lrtable/
stategraph.rs

1use std::{collections::hash_map::HashMap, fmt::Write, hash::Hash};
2
3use cfgrammar::{yacc::YaccGrammar, Symbol, TIdx};
4use num_traits::{AsPrimitive, PrimInt, Unsigned};
5
6use crate::{itemset::Itemset, StIdx};
7
8#[derive(Debug)]
9pub struct StateGraph<StorageT: Eq + Hash> {
10    /// A vector of `(core_states, closed_states)` tuples.
11    states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
12    start_state: StIdx<StorageT>,
13    /// For each state in `states`, edges is a hashmap from symbols to state offsets.
14    edges: Vec<HashMap<Symbol<StorageT>, StIdx<StorageT>>>,
15}
16
17impl<StorageT: 'static + Hash + PrimInt + Unsigned> StateGraph<StorageT>
18where
19    usize: AsPrimitive<StorageT>,
20{
21    pub(crate) fn new(
22        states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
23        start_state: StIdx<StorageT>,
24        edges: Vec<HashMap<Symbol<StorageT>, StIdx<StorageT>>>,
25    ) -> Self {
26        assert!(states.len() < num_traits::cast(StorageT::max_value()).unwrap());
27        StateGraph {
28            states,
29            start_state,
30            edges,
31        }
32    }
33
34    /// Return this state graph's start state.
35    pub fn start_state(&self) -> StIdx<StorageT> {
36        self.start_state
37    }
38
39    /// Return an iterator which produces (in order from `StorageT::zero()..self.all_states_len()`)
40    /// all this grammar's valid `StIdx`s.
41    pub fn iter_stidxs(&self) -> Box<dyn Iterator<Item = StIdx<StorageT>>> {
42        // We can use as safely, because we know that we're only generating integers from
43        // 0..self.states.len() which we've already checked fits within StIdxStorageT.
44        Box::new((0..self.states.len()).map(|x| StIdx(x.as_())))
45    }
46
47    /// Return the itemset for closed state `stidx`. Panics if `stidx` doesn't exist.
48    pub fn closed_state(&self, stidx: StIdx<StorageT>) -> &Itemset<StorageT> {
49        let (_, closed_state) = &self.states[usize::from(stidx)];
50        closed_state
51    }
52
53    /// Return an iterator over all closed states in this `StateGraph`.
54    pub fn iter_closed_states<'a>(
55        &'a self,
56    ) -> Box<dyn Iterator<Item = &'a Itemset<StorageT>> + 'a> {
57        Box::new(self.states.iter().map(|(_, x)| x))
58    }
59
60    /// Return the itemset for core state `stidx` or `None` if it doesn't exist.
61    pub fn core_state(&self, stidx: StIdx<StorageT>) -> &Itemset<StorageT> {
62        let (core_states, _) = &self.states[usize::from(stidx)];
63        core_states
64    }
65
66    /// Return an iterator over all core states in this `StateGraph`.
67    pub fn iter_core_states<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Itemset<StorageT>> + 'a> {
68        Box::new(self.states.iter().map(|(x, _)| x))
69    }
70
71    /// How many states does this `StateGraph` contain? NB: By definition the `StateGraph` contains
72    /// the same number of core and closed states.
73    pub fn all_states_len(&self) -> StIdx<StorageT> {
74        // We checked in the constructor that self.states.len() can fit into StIdxStorageT
75        StIdx(self.states.len().as_())
76    }
77
78    /// Return the state pointed to by `sym` from `stidx` or `None` otherwise.
79    pub fn edge(&self, stidx: StIdx<StorageT>, sym: Symbol<StorageT>) -> Option<StIdx<StorageT>> {
80        self.edges
81            .get(usize::from(stidx))
82            .and_then(|x| x.get(&sym))
83            .cloned()
84    }
85
86    /// Return the edges for state `stidx`. Panics if `stidx` doesn't exist.
87    pub fn edges(&self, stidx: StIdx<StorageT>) -> &HashMap<Symbol<StorageT>, StIdx<StorageT>> {
88        &self.edges[usize::from(stidx)]
89    }
90
91    /// How many edges does this `StateGraph` contain?
92    pub fn all_edges_len(&self) -> usize {
93        self.edges.iter().fold(0, |a, x| a + x.len())
94    }
95
96    /// Pretty print this stategraph as a `String`. If `core_states` is set to true, only the core
97    /// states are pretty printed; if set to false, all states (including non-core states) are
98    /// pretty printed.
99    pub fn pp(&self, grm: &YaccGrammar<StorageT>, core_states: bool) -> String {
100        fn num_digits<StorageT: 'static + Hash + PrimInt + Unsigned>(i: StIdx<StorageT>) -> usize
101        where
102            usize: AsPrimitive<StorageT>,
103        {
104            if usize::from(i) == 0 {
105                1
106            } else {
107                ((usize::from(i) as f64).log10() as usize) + 1
108            }
109        }
110
111        fn fmt_sym<StorageT: 'static + PrimInt + Unsigned>(
112            grm: &YaccGrammar<StorageT>,
113            sym: Symbol<StorageT>,
114        ) -> String
115        where
116            usize: AsPrimitive<StorageT>,
117        {
118            match sym {
119                Symbol::Rule(ridx) => grm.rule_name_str(ridx).to_string(),
120                Symbol::Token(tidx) => format!("'{}'", grm.token_name(tidx).unwrap_or("")),
121            }
122        }
123
124        let mut o = String::new();
125        for (stidx, (core_st, closed_st)) in self.iter_stidxs().zip(self.states.iter()) {
126            if stidx != self.start_state {
127                o.push('\n');
128            }
129            {
130                let padding = num_digits(self.all_states_len()) - num_digits(stidx);
131                write!(o, "{}:{}", usize::from(stidx), " ".repeat(padding)).ok();
132            }
133
134            let st = if core_states { core_st } else { closed_st };
135            for (i, (&(pidx, sidx), ctx)) in st.items.iter().enumerate() {
136                let padding = if i == 0 {
137                    0
138                } else {
139                    o.push_str("\n "); // Extra space to compensate for ":" printed above
140                    num_digits(self.all_states_len())
141                };
142                write!(
143                    o,
144                    "{} [{} ->",
145                    " ".repeat(padding),
146                    grm.rule_name_str(grm.prod_to_rule(pidx))
147                )
148                .ok();
149                for (i_sidx, i_ssym) in grm.prod(pidx).iter().enumerate() {
150                    if i_sidx == usize::from(sidx) {
151                        o.push_str(" .");
152                    }
153                    write!(o, " {}", fmt_sym(grm, *i_ssym)).ok();
154                }
155                if usize::from(sidx) == grm.prod(pidx).len() {
156                    o.push_str(" .");
157                }
158                o.push_str(", {");
159                let mut seen_b = false;
160                for bidx in ctx.iter_set_bits(..) {
161                    if seen_b {
162                        o.push_str(", ");
163                    } else {
164                        seen_b = true;
165                    }
166                    // Since ctx is exactly tokens_len bits long, the call to as_ is safe.
167                    let tidx = TIdx(bidx.as_());
168                    if tidx == grm.eof_token_idx() {
169                        o.push_str("'$'");
170                    } else {
171                        write!(o, "'{}'", grm.token_name(tidx).unwrap()).ok();
172                    }
173                }
174                o.push_str("}]");
175            }
176            let mut edges = self.edges(stidx).iter().collect::<Vec<_>>();
177            edges.sort_by(|(_, x), (_, y)| x.cmp(y));
178            for (esym, e_stidx) in edges {
179                write!(
180                    o,
181                    "\n{}{} -> {}",
182                    " ".repeat(num_digits(self.all_states_len()) + 2),
183                    fmt_sym(grm, *esym),
184                    usize::from(*e_stidx)
185                )
186                .ok();
187            }
188        }
189        o
190    }
191
192    /// Return a pretty printed version of the core states, and all edges.
193    pub fn pp_core_states(&self, grm: &YaccGrammar<StorageT>) -> String {
194        self.pp(grm, true)
195    }
196
197    /// Return a pretty printed version of the closed states, and all edges.
198    pub fn pp_closed_states(&self, grm: &YaccGrammar<StorageT>) -> String {
199        self.pp(grm, false)
200    }
201}
202
203#[cfg(test)]
204use cfgrammar::SIdx;
205
206#[cfg(test)]
207pub(crate) fn state_exists<StorageT: 'static + Hash + PrimInt + Unsigned>(
208    grm: &YaccGrammar<StorageT>,
209    is: &Itemset<StorageT>,
210    nt: &str,
211    prod_off: usize,
212    dot: SIdx<StorageT>,
213    la: Vec<&str>,
214) where
215    usize: AsPrimitive<StorageT>,
216{
217    let ab_prod_off = grm.rule_to_prods(grm.rule_idx(nt).unwrap())[prod_off];
218    let ctx = &is.items[&(ab_prod_off, dot)];
219    for tidx in grm.iter_tidxs() {
220        let bit = ctx[usize::from(tidx)];
221        let mut found = false;
222        for t in la.iter() {
223            let off = if t == &"$" {
224                grm.eof_token_idx()
225            } else {
226                grm.token_idx(t).unwrap()
227            };
228            if off == tidx {
229                if !bit {
230                    panic!("bit for token {}, dot {} is not set in production {} of {} when it should be",
231                           t, usize::from(dot), prod_off, nt);
232                }
233                found = true;
234                break;
235            }
236        }
237        if !found && bit {
238            panic!(
239                "bit for token {}, dot {} is set in production {} of {} when it shouldn't be",
240                grm.token_name(tidx).unwrap(),
241                usize::from(dot),
242                prod_off,
243                nt
244            );
245        }
246    }
247}
248
249#[cfg(test)]
250mod test {
251    use crate::{pager::pager_stategraph, StIdx};
252    use cfgrammar::{
253        yacc::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind},
254        Symbol,
255    };
256
257    #[test]
258    #[rustfmt::skip]
259    fn test_statetable_core() {
260        // Taken from p13 of https://link.springer.com/article/10.1007/s00236-010-0115-6
261        let grm = YaccGrammar::new(
262            YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)),
263            "
264            %start A
265            %%
266            A: 'OPEN_BRACKET' A 'CLOSE_BRACKET'
267             | 'a'
268             | 'b';
269          "
270        ).unwrap();
271        let sg = pager_stategraph(&grm);
272        assert_eq!(sg.all_states_len(), StIdx(7));
273        assert_eq!(sg.states.iter().fold(0, |a, (x, _)| a + x.items.len()), 7);
274        assert_eq!(sg.all_edges_len(), 9);
275
276        // This follows the (not particularly logical) ordering of state numbers in the paper.
277        let s0 = StIdx(0);
278        sg.edge(s0, Symbol::Rule(grm.rule_idx("A").unwrap())).unwrap(); // s1
279        let s2 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
280        let s3 = sg.edge(s0, Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
281        let s5 = sg.edge(s0, Symbol::Token(grm.token_idx("OPEN_BRACKET").unwrap())).unwrap();
282        assert_eq!(s2, sg.edge(s5, Symbol::Token(grm.token_idx("a").unwrap())).unwrap());
283        assert_eq!(s3, sg.edge(s5, Symbol::Token(grm.token_idx("b").unwrap())).unwrap());
284        assert_eq!(s5, sg.edge(s5, Symbol::Token(grm.token_idx("OPEN_BRACKET").unwrap())).unwrap());
285        let s4 = sg.edge(s5, Symbol::Rule(grm.rule_idx("A").unwrap())).unwrap();
286        sg.edge(s4, Symbol::Token(grm.token_idx("CLOSE_BRACKET").unwrap())).unwrap(); // s6
287    }
288}