1use std::{collections::hash_map::HashMap, fmt::Write, hash::Hash};
2
3use cfgrammar::{yacc::YaccGrammar, Symbol, TIdx};
4use num_traits::{AsPrimitive, PrimInt, Unsigned};
5
6use crate::{itemset::Itemset, StIdx};
7
8#[derive(Debug)]
9pub struct StateGraph<StorageT: Eq + Hash> {
10 states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
12 start_state: StIdx<StorageT>,
13 edges: Vec<HashMap<Symbol<StorageT>, StIdx<StorageT>>>,
15}
16
17impl<StorageT: 'static + Hash + PrimInt + Unsigned> StateGraph<StorageT>
18where
19 usize: AsPrimitive<StorageT>,
20{
21 pub(crate) fn new(
22 states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
23 start_state: StIdx<StorageT>,
24 edges: Vec<HashMap<Symbol<StorageT>, StIdx<StorageT>>>,
25 ) -> Self {
26 assert!(states.len() < num_traits::cast(StorageT::max_value()).unwrap());
27 StateGraph {
28 states,
29 start_state,
30 edges,
31 }
32 }
33
34 pub fn start_state(&self) -> StIdx<StorageT> {
36 self.start_state
37 }
38
39 pub fn iter_stidxs(&self) -> Box<dyn Iterator<Item = StIdx<StorageT>>> {
42 Box::new((0..self.states.len()).map(|x| StIdx(x.as_())))
45 }
46
47 pub fn closed_state(&self, stidx: StIdx<StorageT>) -> &Itemset<StorageT> {
49 let (_, closed_state) = &self.states[usize::from(stidx)];
50 closed_state
51 }
52
53 pub fn iter_closed_states<'a>(
55 &'a self,
56 ) -> Box<dyn Iterator<Item = &'a Itemset<StorageT>> + 'a> {
57 Box::new(self.states.iter().map(|(_, x)| x))
58 }
59
60 pub fn core_state(&self, stidx: StIdx<StorageT>) -> &Itemset<StorageT> {
62 let (core_states, _) = &self.states[usize::from(stidx)];
63 core_states
64 }
65
66 pub fn iter_core_states<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Itemset<StorageT>> + 'a> {
68 Box::new(self.states.iter().map(|(x, _)| x))
69 }
70
71 pub fn all_states_len(&self) -> StIdx<StorageT> {
74 StIdx(self.states.len().as_())
76 }
77
78 pub fn edge(&self, stidx: StIdx<StorageT>, sym: Symbol<StorageT>) -> Option<StIdx<StorageT>> {
80 self.edges
81 .get(usize::from(stidx))
82 .and_then(|x| x.get(&sym))
83 .cloned()
84 }
85
86 pub fn edges(&self, stidx: StIdx<StorageT>) -> &HashMap<Symbol<StorageT>, StIdx<StorageT>> {
88 &self.edges[usize::from(stidx)]
89 }
90
91 pub fn all_edges_len(&self) -> usize {
93 self.edges.iter().fold(0, |a, x| a + x.len())
94 }
95
96 pub fn pp(&self, grm: &YaccGrammar<StorageT>, core_states: bool) -> String {
100 fn num_digits<StorageT: 'static + Hash + PrimInt + Unsigned>(i: StIdx<StorageT>) -> usize
101 where
102 usize: AsPrimitive<StorageT>,
103 {
104 if usize::from(i) == 0 {
105 1
106 } else {
107 ((usize::from(i) as f64).log10() as usize) + 1
108 }
109 }
110
111 fn fmt_sym<StorageT: 'static + PrimInt + Unsigned>(
112 grm: &YaccGrammar<StorageT>,
113 sym: Symbol<StorageT>,
114 ) -> String
115 where
116 usize: AsPrimitive<StorageT>,
117 {
118 match sym {
119 Symbol::Rule(ridx) => grm.rule_name_str(ridx).to_string(),
120 Symbol::Token(tidx) => format!("'{}'", grm.token_name(tidx).unwrap_or("")),
121 }
122 }
123
124 let mut o = String::new();
125 for (stidx, (core_st, closed_st)) in self.iter_stidxs().zip(self.states.iter()) {
126 if stidx != self.start_state {
127 o.push('\n');
128 }
129 {
130 let padding = num_digits(self.all_states_len()) - num_digits(stidx);
131 write!(o, "{}:{}", usize::from(stidx), " ".repeat(padding)).ok();
132 }
133
134 let st = if core_states { core_st } else { closed_st };
135 for (i, (&(pidx, sidx), ctx)) in st.items.iter().enumerate() {
136 let padding = if i == 0 {
137 0
138 } else {
139 o.push_str("\n "); num_digits(self.all_states_len())
141 };
142 write!(
143 o,
144 "{} [{} ->",
145 " ".repeat(padding),
146 grm.rule_name_str(grm.prod_to_rule(pidx))
147 )
148 .ok();
149 for (i_sidx, i_ssym) in grm.prod(pidx).iter().enumerate() {
150 if i_sidx == usize::from(sidx) {
151 o.push_str(" .");
152 }
153 write!(o, " {}", fmt_sym(grm, *i_ssym)).ok();
154 }
155 if usize::from(sidx) == grm.prod(pidx).len() {
156 o.push_str(" .");
157 }
158 o.push_str(", {");
159 let mut seen_b = false;
160 for bidx in ctx.iter_set_bits(..) {
161 if seen_b {
162 o.push_str(", ");
163 } else {
164 seen_b = true;
165 }
166 let tidx = TIdx(bidx.as_());
168 if tidx == grm.eof_token_idx() {
169 o.push_str("'$'");
170 } else {
171 write!(o, "'{}'", grm.token_name(tidx).unwrap()).ok();
172 }
173 }
174 o.push_str("}]");
175 }
176 let mut edges = self.edges(stidx).iter().collect::<Vec<_>>();
177 edges.sort_by(|(_, x), (_, y)| x.cmp(y));
178 for (esym, e_stidx) in edges {
179 write!(
180 o,
181 "\n{}{} -> {}",
182 " ".repeat(num_digits(self.all_states_len()) + 2),
183 fmt_sym(grm, *esym),
184 usize::from(*e_stidx)
185 )
186 .ok();
187 }
188 }
189 o
190 }
191
192 pub fn pp_core_states(&self, grm: &YaccGrammar<StorageT>) -> String {
194 self.pp(grm, true)
195 }
196
197 pub fn pp_closed_states(&self, grm: &YaccGrammar<StorageT>) -> String {
199 self.pp(grm, false)
200 }
201}
202
203#[cfg(test)]
204use cfgrammar::SIdx;
205
206#[cfg(test)]
207pub(crate) fn state_exists<StorageT: 'static + Hash + PrimInt + Unsigned>(
208 grm: &YaccGrammar<StorageT>,
209 is: &Itemset<StorageT>,
210 nt: &str,
211 prod_off: usize,
212 dot: SIdx<StorageT>,
213 la: Vec<&str>,
214) where
215 usize: AsPrimitive<StorageT>,
216{
217 let ab_prod_off = grm.rule_to_prods(grm.rule_idx(nt).unwrap())[prod_off];
218 let ctx = &is.items[&(ab_prod_off, dot)];
219 for tidx in grm.iter_tidxs() {
220 let bit = ctx[usize::from(tidx)];
221 let mut found = false;
222 for t in la.iter() {
223 let off = if t == &"$" {
224 grm.eof_token_idx()
225 } else {
226 grm.token_idx(t).unwrap()
227 };
228 if off == tidx {
229 if !bit {
230 panic!("bit for token {}, dot {} is not set in production {} of {} when it should be",
231 t, usize::from(dot), prod_off, nt);
232 }
233 found = true;
234 break;
235 }
236 }
237 if !found && bit {
238 panic!(
239 "bit for token {}, dot {} is set in production {} of {} when it shouldn't be",
240 grm.token_name(tidx).unwrap(),
241 usize::from(dot),
242 prod_off,
243 nt
244 );
245 }
246 }
247}
248
249#[cfg(test)]
250mod test {
251 use crate::{pager::pager_stategraph, StIdx};
252 use cfgrammar::{
253 yacc::{YaccGrammar, YaccKind, YaccKindResolver, YaccOriginalActionKind},
254 Symbol,
255 };
256
257 #[test]
258 #[rustfmt::skip]
259 fn test_statetable_core() {
260 let grm = YaccGrammar::new(
262 YaccKindResolver::Force(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)),
263 "
264 %start A
265 %%
266 A: 'OPEN_BRACKET' A 'CLOSE_BRACKET'
267 | 'a'
268 | 'b';
269 "
270 ).unwrap();
271 let sg = pager_stategraph(&grm);
272 assert_eq!(sg.all_states_len(), StIdx(7));
273 assert_eq!(sg.states.iter().fold(0, |a, (x, _)| a + x.items.len()), 7);
274 assert_eq!(sg.all_edges_len(), 9);
275
276 let s0 = StIdx(0);
278 sg.edge(s0, Symbol::Rule(grm.rule_idx("A").unwrap())).unwrap(); let s2 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
280 let s3 = sg.edge(s0, Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
281 let s5 = sg.edge(s0, Symbol::Token(grm.token_idx("OPEN_BRACKET").unwrap())).unwrap();
282 assert_eq!(s2, sg.edge(s5, Symbol::Token(grm.token_idx("a").unwrap())).unwrap());
283 assert_eq!(s3, sg.edge(s5, Symbol::Token(grm.token_idx("b").unwrap())).unwrap());
284 assert_eq!(s5, sg.edge(s5, Symbol::Token(grm.token_idx("OPEN_BRACKET").unwrap())).unwrap());
285 let s4 = sg.edge(s5, Symbol::Rule(grm.rule_idx("A").unwrap())).unwrap();
286 sg.edge(s4, Symbol::Token(grm.token_idx("CLOSE_BRACKET").unwrap())).unwrap(); }
288}