cfgrammar/yacc/
firsts.rs

1use std::marker::PhantomData;
2
3use num_traits::{AsPrimitive, PrimInt, Unsigned};
4use vob::Vob;
5
6use super::YaccGrammar;
7use crate::{RIdx, Symbol, TIdx};
8
9/// `Firsts` stores all the first sets for a given grammar. For example, given this code and
10/// grammar:
11/// ```text
12///   let grm = YaccGrammar::new(&mut Header::new(), "
13///     %grmtools{yacckind: YaccKind::Original(YaccOriginalActionKind::GenericParseTree)}
14///     S: A 'b';
15///     A: 'a'
16///      | ;").unwrap();
17///   let firsts = Firsts::new(&grm);
18/// ```
19/// then the following assertions (and only the following assertions) about the firsts set are
20/// correct:
21/// ```text
22///   assert!(firsts.is_set(grm.rule_idx("S").unwrap(), grm.token_idx("a").unwrap()));
23///   assert!(firsts.is_set(grm.rule_idx("S").unwrap(), grm.token_idx("b").unwrap()));
24///   assert!(firsts.is_set(grm.rule_idx("A").unwrap(), grm.token_idx("a").unwrap()));
25///   assert!(firsts.is_epsilon_set(grm.rule_idx("A").unwrap()));
26/// ```
27#[derive(Debug)]
28pub struct YaccFirsts<StorageT> {
29    firsts: Vec<Vob>,
30    epsilons: Vob,
31    phantom: PhantomData<StorageT>,
32}
33
34impl<StorageT: 'static + PrimInt + Unsigned> YaccFirsts<StorageT>
35where
36    usize: AsPrimitive<StorageT>,
37{
38    /// Generates and returns the firsts set for the given grammar.
39    pub fn new(grm: &YaccGrammar<StorageT>) -> Self {
40        let mut firsts = YaccFirsts {
41            firsts: vec![
42                Vob::from_elem(false, usize::from(grm.tokens_len()));
43                usize::from(grm.rules_len())
44            ],
45            epsilons: Vob::from_elem(false, usize::from(grm.rules_len())),
46            phantom: PhantomData,
47        };
48
49        // Loop looking for changes to the firsts set, until we reach a fixed point. In essence, we
50        // look at each rule E, and see if any of the rules at the start of its productions
51        // have new elements in since we last looked. If they do, we'll have to do another round.
52        loop {
53            let mut changed = false;
54            for ridx in grm.iter_rules() {
55                // For each rule E
56                for &pidx in grm.rule_to_prods(ridx).iter() {
57                    // ...and each production A
58                    let prod = grm.prod(pidx);
59                    if prod.is_empty() {
60                        // if it's an empty production, ensure this rule's epsilon bit is
61                        // set.
62                        if !firsts.is_epsilon_set(ridx) {
63                            firsts.epsilons.set(usize::from(ridx), true);
64                            changed = true;
65                        }
66                        continue;
67                    }
68                    for (sidx, sym) in prod.iter().enumerate() {
69                        match *sym {
70                            Symbol::Token(s_tidx) => {
71                                // if symbol is a token, add to FIRSTS
72                                if !firsts.set(ridx, s_tidx) {
73                                    changed = true;
74                                }
75                                break;
76                            }
77                            Symbol::Rule(s_ridx) => {
78                                // if we're dealing with another rule, union its FIRSTs
79                                // together with the current rules FIRSTs. Note this is
80                                // (intentionally) a no-op if the two tokens are one and the
81                                // same.
82                                for tidx in grm.iter_tidxs() {
83                                    if firsts.is_set(s_ridx, tidx) && !firsts.set(ridx, tidx) {
84                                        changed = true;
85                                    }
86                                }
87
88                                // If the epsilon bit in the rule being referenced is set,
89                                // and if its the last symbol in the production, then add epsilon
90                                // to FIRSTs.
91                                if firsts.is_epsilon_set(s_ridx) && sidx == prod.len() - 1 {
92                                    // Only add epsilon if the symbol is the last in the production
93                                    if !firsts.epsilons[usize::from(ridx)] {
94                                        firsts.epsilons.set(usize::from(ridx), true);
95                                        changed = true;
96                                    }
97                                }
98
99                                // If FIRST(X) of production R : X Y2 Y3 doesn't contain epsilon,
100                                // then don't try and calculate the FIRSTS of Y2 or Y3 (i.e. stop
101                                // now).
102                                if !firsts.is_epsilon_set(s_ridx) {
103                                    break;
104                                }
105                            }
106                        }
107                    }
108                }
109            }
110            if !changed {
111                return firsts;
112            }
113        }
114    }
115
116    /// Return all the firsts for rule `ridx`.
117    pub fn firsts(&self, ridx: RIdx<StorageT>) -> &Vob {
118        &self.firsts[usize::from(ridx)]
119    }
120
121    /// Returns true if the token `tidx` is in the first set for rule `ridx`.
122    pub fn is_set(&self, ridx: RIdx<StorageT>, tidx: TIdx<StorageT>) -> bool {
123        self.firsts[usize::from(ridx)][usize::from(tidx)]
124    }
125
126    /// Returns true if the rule `ridx` has epsilon in its first set.
127    pub fn is_epsilon_set(&self, ridx: RIdx<StorageT>) -> bool {
128        self.epsilons[usize::from(ridx)]
129    }
130
131    /// Ensures that the firsts bit for token `tidx` rule `ridx` is set. Returns true if
132    /// it was already set, or false otherwise.
133    pub fn set(&mut self, ridx: RIdx<StorageT>, tidx: TIdx<StorageT>) -> bool {
134        let r = &mut self.firsts[usize::from(ridx)];
135        if r[usize::from(tidx)] {
136            true
137        } else {
138            r.set(usize::from(tidx), true);
139            false
140        }
141    }
142}
143
144#[cfg(test)]
145mod test {
146    use super::{
147        super::{YaccGrammar, YaccKind, YaccOriginalActionKind},
148        YaccFirsts,
149    };
150    use num_traits::{AsPrimitive, PrimInt, Unsigned};
151
152    fn has<StorageT: 'static + PrimInt + Unsigned>(
153        grm: &YaccGrammar<StorageT>,
154        firsts: &YaccFirsts<StorageT>,
155        rn: &str,
156        should_be: Vec<&str>,
157    ) where
158        usize: AsPrimitive<StorageT>,
159    {
160        let ridx = grm.rule_idx(rn).unwrap();
161        for tidx in grm.iter_tidxs() {
162            let n = grm.token_name(tidx).unwrap_or("<no name>");
163            match should_be.iter().position(|&x| x == n) {
164                Some(_) => {
165                    if !firsts.is_set(ridx, tidx) {
166                        panic!("{} is not set in {}", n, rn);
167                    }
168                }
169                None => {
170                    if firsts.is_set(ridx, tidx) {
171                        panic!("{} is incorrectly set in {}", n, rn);
172                    }
173                }
174            }
175        }
176        if should_be.iter().any(|x| x == &"") {
177            assert!(firsts.is_epsilon_set(ridx));
178        }
179    }
180
181    #[test]
182    fn test_first() {
183        let grm = YaccGrammar::new(
184            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
185            "
186          %start C
187          %token c d
188          %%
189          C: 'c';
190          D: 'd';
191          E: D | C;
192          F: E;
193          ",
194        )
195        .unwrap();
196        let firsts = grm.firsts();
197        has(&grm, &firsts, "^", vec!["c"]);
198        has(&grm, &firsts, "D", vec!["d"]);
199        has(&grm, &firsts, "E", vec!["d", "c"]);
200        has(&grm, &firsts, "F", vec!["d", "c"]);
201    }
202
203    #[test]
204    fn test_first_no_subsequent_rules() {
205        let grm = YaccGrammar::new(
206            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
207            "
208          %start C
209          %token c d
210          %%
211          C: 'c';
212          D: 'd';
213          E: D C;
214          ",
215        )
216        .unwrap();
217        let firsts = grm.firsts();
218        has(&grm, &firsts, "E", vec!["d"]);
219    }
220
221    #[test]
222    fn test_first_epsilon() {
223        let grm = YaccGrammar::new(
224            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
225            "
226          %start A
227          %token a b c
228          %%
229          A: B 'a';
230          B: 'b' | ;
231          C: 'c' | ;
232          D: C;
233          ",
234        )
235        .unwrap();
236        let firsts = grm.firsts();
237        has(&grm, &firsts, "A", vec!["b", "a"]);
238        has(&grm, &firsts, "C", vec!["c", ""]);
239        has(&grm, &firsts, "D", vec!["c", ""]);
240    }
241
242    #[test]
243    fn test_last_epsilon() {
244        let grm = YaccGrammar::new(
245            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
246            "
247          %start A
248          %token b c
249          %%
250          A: B C;
251          B: 'b' | ;
252          C: B 'c' B;
253          ",
254        )
255        .unwrap();
256        let firsts = grm.firsts();
257        has(&grm, &firsts, "A", vec!["b", "c"]);
258        has(&grm, &firsts, "B", vec!["b", ""]);
259        has(&grm, &firsts, "C", vec!["b", "c"]);
260    }
261
262    #[test]
263    fn test_first_no_multiples() {
264        let grm = YaccGrammar::new(
265            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
266            "
267          %start A
268          %token b c
269          %%
270          A: B 'b';
271          B: 'b' | ;
272          ",
273        )
274        .unwrap();
275        let firsts = grm.firsts();
276        has(&grm, &firsts, "A", vec!["b"]);
277    }
278
279    fn eco_grammar() -> YaccGrammar {
280        YaccGrammar::new(
281            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
282            "
283          %start S
284          %token a b c d f
285          %%
286          S: S 'b' | 'b' A 'a' | 'a';
287          A: 'a' S 'c' | 'a' | 'a' S 'b';
288          B: A S;
289          C: D A;
290          D: 'd' | ;
291          F: C D 'f';
292          ",
293        )
294        .unwrap()
295    }
296
297    #[test]
298    fn test_first_from_eco() {
299        let grm = eco_grammar();
300        let firsts = grm.firsts();
301        has(&grm, &firsts, "S", vec!["a", "b"]);
302        has(&grm, &firsts, "A", vec!["a"]);
303        has(&grm, &firsts, "B", vec!["a"]);
304        has(&grm, &firsts, "D", vec!["d", ""]);
305        has(&grm, &firsts, "C", vec!["d", "a"]);
306        has(&grm, &firsts, "F", vec!["d", "a"]);
307    }
308
309    #[test]
310    fn test_first_from_eco_bug() {
311        let grm = YaccGrammar::new(
312            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
313            "
314          %start E
315          %token a b c d e f
316          %%
317          E : T | E 'b' T;
318          T : P | T 'e' P;
319          P : 'a';
320          C: C 'c' | ;
321          D: D 'd' | F;
322          F: 'f' | ;
323          G: C D;
324          ",
325        )
326        .unwrap();
327        let firsts = grm.firsts();
328        has(&grm, &firsts, "E", vec!["a"]);
329        has(&grm, &firsts, "T", vec!["a"]);
330        has(&grm, &firsts, "P", vec!["a"]);
331        has(&grm, &firsts, "C", vec!["c", ""]);
332        has(&grm, &firsts, "D", vec!["f", "d", ""]);
333        has(&grm, &firsts, "G", vec!["c", "d", "f", ""]);
334    }
335}