cfgrammar/yacc/
follows.rs

1use std::marker::PhantomData;
2
3use num_traits::{AsPrimitive, PrimInt, Unsigned};
4use vob::Vob;
5
6use super::YaccGrammar;
7use crate::{RIdx, Symbol, TIdx};
8
9/// `Follows` stores all the Follow sets for a given grammar. For example, given this code and
10/// grammar:
11/// ```text
12///   let grm = YaccGrammar::new(&mut Header::new(), "
13///       %grmtools{yacckind: YaccKind::Original(YaccOriginalActionKind::GenericParseTree)}
14///       S: A 'b';
15///       A: 'a' | ;
16///     ").unwrap();
17///   let follows = Follows::new(&grm);
18/// ```
19/// then the following assertions (and only the following assertions) about the Follows set are
20/// correct:
21/// ```text
22///   assert!(follows.is_set(grm.rule_idx("S").unwrap(), grm.eof_token_idx());
23///   assert!(follows.is_set(grm.rule_idx("A").unwrap(), grm.token_idx("b").unwrap()));
24/// ```
25#[derive(Debug)]
26pub struct YaccFollows<StorageT> {
27    follows: Vec<Vob>,
28    phantom: PhantomData<StorageT>,
29}
30
31impl<StorageT: 'static + PrimInt + Unsigned> YaccFollows<StorageT>
32where
33    usize: AsPrimitive<StorageT>,
34{
35    /// Generates and returns the Follows set for the given grammar.
36    pub fn new(grm: &YaccGrammar<StorageT>) -> Self {
37        let mut follows = vec![
38            Vob::from_elem(false, usize::from(grm.tokens_len()));
39            usize::from(grm.rules_len())
40        ];
41        follows[usize::from(grm.start_rule_idx())].set(usize::from(grm.eof_token_idx()), true);
42
43        let firsts = grm.firsts();
44        loop {
45            let mut changed = false;
46            for pidx in grm.iter_pidxs() {
47                let ridx = grm.prod_to_rule(pidx);
48                let prod = grm.prod(pidx);
49                // Our implementation of the Follows algorithm is simple: we start from the right
50                // hand side of a production and work backwards. While epsilon is true, any
51                // nonterminals we encounter have the Follow set of the production's rule added to
52                // them. As soon as we hit a token or a nonterminal that can't produce the empty
53                // string, we set epsilon to false. At that point, we simply add the first set of
54                // the following symbol to any nonterminals we encounter.
55                let mut epsilon = true;
56                for sidx in (0..prod.len()).rev() {
57                    let sym = prod[sidx];
58                    match sym {
59                        Symbol::Token(_) => {
60                            epsilon = false;
61                        }
62                        Symbol::Rule(s_ridx) => {
63                            if epsilon {
64                                for tidx in grm.iter_tidxs() {
65                                    if follows[usize::from(ridx)][usize::from(tidx)]
66                                        && follows[usize::from(s_ridx)].set(usize::from(tidx), true)
67                                    {
68                                        changed = true;
69                                    }
70                                }
71                            }
72                            if !firsts.is_epsilon_set(s_ridx) {
73                                epsilon = false;
74                            }
75                            if sidx < prod.len() - 1 {
76                                match prod[sidx + 1] {
77                                    Symbol::Token(nxt_tidx) => {
78                                        if follows[usize::from(s_ridx)]
79                                            .set(usize::from(nxt_tidx), true)
80                                        {
81                                            changed = true;
82                                        }
83                                    }
84                                    Symbol::Rule(nxt_ridx) => {
85                                        if follows[usize::from(s_ridx)].or(firsts.firsts(nxt_ridx))
86                                        {
87                                            changed = true;
88                                        }
89                                    }
90                                }
91                            }
92                        }
93                    }
94                }
95            }
96            if !changed {
97                return YaccFollows {
98                    follows,
99                    phantom: PhantomData,
100                };
101            }
102        }
103    }
104
105    /// Return the Follows `Vob` for rule `ridx`.
106    pub fn follows(&self, ridx: RIdx<StorageT>) -> &Vob {
107        &self.follows[usize::from(ridx)]
108    }
109
110    /// Returns true if the token `tidx` is in the follow set for rule `ridx`.
111    pub fn is_set(&self, ridx: RIdx<StorageT>, tidx: TIdx<StorageT>) -> bool {
112        self.follows[usize::from(ridx)][usize::from(tidx)]
113    }
114}
115
116#[cfg(test)]
117mod test {
118    use super::{
119        super::{YaccGrammar, YaccKind, YaccOriginalActionKind},
120        YaccFollows,
121    };
122    use num_traits::{AsPrimitive, PrimInt, Unsigned};
123
124    fn has<StorageT: 'static + PrimInt + Unsigned>(
125        grm: &YaccGrammar<StorageT>,
126        follows: &YaccFollows<StorageT>,
127        rn: &str,
128        should_be: Vec<&str>,
129    ) where
130        usize: AsPrimitive<StorageT>,
131    {
132        let ridx = grm.rule_idx(rn).unwrap();
133        for tidx in grm.iter_tidxs() {
134            let n = if tidx == grm.eof_token_idx() {
135                "$"
136            } else {
137                grm.token_name(tidx).unwrap_or("<no name>")
138            };
139            if !should_be.iter().any(|x| x == &n) {
140                if follows.is_set(ridx, tidx) {
141                    panic!("{} is incorrectly set in {}", n, rn);
142                }
143            } else if !follows.is_set(ridx, tidx) {
144                panic!("{} is not set in {}", n, rn);
145            }
146        }
147    }
148
149    #[test]
150    fn test_follow() {
151        // Adapted from p2 of https://www.cs.uaf.edu/~cs331/notes/FirstFollow.pdf
152        let grm = YaccGrammar::new(
153            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
154            "
155                %start E
156                %%
157                E: T E2 ;
158                E2: '+' T E2 | ;
159                T: F T2 ;
160                T2: '*' F T2 | ;
161                F: '(' E ')' | 'ID' ;
162          ",
163        )
164        .unwrap();
165        let follows = grm.follows();
166        has(&grm, &follows, "E", vec![")", "$"]);
167        has(&grm, &follows, "E2", vec![")", "$"]);
168        has(&grm, &follows, "T", vec!["+", ")", "$"]);
169        has(&grm, &follows, "T2", vec!["+", ")", "$"]);
170        has(&grm, &follows, "F", vec!["+", "*", ")", "$"]);
171    }
172
173    #[test]
174    fn test_follow2() {
175        // Adapted from https://www.l2f.inesc-id.pt/~david/w/pt/Top-Down_Parsing/Exercise_5:_Test_2010/07/01
176        let grm = YaccGrammar::new(
177            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
178            "
179                %start A
180                %%
181                A : 't' B2 D | 'v' D2 ;
182                B : 't' B2 | ;
183                B2: 'w' B | 'u' 'w' B ;
184                D : 'v' D2 ;
185                D2: 'x' B D2 | ;
186          ",
187        )
188        .unwrap();
189        let follows = grm.follows();
190        has(&grm, &follows, "A", vec!["$"]);
191        has(&grm, &follows, "B", vec!["v", "x", "$"]);
192        has(&grm, &follows, "B2", vec!["v", "x", "$"]);
193        has(&grm, &follows, "D", vec!["$"]);
194        has(&grm, &follows, "D2", vec!["$"]);
195    }
196
197    #[test]
198    fn test_follow3() {
199        let grm = YaccGrammar::new(
200            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
201            "
202                %start S
203                %%
204                S: A 'b';
205                A: 'b' | ;
206          ",
207        )
208        .unwrap();
209        let follows = grm.follows();
210        has(&grm, &follows, "S", vec!["$"]);
211        has(&grm, &follows, "A", vec!["b"]);
212    }
213
214    #[test]
215    fn test_follow_corchuelo() {
216        let grm = YaccGrammar::new(
217            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
218            "
219                %start E
220                %%
221                E : 'N'
222                  | E '+' 'N'
223                  | '(' E ')'
224                  ;
225          ",
226        )
227        .unwrap();
228        let follows = grm.follows();
229        has(&grm, &follows, "E", vec!["+", ")", "$"]);
230    }
231}