lrpar/
parser.rs

1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3    error::Error,
4    fmt::{self, Debug, Display, Write as _},
5    hash::Hash,
6    marker::PhantomData,
7    vec,
8};
9
10// Can be used on non-wasm32 but to avoid the dependency.
11#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{header::Value, span::Location, yacc::YaccGrammar, RIdx, Span, TIdx};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{cpctplus, LexError, Lexeme, LexerTypes, NonStreamingLexer};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; // milliseconds
29#[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; // milliseconds
31
32/// A generic parse tree.
33#[derive(Debug, Clone, PartialEq)]
34pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
35    /// Terminals store a single lexeme.
36    Term { lexeme: LexemeT },
37    /// Nonterminals reference a rule and have zero or more `Node`s as children.
38    Nonterm {
39        ridx: RIdx<StorageT>,
40        nodes: Vec<Node<LexemeT, StorageT>>,
41    },
42}
43
44impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
45where
46    usize: AsPrimitive<StorageT>,
47{
48    /// Return a pretty-printed version of this node.
49    pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
50        let mut st = vec![(0, self)]; // Stack of (indent level, node) pairs
51        let mut s = String::new();
52        while let Some((indent, e)) = st.pop() {
53            for _ in 0..indent {
54                s.push(' ');
55            }
56            match *e {
57                Node::Term { lexeme } => {
58                    let tidx = TIdx(lexeme.tok_id());
59                    let tn = grm.token_name(tidx).unwrap();
60                    let lt = &input[lexeme.span().start()..lexeme.span().end()];
61                    writeln!(s, "{} {}", tn, lt).ok();
62                }
63                Node::Nonterm { ridx, ref nodes } => {
64                    writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
65                    for x in nodes.iter().rev() {
66                        st.push((indent + 1, x));
67                    }
68                }
69            }
70        }
71        s
72    }
73}
74
75type PStack<StorageT> = Vec<StIdx<StorageT>>; // Parse stack
76type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
77type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
78    RIdx<StorageT>,
79    &'b dyn NonStreamingLexer<'input, LexerTypesT>,
80    Span,
81    vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
82    ParamT,
83) -> ActionT;
84
85#[derive(Debug)]
86pub enum AStackType<LexemeT, ActionT> {
87    ActionType(ActionT),
88    Lexeme(LexemeT),
89}
90
91pub(super) struct Parser<
92    'a,
93    'b: 'a,
94    'input: 'b,
95    StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
96    LexerTypesT: LexerTypes<StorageT = StorageT>,
97    ActionT: 'a,
98    ParamT: Clone,
99> where
100    usize: AsPrimitive<StorageT>,
101{
102    rcvry_kind: RecoveryKind,
103    pub(super) grm: &'a YaccGrammar<StorageT>,
104    pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
105    pub(super) stable: &'a StateTable<StorageT>,
106    lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
107    // In the long term, we should remove the `lexemes` field entirely, as the `NonStreamingLexer` API is
108    // powerful enough to allow us to incrementally obtain lexemes and buffer them when necessary.
109    pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
110    actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
111    param: ParamT,
112}
113
114impl<
115        'a,
116        'b: 'a,
117        'input: 'b,
118        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
119        LexerTypesT: LexerTypes<StorageT = StorageT>,
120    > Parser<'a, 'b, 'input, StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>
121where
122    usize: AsPrimitive<StorageT>,
123{
124    fn parse_generictree(
125        rcvry_kind: RecoveryKind,
126        grm: &YaccGrammar<StorageT>,
127        token_cost: TokenCostFn<'a, StorageT>,
128        stable: &StateTable<StorageT>,
129        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
130        lexemes: Vec<LexerTypesT::LexemeT>,
131    ) -> (
132        Option<Node<LexerTypesT::LexemeT, StorageT>>,
133        Vec<LexParseError<StorageT, LexerTypesT>>,
134    ) {
135        for tidx in grm.iter_tidxs() {
136            assert!(token_cost(tidx) > 0);
137        }
138        let mut actions: Vec<
139            ActionFn<
140                'a,
141                'b,
142                'input,
143                StorageT,
144                LexerTypesT,
145                Node<LexerTypesT::LexemeT, StorageT>,
146                (),
147            >,
148        > = Vec::new();
149        actions.resize(usize::from(grm.prods_len()), &action_generictree);
150        let psr = Parser {
151            rcvry_kind,
152            grm,
153            token_cost: Box::new(token_cost),
154            stable,
155            lexer,
156            lexemes,
157            actions: actions.as_slice(),
158            param: (),
159        };
160        let mut pstack = vec![stable.start_state()];
161        let mut astack = Vec::new();
162        let mut errors = Vec::new();
163        let mut spans = Vec::new();
164        let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
165        (accpt, errors)
166    }
167}
168
169/// The action which implements [`cfgrammar::yacc::YaccOriginalActionKind::GenericParseTree`].
170/// Usually you should just use the action kind directly. But you can also call this from
171/// within a custom action to return a generic parse tree with custom behavior.
172pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
173    ridx: RIdx<StorageT>,
174    _lexer: &dyn NonStreamingLexer<LexerTypesT>,
175    _span: Span,
176    astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
177    _param: (),
178) -> Node<LexerTypesT::LexemeT, StorageT>
179where
180    usize: AsPrimitive<LexerTypesT::StorageT>,
181    LexerTypesT::LexemeT: Lexeme<StorageT>,
182{
183    let mut nodes = Vec::with_capacity(astack.len());
184    for a in astack {
185        nodes.push(match a {
186            AStackType::ActionType(n) => n,
187            AStackType::Lexeme(lexeme) => Node::Term { lexeme },
188        });
189    }
190    Node::Nonterm { ridx, nodes }
191}
192
193impl<
194        'a,
195        'b: 'a,
196        'input: 'b,
197        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
198        LexerTypesT: LexerTypes<StorageT = StorageT>,
199    > Parser<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>
200where
201    usize: AsPrimitive<StorageT>,
202{
203    fn parse_noaction(
204        rcvry_kind: RecoveryKind,
205        grm: &YaccGrammar<StorageT>,
206        token_cost: TokenCostFn<'a, StorageT>,
207        stable: &StateTable<StorageT>,
208        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
209        lexemes: Vec<LexerTypesT::LexemeT>,
210    ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
211        for tidx in grm.iter_tidxs() {
212            assert!(token_cost(tidx) > 0);
213        }
214        let mut actions: Vec<ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>> = Vec::new();
215        actions.resize(usize::from(grm.prods_len()), &Parser::noaction);
216        let psr = Parser {
217            rcvry_kind,
218            grm,
219            token_cost: Box::new(token_cost),
220            stable,
221            lexer,
222            lexemes,
223            actions: actions.as_slice(),
224            param: (),
225        };
226        let mut pstack = vec![stable.start_state()];
227        let mut astack = Vec::new();
228        let mut errors = Vec::new();
229        let mut spans = Vec::new();
230        psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
231        errors
232    }
233
234    fn noaction(
235        _ridx: RIdx<StorageT>,
236        _lexer: &dyn NonStreamingLexer<LexerTypesT>,
237        _span: Span,
238        _astack: vec::Drain<AStackType<LexerTypesT::LexemeT, ()>>,
239        _param: (),
240    ) {
241    }
242}
243
244impl<
245        'a,
246        'b: 'a,
247        'input: 'b,
248        StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
249        LexerTypesT: LexerTypes<StorageT = StorageT>,
250        ActionT: 'a,
251        ParamT: Clone,
252    > Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
253where
254    usize: AsPrimitive<StorageT>,
255{
256    fn parse_actions(
257        rcvry_kind: RecoveryKind,
258        grm: &'a YaccGrammar<StorageT>,
259        token_cost: TokenCostFn<'a, StorageT>,
260        stable: &'a StateTable<StorageT>,
261        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
262        lexemes: Vec<LexerTypesT::LexemeT>,
263        actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
264        param: ParamT,
265    ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
266        for tidx in grm.iter_tidxs() {
267            assert!(token_cost(tidx) > 0);
268        }
269        let psr = Parser {
270            rcvry_kind,
271            grm,
272            token_cost: Box::new(token_cost),
273            stable,
274            lexer,
275            lexemes,
276            actions,
277            param,
278        };
279        let mut pstack = vec![stable.start_state()];
280        let mut astack = Vec::new();
281        let mut errors = Vec::new();
282        let mut spans = Vec::new();
283        let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
284        (accpt, errors)
285    }
286
287    /// Start parsing text at `laidx` (using the lexeme in `lexeme_prefix`, if it is not `None`,
288    /// as the first lexeme) up to (but excluding) `end_laidx` (if it's specified). Parsing
289    /// continues as long as possible (assuming that any errors encountered can be recovered from)
290    /// unless `end_laidx` is `None`, at which point this function returns as soon as it
291    /// encounters an error.
292    ///
293    /// Note that if `lexeme_prefix` is specified, `laidx` will still be incremented, and thus
294    /// `end_laidx` *must* be set to `laidx + 1` in order that the parser doesn't skip the real
295    /// lexeme at position `laidx`.
296    ///
297    /// Return `Some(value)` if the parse reached an accept state (i.e. all the input was consumed,
298    /// possibly after making repairs) or `None` (i.e. some of the input was not consumed, even
299    /// after possibly making repairs) otherwise.
300    fn lr(
301        &self,
302        mut laidx: usize,
303        pstack: &mut PStack<StorageT>,
304        astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
305        errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
306        spans: &mut Vec<Span>,
307    ) -> Option<ActionT> {
308        let mut recoverer = None;
309        let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
310        loop {
311            debug_assert_eq!(astack.len(), spans.len());
312            let stidx = *pstack.last().unwrap();
313            let la_tidx = self.next_tidx(laidx);
314
315            match self.stable.action(stidx, la_tidx) {
316                Action::Reduce(pidx) => {
317                    let ridx = self.grm.prod_to_rule(pidx);
318                    let pop_idx = pstack.len() - self.grm.prod(pidx).len();
319
320                    pstack.drain(pop_idx..);
321                    let prior = *pstack.last().unwrap();
322                    pstack.push(self.stable.goto(prior, ridx).unwrap());
323
324                    let span = if spans.is_empty() {
325                        Span::new(0, 0)
326                    } else if pop_idx - 1 < spans.len() {
327                        Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
328                    } else {
329                        Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
330                    };
331                    spans.truncate(pop_idx - 1);
332                    spans.push(span);
333
334                    let v = AStackType::ActionType(self.actions[usize::from(pidx)](
335                        ridx,
336                        self.lexer,
337                        span,
338                        astack.drain(pop_idx - 1..),
339                        self.param.clone(),
340                    ));
341                    astack.push(v);
342                }
343                Action::Shift(state_id) => {
344                    let la_lexeme = self.next_lexeme(laidx);
345                    pstack.push(state_id);
346                    astack.push(AStackType::Lexeme(la_lexeme));
347
348                    spans.push(la_lexeme.span());
349                    laidx += 1;
350                }
351                Action::Accept => {
352                    debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
353                    debug_assert_eq!(astack.len(), 1);
354                    match astack.drain(..).next().unwrap() {
355                        AStackType::ActionType(v) => return Some(v),
356                        _ => unreachable!(),
357                    }
358                }
359                Action::Error => {
360                    if recoverer.is_none() {
361                        recoverer = Some(match self.rcvry_kind {
362                            RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
363                            RecoveryKind::None => {
364                                let la_lexeme = self.next_lexeme(laidx);
365                                errors.push(
366                                    ParseError {
367                                        stidx,
368                                        lexeme: la_lexeme,
369                                        repairs: vec![],
370                                    }
371                                    .into(),
372                                );
373                                return None;
374                            }
375                        });
376                    }
377
378                    let before = Instant::now();
379                    let finish_by = before + recovery_budget;
380                    let (new_laidx, repairs) = recoverer
381                        .as_ref()
382                        .unwrap()
383                        .as_ref()
384                        .recover(finish_by, self, laidx, pstack, astack, spans);
385                    let after = Instant::now();
386                    recovery_budget = recovery_budget
387                        .checked_sub(after - before)
388                        .unwrap_or_else(|| Duration::new(0, 0));
389                    let keep_going = !repairs.is_empty();
390                    let la_lexeme = self.next_lexeme(laidx);
391                    errors.push(
392                        ParseError {
393                            stidx,
394                            lexeme: la_lexeme,
395                            repairs,
396                        }
397                        .into(),
398                    );
399                    if !keep_going {
400                        return None;
401                    }
402                    laidx = new_laidx;
403                }
404            }
405        }
406    }
407
408    /// Parse from `laidx` up to (but excluding) `end_laidx` mutating `pstack` as parsing occurs.
409    /// Returns the index of the token it parsed up to (by definition <= end_laidx: can be less if
410    /// the input is < end_laidx, or if an error is encountered). Does not do any form of error
411    /// recovery.
412    pub(super) fn lr_upto(
413        &self,
414        lexeme_prefix: Option<LexerTypesT::LexemeT>,
415        mut laidx: usize,
416        end_laidx: usize,
417        pstack: &mut PStack<StorageT>,
418        astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
419        spans: &mut Option<&mut Vec<Span>>,
420    ) -> usize {
421        assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
422        while laidx != end_laidx && laidx <= self.lexemes.len() {
423            let stidx = *pstack.last().unwrap();
424            let la_tidx = if let Some(l) = lexeme_prefix {
425                TIdx(l.tok_id())
426            } else {
427                self.next_tidx(laidx)
428            };
429
430            match self.stable.action(stidx, la_tidx) {
431                Action::Reduce(pidx) => {
432                    let ridx = self.grm.prod_to_rule(pidx);
433                    let pop_idx = pstack.len() - self.grm.prod(pidx).len();
434                    if let Some(ref mut astack_uw) = *astack {
435                        if let Some(ref mut spans_uw) = *spans {
436                            let span = if spans_uw.is_empty() {
437                                Span::new(0, 0)
438                            } else if pop_idx - 1 < spans_uw.len() {
439                                Span::new(
440                                    spans_uw[pop_idx - 1].start(),
441                                    spans_uw[spans_uw.len() - 1].end(),
442                                )
443                            } else {
444                                Span::new(
445                                    spans_uw[spans_uw.len() - 1].start(),
446                                    spans_uw[spans_uw.len() - 1].end(),
447                                )
448                            };
449                            spans_uw.truncate(pop_idx - 1);
450                            spans_uw.push(span);
451
452                            let v = AStackType::ActionType(self.actions[usize::from(pidx)](
453                                ridx,
454                                self.lexer,
455                                span,
456                                astack_uw.drain(pop_idx - 1..),
457                                self.param.clone(),
458                            ));
459                            astack_uw.push(v);
460                        } else {
461                            unreachable!();
462                        }
463                    }
464
465                    pstack.drain(pop_idx..);
466                    let prior = *pstack.last().unwrap();
467                    pstack.push(self.stable.goto(prior, ridx).unwrap());
468                }
469                Action::Shift(state_id) => {
470                    if let Some(ref mut astack_uw) = *astack {
471                        if let Some(ref mut spans_uw) = spans {
472                            let la_lexeme = if let Some(l) = lexeme_prefix {
473                                l
474                            } else {
475                                self.next_lexeme(laidx)
476                            };
477                            astack_uw.push(AStackType::Lexeme(la_lexeme));
478                            spans_uw.push(la_lexeme.span());
479                        }
480                    }
481                    pstack.push(state_id);
482                    laidx += 1;
483                }
484                Action::Accept => {
485                    break;
486                }
487                Action::Error => {
488                    break;
489                }
490            }
491        }
492        laidx
493    }
494
495    /// Return a `Lexeme` for the next lemexe (if `laidx` == `self.lexemes.len()` this will be
496    /// a lexeme constructed to look as if contains the EOF token).
497    pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
498        let llen = self.lexemes.len();
499        debug_assert!(laidx <= llen);
500        if laidx < llen {
501            self.lexemes[laidx]
502        } else {
503            // We have to artificially construct a Lexeme for the EOF lexeme.
504            let last_la_end = if llen == 0 {
505                0
506            } else {
507                debug_assert!(laidx > 0);
508                let last_la = self.lexemes[laidx - 1];
509                last_la.span().end()
510            };
511
512            Lexeme::new_faulty(
513                StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
514                last_la_end,
515                0,
516            )
517        }
518    }
519
520    /// Return the `TIdx` of the next lexeme (if `laidx` == `self.lexemes.len()` this will be the
521    /// EOF `TIdx`).
522    pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
523        let ll = self.lexemes.len();
524        debug_assert!(laidx <= ll);
525        if laidx < ll {
526            TIdx(self.lexemes[laidx].tok_id())
527        } else {
528            self.grm.eof_token_idx()
529        }
530    }
531
532    /// Start parsing text at `laidx` (using the lexeme in `lexeme_prefix`, if it is not `None`,
533    /// as the first lexeme) up to (but excluding) `end_laidx`. If an error is encountered, parsing
534    /// immediately terminates (without recovery).
535    ///
536    /// Note that if `lexeme_prefix` is specified, `laidx` will still be incremented, and thus
537    /// `end_laidx` *must* be set to `laidx + 1` in order that the parser doesn't skip the real
538    /// lexeme at position `laidx`.
539    pub(super) fn lr_cactus(
540        &self,
541        lexeme_prefix: Option<LexerTypesT::LexemeT>,
542        mut laidx: usize,
543        end_laidx: usize,
544        mut pstack: Cactus<StIdx<StorageT>>,
545        tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
546    ) -> (usize, Cactus<StIdx<StorageT>>) {
547        assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
548        while laidx != end_laidx {
549            let stidx = *pstack.val().unwrap();
550            let la_tidx = if let Some(l) = lexeme_prefix {
551                TIdx(l.tok_id())
552            } else {
553                self.next_tidx(laidx)
554            };
555
556            match self.stable.action(stidx, la_tidx) {
557                Action::Reduce(pidx) => {
558                    let ridx = self.grm.prod_to_rule(pidx);
559                    let pop_num = self.grm.prod(pidx).len();
560                    if let Some(ref mut tstack_uw) = *tstack {
561                        let nodes = tstack_uw
562                            .drain(pstack.len() - pop_num - 1..)
563                            .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
564                        tstack_uw.push(Node::Nonterm { ridx, nodes });
565                    }
566
567                    for _ in 0..pop_num {
568                        pstack = pstack.parent().unwrap();
569                    }
570                    let prior = *pstack.val().unwrap();
571                    pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
572                }
573                Action::Shift(state_id) => {
574                    if let Some(ref mut tstack_uw) = *tstack {
575                        let la_lexeme = if let Some(l) = lexeme_prefix {
576                            l
577                        } else {
578                            self.next_lexeme(laidx)
579                        };
580                        tstack_uw.push(Node::Term { lexeme: la_lexeme });
581                    }
582                    pstack = pstack.child(state_id);
583                    laidx += 1;
584                }
585                Action::Accept => {
586                    debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
587                    if let Some(ref tstack_uw) = *tstack {
588                        debug_assert_eq!(tstack_uw.len(), 1);
589                    }
590                    break;
591                }
592                Action::Error => {
593                    break;
594                }
595            }
596        }
597        (laidx, pstack)
598    }
599}
600
601pub(super) trait Recoverer<
602    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
603    LexerTypesT: LexerTypes<StorageT = StorageT>,
604    ActionT,
605    ParamT: Clone,
606> where
607    usize: AsPrimitive<StorageT>,
608{
609    fn recover(
610        &self,
611        finish_by: Instant,
612        parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
613        in_laidx: usize,
614        in_pstack: &mut PStack<StorageT>,
615        astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
616        spans: &mut Vec<Span>,
617    ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
618}
619
620/// What recovery algorithm should be used when a syntax error is encountered?
621#[derive(Clone, Copy, Debug)]
622#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
623#[non_exhaustive]
624pub enum RecoveryKind {
625    /// The CPCT+ algorithm from Diekmann/Tratt "Don't Panic! Better, Fewer, Syntax Errors for LR
626    /// Parsers".
627    CPCTPlus,
628    /// Don't use error recovery: return as soon as the first syntax error is encountered.
629    None,
630}
631
632impl TryFrom<RecoveryKind> for Value<Location> {
633    type Error = cfgrammar::header::HeaderError<Location>;
634    fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
635        use cfgrammar::{
636            header::{Namespaced, Setting},
637            Location,
638        };
639        let from_loc = Location::Other("From<RecoveryKind>".to_string());
640        Ok(match rk {
641            RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
642                namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
643                member: ("CPCTPlus".to_string(), from_loc.clone()),
644            })),
645            RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
646                namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
647                member: ("None".to_string(), from_loc.clone()),
648            })),
649        })
650    }
651}
652
653impl TryFrom<&Value<Location>> for RecoveryKind {
654    type Error = cfgrammar::header::HeaderError<Location>;
655    fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
656        use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
657
658        match rk {
659            Value::Flag(_, loc) => Err(HeaderError {
660                kind: HeaderErrorKind::ConversionError(
661                    "RecoveryKind",
662                    "Cannot convert boolean to RecoveryKind",
663                ),
664                locations: vec![loc.clone()],
665            }),
666            Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
667                kind: HeaderErrorKind::ConversionError(
668                    "RecoveryKind",
669                    "Cannot convert number to RecoveryKind",
670                ),
671                locations: vec![loc.clone()],
672            }),
673            Value::Setting(Setting::Unitary(Namespaced {
674                namespace,
675                member: (kind, kind_loc),
676            })) => {
677                match namespace {
678                    Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
679                        return Err(HeaderError {
680                            kind: HeaderErrorKind::ConversionError(
681                                "RecoveryKind",
682                                "Unknown namespace",
683                            ),
684                            locations: vec![loc.clone()],
685                        })
686                    }
687                    _ => {}
688                }
689                match kind.to_lowercase().as_ref() {
690                    "cpctplus" => Ok(RecoveryKind::CPCTPlus),
691                    "none" => Ok(RecoveryKind::None),
692                    _ => Err(HeaderError {
693                        kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
694                        locations: vec![kind_loc.clone()],
695                    }),
696                }
697            }
698            Value::Setting(Setting::Constructor {
699                ctor: _,
700                arg:
701                    Namespaced {
702                        namespace: _,
703                        member: (_, arg_loc),
704                    },
705            }) => Err(HeaderError {
706                kind: HeaderErrorKind::ConversionError(
707                    "RecoveryKind",
708                    "Cannot convert constructor to RecoveryKind",
709                ),
710                locations: vec![arg_loc.clone()],
711            }),
712        }
713    }
714}
715
716impl quote::ToTokens for RecoveryKind {
717    fn to_tokens(&self, tokens: &mut TokenStream) {
718        tokens.extend(match *self {
719            RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
720            RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
721        })
722    }
723}
724
725/// A lexing or parsing error. Although the two are quite distinct in terms of what can be reported
726/// to users, both can (at least conceptually) occur at any point of the intertwined lexing/parsing
727/// process.
728#[derive(Debug)]
729pub enum LexParseError<
730    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
731    LexerTypesT: LexerTypes<StorageT = StorageT>,
732> where
733    usize: AsPrimitive<StorageT>,
734{
735    LexError(LexerTypesT::LexErrorT),
736    ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
737}
738
739impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
740    LexParseError<StorageT, LexerTypesT>
741where
742    usize: AsPrimitive<StorageT>,
743{
744    /// A pretty-printer of a lexer/parser error. This isn't suitable for all purposes, but it's
745    /// often good enough. The output format is not guaranteed to be stable but is likely to be of
746    /// the form:
747    ///
748    /// ```text
749    /// Parsing error at line 3 column 8. Repair sequences found:
750    ///   1: Insert ID
751    ///   2: Delete +, Shift 3
752    /// ```
753    ///
754    /// If you are using the compile-time parse system, your `grm_y` module exposes a suitable
755    /// [`epp`](../ctbuilder/struct.CTParserBuilder.html#method.process_file) function; if you are
756    /// using the run-time system,
757    /// [`YaccGrammar`](../../cfgrammar/yacc/grammar/struct.YaccGrammar.html) exposes a suitable
758    /// [`epp`](../../cfgrammar/yacc/grammar/struct.YaccGrammar.html#method.token_epp) function,
759    /// though you will have to wrap it in a closure e.g. `&|t| grm.token_epp(t)`.
760    pub fn pp<'a>(
761        &self,
762        lexer: &dyn NonStreamingLexer<LexerTypesT>,
763        epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
764    ) -> String {
765        match self {
766            LexParseError::LexError(e) => {
767                let ((line, col), _) = lexer.line_col(e.span());
768                format!("Lexing error at line {} column {}.", line, col)
769            }
770            LexParseError::ParseError(e) => {
771                let ((line, col), _) = lexer.line_col(e.lexeme().span());
772                let mut out = format!("Parsing error at line {} column {}.", line, col);
773                let repairs_len = e.repairs().len();
774                if repairs_len == 0 {
775                    out.push_str(" No repair sequences found.");
776                } else {
777                    out.push_str(" Repair sequences found:");
778                    for (i, rs) in e.repairs().iter().enumerate() {
779                        let padding = ((repairs_len as f64).log10() as usize)
780                            - (((i + 1) as f64).log10() as usize)
781                            + 1;
782                        write!(out, "\n  {}{}: ", " ".repeat(padding), i + 1).ok();
783                        let mut rs_out = Vec::new();
784
785                        // Merge together Deletes iff they are consecutive (if they are separated
786                        // by even a single character, they will not be merged).
787                        let mut i = 0;
788                        while i < rs.len() {
789                            match rs[i] {
790                                ParseRepair::Delete(l) => {
791                                    let mut j = i + 1;
792                                    let mut last_end = l.span().end();
793                                    while j < rs.len() {
794                                        if let ParseRepair::Delete(next_l) = rs[j] {
795                                            if next_l.span().start() == last_end {
796                                                last_end = next_l.span().end();
797                                                j += 1;
798                                                continue;
799                                            }
800                                        }
801                                        break;
802                                    }
803                                    let t = &lexer
804                                        .span_str(Span::new(l.span().start(), last_end))
805                                        .replace('\n', "\\n");
806                                    rs_out.push(format!("Delete {}", t));
807                                    i = j;
808                                }
809                                ParseRepair::Insert(tidx) => {
810                                    rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
811                                    i += 1;
812                                }
813                                ParseRepair::Shift(l) => {
814                                    let t = &lexer.span_str(l.span()).replace('\n', "\\n");
815                                    rs_out.push(format!("Shift {}", t));
816                                    i += 1;
817                                }
818                            }
819                        }
820
821                        out.push_str(&rs_out.join(", "));
822                    }
823                }
824                out
825            }
826        }
827    }
828}
829
830impl<
831        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
832        LexerTypesT: LexerTypes<StorageT = StorageT>,
833    > fmt::Display for LexParseError<StorageT, LexerTypesT>
834where
835    usize: AsPrimitive<StorageT>,
836{
837    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
838        match *self {
839            LexParseError::LexError(ref e) => Display::fmt(e, f),
840            LexParseError::ParseError(ref e) => Display::fmt(e, f),
841        }
842    }
843}
844
845impl<
846        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
847        LexerTypesT: LexerTypes<StorageT = StorageT>,
848    > Error for LexParseError<StorageT, LexerTypesT>
849where
850    usize: AsPrimitive<StorageT>,
851{
852}
853
854impl<
855        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
856        LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
857        T: LexError,
858    > From<T> for LexParseError<StorageT, LexerTypesT>
859where
860    usize: AsPrimitive<StorageT>,
861{
862    fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
863        LexParseError::LexError(err)
864    }
865}
866
867impl<
868        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
869        LexerTypesT: LexerTypes<StorageT = StorageT>,
870    > From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
871where
872    usize: AsPrimitive<StorageT>,
873{
874    fn from(
875        err: ParseError<LexerTypesT::LexemeT, StorageT>,
876    ) -> LexParseError<StorageT, LexerTypesT> {
877        LexParseError::ParseError(err)
878    }
879}
880
881/// A run-time parser builder.
882pub struct RTParserBuilder<
883    'a,
884    StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
885    LexerTypesT: LexerTypes<StorageT = StorageT>,
886> where
887    usize: AsPrimitive<StorageT>,
888{
889    grm: &'a YaccGrammar<StorageT>,
890    stable: &'a StateTable<StorageT>,
891    recoverer: RecoveryKind,
892    term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
893    phantom: PhantomData<(StorageT, LexerTypesT)>,
894}
895
896impl<
897        'a,
898        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
899        LexerTypesT: LexerTypes<StorageT = StorageT>,
900    > RTParserBuilder<'a, StorageT, LexerTypesT>
901where
902    usize: AsPrimitive<StorageT>,
903{
904    /// Create a new run-time parser from a `YaccGrammar`, and a `StateTable`.
905    pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
906        RTParserBuilder {
907            grm,
908            stable,
909            recoverer: RecoveryKind::CPCTPlus,
910            term_costs: &|_| 1,
911            phantom: PhantomData,
912        }
913    }
914
915    /// Set the recoverer for this parser to `rk`.
916    pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
917        self.recoverer = rk;
918        self
919    }
920
921    pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
922        self.term_costs = f;
923        self
924    }
925
926    /// Parse input, and (if possible) return a generic parse tree. See the arguments for
927    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
928    pub fn parse_generictree(
929        &self,
930        lexer: &dyn NonStreamingLexer<LexerTypesT>,
931    ) -> (
932        Option<Node<LexerTypesT::LexemeT, StorageT>>,
933        Vec<LexParseError<StorageT, LexerTypesT>>,
934    ) {
935        let mut lexemes = vec![];
936        for e in lexer.iter().collect::<Vec<_>>() {
937            match e {
938                Ok(l) => lexemes.push(l),
939                Err(e) => return (None, vec![e.into()]),
940            }
941        }
942        Parser::<StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>::parse_generictree(
943            self.recoverer,
944            self.grm,
945            self.term_costs,
946            self.stable,
947            lexer,
948            lexemes,
949        )
950    }
951
952    /// Parse input, returning any errors found. See the arguments for
953    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
954    pub fn parse_noaction(
955        &self,
956        lexer: &dyn NonStreamingLexer<LexerTypesT>,
957    ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
958        let mut lexemes = vec![];
959        for e in lexer.iter().collect::<Vec<_>>() {
960            match e {
961                Ok(l) => lexemes.push(l),
962                Err(e) => return vec![e.into()],
963            }
964        }
965        Parser::<StorageT, LexerTypesT, (), ()>::parse_noaction(
966            self.recoverer,
967            self.grm,
968            self.term_costs,
969            self.stable,
970            lexer,
971            lexemes,
972        )
973    }
974
975    /// Parse input, execute actions, and return the associated value (if possible) and/or any
976    /// lexing/parsing errors encountered. Note that the two parts of the (value, errors) return
977    /// pair are entirely independent: one can encounter errors without a value being produced
978    /// (`None, [...]`), errors and a value (`Some(...), [...]`), as well as a value and no errors
979    /// (`Some(...), []`). Errors are sorted by the position they were found in the input and can
980    /// be a mix of lexing and parsing errors.
981    pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
982        &self,
983        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
984        actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
985        param: ParamT,
986    ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
987        let mut lexemes = vec![];
988        for e in lexer.iter().collect::<Vec<_>>() {
989            match e {
990                Ok(l) => lexemes.push(l),
991                Err(e) => return (None, vec![e.into()]),
992            }
993        }
994        Parser::parse_actions(
995            self.recoverer,
996            self.grm,
997            self.term_costs,
998            self.stable,
999            lexer,
1000            lexemes,
1001            actions,
1002            param,
1003        )
1004    }
1005}
1006
1007/// After a parse error is encountered, the parser attempts to find a way of recovering. Each entry
1008/// in the sequence of repairs is represented by a `ParseRepair`.
1009#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1010pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1011    /// Insert a `Symbol::Token`.
1012    Insert(TIdx<StorageT>),
1013    /// Delete a symbol.
1014    Delete(LexemeT),
1015    /// Shift a symbol.
1016    Shift(LexemeT),
1017}
1018
1019/// Records a single parse error.
1020#[derive(Clone, Debug, PartialEq)]
1021pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1022    stidx: StIdx<StorageT>,
1023    lexeme: LexemeT,
1024    repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1025}
1026
1027impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1028    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1029        write!(f, "Parse error at lexeme {:?}", self.lexeme)
1030    }
1031}
1032
1033impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1034
1035impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1036    /// Return the state table index where this error was detected.
1037    pub fn stidx(&self) -> StIdx<StorageT> {
1038        self.stidx
1039    }
1040
1041    /// Return the lexeme where this error was detected.
1042    pub fn lexeme(&self) -> &LexemeT {
1043        &self.lexeme
1044    }
1045
1046    /// Return the repairs found that would fix this error. Note that there are infinite number of
1047    /// possible repairs for any error, so this is by definition a (finite) subset.
1048    pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1049        &self.repairs
1050    }
1051}
1052
1053#[cfg(test)]
1054pub(crate) mod test {
1055    use std::collections::HashMap;
1056
1057    use cfgrammar::{
1058        yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1059        Span,
1060    };
1061    use lrtable::{from_yacc, Minimiser};
1062    use num_traits::ToPrimitive;
1063    use regex::Regex;
1064
1065    use super::*;
1066    use crate::{
1067        test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1068        Lexeme, Lexer,
1069    };
1070
1071    pub(crate) fn do_parse<'input>(
1072        rcvry_kind: RecoveryKind,
1073        lexs: &str,
1074        grms: &str,
1075        input: &'input str,
1076    ) -> (
1077        YaccGrammar<u16>,
1078        SmallLexer<'input>,
1079        Result<
1080            Node<TestLexeme, u16>,
1081            (
1082                Option<Node<TestLexeme, u16>>,
1083                Vec<LexParseError<u16, TestLexerTypes>>,
1084            ),
1085        >,
1086    ) {
1087        do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1088    }
1089
1090    fn do_parse_with_costs<'input>(
1091        rcvry_kind: RecoveryKind,
1092        lexs: &str,
1093        grms: &str,
1094        input: &'input str,
1095        costs: &HashMap<&str, u8>,
1096    ) -> (
1097        YaccGrammar<u16>,
1098        SmallLexer<'input>,
1099        Result<
1100            Node<TestLexeme, u16>,
1101            (
1102                Option<Node<TestLexeme, u16>>,
1103                Vec<LexParseError<u16, TestLexerTypes>>,
1104            ),
1105        >,
1106    ) {
1107        let grm = YaccGrammar::<u16>::new_with_storaget(
1108            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1109            grms,
1110        )
1111        .unwrap();
1112        let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1113        let rule_ids = grm
1114            .tokens_map()
1115            .iter()
1116            .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1117            .collect();
1118        let lexer_rules = small_lexer(lexs, rule_ids);
1119        let lexemes = small_lex(lexer_rules, input);
1120        let lexer = SmallLexer { lexemes, s: input };
1121        let costs_tidx = costs
1122            .iter()
1123            .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1124            .collect::<HashMap<_, _>>();
1125        let (r, errs) = RTParserBuilder::new(&grm, &stable)
1126            .recoverer(rcvry_kind)
1127            .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1128            .parse_generictree(&lexer);
1129        if let Some(node) = r {
1130            if errs.is_empty() {
1131                (grm, lexer, Ok(node))
1132            } else {
1133                (grm, lexer, Err((Some(node), errs)))
1134            }
1135        } else {
1136            (grm, lexer, Err((None, errs)))
1137        }
1138    }
1139
1140    fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1141        let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1142        assert_eq!(expected, pt.unwrap().pp(&grm, input));
1143    }
1144
1145    // SmallLexer is our highly simplified version of lrlex (allowing us to avoid having to have
1146    // lrlex as a dependency of lrpar). The format is the same as lrlex *except*:
1147    //   * The initial "%%" isn't needed, and only "'" is valid as a rule name delimiter.
1148    //   * "Unnamed" rules aren't allowed (e.g. you can't have a rule which discards whitespaces).
1149    pub(crate) struct SmallLexer<'input> {
1150        lexemes: Vec<TestLexeme>,
1151        s: &'input str,
1152    }
1153
1154    impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1155        fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1156            Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1157        }
1158    }
1159
1160    impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1161        fn span_str(&self, span: Span) -> &'input str {
1162            &self.s[span.start()..span.end()]
1163        }
1164
1165        fn span_lines_str(&self, _: Span) -> &'input str {
1166            unreachable!();
1167        }
1168
1169        fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1170            unreachable!();
1171        }
1172    }
1173
1174    fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1175        let mut rules = Vec::new();
1176        for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1177            assert!(l.rfind('\'') == Some(l.len() - 1));
1178            let i = l[..l.len() - 1].rfind('\'').unwrap();
1179            let name = &l[i + 1..l.len() - 1];
1180            let re = &l[..i - 1].trim();
1181            rules.push((
1182                ids_map[name],
1183                Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1184            ));
1185        }
1186        rules
1187    }
1188
1189    fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1190        let mut lexemes = vec![];
1191        let mut i = 0;
1192        while i < input.len() {
1193            let mut longest = 0; // Length of the longest match
1194            let mut longest_tok_id = 0; // This is only valid iff longest != 0
1195            for (tok_id, r) in rules.iter() {
1196                if let Some(m) = r.find(&input[i..]) {
1197                    let len = m.end();
1198                    if len > longest {
1199                        longest = len;
1200                        longest_tok_id = *tok_id;
1201                    }
1202                }
1203            }
1204            assert!(longest > 0);
1205            lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1206            i += longest;
1207        }
1208        lexemes
1209    }
1210
1211    #[test]
1212    fn simple_parse() {
1213        // From p4 of https://www.cs.umd.edu/class/spring2014/cmsc430/lectures/lec07.pdf
1214        check_parse_output(
1215            "[a-zA-Z_] 'ID'
1216             \\+ '+'",
1217            "
1218%start E
1219%%
1220E: T '+' E
1221 | T;
1222T: 'ID';
1223",
1224            "a+b",
1225            "E
1226 T
1227  ID a
1228 + +
1229 E
1230  T
1231   ID b
1232",
1233        );
1234    }
1235
1236    #[test]
1237    fn parse_empty_rules() {
1238        let lexs = "[a-zA-Z_] 'ID'";
1239        let grms = "%start S
1240%%
1241S: L;
1242L: 'ID'
1243 | ;
1244";
1245        check_parse_output(
1246            lexs, grms, "", "S
1247 L
1248",
1249        );
1250
1251        check_parse_output(
1252            lexs,
1253            grms,
1254            "x",
1255            "S
1256 L
1257  ID x
1258",
1259        );
1260    }
1261
1262    #[test]
1263    fn recursive_parse() {
1264        let lexs = "\\+ '+'
1265                    \\* '*'
1266                    [0-9]+ 'INT'";
1267        let grms = "%start Expr
1268%%
1269Expr : Expr '+' Term | Term;
1270Term : Term '*' Factor | Factor;
1271Factor : 'INT';";
1272
1273        check_parse_output(
1274            lexs,
1275            grms,
1276            "2+3*4",
1277            "Expr
1278 Expr
1279  Term
1280   Factor
1281    INT 2
1282 + +
1283 Term
1284  Term
1285   Factor
1286    INT 3
1287  * *
1288  Factor
1289   INT 4
1290",
1291        );
1292        check_parse_output(
1293            lexs,
1294            grms,
1295            "2*3+4",
1296            "Expr
1297 Expr
1298  Term
1299   Term
1300    Factor
1301     INT 2
1302   * *
1303   Factor
1304    INT 3
1305 + +
1306 Term
1307  Factor
1308   INT 4
1309",
1310        );
1311    }
1312
1313    #[test]
1314    fn parse_error() {
1315        let lexs = "\\( '('
1316                    \\) ')'
1317                    [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1318        let grms = "%start Call
1319%%
1320Call: 'ID' '(' ')';";
1321
1322        check_parse_output(
1323            lexs,
1324            grms,
1325            "f()",
1326            "Call
1327 ID f
1328 ( (
1329 ) )
1330",
1331        );
1332
1333        let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1334        let (_, errs) = pr.unwrap_err();
1335        assert_eq!(errs.len(), 1);
1336        let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1337        match &errs[0] {
1338            LexParseError::ParseError(e) => {
1339                assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1340                assert!(e.lexeme().faulty());
1341            }
1342            _ => unreachable!(),
1343        }
1344
1345        let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1346        let (_, errs) = pr.unwrap_err();
1347        assert_eq!(errs.len(), 1);
1348        let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1349        match &errs[0] {
1350            LexParseError::ParseError(e) => {
1351                assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1352                assert!(!e.lexeme().faulty());
1353            }
1354            _ => unreachable!(),
1355        }
1356    }
1357}