lrpar/
parser.rs

1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3    error::Error,
4    fmt::{self, Debug, Display, Write as _},
5    hash::Hash,
6    marker::PhantomData,
7    vec,
8};
9
10// Can be used on non-wasm32 but to avoid the dependency.
11#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{header::Value, span::Location, yacc::YaccGrammar, RIdx, Span, TIdx};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{cpctplus, LexError, Lexeme, LexerTypes, NonStreamingLexer};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; // milliseconds
29#[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; // milliseconds
31
32/// A generic parse tree.
33#[derive(Debug, Clone, PartialEq)]
34pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
35    /// Terminals store a single lexeme.
36    Term { lexeme: LexemeT },
37    /// Nonterminals reference a rule and have zero or more `Node`s as children.
38    Nonterm {
39        ridx: RIdx<StorageT>,
40        nodes: Vec<Node<LexemeT, StorageT>>,
41    },
42}
43
44impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
45where
46    usize: AsPrimitive<StorageT>,
47{
48    /// Return a pretty-printed version of this node.
49    pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
50        let mut st = vec![(0, self)]; // Stack of (indent level, node) pairs
51        let mut s = String::new();
52        while let Some((indent, e)) = st.pop() {
53            for _ in 0..indent {
54                s.push(' ');
55            }
56            match *e {
57                Node::Term { lexeme } => {
58                    let tidx = TIdx(lexeme.tok_id());
59                    let tn = grm.token_name(tidx).unwrap();
60                    let lt = &input[lexeme.span().start()..lexeme.span().end()];
61                    writeln!(s, "{} {}", tn, lt).ok();
62                }
63                Node::Nonterm { ridx, ref nodes } => {
64                    writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
65                    for x in nodes.iter().rev() {
66                        st.push((indent + 1, x));
67                    }
68                }
69            }
70        }
71        s
72    }
73}
74
75type PStack<StorageT> = Vec<StIdx<StorageT>>; // Parse stack
76type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
77type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
78    RIdx<StorageT>,
79    &'b dyn NonStreamingLexer<'input, LexerTypesT>,
80    Span,
81    vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
82    ParamT,
83) -> ActionT;
84
85#[derive(Debug)]
86pub enum AStackType<LexemeT, ActionT> {
87    ActionType(ActionT),
88    Lexeme(LexemeT),
89}
90
91pub(super) struct Parser<
92    'a,
93    'b: 'a,
94    'input: 'b,
95    StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
96    LexerTypesT: LexerTypes<StorageT = StorageT>,
97    ActionT: 'a,
98    ParamT: Clone,
99> where
100    usize: AsPrimitive<StorageT>,
101{
102    rcvry_kind: RecoveryKind,
103    pub(super) grm: &'a YaccGrammar<StorageT>,
104    pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
105    pub(super) stable: &'a StateTable<StorageT>,
106    lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
107    // In the long term, we should remove the `lexemes` field entirely, as the `NonStreamingLexer` API is
108    // powerful enough to allow us to incrementally obtain lexemes and buffer them when necessary.
109    pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
110    actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
111    param: ParamT,
112}
113
114impl<
115        'a,
116        'b: 'a,
117        'input: 'b,
118        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
119        LexerTypesT: LexerTypes<StorageT = StorageT>,
120    > Parser<'a, 'b, 'input, StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>
121where
122    usize: AsPrimitive<StorageT>,
123{
124    fn parse_generictree(
125        rcvry_kind: RecoveryKind,
126        grm: &YaccGrammar<StorageT>,
127        token_cost: TokenCostFn<'a, StorageT>,
128        stable: &StateTable<StorageT>,
129        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
130        lexemes: Vec<LexerTypesT::LexemeT>,
131    ) -> (
132        Option<Node<LexerTypesT::LexemeT, StorageT>>,
133        Vec<LexParseError<StorageT, LexerTypesT>>,
134    ) {
135        for tidx in grm.iter_tidxs() {
136            assert!(token_cost(tidx) > 0);
137        }
138        let mut actions: Vec<
139            ActionFn<
140                'a,
141                'b,
142                'input,
143                StorageT,
144                LexerTypesT,
145                Node<LexerTypesT::LexemeT, StorageT>,
146                (),
147            >,
148        > = Vec::new();
149        actions.resize(usize::from(grm.prods_len()), &action_generictree);
150        let psr = Parser {
151            rcvry_kind,
152            grm,
153            token_cost: Box::new(token_cost),
154            stable,
155            lexer,
156            lexemes,
157            actions: actions.as_slice(),
158            param: (),
159        };
160        let mut pstack = vec![stable.start_state()];
161        let mut astack = Vec::new();
162        let mut errors = Vec::new();
163        let mut spans = Vec::new();
164        let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
165        (accpt, errors)
166    }
167}
168
169/// The action which implements [`cfgrammar::yacc::YaccOriginalActionKind::GenericParseTree`].
170/// Usually you should just use the action kind directly. But you can also call this from
171/// within a custom action to return a generic parse tree with custom behavior.
172pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
173    ridx: RIdx<StorageT>,
174    _lexer: &dyn NonStreamingLexer<LexerTypesT>,
175    _span: Span,
176    astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
177    _param: (),
178) -> Node<LexerTypesT::LexemeT, StorageT>
179where
180    usize: AsPrimitive<LexerTypesT::StorageT>,
181    LexerTypesT::LexemeT: Lexeme<StorageT>,
182{
183    let mut nodes = Vec::with_capacity(astack.len());
184    for a in astack {
185        nodes.push(match a {
186            AStackType::ActionType(n) => n,
187            AStackType::Lexeme(lexeme) => Node::Term { lexeme },
188        });
189    }
190    Node::Nonterm { ridx, nodes }
191}
192
193impl<
194        'a,
195        'b: 'a,
196        'input: 'b,
197        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
198        LexerTypesT: LexerTypes<StorageT = StorageT>,
199    > Parser<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>
200where
201    usize: AsPrimitive<StorageT>,
202{
203    fn parse_noaction(
204        rcvry_kind: RecoveryKind,
205        grm: &YaccGrammar<StorageT>,
206        token_cost: TokenCostFn<'a, StorageT>,
207        stable: &StateTable<StorageT>,
208        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
209        lexemes: Vec<LexerTypesT::LexemeT>,
210    ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
211        for tidx in grm.iter_tidxs() {
212            assert!(token_cost(tidx) > 0);
213        }
214        let mut actions: Vec<ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>> = Vec::new();
215        actions.resize(usize::from(grm.prods_len()), &Parser::noaction);
216        let psr = Parser {
217            rcvry_kind,
218            grm,
219            token_cost: Box::new(token_cost),
220            stable,
221            lexer,
222            lexemes,
223            actions: actions.as_slice(),
224            param: (),
225        };
226        let mut pstack = vec![stable.start_state()];
227        let mut astack = Vec::new();
228        let mut errors = Vec::new();
229        let mut spans = Vec::new();
230        psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
231        errors
232    }
233
234    fn noaction(
235        _ridx: RIdx<StorageT>,
236        _lexer: &dyn NonStreamingLexer<LexerTypesT>,
237        _span: Span,
238        _astack: vec::Drain<AStackType<LexerTypesT::LexemeT, ()>>,
239        _param: (),
240    ) {
241    }
242}
243
244impl<
245        'a,
246        'b: 'a,
247        'input: 'b,
248        StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
249        LexerTypesT: LexerTypes<StorageT = StorageT>,
250        ActionT: 'a,
251        ParamT: Clone,
252    > Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
253where
254    usize: AsPrimitive<StorageT>,
255{
256    fn parse_actions(
257        rcvry_kind: RecoveryKind,
258        grm: &'a YaccGrammar<StorageT>,
259        token_cost: TokenCostFn<'a, StorageT>,
260        stable: &'a StateTable<StorageT>,
261        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
262        lexemes: Vec<LexerTypesT::LexemeT>,
263        actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
264        param: ParamT,
265    ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
266        for tidx in grm.iter_tidxs() {
267            assert!(token_cost(tidx) > 0);
268        }
269        let psr = Parser {
270            rcvry_kind,
271            grm,
272            token_cost: Box::new(token_cost),
273            stable,
274            lexer,
275            lexemes,
276            actions,
277            param,
278        };
279        let mut pstack = vec![stable.start_state()];
280        let mut astack = Vec::new();
281        let mut errors = Vec::new();
282        let mut spans = Vec::new();
283        let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
284        (accpt, errors)
285    }
286
287    /// Start parsing text at `laidx` (using the lexeme in `lexeme_prefix`, if it is not `None`,
288    /// as the first lexeme) up to (but excluding) `end_laidx` (if it's specified). Parsing
289    /// continues as long as possible (assuming that any errors encountered can be recovered from)
290    /// unless `end_laidx` is `None`, at which point this function returns as soon as it
291    /// encounters an error.
292    ///
293    /// Note that if `lexeme_prefix` is specified, `laidx` will still be incremented, and thus
294    /// `end_laidx` *must* be set to `laidx + 1` in order that the parser doesn't skip the real
295    /// lexeme at position `laidx`.
296    ///
297    /// Return `Some(value)` if the parse reached an accept state (i.e. all the input was consumed,
298    /// possibly after making repairs) or `None` (i.e. some of the input was not consumed, even
299    /// after possibly making repairs) otherwise.
300    fn lr(
301        &self,
302        mut laidx: usize,
303        pstack: &mut PStack<StorageT>,
304        astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
305        errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
306        spans: &mut Vec<Span>,
307    ) -> Option<ActionT> {
308        let mut recoverer = None;
309        let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
310        loop {
311            debug_assert_eq!(astack.len(), spans.len());
312            let stidx = *pstack.last().unwrap();
313            let la_tidx = self.next_tidx(laidx);
314
315            match self.stable.action(stidx, la_tidx) {
316                Action::Reduce(pidx) => {
317                    let ridx = self.grm.prod_to_rule(pidx);
318                    let pop_idx = pstack.len() - self.grm.prod(pidx).len();
319
320                    pstack.drain(pop_idx..);
321                    let prior = *pstack.last().unwrap();
322                    pstack.push(self.stable.goto(prior, ridx).unwrap());
323
324                    let span = if spans.is_empty() {
325                        Span::new(0, 0)
326                    } else if pop_idx - 1 < spans.len() {
327                        Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
328                    } else {
329                        Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
330                    };
331                    spans.truncate(pop_idx - 1);
332                    spans.push(span);
333
334                    let v = AStackType::ActionType(self.actions[usize::from(pidx)](
335                        ridx,
336                        self.lexer,
337                        span,
338                        astack.drain(pop_idx - 1..),
339                        self.param.clone(),
340                    ));
341                    astack.push(v);
342                }
343                Action::Shift(state_id) => {
344                    let la_lexeme = self.next_lexeme(laidx);
345                    pstack.push(state_id);
346                    astack.push(AStackType::Lexeme(la_lexeme));
347
348                    spans.push(la_lexeme.span());
349                    laidx += 1;
350                }
351                Action::Accept => {
352                    debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
353                    debug_assert_eq!(astack.len(), 1);
354                    match astack.drain(..).next().unwrap() {
355                        AStackType::ActionType(v) => return Some(v),
356                        _ => unreachable!(),
357                    }
358                }
359                Action::Error => {
360                    if recoverer.is_none() {
361                        recoverer = Some(match self.rcvry_kind {
362                            RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
363                            RecoveryKind::None => {
364                                let la_lexeme = self.next_lexeme(laidx);
365                                errors.push(
366                                    ParseError {
367                                        stidx,
368                                        lexeme: la_lexeme,
369                                        repairs: vec![],
370                                    }
371                                    .into(),
372                                );
373                                return None;
374                            }
375                        });
376                    }
377
378                    let before = Instant::now();
379                    let finish_by = before + recovery_budget;
380                    let (new_laidx, repairs) = recoverer
381                        .as_ref()
382                        .unwrap()
383                        .as_ref()
384                        .recover(finish_by, self, laidx, pstack, astack, spans);
385                    let after = Instant::now();
386                    recovery_budget = recovery_budget
387                        .checked_sub(after - before)
388                        .unwrap_or_else(|| Duration::new(0, 0));
389                    let keep_going = !repairs.is_empty();
390                    let la_lexeme = self.next_lexeme(laidx);
391                    errors.push(
392                        ParseError {
393                            stidx,
394                            lexeme: la_lexeme,
395                            repairs,
396                        }
397                        .into(),
398                    );
399                    if !keep_going {
400                        return None;
401                    }
402                    laidx = new_laidx;
403                }
404            }
405        }
406    }
407
408    /// Parse from `laidx` up to (but excluding) `end_laidx` mutating `pstack` as parsing occurs.
409    /// Returns the index of the token it parsed up to (by definition <= end_laidx: can be less if
410    /// the input is < end_laidx, or if an error is encountered). Does not do any form of error
411    /// recovery.
412    pub(super) fn lr_upto(
413        &self,
414        lexeme_prefix: Option<LexerTypesT::LexemeT>,
415        mut laidx: usize,
416        end_laidx: usize,
417        pstack: &mut PStack<StorageT>,
418        astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
419        spans: &mut Option<&mut Vec<Span>>,
420    ) -> usize {
421        assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
422        while laidx != end_laidx && laidx <= self.lexemes.len() {
423            let stidx = *pstack.last().unwrap();
424            let la_tidx = if let Some(l) = lexeme_prefix {
425                TIdx(l.tok_id())
426            } else {
427                self.next_tidx(laidx)
428            };
429
430            match self.stable.action(stidx, la_tidx) {
431                Action::Reduce(pidx) => {
432                    let ridx = self.grm.prod_to_rule(pidx);
433                    let pop_idx = pstack.len() - self.grm.prod(pidx).len();
434                    if let Some(ref mut astack_uw) = *astack {
435                        if let Some(ref mut spans_uw) = *spans {
436                            let span = if spans_uw.is_empty() {
437                                Span::new(0, 0)
438                            } else if pop_idx - 1 < spans_uw.len() {
439                                Span::new(
440                                    spans_uw[pop_idx - 1].start(),
441                                    spans_uw[spans_uw.len() - 1].end(),
442                                )
443                            } else {
444                                Span::new(
445                                    spans_uw[spans_uw.len() - 1].start(),
446                                    spans_uw[spans_uw.len() - 1].end(),
447                                )
448                            };
449                            spans_uw.truncate(pop_idx - 1);
450                            spans_uw.push(span);
451
452                            let v = AStackType::ActionType(self.actions[usize::from(pidx)](
453                                ridx,
454                                self.lexer,
455                                span,
456                                astack_uw.drain(pop_idx - 1..),
457                                self.param.clone(),
458                            ));
459                            astack_uw.push(v);
460                        } else {
461                            unreachable!();
462                        }
463                    }
464
465                    pstack.drain(pop_idx..);
466                    let prior = *pstack.last().unwrap();
467                    pstack.push(self.stable.goto(prior, ridx).unwrap());
468                }
469                Action::Shift(state_id) => {
470                    if let Some(ref mut astack_uw) = *astack {
471                        if let Some(ref mut spans_uw) = spans {
472                            let la_lexeme = if let Some(l) = lexeme_prefix {
473                                l
474                            } else {
475                                self.next_lexeme(laidx)
476                            };
477                            astack_uw.push(AStackType::Lexeme(la_lexeme));
478                            spans_uw.push(la_lexeme.span());
479                        }
480                    }
481                    pstack.push(state_id);
482                    laidx += 1;
483                }
484                Action::Accept => {
485                    break;
486                }
487                Action::Error => {
488                    break;
489                }
490            }
491        }
492        laidx
493    }
494
495    /// Return a `Lexeme` for the next lemexe (if `laidx` == `self.lexemes.len()` this will be
496    /// a lexeme constructed to look as if contains the EOF token).
497    pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
498        let llen = self.lexemes.len();
499        debug_assert!(laidx <= llen);
500        if laidx < llen {
501            self.lexemes[laidx]
502        } else {
503            // We have to artificially construct a Lexeme for the EOF lexeme.
504            let last_la_end = if llen == 0 {
505                0
506            } else {
507                debug_assert!(laidx > 0);
508                let last_la = self.lexemes[laidx - 1];
509                last_la.span().end()
510            };
511
512            Lexeme::new_faulty(
513                StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
514                last_la_end,
515                0,
516            )
517        }
518    }
519
520    /// Return the `TIdx` of the next lexeme (if `laidx` == `self.lexemes.len()` this will be the
521    /// EOF `TIdx`).
522    pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
523        let ll = self.lexemes.len();
524        debug_assert!(laidx <= ll);
525        if laidx < ll {
526            TIdx(self.lexemes[laidx].tok_id())
527        } else {
528            self.grm.eof_token_idx()
529        }
530    }
531
532    /// Start parsing text at `laidx` (using the lexeme in `lexeme_prefix`, if it is not `None`,
533    /// as the first lexeme) up to (but excluding) `end_laidx`. If an error is encountered, parsing
534    /// immediately terminates (without recovery).
535    ///
536    /// Note that if `lexeme_prefix` is specified, `laidx` will still be incremented, and thus
537    /// `end_laidx` *must* be set to `laidx + 1` in order that the parser doesn't skip the real
538    /// lexeme at position `laidx`.
539    pub(super) fn lr_cactus(
540        &self,
541        lexeme_prefix: Option<LexerTypesT::LexemeT>,
542        mut laidx: usize,
543        end_laidx: usize,
544        mut pstack: Cactus<StIdx<StorageT>>,
545        tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
546    ) -> (usize, Cactus<StIdx<StorageT>>) {
547        assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
548        while laidx != end_laidx {
549            let stidx = *pstack.val().unwrap();
550            let la_tidx = if let Some(l) = lexeme_prefix {
551                TIdx(l.tok_id())
552            } else {
553                self.next_tidx(laidx)
554            };
555
556            match self.stable.action(stidx, la_tidx) {
557                Action::Reduce(pidx) => {
558                    let ridx = self.grm.prod_to_rule(pidx);
559                    let pop_num = self.grm.prod(pidx).len();
560                    if let Some(ref mut tstack_uw) = *tstack {
561                        let nodes = tstack_uw
562                            .drain(pstack.len() - pop_num - 1..)
563                            .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
564                        tstack_uw.push(Node::Nonterm { ridx, nodes });
565                    }
566
567                    for _ in 0..pop_num {
568                        pstack = pstack.parent().unwrap();
569                    }
570                    let prior = *pstack.val().unwrap();
571                    pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
572                }
573                Action::Shift(state_id) => {
574                    if let Some(ref mut tstack_uw) = *tstack {
575                        let la_lexeme = if let Some(l) = lexeme_prefix {
576                            l
577                        } else {
578                            self.next_lexeme(laidx)
579                        };
580                        tstack_uw.push(Node::Term { lexeme: la_lexeme });
581                    }
582                    pstack = pstack.child(state_id);
583                    laidx += 1;
584                }
585                Action::Accept => {
586                    debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
587                    if let Some(ref tstack_uw) = *tstack {
588                        debug_assert_eq!(tstack_uw.len(), 1);
589                    }
590                    break;
591                }
592                Action::Error => {
593                    break;
594                }
595            }
596        }
597        (laidx, pstack)
598    }
599}
600
601pub(super) trait Recoverer<
602    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
603    LexerTypesT: LexerTypes<StorageT = StorageT>,
604    ActionT,
605    ParamT: Clone,
606> where
607    usize: AsPrimitive<StorageT>,
608{
609    fn recover(
610        &self,
611        finish_by: Instant,
612        parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
613        in_laidx: usize,
614        in_pstack: &mut PStack<StorageT>,
615        astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
616        spans: &mut Vec<Span>,
617    ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
618}
619
620/// What recovery algorithm should be used when a syntax error is encountered?
621#[derive(Clone, Copy, Debug)]
622#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
623#[non_exhaustive]
624pub enum RecoveryKind {
625    /// The CPCT+ algorithm from Diekmann/Tratt "Don't Panic! Better, Fewer, Syntax Errors for LR
626    /// Parsers".
627    CPCTPlus,
628    /// Don't use error recovery: return as soon as the first syntax error is encountered.
629    None,
630}
631
632impl TryFrom<RecoveryKind> for Value<Location> {
633    type Error = cfgrammar::header::HeaderError<Location>;
634    fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
635        use cfgrammar::{
636            header::{Namespaced, Setting},
637            Location,
638        };
639        let from_loc = Location::Other("From<RecoveryKind>".to_string());
640        Ok(match rk {
641            RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
642                namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
643                member: ("CPCTPlus".to_string(), from_loc.clone()),
644            })),
645            RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
646                namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
647                member: ("None".to_string(), from_loc.clone()),
648            })),
649        })
650    }
651}
652
653impl TryFrom<&Value<Location>> for RecoveryKind {
654    type Error = cfgrammar::header::HeaderError<Location>;
655    fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
656        use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
657
658        match rk {
659            Value::Flag(_, loc) => Err(HeaderError {
660                kind: HeaderErrorKind::ConversionError(
661                    "RecoveryKind",
662                    "Cannot convert boolean to RecoveryKind",
663                ),
664                locations: vec![loc.clone()],
665            }),
666            Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
667                kind: HeaderErrorKind::ConversionError(
668                    "RecoveryKind",
669                    "Cannot convert number to RecoveryKind",
670                ),
671                locations: vec![loc.clone()],
672            }),
673            Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
674                kind: HeaderErrorKind::ConversionError(
675                    "RecoveryKind",
676                    "Cannot convert string to RecoveryKind",
677                ),
678                locations: vec![loc.clone()],
679            }),
680            Value::Setting(Setting::Unitary(Namespaced {
681                namespace,
682                member: (kind, kind_loc),
683            })) => {
684                match namespace {
685                    Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
686                        return Err(HeaderError {
687                            kind: HeaderErrorKind::ConversionError(
688                                "RecoveryKind",
689                                "Unknown namespace",
690                            ),
691                            locations: vec![loc.clone()],
692                        })
693                    }
694                    _ => {}
695                }
696                match kind.to_lowercase().as_ref() {
697                    "cpctplus" => Ok(RecoveryKind::CPCTPlus),
698                    "none" => Ok(RecoveryKind::None),
699                    _ => Err(HeaderError {
700                        kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
701                        locations: vec![kind_loc.clone()],
702                    }),
703                }
704            }
705            Value::Setting(Setting::Constructor {
706                ctor: _,
707                arg:
708                    Namespaced {
709                        namespace: _,
710                        member: (_, arg_loc),
711                    },
712            }) => Err(HeaderError {
713                kind: HeaderErrorKind::ConversionError(
714                    "RecoveryKind",
715                    "Cannot convert constructor to RecoveryKind",
716                ),
717                locations: vec![arg_loc.clone()],
718            }),
719        }
720    }
721}
722
723impl quote::ToTokens for RecoveryKind {
724    fn to_tokens(&self, tokens: &mut TokenStream) {
725        tokens.extend(match *self {
726            RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
727            RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
728        })
729    }
730}
731
732/// A lexing or parsing error. Although the two are quite distinct in terms of what can be reported
733/// to users, both can (at least conceptually) occur at any point of the intertwined lexing/parsing
734/// process.
735#[derive(Debug)]
736pub enum LexParseError<
737    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
738    LexerTypesT: LexerTypes<StorageT = StorageT>,
739> where
740    usize: AsPrimitive<StorageT>,
741{
742    LexError(LexerTypesT::LexErrorT),
743    ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
744}
745
746impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
747    LexParseError<StorageT, LexerTypesT>
748where
749    usize: AsPrimitive<StorageT>,
750{
751    /// A pretty-printer of a lexer/parser error. This isn't suitable for all purposes, but it's
752    /// often good enough. The output format is not guaranteed to be stable but is likely to be of
753    /// the form:
754    ///
755    /// ```text
756    /// Parsing error at line 3 column 8. Repair sequences found:
757    ///   1: Insert ID
758    ///   2: Delete +, Shift 3
759    /// ```
760    ///
761    /// If you are using the compile-time parse system, your `grm_y` module exposes a suitable
762    /// [`epp`](../ctbuilder/struct.CTParserBuilder.html#method.process_file) function; if you are
763    /// using the run-time system,
764    /// [`YaccGrammar`](../../cfgrammar/yacc/grammar/struct.YaccGrammar.html) exposes a suitable
765    /// [`epp`](../../cfgrammar/yacc/grammar/struct.YaccGrammar.html#method.token_epp) function,
766    /// though you will have to wrap it in a closure e.g. `&|t| grm.token_epp(t)`.
767    pub fn pp<'a>(
768        &self,
769        lexer: &dyn NonStreamingLexer<LexerTypesT>,
770        epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
771    ) -> String {
772        match self {
773            LexParseError::LexError(e) => {
774                let ((line, col), _) = lexer.line_col(e.span());
775                format!("Lexing error at line {} column {}.", line, col)
776            }
777            LexParseError::ParseError(e) => {
778                let ((line, col), _) = lexer.line_col(e.lexeme().span());
779                let mut out = format!("Parsing error at line {} column {}.", line, col);
780                let repairs_len = e.repairs().len();
781                if repairs_len == 0 {
782                    out.push_str(" No repair sequences found.");
783                } else {
784                    out.push_str(" Repair sequences found:");
785                    for (i, rs) in e.repairs().iter().enumerate() {
786                        let padding = ((repairs_len as f64).log10() as usize)
787                            - (((i + 1) as f64).log10() as usize)
788                            + 1;
789                        write!(out, "\n  {}{}: ", " ".repeat(padding), i + 1).ok();
790                        let mut rs_out = Vec::new();
791
792                        // Merge together Deletes iff they are consecutive (if they are separated
793                        // by even a single character, they will not be merged).
794                        let mut i = 0;
795                        while i < rs.len() {
796                            match rs[i] {
797                                ParseRepair::Delete(l) => {
798                                    let mut j = i + 1;
799                                    let mut last_end = l.span().end();
800                                    while j < rs.len() {
801                                        if let ParseRepair::Delete(next_l) = rs[j] {
802                                            if next_l.span().start() == last_end {
803                                                last_end = next_l.span().end();
804                                                j += 1;
805                                                continue;
806                                            }
807                                        }
808                                        break;
809                                    }
810                                    let t = &lexer
811                                        .span_str(Span::new(l.span().start(), last_end))
812                                        .replace('\n', "\\n");
813                                    rs_out.push(format!("Delete {}", t));
814                                    i = j;
815                                }
816                                ParseRepair::Insert(tidx) => {
817                                    rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
818                                    i += 1;
819                                }
820                                ParseRepair::Shift(l) => {
821                                    let t = &lexer.span_str(l.span()).replace('\n', "\\n");
822                                    rs_out.push(format!("Shift {}", t));
823                                    i += 1;
824                                }
825                            }
826                        }
827
828                        out.push_str(&rs_out.join(", "));
829                    }
830                }
831                out
832            }
833        }
834    }
835}
836
837impl<
838        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
839        LexerTypesT: LexerTypes<StorageT = StorageT>,
840    > fmt::Display for LexParseError<StorageT, LexerTypesT>
841where
842    usize: AsPrimitive<StorageT>,
843{
844    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
845        match *self {
846            LexParseError::LexError(ref e) => Display::fmt(e, f),
847            LexParseError::ParseError(ref e) => Display::fmt(e, f),
848        }
849    }
850}
851
852impl<
853        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
854        LexerTypesT: LexerTypes<StorageT = StorageT>,
855    > Error for LexParseError<StorageT, LexerTypesT>
856where
857    usize: AsPrimitive<StorageT>,
858{
859}
860
861impl<
862        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
863        LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
864        T: LexError,
865    > From<T> for LexParseError<StorageT, LexerTypesT>
866where
867    usize: AsPrimitive<StorageT>,
868{
869    fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
870        LexParseError::LexError(err)
871    }
872}
873
874impl<
875        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
876        LexerTypesT: LexerTypes<StorageT = StorageT>,
877    > From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
878where
879    usize: AsPrimitive<StorageT>,
880{
881    fn from(
882        err: ParseError<LexerTypesT::LexemeT, StorageT>,
883    ) -> LexParseError<StorageT, LexerTypesT> {
884        LexParseError::ParseError(err)
885    }
886}
887
888/// A run-time parser builder.
889pub struct RTParserBuilder<
890    'a,
891    StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
892    LexerTypesT: LexerTypes<StorageT = StorageT>,
893> where
894    usize: AsPrimitive<StorageT>,
895{
896    grm: &'a YaccGrammar<StorageT>,
897    stable: &'a StateTable<StorageT>,
898    recoverer: RecoveryKind,
899    term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
900    phantom: PhantomData<(StorageT, LexerTypesT)>,
901}
902
903impl<
904        'a,
905        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
906        LexerTypesT: LexerTypes<StorageT = StorageT>,
907    > RTParserBuilder<'a, StorageT, LexerTypesT>
908where
909    usize: AsPrimitive<StorageT>,
910{
911    /// Create a new run-time parser from a `YaccGrammar`, and a `StateTable`.
912    pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
913        RTParserBuilder {
914            grm,
915            stable,
916            recoverer: RecoveryKind::CPCTPlus,
917            term_costs: &|_| 1,
918            phantom: PhantomData,
919        }
920    }
921
922    /// Set the recoverer for this parser to `rk`.
923    pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
924        self.recoverer = rk;
925        self
926    }
927
928    pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
929        self.term_costs = f;
930        self
931    }
932
933    /// Parse input, and (if possible) return a generic parse tree. See the arguments for
934    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
935    pub fn parse_generictree(
936        &self,
937        lexer: &dyn NonStreamingLexer<LexerTypesT>,
938    ) -> (
939        Option<Node<LexerTypesT::LexemeT, StorageT>>,
940        Vec<LexParseError<StorageT, LexerTypesT>>,
941    ) {
942        let mut lexemes = vec![];
943        for e in lexer.iter().collect::<Vec<_>>() {
944            match e {
945                Ok(l) => lexemes.push(l),
946                Err(e) => return (None, vec![e.into()]),
947            }
948        }
949        Parser::<StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>::parse_generictree(
950            self.recoverer,
951            self.grm,
952            self.term_costs,
953            self.stable,
954            lexer,
955            lexemes,
956        )
957    }
958
959    /// Parse input, returning any errors found. See the arguments for
960    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
961    pub fn parse_noaction(
962        &self,
963        lexer: &dyn NonStreamingLexer<LexerTypesT>,
964    ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
965        let mut lexemes = vec![];
966        for e in lexer.iter().collect::<Vec<_>>() {
967            match e {
968                Ok(l) => lexemes.push(l),
969                Err(e) => return vec![e.into()],
970            }
971        }
972        Parser::<StorageT, LexerTypesT, (), ()>::parse_noaction(
973            self.recoverer,
974            self.grm,
975            self.term_costs,
976            self.stable,
977            lexer,
978            lexemes,
979        )
980    }
981
982    /// Parse input, execute actions, and return the associated value (if possible) and/or any
983    /// lexing/parsing errors encountered. Note that the two parts of the (value, errors) return
984    /// pair are entirely independent: one can encounter errors without a value being produced
985    /// (`None, [...]`), errors and a value (`Some(...), [...]`), as well as a value and no errors
986    /// (`Some(...), []`). Errors are sorted by the position they were found in the input and can
987    /// be a mix of lexing and parsing errors.
988    pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
989        &self,
990        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
991        actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
992        param: ParamT,
993    ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
994        let mut lexemes = vec![];
995        for e in lexer.iter().collect::<Vec<_>>() {
996            match e {
997                Ok(l) => lexemes.push(l),
998                Err(e) => return (None, vec![e.into()]),
999            }
1000        }
1001        Parser::parse_actions(
1002            self.recoverer,
1003            self.grm,
1004            self.term_costs,
1005            self.stable,
1006            lexer,
1007            lexemes,
1008            actions,
1009            param,
1010        )
1011    }
1012}
1013
1014/// After a parse error is encountered, the parser attempts to find a way of recovering. Each entry
1015/// in the sequence of repairs is represented by a `ParseRepair`.
1016#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1017pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1018    /// Insert a `Symbol::Token`.
1019    Insert(TIdx<StorageT>),
1020    /// Delete a symbol.
1021    Delete(LexemeT),
1022    /// Shift a symbol.
1023    Shift(LexemeT),
1024}
1025
1026/// Records a single parse error.
1027#[derive(Clone, Debug, PartialEq)]
1028pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1029    stidx: StIdx<StorageT>,
1030    lexeme: LexemeT,
1031    repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1032}
1033
1034impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1035    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1036        write!(f, "Parse error at lexeme {:?}", self.lexeme)
1037    }
1038}
1039
1040impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1041
1042impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1043    /// Return the state table index where this error was detected.
1044    pub fn stidx(&self) -> StIdx<StorageT> {
1045        self.stidx
1046    }
1047
1048    /// Return the lexeme where this error was detected.
1049    pub fn lexeme(&self) -> &LexemeT {
1050        &self.lexeme
1051    }
1052
1053    /// Return the repairs found that would fix this error. Note that there are infinite number of
1054    /// possible repairs for any error, so this is by definition a (finite) subset.
1055    pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1056        &self.repairs
1057    }
1058}
1059
1060#[cfg(test)]
1061pub(crate) mod test {
1062    use std::collections::HashMap;
1063
1064    use cfgrammar::{
1065        yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1066        Span,
1067    };
1068    use lrtable::{from_yacc, Minimiser};
1069    use num_traits::ToPrimitive;
1070    use regex::Regex;
1071
1072    use super::*;
1073    use crate::{
1074        test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1075        Lexeme, Lexer,
1076    };
1077
1078    pub(crate) fn do_parse<'input>(
1079        rcvry_kind: RecoveryKind,
1080        lexs: &str,
1081        grms: &str,
1082        input: &'input str,
1083    ) -> (
1084        YaccGrammar<u16>,
1085        SmallLexer<'input>,
1086        Result<
1087            Node<TestLexeme, u16>,
1088            (
1089                Option<Node<TestLexeme, u16>>,
1090                Vec<LexParseError<u16, TestLexerTypes>>,
1091            ),
1092        >,
1093    ) {
1094        do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1095    }
1096
1097    fn do_parse_with_costs<'input>(
1098        rcvry_kind: RecoveryKind,
1099        lexs: &str,
1100        grms: &str,
1101        input: &'input str,
1102        costs: &HashMap<&str, u8>,
1103    ) -> (
1104        YaccGrammar<u16>,
1105        SmallLexer<'input>,
1106        Result<
1107            Node<TestLexeme, u16>,
1108            (
1109                Option<Node<TestLexeme, u16>>,
1110                Vec<LexParseError<u16, TestLexerTypes>>,
1111            ),
1112        >,
1113    ) {
1114        let grm = YaccGrammar::<u16>::new_with_storaget(
1115            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1116            grms,
1117        )
1118        .unwrap();
1119        let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1120        let rule_ids = grm
1121            .tokens_map()
1122            .iter()
1123            .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1124            .collect();
1125        let lexer_rules = small_lexer(lexs, rule_ids);
1126        let lexemes = small_lex(lexer_rules, input);
1127        let lexer = SmallLexer { lexemes, s: input };
1128        let costs_tidx = costs
1129            .iter()
1130            .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1131            .collect::<HashMap<_, _>>();
1132        let (r, errs) = RTParserBuilder::new(&grm, &stable)
1133            .recoverer(rcvry_kind)
1134            .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1135            .parse_generictree(&lexer);
1136        if let Some(node) = r {
1137            if errs.is_empty() {
1138                (grm, lexer, Ok(node))
1139            } else {
1140                (grm, lexer, Err((Some(node), errs)))
1141            }
1142        } else {
1143            (grm, lexer, Err((None, errs)))
1144        }
1145    }
1146
1147    fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1148        let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1149        assert_eq!(expected, pt.unwrap().pp(&grm, input));
1150    }
1151
1152    // SmallLexer is our highly simplified version of lrlex (allowing us to avoid having to have
1153    // lrlex as a dependency of lrpar). The format is the same as lrlex *except*:
1154    //   * The initial "%%" isn't needed, and only "'" is valid as a rule name delimiter.
1155    //   * "Unnamed" rules aren't allowed (e.g. you can't have a rule which discards whitespaces).
1156    pub(crate) struct SmallLexer<'input> {
1157        lexemes: Vec<TestLexeme>,
1158        s: &'input str,
1159    }
1160
1161    impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1162        fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1163            Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1164        }
1165    }
1166
1167    impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1168        fn span_str(&self, span: Span) -> &'input str {
1169            &self.s[span.start()..span.end()]
1170        }
1171
1172        fn span_lines_str(&self, _: Span) -> &'input str {
1173            unreachable!();
1174        }
1175
1176        fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1177            unreachable!();
1178        }
1179    }
1180
1181    fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1182        let mut rules = Vec::new();
1183        for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1184            assert!(l.rfind('\'') == Some(l.len() - 1));
1185            let i = l[..l.len() - 1].rfind('\'').unwrap();
1186            let name = &l[i + 1..l.len() - 1];
1187            let re = &l[..i - 1].trim();
1188            rules.push((
1189                ids_map[name],
1190                Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1191            ));
1192        }
1193        rules
1194    }
1195
1196    fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1197        let mut lexemes = vec![];
1198        let mut i = 0;
1199        while i < input.len() {
1200            let mut longest = 0; // Length of the longest match
1201            let mut longest_tok_id = 0; // This is only valid iff longest != 0
1202            for (tok_id, r) in rules.iter() {
1203                if let Some(m) = r.find(&input[i..]) {
1204                    let len = m.end();
1205                    if len > longest {
1206                        longest = len;
1207                        longest_tok_id = *tok_id;
1208                    }
1209                }
1210            }
1211            assert!(longest > 0);
1212            lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1213            i += longest;
1214        }
1215        lexemes
1216    }
1217
1218    #[test]
1219    fn simple_parse() {
1220        // From p4 of https://www.cs.umd.edu/class/spring2014/cmsc430/lectures/lec07.pdf
1221        check_parse_output(
1222            "[a-zA-Z_] 'ID'
1223             \\+ '+'",
1224            "
1225%start E
1226%%
1227E: T '+' E
1228 | T;
1229T: 'ID';
1230",
1231            "a+b",
1232            "E
1233 T
1234  ID a
1235 + +
1236 E
1237  T
1238   ID b
1239",
1240        );
1241    }
1242
1243    #[test]
1244    fn parse_empty_rules() {
1245        let lexs = "[a-zA-Z_] 'ID'";
1246        let grms = "%start S
1247%%
1248S: L;
1249L: 'ID'
1250 | ;
1251";
1252        check_parse_output(
1253            lexs, grms, "", "S
1254 L
1255",
1256        );
1257
1258        check_parse_output(
1259            lexs,
1260            grms,
1261            "x",
1262            "S
1263 L
1264  ID x
1265",
1266        );
1267    }
1268
1269    #[test]
1270    fn recursive_parse() {
1271        let lexs = "\\+ '+'
1272                    \\* '*'
1273                    [0-9]+ 'INT'";
1274        let grms = "%start Expr
1275%%
1276Expr : Expr '+' Term | Term;
1277Term : Term '*' Factor | Factor;
1278Factor : 'INT';";
1279
1280        check_parse_output(
1281            lexs,
1282            grms,
1283            "2+3*4",
1284            "Expr
1285 Expr
1286  Term
1287   Factor
1288    INT 2
1289 + +
1290 Term
1291  Term
1292   Factor
1293    INT 3
1294  * *
1295  Factor
1296   INT 4
1297",
1298        );
1299        check_parse_output(
1300            lexs,
1301            grms,
1302            "2*3+4",
1303            "Expr
1304 Expr
1305  Term
1306   Term
1307    Factor
1308     INT 2
1309   * *
1310   Factor
1311    INT 3
1312 + +
1313 Term
1314  Factor
1315   INT 4
1316",
1317        );
1318    }
1319
1320    #[test]
1321    fn parse_error() {
1322        let lexs = "\\( '('
1323                    \\) ')'
1324                    [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1325        let grms = "%start Call
1326%%
1327Call: 'ID' '(' ')';";
1328
1329        check_parse_output(
1330            lexs,
1331            grms,
1332            "f()",
1333            "Call
1334 ID f
1335 ( (
1336 ) )
1337",
1338        );
1339
1340        let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1341        let (_, errs) = pr.unwrap_err();
1342        assert_eq!(errs.len(), 1);
1343        let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1344        match &errs[0] {
1345            LexParseError::ParseError(e) => {
1346                assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1347                assert!(e.lexeme().faulty());
1348            }
1349            _ => unreachable!(),
1350        }
1351
1352        let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1353        let (_, errs) = pr.unwrap_err();
1354        assert_eq!(errs.len(), 1);
1355        let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1356        match &errs[0] {
1357            LexParseError::ParseError(e) => {
1358                assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1359                assert!(!e.lexeme().faulty());
1360            }
1361            _ => unreachable!(),
1362        }
1363    }
1364}