Skip to main content

lrpar/
parser.rs

1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3    error::Error,
4    fmt::{self, Debug, Display, Write as _},
5    hash::Hash,
6    marker::PhantomData,
7    vec,
8};
9
10// Can be used on non-wasm32 but to avoid the dependency.
11#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{RIdx, Span, TIdx, header::Value, span::Location, yacc::YaccGrammar};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{LexError, Lexeme, LexerTypes, NonStreamingLexer, cpctplus};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; // milliseconds
29#[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; // milliseconds
31
32#[deprecated(
33    since = "0.14.0",
34    note = "Use the version of `Node` exported from your `lrpar_mod!`"
35)]
36pub type Node<T, S> = _deprecated_moved_::Node<T, S>;
37
38#[doc(hidden)]
39pub mod _deprecated_moved_ {
40    use super::*;
41    /// A generic parse tree.
42    #[derive(Debug, Clone, PartialEq)]
43    pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
44        /// Terminals store a single lexeme.
45        Term { lexeme: LexemeT },
46        /// Nonterminals reference a rule and have zero or more `Node`s as children.
47        Nonterm {
48            ridx: RIdx<StorageT>,
49            nodes: Vec<Node<LexemeT, StorageT>>,
50        },
51    }
52}
53
54#[allow(deprecated)]
55impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
56where
57    usize: AsPrimitive<StorageT>,
58{
59    /// Return a pretty-printed version of this node.
60    pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
61        let mut st = vec![(0, self)]; // Stack of (indent level, node) pairs
62        let mut s = String::new();
63        while let Some((indent, e)) = st.pop() {
64            for _ in 0..indent {
65                s.push(' ');
66            }
67            match *e {
68                Node::Term { lexeme } => {
69                    let tidx = TIdx(lexeme.tok_id());
70                    let tn = grm.token_name(tidx).unwrap();
71                    let lt = &input[lexeme.span().start()..lexeme.span().end()];
72                    writeln!(s, "{} {}", tn, lt).ok();
73                }
74                Node::Nonterm { ridx, ref nodes } => {
75                    writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
76                    for x in nodes.iter().rev() {
77                        st.push((indent + 1, x));
78                    }
79                }
80            }
81        }
82        s
83    }
84}
85
86type PStack<StorageT> = Vec<StIdx<StorageT>>; // Parse stack
87type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
88type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
89    RIdx<StorageT>,
90    &'b dyn NonStreamingLexer<'input, LexerTypesT>,
91    Span,
92    vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
93    ParamT,
94) -> ActionT;
95
96#[derive(Debug)]
97pub enum AStackType<LexemeT, ActionT> {
98    ActionType(ActionT),
99    Lexeme(LexemeT),
100}
101
102pub(super) struct Parser<
103    'a,
104    'b: 'a,
105    'input: 'b,
106    StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
107    LexerTypesT: LexerTypes<StorageT = StorageT>,
108    ActionT: 'a,
109    ParamT: Clone,
110> where
111    usize: AsPrimitive<StorageT>,
112{
113    rcvry_kind: RecoveryKind,
114    pub(super) grm: &'a YaccGrammar<StorageT>,
115    pub(super) token_cost: TokenCostFn<'a, StorageT>,
116    pub(super) stable: &'a StateTable<StorageT>,
117    lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
118    // In the long term, we should remove the `lexemes` field entirely, as the `NonStreamingLexer` API is
119    // powerful enough to allow us to incrementally obtain lexemes and buffer them when necessary.
120    pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
121    actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
122    param: ParamT,
123}
124
125impl<
126    'a,
127    'b: 'a,
128    'input: 'b,
129    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
130    LexerTypesT: LexerTypes<StorageT = StorageT>,
131    Node,
132>
133    Parser<
134        'a,
135        'b,
136        'input,
137        StorageT,
138        LexerTypesT,
139        Node,
140        (
141            &dyn Fn(LexerTypesT::LexemeT) -> Node,
142            &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
143        ),
144    >
145where
146    usize: AsPrimitive<StorageT>,
147{
148    fn parse_map(
149        rcvry_kind: RecoveryKind,
150        grm: &YaccGrammar<StorageT>,
151        token_cost: TokenCostFn<'a, StorageT>,
152        stable: &StateTable<StorageT>,
153        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
154        lexemes: Vec<LexerTypesT::LexemeT>,
155        fterm: &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
156        fnonterm: &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
157    ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
158        for tidx in grm.iter_tidxs() {
159            assert!(token_cost(tidx) > 0);
160        }
161        let mut actions: Vec<
162            ActionFn<
163                'a,
164                'b,
165                'input,
166                StorageT,
167                LexerTypesT,
168                Node,
169                (
170                    &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
171                    &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
172                ),
173            >,
174        > = Vec::new();
175        actions.resize(usize::from(grm.prods_len()), &action_map);
176        let psr = Parser {
177            rcvry_kind,
178            grm,
179            token_cost,
180            stable,
181            lexer,
182            lexemes,
183            actions: actions.as_slice(),
184            param: (fterm, fnonterm),
185        };
186        let mut pstack = vec![stable.start_state()];
187        let mut astack = Vec::new();
188        let mut errors = Vec::new();
189        let mut spans = Vec::new();
190        let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
191        (accpt, errors)
192    }
193}
194
195fn action_map<StorageT, LexerTypesT: LexerTypes, Node>(
196    ridx: RIdx<StorageT>,
197    _lexer: &dyn NonStreamingLexer<LexerTypesT>,
198    _span: Span,
199    astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node>>,
200    param: (
201        &dyn Fn(LexerTypesT::LexemeT) -> Node,
202        &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
203    ),
204) -> Node
205where
206    usize: AsPrimitive<LexerTypesT::StorageT>,
207    LexerTypesT::LexemeT: Lexeme<StorageT>,
208{
209    let (fterm, fnonterm) = param;
210    let mut nodes = Vec::with_capacity(astack.len());
211    for a in astack {
212        nodes.push(match a {
213            AStackType::ActionType(n) => n,
214            AStackType::Lexeme(lexeme) => fterm(lexeme),
215        });
216    }
217    fnonterm(ridx, nodes)
218}
219
220#[deprecated(
221    since = "0.14.0",
222    note = "Deprecated with `parse_generictree` there is no direct replacement, besides a custom action"
223)]
224#[allow(deprecated)]
225/// The action which implements [`cfgrammar::yacc::YaccOriginalActionKind::GenericParseTree`].
226/// Usually you should just use the action kind directly. But you can also call this from
227/// within a custom action to return a generic parse tree with custom behavior.
228pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
229    ridx: RIdx<StorageT>,
230    _lexer: &dyn NonStreamingLexer<LexerTypesT>,
231    _span: Span,
232    astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
233    _param: (),
234) -> Node<LexerTypesT::LexemeT, StorageT>
235where
236    usize: AsPrimitive<LexerTypesT::StorageT>,
237    LexerTypesT::LexemeT: Lexeme<StorageT>,
238{
239    let mut nodes = Vec::with_capacity(astack.len());
240    for a in astack {
241        nodes.push(match a {
242            AStackType::ActionType(n) => n,
243            AStackType::Lexeme(lexeme) => Node::Term { lexeme },
244        });
245    }
246    Node::Nonterm { ridx, nodes }
247}
248
249impl<
250    'a,
251    'b: 'a,
252    'input: 'b,
253    StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
254    LexerTypesT: LexerTypes<StorageT = StorageT>,
255    ActionT: 'a,
256    ParamT: Clone,
257> Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
258where
259    usize: AsPrimitive<StorageT>,
260{
261    fn parse_actions(
262        rcvry_kind: RecoveryKind,
263        grm: &'a YaccGrammar<StorageT>,
264        token_cost: TokenCostFn<'a, StorageT>,
265        stable: &'a StateTable<StorageT>,
266        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
267        lexemes: Vec<LexerTypesT::LexemeT>,
268        actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
269        param: ParamT,
270    ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
271        for tidx in grm.iter_tidxs() {
272            assert!(token_cost(tidx) > 0);
273        }
274        let psr = Parser {
275            rcvry_kind,
276            grm,
277            token_cost,
278            stable,
279            lexer,
280            lexemes,
281            actions,
282            param,
283        };
284        let mut pstack = vec![stable.start_state()];
285        let mut astack = Vec::new();
286        let mut errors = Vec::new();
287        let mut spans = Vec::new();
288        let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
289        (accpt, errors)
290    }
291
292    /// Start parsing text at `laidx` (using the lexeme in `lexeme_prefix`, if it is not `None`,
293    /// as the first lexeme) up to (but excluding) `end_laidx` (if it's specified). Parsing
294    /// continues as long as possible (assuming that any errors encountered can be recovered from)
295    /// unless `end_laidx` is `None`, at which point this function returns as soon as it
296    /// encounters an error.
297    ///
298    /// Note that if `lexeme_prefix` is specified, `laidx` will still be incremented, and thus
299    /// `end_laidx` *must* be set to `laidx + 1` in order that the parser doesn't skip the real
300    /// lexeme at position `laidx`.
301    ///
302    /// Return `Some(value)` if the parse reached an accept state (i.e. all the input was consumed,
303    /// possibly after making repairs) or `None` (i.e. some of the input was not consumed, even
304    /// after possibly making repairs) otherwise.
305    fn lr(
306        &self,
307        mut laidx: usize,
308        pstack: &mut PStack<StorageT>,
309        astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
310        errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
311        spans: &mut Vec<Span>,
312    ) -> Option<ActionT> {
313        let mut recoverer = None;
314        let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
315        loop {
316            debug_assert_eq!(astack.len(), spans.len());
317            let stidx = *pstack.last().unwrap();
318            let la_tidx = self.next_tidx(laidx);
319
320            match self.stable.action(stidx, la_tidx) {
321                Action::Reduce(pidx) => {
322                    let ridx = self.grm.prod_to_rule(pidx);
323                    let pop_idx = pstack.len() - self.grm.prod(pidx).len();
324
325                    pstack.drain(pop_idx..);
326                    let prior = *pstack.last().unwrap();
327                    pstack.push(self.stable.goto(prior, ridx).unwrap());
328
329                    let span = if spans.is_empty() {
330                        Span::new(0, 0)
331                    } else if pop_idx - 1 < spans.len() {
332                        Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
333                    } else {
334                        Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
335                    };
336                    spans.truncate(pop_idx - 1);
337                    spans.push(span);
338
339                    let v = AStackType::ActionType(self.actions[usize::from(pidx)](
340                        ridx,
341                        self.lexer,
342                        span,
343                        astack.drain(pop_idx - 1..),
344                        self.param.clone(),
345                    ));
346                    astack.push(v);
347                }
348                Action::Shift(state_id) => {
349                    let la_lexeme = self.next_lexeme(laidx);
350                    pstack.push(state_id);
351                    astack.push(AStackType::Lexeme(la_lexeme));
352
353                    spans.push(la_lexeme.span());
354                    laidx += 1;
355                }
356                Action::Accept => {
357                    debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
358                    debug_assert_eq!(astack.len(), 1);
359                    match astack.drain(..).next().unwrap() {
360                        AStackType::ActionType(v) => return Some(v),
361                        _ => unreachable!(),
362                    }
363                }
364                Action::Error => {
365                    if recoverer.is_none() {
366                        recoverer = Some(match self.rcvry_kind {
367                            RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
368                            RecoveryKind::None => {
369                                let la_lexeme = self.next_lexeme(laidx);
370                                errors.push(
371                                    ParseError {
372                                        stidx,
373                                        lexeme: la_lexeme,
374                                        repairs: vec![],
375                                    }
376                                    .into(),
377                                );
378                                return None;
379                            }
380                        });
381                    }
382
383                    let before = Instant::now();
384                    let finish_by = before + recovery_budget;
385                    let (new_laidx, repairs) = recoverer
386                        .as_ref()
387                        .unwrap()
388                        .as_ref()
389                        .recover(finish_by, self, laidx, pstack, astack, spans);
390                    let after = Instant::now();
391                    recovery_budget = recovery_budget
392                        .checked_sub(after - before)
393                        .unwrap_or_else(|| Duration::new(0, 0));
394                    let keep_going = !repairs.is_empty();
395                    let la_lexeme = self.next_lexeme(laidx);
396                    errors.push(
397                        ParseError {
398                            stidx,
399                            lexeme: la_lexeme,
400                            repairs,
401                        }
402                        .into(),
403                    );
404                    if !keep_going {
405                        return None;
406                    }
407                    laidx = new_laidx;
408                }
409            }
410        }
411    }
412
413    /// Parse from `laidx` up to (but excluding) `end_laidx` mutating `pstack` as parsing occurs.
414    /// Returns the index of the token it parsed up to (by definition <= end_laidx: can be less if
415    /// the input is < end_laidx, or if an error is encountered). Does not do any form of error
416    /// recovery.
417    pub(super) fn lr_upto(
418        &self,
419        lexeme_prefix: Option<LexerTypesT::LexemeT>,
420        mut laidx: usize,
421        end_laidx: usize,
422        pstack: &mut PStack<StorageT>,
423        astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
424        spans: &mut Option<&mut Vec<Span>>,
425    ) -> usize {
426        assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
427        while laidx != end_laidx && laidx <= self.lexemes.len() {
428            let stidx = *pstack.last().unwrap();
429            let la_tidx = if let Some(l) = lexeme_prefix {
430                TIdx(l.tok_id())
431            } else {
432                self.next_tidx(laidx)
433            };
434
435            match self.stable.action(stidx, la_tidx) {
436                Action::Reduce(pidx) => {
437                    let ridx = self.grm.prod_to_rule(pidx);
438                    let pop_idx = pstack.len() - self.grm.prod(pidx).len();
439                    if let Some(ref mut astack_uw) = *astack {
440                        if let Some(ref mut spans_uw) = *spans {
441                            let span = if spans_uw.is_empty() {
442                                Span::new(0, 0)
443                            } else if pop_idx - 1 < spans_uw.len() {
444                                Span::new(
445                                    spans_uw[pop_idx - 1].start(),
446                                    spans_uw[spans_uw.len() - 1].end(),
447                                )
448                            } else {
449                                Span::new(
450                                    spans_uw[spans_uw.len() - 1].start(),
451                                    spans_uw[spans_uw.len() - 1].end(),
452                                )
453                            };
454                            spans_uw.truncate(pop_idx - 1);
455                            spans_uw.push(span);
456
457                            let v = AStackType::ActionType(self.actions[usize::from(pidx)](
458                                ridx,
459                                self.lexer,
460                                span,
461                                astack_uw.drain(pop_idx - 1..),
462                                self.param.clone(),
463                            ));
464                            astack_uw.push(v);
465                        } else {
466                            unreachable!();
467                        }
468                    }
469
470                    pstack.drain(pop_idx..);
471                    let prior = *pstack.last().unwrap();
472                    pstack.push(self.stable.goto(prior, ridx).unwrap());
473                }
474                Action::Shift(state_id) => {
475                    if let Some(ref mut astack_uw) = *astack {
476                        if let Some(spans_uw) = spans {
477                            let la_lexeme = if let Some(l) = lexeme_prefix {
478                                l
479                            } else {
480                                self.next_lexeme(laidx)
481                            };
482                            astack_uw.push(AStackType::Lexeme(la_lexeme));
483                            spans_uw.push(la_lexeme.span());
484                        }
485                    }
486                    pstack.push(state_id);
487                    laidx += 1;
488                }
489                Action::Accept => {
490                    break;
491                }
492                Action::Error => {
493                    break;
494                }
495            }
496        }
497        laidx
498    }
499
500    /// Return a `Lexeme` for the next lemexe (if `laidx` == `self.lexemes.len()` this will be
501    /// a lexeme constructed to look as if contains the EOF token).
502    pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
503        let llen = self.lexemes.len();
504        debug_assert!(laidx <= llen);
505        if laidx < llen {
506            self.lexemes[laidx]
507        } else {
508            // We have to artificially construct a Lexeme for the EOF lexeme.
509            let last_la_end = if llen == 0 {
510                0
511            } else {
512                debug_assert!(laidx > 0);
513                let last_la = self.lexemes[laidx - 1];
514                last_la.span().end()
515            };
516
517            Lexeme::new_faulty(
518                StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
519                last_la_end,
520                0,
521            )
522        }
523    }
524
525    /// Return the `TIdx` of the next lexeme (if `laidx` == `self.lexemes.len()` this will be the
526    /// EOF `TIdx`).
527    pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
528        let ll = self.lexemes.len();
529        debug_assert!(laidx <= ll);
530        if laidx < ll {
531            TIdx(self.lexemes[laidx].tok_id())
532        } else {
533            self.grm.eof_token_idx()
534        }
535    }
536
537    /// Start parsing text at `laidx` (using the lexeme in `lexeme_prefix`, if it is not `None`,
538    /// as the first lexeme) up to (but excluding) `end_laidx`. If an error is encountered, parsing
539    /// immediately terminates (without recovery).
540    ///
541    /// Note that if `lexeme_prefix` is specified, `laidx` will still be incremented, and thus
542    /// `end_laidx` *must* be set to `laidx + 1` in order that the parser doesn't skip the real
543    /// lexeme at position `laidx`.
544    #[allow(deprecated)]
545    pub(super) fn lr_cactus(
546        &self,
547        lexeme_prefix: Option<LexerTypesT::LexemeT>,
548        mut laidx: usize,
549        end_laidx: usize,
550        mut pstack: Cactus<StIdx<StorageT>>,
551        tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
552    ) -> (usize, Cactus<StIdx<StorageT>>) {
553        assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
554        while laidx != end_laidx {
555            let stidx = *pstack.val().unwrap();
556            let la_tidx = if let Some(l) = lexeme_prefix {
557                TIdx(l.tok_id())
558            } else {
559                self.next_tidx(laidx)
560            };
561
562            match self.stable.action(stidx, la_tidx) {
563                Action::Reduce(pidx) => {
564                    let ridx = self.grm.prod_to_rule(pidx);
565                    let pop_num = self.grm.prod(pidx).len();
566                    if let Some(ref mut tstack_uw) = *tstack {
567                        let nodes = tstack_uw
568                            .drain(pstack.len() - pop_num - 1..)
569                            .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
570                        tstack_uw.push(Node::Nonterm { ridx, nodes });
571                    }
572
573                    for _ in 0..pop_num {
574                        pstack = pstack.parent().unwrap();
575                    }
576                    let prior = *pstack.val().unwrap();
577                    pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
578                }
579                Action::Shift(state_id) => {
580                    if let Some(ref mut tstack_uw) = *tstack {
581                        let la_lexeme = if let Some(l) = lexeme_prefix {
582                            l
583                        } else {
584                            self.next_lexeme(laidx)
585                        };
586                        tstack_uw.push(Node::Term { lexeme: la_lexeme });
587                    }
588                    pstack = pstack.child(state_id);
589                    laidx += 1;
590                }
591                Action::Accept => {
592                    debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
593                    if let Some(ref tstack_uw) = *tstack {
594                        debug_assert_eq!(tstack_uw.len(), 1);
595                    }
596                    break;
597                }
598                Action::Error => {
599                    break;
600                }
601            }
602        }
603        (laidx, pstack)
604    }
605}
606
607pub(super) trait Recoverer<
608    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
609    LexerTypesT: LexerTypes<StorageT = StorageT>,
610    ActionT,
611    ParamT: Clone,
612> where
613    usize: AsPrimitive<StorageT>,
614{
615    fn recover(
616        &self,
617        finish_by: Instant,
618        parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
619        in_laidx: usize,
620        in_pstack: &mut PStack<StorageT>,
621        astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
622        spans: &mut Vec<Span>,
623    ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
624}
625
626/// What recovery algorithm should be used when a syntax error is encountered?
627#[derive(Clone, Copy, Debug)]
628#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
629#[non_exhaustive]
630pub enum RecoveryKind {
631    /// The CPCT+ algorithm from Diekmann/Tratt "Don't Panic! Better, Fewer, Syntax Errors for LR
632    /// Parsers".
633    CPCTPlus,
634    /// Don't use error recovery: return as soon as the first syntax error is encountered.
635    None,
636}
637
638impl TryFrom<RecoveryKind> for Value<Location> {
639    type Error = cfgrammar::header::HeaderError<Location>;
640    fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
641        use cfgrammar::{
642            Location,
643            header::{Namespaced, Setting},
644        };
645        let from_loc = Location::Other("From<RecoveryKind>".to_string());
646        Ok(match rk {
647            RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
648                namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
649                member: ("CPCTPlus".to_string(), from_loc.clone()),
650            })),
651            RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
652                namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
653                member: ("None".to_string(), from_loc.clone()),
654            })),
655        })
656    }
657}
658
659impl TryFrom<&Value<Location>> for RecoveryKind {
660    type Error = cfgrammar::header::HeaderError<Location>;
661    fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
662        use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
663
664        match rk {
665            Value::Flag(_, loc) => Err(HeaderError {
666                kind: HeaderErrorKind::ConversionError(
667                    "RecoveryKind",
668                    "Cannot convert boolean to RecoveryKind",
669                ),
670                locations: vec![loc.clone()],
671            }),
672            Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
673                kind: HeaderErrorKind::ConversionError(
674                    "RecoveryKind",
675                    "Cannot convert number to RecoveryKind",
676                ),
677                locations: vec![loc.clone()],
678            }),
679            Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
680                kind: HeaderErrorKind::ConversionError(
681                    "RecoveryKind",
682                    "Cannot convert string to RecoveryKind",
683                ),
684                locations: vec![loc.clone()],
685            }),
686            Value::Setting(Setting::Unitary(Namespaced {
687                namespace,
688                member: (kind, kind_loc),
689            })) => {
690                match namespace {
691                    Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
692                        return Err(HeaderError {
693                            kind: HeaderErrorKind::ConversionError(
694                                "RecoveryKind",
695                                "Unknown namespace",
696                            ),
697                            locations: vec![loc.clone()],
698                        });
699                    }
700                    _ => {}
701                }
702                match kind.to_lowercase().as_ref() {
703                    "cpctplus" => Ok(RecoveryKind::CPCTPlus),
704                    "none" => Ok(RecoveryKind::None),
705                    _ => Err(HeaderError {
706                        kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
707                        locations: vec![kind_loc.clone()],
708                    }),
709                }
710            }
711            Value::Setting(Setting::Constructor {
712                ctor: _,
713                arg:
714                    Namespaced {
715                        namespace: _,
716                        member: (_, arg_loc),
717                    },
718            }) => Err(HeaderError {
719                kind: HeaderErrorKind::ConversionError(
720                    "RecoveryKind",
721                    "Cannot convert constructor to RecoveryKind",
722                ),
723                locations: vec![arg_loc.clone()],
724            }),
725            Value::Setting(Setting::Array(_, arr_loc, _)) => Err(HeaderError {
726                kind: HeaderErrorKind::ConversionError(
727                    "RecoveryKind",
728                    "Cannot convert array to RecoveryKind",
729                ),
730                locations: vec![arr_loc.clone()],
731            }),
732        }
733    }
734}
735
736impl quote::ToTokens for RecoveryKind {
737    fn to_tokens(&self, tokens: &mut TokenStream) {
738        tokens.extend(match *self {
739            RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
740            RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
741        })
742    }
743}
744
745/// A lexing or parsing error. Although the two are quite distinct in terms of what can be reported
746/// to users, both can (at least conceptually) occur at any point of the intertwined lexing/parsing
747/// process.
748#[derive(Debug)]
749pub enum LexParseError<
750    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
751    LexerTypesT: LexerTypes<StorageT = StorageT>,
752> where
753    usize: AsPrimitive<StorageT>,
754{
755    LexError(LexerTypesT::LexErrorT),
756    ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
757}
758
759impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
760    LexParseError<StorageT, LexerTypesT>
761where
762    usize: AsPrimitive<StorageT>,
763{
764    /// A pretty-printer of a lexer/parser error. This isn't suitable for all purposes, but it's
765    /// often good enough. The output format is not guaranteed to be stable but is likely to be of
766    /// the form:
767    ///
768    /// ```text
769    /// Parsing error at line 3 column 8. Repair sequences found:
770    ///   1: Insert ID
771    ///   2: Delete +, Shift 3
772    /// ```
773    ///
774    /// If you are using the compile-time parse system, your `grm_y` module exposes a suitable
775    /// [`epp`](../ctbuilder/struct.CTParserBuilder.html#method.process_file) function; if you are
776    /// using the run-time system,
777    /// [`YaccGrammar`](../../cfgrammar/yacc/grammar/struct.YaccGrammar.html) exposes a suitable
778    /// [`epp`](../../cfgrammar/yacc/grammar/struct.YaccGrammar.html#method.token_epp) function,
779    /// though you will have to wrap it in a closure e.g. `&|t| grm.token_epp(t)`.
780    pub fn pp<'a>(
781        &self,
782        lexer: &dyn NonStreamingLexer<LexerTypesT>,
783        epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
784    ) -> String {
785        match self {
786            LexParseError::LexError(e) => {
787                let ((line, col), _) = lexer.line_col(e.span());
788                format!("Lexing error at line {} column {}.", line, col)
789            }
790            LexParseError::ParseError(e) => {
791                let ((line, col), _) = lexer.line_col(e.lexeme().span());
792                let mut out = format!("Parsing error at line {} column {}.", line, col);
793                let repairs_len = e.repairs().len();
794                if repairs_len == 0 {
795                    out.push_str(" No repair sequences found.");
796                } else {
797                    out.push_str(" Repair sequences found:");
798                    for (i, rs) in e.repairs().iter().enumerate() {
799                        let padding = ((repairs_len as f64).log10() as usize)
800                            - (((i + 1) as f64).log10() as usize)
801                            + 1;
802                        write!(out, "\n  {}{}: ", " ".repeat(padding), i + 1).ok();
803                        let mut rs_out = Vec::new();
804
805                        // Merge together Deletes iff they are consecutive (if they are separated
806                        // by even a single character, they will not be merged).
807                        let mut i = 0;
808                        while i < rs.len() {
809                            match rs[i] {
810                                ParseRepair::Delete(l) => {
811                                    let mut j = i + 1;
812                                    let mut last_end = l.span().end();
813                                    while j < rs.len() {
814                                        if let ParseRepair::Delete(next_l) = rs[j] {
815                                            if next_l.span().start() == last_end {
816                                                last_end = next_l.span().end();
817                                                j += 1;
818                                                continue;
819                                            }
820                                        }
821                                        break;
822                                    }
823                                    let t = &lexer
824                                        .span_str(Span::new(l.span().start(), last_end))
825                                        .replace('\n', "\\n");
826                                    rs_out.push(format!("Delete {}", t));
827                                    i = j;
828                                }
829                                ParseRepair::Insert(tidx) => {
830                                    rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
831                                    i += 1;
832                                }
833                                ParseRepair::Shift(l) => {
834                                    let t = &lexer.span_str(l.span()).replace('\n', "\\n");
835                                    rs_out.push(format!("Shift {}", t));
836                                    i += 1;
837                                }
838                            }
839                        }
840
841                        out.push_str(&rs_out.join(", "));
842                    }
843                }
844                out
845            }
846        }
847    }
848}
849
850impl<
851    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
852    LexerTypesT: LexerTypes<StorageT = StorageT>,
853> fmt::Display for LexParseError<StorageT, LexerTypesT>
854where
855    usize: AsPrimitive<StorageT>,
856{
857    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
858        match *self {
859            LexParseError::LexError(ref e) => Display::fmt(e, f),
860            LexParseError::ParseError(ref e) => Display::fmt(e, f),
861        }
862    }
863}
864
865impl<
866    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
867    LexerTypesT: LexerTypes<StorageT = StorageT>,
868> Error for LexParseError<StorageT, LexerTypesT>
869where
870    usize: AsPrimitive<StorageT>,
871{
872}
873
874impl<
875    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
876    LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
877    T: LexError,
878> From<T> for LexParseError<StorageT, LexerTypesT>
879where
880    usize: AsPrimitive<StorageT>,
881{
882    fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
883        LexParseError::LexError(err)
884    }
885}
886
887impl<
888    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
889    LexerTypesT: LexerTypes<StorageT = StorageT>,
890> From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
891where
892    usize: AsPrimitive<StorageT>,
893{
894    fn from(
895        err: ParseError<LexerTypesT::LexemeT, StorageT>,
896    ) -> LexParseError<StorageT, LexerTypesT> {
897        LexParseError::ParseError(err)
898    }
899}
900
901/// A run-time parser builder.
902pub struct RTParserBuilder<
903    'a,
904    StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
905    LexerTypesT: LexerTypes<StorageT = StorageT>,
906> where
907    usize: AsPrimitive<StorageT>,
908{
909    grm: &'a YaccGrammar<StorageT>,
910    stable: &'a StateTable<StorageT>,
911    recoverer: RecoveryKind,
912    term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
913    phantom: PhantomData<(StorageT, LexerTypesT)>,
914}
915
916impl<
917    'a,
918    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
919    LexerTypesT: LexerTypes<StorageT = StorageT>,
920> RTParserBuilder<'a, StorageT, LexerTypesT>
921where
922    usize: AsPrimitive<StorageT>,
923{
924    /// Create a new run-time parser from a `YaccGrammar`, and a `StateTable`.
925    pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
926        RTParserBuilder {
927            grm,
928            stable,
929            recoverer: RecoveryKind::CPCTPlus,
930            term_costs: &|_| 1,
931            phantom: PhantomData,
932        }
933    }
934
935    /// Set the recoverer for this parser to `rk`.
936    pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
937        self.recoverer = rk;
938        self
939    }
940
941    pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
942        self.term_costs = f;
943        self
944    }
945
946    #[deprecated(
947        since = "0.14.0",
948        note = "Use `parse_map` to return a `Node` from your `lrpar_mod!` instead"
949    )]
950    #[allow(deprecated)]
951    /// Parse input, and (if possible) return a generic parse tree. See the arguments for
952    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
953    pub fn parse_generictree(
954        &self,
955        lexer: &dyn NonStreamingLexer<LexerTypesT>,
956    ) -> (
957        Option<Node<LexerTypesT::LexemeT, StorageT>>,
958        Vec<LexParseError<StorageT, LexerTypesT>>,
959    ) {
960        self.parse_map(lexer, &|lexeme| Node::Term { lexeme }, &|ridx, nodes| {
961            Node::Nonterm { ridx, nodes }
962        })
963    }
964
965    /// Parse input, and (if possible) return a generic parse tree. See the arguments for
966    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
967    pub fn parse_map<Node>(
968        &self,
969        lexer: &dyn NonStreamingLexer<LexerTypesT>,
970        fterm: &dyn Fn(LexerTypesT::LexemeT) -> Node,
971        fnonterm: &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
972    ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
973        let lexemes = match lexer.iter().collect() {
974            Ok(lexemes) => lexemes,
975            Err(e) => return (None, vec![e.into()]),
976        };
977        Parser::<
978            StorageT,
979            LexerTypesT,
980            Node,
981            (
982                &dyn Fn(LexerTypesT::LexemeT) -> Node,
983                &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
984            ),
985        >::parse_map(
986            self.recoverer,
987            self.grm,
988            self.term_costs,
989            self.stable,
990            lexer,
991            lexemes,
992            fterm,
993            fnonterm,
994        )
995    }
996
997    #[deprecated(since = "0.14.0", note = "Use `parse_map` instead")]
998    /// Parse input, returning any errors found. See the arguments for
999    /// [`parse_actions`](#method.parse_actions) for more details about the return value.
1000    pub fn parse_noaction(
1001        &self,
1002        lexer: &dyn NonStreamingLexer<LexerTypesT>,
1003    ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
1004        self.parse_map(lexer, &|_| (), &|_, _| ()).1
1005    }
1006
1007    /// Parse input, execute actions, and return the associated value (if possible) and/or any
1008    /// lexing/parsing errors encountered. Note that the two parts of the (value, errors) return
1009    /// pair are entirely independent: one can encounter errors without a value being produced
1010    /// (`None, [...]`), errors and a value (`Some(...), [...]`), as well as a value and no errors
1011    /// (`Some(...), []`). Errors are sorted by the position they were found in the input and can
1012    /// be a mix of lexing and parsing errors.
1013    pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
1014        &self,
1015        lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
1016        actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
1017        param: ParamT,
1018    ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
1019        let lexemes = match lexer.iter().collect() {
1020            Ok(lexemes) => lexemes,
1021            Err(e) => return (None, vec![e.into()]),
1022        };
1023        Parser::parse_actions(
1024            self.recoverer,
1025            self.grm,
1026            self.term_costs,
1027            self.stable,
1028            lexer,
1029            lexemes,
1030            actions,
1031            param,
1032        )
1033    }
1034
1035    pub fn grammar(&self) -> &YaccGrammar<StorageT> {
1036        self.grm
1037    }
1038}
1039
1040/// After a parse error is encountered, the parser attempts to find a way of recovering. Each entry
1041/// in the sequence of repairs is represented by a `ParseRepair`.
1042#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1043pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1044    /// Insert a `Symbol::Token`.
1045    Insert(TIdx<StorageT>),
1046    /// Delete a symbol.
1047    Delete(LexemeT),
1048    /// Shift a symbol.
1049    Shift(LexemeT),
1050}
1051
1052/// Records a single parse error.
1053#[derive(Clone, Debug, PartialEq)]
1054pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1055    stidx: StIdx<StorageT>,
1056    lexeme: LexemeT,
1057    repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1058}
1059
1060impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1061    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1062        write!(f, "Parse error at lexeme {:?}", self.lexeme)
1063    }
1064}
1065
1066impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1067
1068impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1069    /// Return the state table index where this error was detected.
1070    pub fn stidx(&self) -> StIdx<StorageT> {
1071        self.stidx
1072    }
1073
1074    /// Return the lexeme where this error was detected.
1075    pub fn lexeme(&self) -> &LexemeT {
1076        &self.lexeme
1077    }
1078
1079    /// Return the repairs found that would fix this error. Note that there are infinite number of
1080    /// possible repairs for any error, so this is by definition a (finite) subset.
1081    pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1082        &self.repairs
1083    }
1084}
1085
1086#[cfg(test)]
1087pub(crate) mod test {
1088    use std::collections::HashMap;
1089
1090    use cfgrammar::{
1091        Span,
1092        yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1093    };
1094    use lrtable::{Minimiser, from_yacc};
1095    use num_traits::ToPrimitive;
1096    use regex::Regex;
1097
1098    use super::*;
1099    use crate::{
1100        Lexeme, Lexer,
1101        test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1102    };
1103
1104    #[allow(deprecated)]
1105    pub(crate) fn do_parse<'input>(
1106        rcvry_kind: RecoveryKind,
1107        lexs: &str,
1108        grms: &str,
1109        input: &'input str,
1110    ) -> (
1111        YaccGrammar<u16>,
1112        SmallLexer<'input>,
1113        Result<
1114            Node<TestLexeme, u16>,
1115            (
1116                Option<Node<TestLexeme, u16>>,
1117                Vec<LexParseError<u16, TestLexerTypes>>,
1118            ),
1119        >,
1120    ) {
1121        do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1122    }
1123
1124    #[allow(deprecated)]
1125    fn do_parse_with_costs<'input>(
1126        rcvry_kind: RecoveryKind,
1127        lexs: &str,
1128        grms: &str,
1129        input: &'input str,
1130        costs: &HashMap<&str, u8>,
1131    ) -> (
1132        YaccGrammar<u16>,
1133        SmallLexer<'input>,
1134        Result<
1135            Node<TestLexeme, u16>,
1136            (
1137                Option<Node<TestLexeme, u16>>,
1138                Vec<LexParseError<u16, TestLexerTypes>>,
1139            ),
1140        >,
1141    ) {
1142        let grm = YaccGrammar::<u16>::new_with_storaget(
1143            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1144            grms,
1145        )
1146        .unwrap();
1147        let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1148        let rule_ids = grm
1149            .tokens_map()
1150            .iter()
1151            .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1152            .collect();
1153        let lexer_rules = small_lexer(lexs, rule_ids);
1154        let lexemes = small_lex(lexer_rules, input);
1155        let lexer = SmallLexer { lexemes, s: input };
1156        let costs_tidx = costs
1157            .iter()
1158            .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1159            .collect::<HashMap<_, _>>();
1160        let (r, errs) = RTParserBuilder::new(&grm, &stable)
1161            .recoverer(rcvry_kind)
1162            .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1163            .parse_generictree(&lexer);
1164        if let Some(node) = r {
1165            if errs.is_empty() {
1166                (grm, lexer, Ok(node))
1167            } else {
1168                (grm, lexer, Err((Some(node), errs)))
1169            }
1170        } else {
1171            (grm, lexer, Err((None, errs)))
1172        }
1173    }
1174
1175    fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1176        let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1177        assert_eq!(expected, pt.unwrap().pp(&grm, input));
1178    }
1179
1180    // SmallLexer is our highly simplified version of lrlex (allowing us to avoid having to have
1181    // lrlex as a dependency of lrpar). The format is the same as lrlex *except*:
1182    //   * The initial "%%" isn't needed, and only "'" is valid as a rule name delimiter.
1183    //   * "Unnamed" rules aren't allowed (e.g. you can't have a rule which discards whitespaces).
1184    pub(crate) struct SmallLexer<'input> {
1185        lexemes: Vec<TestLexeme>,
1186        s: &'input str,
1187    }
1188
1189    impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1190        fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1191            Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1192        }
1193    }
1194
1195    impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1196        fn span_str(&self, span: Span) -> &'input str {
1197            &self.s[span.start()..span.end()]
1198        }
1199
1200        fn span_lines_str(&self, _: Span) -> &'input str {
1201            unreachable!();
1202        }
1203
1204        fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1205            unreachable!();
1206        }
1207    }
1208
1209    fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1210        let mut rules = Vec::new();
1211        for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1212            assert!(l.rfind('\'') == Some(l.len() - 1));
1213            let i = l[..l.len() - 1].rfind('\'').unwrap();
1214            let name = &l[i + 1..l.len() - 1];
1215            let re = &l[..i - 1].trim();
1216            rules.push((
1217                ids_map[name],
1218                Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1219            ));
1220        }
1221        rules
1222    }
1223
1224    fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1225        let mut lexemes = vec![];
1226        let mut i = 0;
1227        while i < input.len() {
1228            let mut longest = 0; // Length of the longest match
1229            let mut longest_tok_id = 0; // This is only valid iff longest != 0
1230            for (tok_id, r) in rules.iter() {
1231                if let Some(m) = r.find(&input[i..]) {
1232                    let len = m.end();
1233                    if len > longest {
1234                        longest = len;
1235                        longest_tok_id = *tok_id;
1236                    }
1237                }
1238            }
1239            assert!(longest > 0);
1240            lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1241            i += longest;
1242        }
1243        lexemes
1244    }
1245
1246    #[test]
1247    fn simple_parse() {
1248        // From p4 of https://www.cs.umd.edu/class/spring2014/cmsc430/lectures/lec07.pdf
1249        check_parse_output(
1250            "[a-zA-Z_] 'ID'
1251             \\+ '+'",
1252            "
1253%start E
1254%%
1255E: T '+' E
1256 | T;
1257T: 'ID';
1258",
1259            "a+b",
1260            "E
1261 T
1262  ID a
1263 + +
1264 E
1265  T
1266   ID b
1267",
1268        );
1269    }
1270
1271    #[test]
1272    fn parse_empty_rules() {
1273        let lexs = "[a-zA-Z_] 'ID'";
1274        let grms = "%start S
1275%%
1276S: L;
1277L: 'ID'
1278 | ;
1279";
1280        check_parse_output(
1281            lexs, grms, "", "S
1282 L
1283",
1284        );
1285
1286        check_parse_output(
1287            lexs,
1288            grms,
1289            "x",
1290            "S
1291 L
1292  ID x
1293",
1294        );
1295    }
1296
1297    #[test]
1298    fn recursive_parse() {
1299        let lexs = "\\+ '+'
1300                    \\* '*'
1301                    [0-9]+ 'INT'";
1302        let grms = "%start Expr
1303%%
1304Expr : Expr '+' Term | Term;
1305Term : Term '*' Factor | Factor;
1306Factor : 'INT';";
1307
1308        check_parse_output(
1309            lexs,
1310            grms,
1311            "2+3*4",
1312            "Expr
1313 Expr
1314  Term
1315   Factor
1316    INT 2
1317 + +
1318 Term
1319  Term
1320   Factor
1321    INT 3
1322  * *
1323  Factor
1324   INT 4
1325",
1326        );
1327        check_parse_output(
1328            lexs,
1329            grms,
1330            "2*3+4",
1331            "Expr
1332 Expr
1333  Term
1334   Term
1335    Factor
1336     INT 2
1337   * *
1338   Factor
1339    INT 3
1340 + +
1341 Term
1342  Factor
1343   INT 4
1344",
1345        );
1346    }
1347
1348    #[test]
1349    fn parse_error() {
1350        let lexs = "\\( '('
1351                    \\) ')'
1352                    [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1353        let grms = "%start Call
1354%%
1355Call: 'ID' '(' ')';";
1356
1357        check_parse_output(
1358            lexs,
1359            grms,
1360            "f()",
1361            "Call
1362 ID f
1363 ( (
1364 ) )
1365",
1366        );
1367
1368        let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1369        let (_, errs) = pr.unwrap_err();
1370        assert_eq!(errs.len(), 1);
1371        let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1372        match &errs[0] {
1373            LexParseError::ParseError(e) => {
1374                assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1375                assert!(e.lexeme().faulty());
1376            }
1377            _ => unreachable!(),
1378        }
1379
1380        let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1381        let (_, errs) = pr.unwrap_err();
1382        assert_eq!(errs.len(), 1);
1383        let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1384        match &errs[0] {
1385            LexParseError::ParseError(e) => {
1386                assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1387                assert!(!e.lexeme().faulty());
1388            }
1389            _ => unreachable!(),
1390        }
1391    }
1392
1393    #[test]
1394    fn test_parse_map() {
1395        #[derive(PartialEq, Debug)]
1396        enum TestParseMap<'a> {
1397            Term(&'a str, &'a str),
1398            NonTerm(&'a str, Vec<TestParseMap<'a>>),
1399        }
1400        let lex_src = r#"[0-9]+ 'INT'
1401\+ '+'
1402\* '*'
1403"#;
1404        let grammar_src = "
1405%grmtools{YaccKind: Original(NoAction)}
1406%start Expr
1407%%
1408Expr : Expr '+' Term | Term;
1409Term : Term '*' Factor | Factor;
1410Factor : 'INT';";
1411        let input_src = "2*3+4";
1412        let grm = str::parse::<YaccGrammar<u16>>(grammar_src).unwrap();
1413        let (_, stable) = lrtable::from_yacc(&grm, lrtable::Minimiser::Pager).unwrap();
1414        let rt_parser = RTParserBuilder::new(&grm, &stable);
1415        let rule_ids = grm
1416            .tokens_map()
1417            .iter()
1418            .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1419            .collect();
1420        let lexer_rules = small_lexer(lex_src, rule_ids);
1421        let lexemes = small_lex(lexer_rules, input_src);
1422        let lexer = SmallLexer {
1423            lexemes,
1424            s: input_src,
1425        };
1426
1427        let (parse_map, errs) = rt_parser.parse_map(
1428            &lexer,
1429            &|lexeme: TestLexeme| {
1430                let tidx = TIdx(lexeme.tok_id());
1431                let tn = &grm.token_name(tidx).unwrap();
1432                let lt = &input_src[lexeme.span().start()..lexeme.span().end()];
1433                TestParseMap::Term(tn, lt)
1434            },
1435            &|ridx, nodes| {
1436                let rule_name = &grm.rule_name_str(ridx);
1437                TestParseMap::NonTerm(rule_name, nodes)
1438            },
1439        );
1440        assert!(parse_map.is_some() && errs.is_empty());
1441        // Sanity check the `parse_generictree` output.
1442        check_parse_output(
1443            lex_src,
1444            grammar_src,
1445            input_src,
1446            "Expr
1447 Expr
1448  Term
1449   Term
1450    Factor
1451     INT 2
1452   * *
1453   Factor
1454    INT 3
1455 + +
1456 Term
1457  Factor
1458   INT 4
1459",
1460        );
1461
1462        let expected_parse_map = {
1463            use TestParseMap::*;
1464            NonTerm(
1465                "Expr",
1466                vec![
1467                    NonTerm(
1468                        "Expr",
1469                        vec![NonTerm(
1470                            "Term",
1471                            vec![
1472                                NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "2")])]),
1473                                Term("*", "*"),
1474                                NonTerm("Factor", vec![Term("INT", "3")]),
1475                            ],
1476                        )],
1477                    ),
1478                    Term("+", "+"),
1479                    NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "4")])]),
1480                ],
1481            )
1482        };
1483        assert_eq!(parse_map, Some(expected_parse_map));
1484    }
1485}