Skip to main content

cfgrammar/yacc/
parser.rs

1// Note: this is the parser for both YaccKind::Original(YaccOriginalActionKind::GenericParseTree) and YaccKind::Eco yacc kinds.
2
3#[cfg(feature = "bincode")]
4use bincode::{Decode, Encode};
5use num_traits::PrimInt;
6use regex::Regex;
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9use std::{
10    collections::{HashMap, hash_map::Entry},
11    error::Error,
12    fmt,
13    str::FromStr,
14    sync::LazyLock,
15};
16
17use crate::{
18    Span, Spanned,
19    header::{GrmtoolsSectionParser, HeaderErrorKind},
20};
21
22pub type YaccGrammarResult<T> = Result<T, Vec<YaccGrammarError>>;
23
24use super::{
25    AssocKind, Precedence, YaccKind,
26    ast::{GrammarAST, Symbol},
27};
28
29/// The various different possible Yacc parser errors.
30#[derive(Debug, PartialEq, Eq, Clone)]
31#[non_exhaustive]
32pub enum YaccGrammarErrorKind {
33    IllegalInteger,
34    IllegalName,
35    IllegalString,
36    IncompleteRule,
37    IncompleteComment,
38    IncompleteAction,
39    MissingColon,
40    MissingRightArrow,
41    MismatchedBrace,
42    NonEmptyProduction,
43    PrematureEnd,
44    ProductionNotTerminated,
45    ProgramsNotSupported,
46    UnknownDeclaration,
47    PrecNotFollowedByToken,
48    DuplicatePrecedence,
49    DuplicateAvoidInsertDeclaration,
50    DuplicateImplicitTokensDeclaration,
51    DuplicateExpectDeclaration,
52    DuplicateExpectRRDeclaration,
53    DuplicateStartDeclaration,
54    DuplicateActiontypeDeclaration,
55    DuplicateEPP,
56    ReachedEOL,
57    InvalidString,
58    NoStartRule,
59    UnknownSymbol,
60    InvalidStartRule(String),
61    UnknownRuleRef(String),
62    UnknownToken(String),
63    NoPrecForToken(String),
64    UnknownEPP(String),
65    ExpectedInput(char),
66    InvalidYaccKind,
67    Header(HeaderErrorKind, SpansKind),
68}
69
70/// Any error from the Yacc parser returns an instance of this struct.
71#[derive(Debug, PartialEq, Eq, Clone)]
72pub struct YaccGrammarError {
73    /// Uniquely identifies each error.
74    pub(crate) kind: YaccGrammarErrorKind,
75    /// Always contains at least 1 span.
76    ///
77    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
78    /// For meaning and interpretation of spans and their ordering.
79    pub(crate) spans: Vec<Span>,
80}
81
82impl Error for YaccGrammarError {}
83
84impl fmt::Display for YaccGrammarError {
85    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
86        write!(f, "{}", self.kind)
87    }
88}
89
90impl fmt::Display for YaccGrammarErrorKind {
91    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92        let s = match self {
93            YaccGrammarErrorKind::ExpectedInput(c) => &format!("Expected input '{c}'"),
94            YaccGrammarErrorKind::IllegalInteger => "Illegal integer",
95            YaccGrammarErrorKind::IllegalName => "Illegal name",
96            YaccGrammarErrorKind::IllegalString => "Illegal string",
97            YaccGrammarErrorKind::IncompleteRule => "Incomplete rule",
98            YaccGrammarErrorKind::IncompleteComment => "Incomplete comment",
99            YaccGrammarErrorKind::IncompleteAction => "Incomplete action",
100            YaccGrammarErrorKind::MissingColon => "Missing ':'",
101            YaccGrammarErrorKind::MissingRightArrow => "Missing '->'",
102            YaccGrammarErrorKind::MismatchedBrace => "Mismatched brace",
103            YaccGrammarErrorKind::NonEmptyProduction => "%empty used in non-empty production",
104            YaccGrammarErrorKind::PrematureEnd => "File ends prematurely",
105            YaccGrammarErrorKind::ProductionNotTerminated => "Production not terminated correctly",
106            YaccGrammarErrorKind::ProgramsNotSupported => "Programs not currently supported",
107            YaccGrammarErrorKind::UnknownDeclaration => "Unknown declaration",
108            YaccGrammarErrorKind::DuplicatePrecedence => "Token has multiple precedences specified",
109            YaccGrammarErrorKind::PrecNotFollowedByToken => "%prec not followed by token name",
110            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration => {
111                "Duplicated %avoid_insert declaration"
112            }
113            YaccGrammarErrorKind::DuplicateExpectDeclaration => "Duplicated %expect declaration",
114            YaccGrammarErrorKind::DuplicateExpectRRDeclaration => {
115                "Duplicate %expect-rr declaration"
116            }
117            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration => {
118                "Duplicated %implicit_tokens declaration"
119            }
120            YaccGrammarErrorKind::DuplicateStartDeclaration => "Duplicated %start declaration",
121            YaccGrammarErrorKind::DuplicateActiontypeDeclaration => {
122                "Duplicate %actiontype declaration"
123            }
124            YaccGrammarErrorKind::DuplicateEPP => "Duplicate %epp declaration for this token",
125            YaccGrammarErrorKind::ReachedEOL => {
126                "Reached end of line without finding expected content"
127            }
128            YaccGrammarErrorKind::InvalidString => "Invalid string",
129            YaccGrammarErrorKind::NoStartRule => return write!(f, "No start rule specified"),
130            YaccGrammarErrorKind::UnknownSymbol => "Unknown symbol, expected a rule or token",
131            YaccGrammarErrorKind::InvalidStartRule(name) => {
132                return write!(f, "Start rule '{}' does not appear in grammar", name);
133            }
134            YaccGrammarErrorKind::UnknownRuleRef(name) => {
135                return write!(f, "Unknown reference to rule '{}'", name);
136            }
137            YaccGrammarErrorKind::UnknownToken(name) => {
138                return write!(f, "Unknown token '{}'", name);
139            }
140            YaccGrammarErrorKind::NoPrecForToken(name) => {
141                return write!(
142                    f,
143                    "Token '{}' used in %prec has no precedence attached",
144                    name
145                );
146            }
147            YaccGrammarErrorKind::UnknownEPP(name) => {
148                return write!(
149                    f,
150                    "Token '{}' in %epp declaration is not referenced in the grammar",
151                    name
152                );
153            }
154            YaccGrammarErrorKind::InvalidYaccKind => "Invalid yacc kind",
155            YaccGrammarErrorKind::Header(hk, _) => &format!("Error in '%grmtools' {}", hk),
156        };
157        write!(f, "{}", s)
158    }
159}
160
161/// The various different possible Yacc parser errors.
162#[derive(Debug, PartialEq, Eq, Clone)]
163#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
164#[cfg_attr(feature = "bincode", derive(Encode, Decode))]
165#[non_exhaustive]
166pub enum YaccGrammarWarningKind {
167    UnusedRule,
168    UnusedToken,
169}
170
171/// Any Warning from the Yacc parser returns an instance of this struct.
172#[derive(Debug, PartialEq, Eq, Clone)]
173#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
174#[cfg_attr(feature = "bincode", derive(Encode, Decode))]
175pub struct YaccGrammarWarning {
176    /// The specific kind of warning.
177    pub(crate) kind: YaccGrammarWarningKind,
178    /// Always contains at least 1 span.
179    ///
180    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
181    /// For meaning and interpretation of spans and their ordering.
182    pub(crate) spans: Vec<Span>,
183}
184
185impl fmt::Display for YaccGrammarWarning {
186    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
187        write!(f, "{}", self.kind)
188    }
189}
190
191impl fmt::Display for YaccGrammarWarningKind {
192    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193        let s = match self {
194            YaccGrammarWarningKind::UnusedRule => "Unused rule",
195            YaccGrammarWarningKind::UnusedToken => "Unused token",
196        };
197        write!(f, "{}", s)
198    }
199}
200
201impl Spanned for YaccGrammarWarning {
202    /// Returns the spans associated with the error, always containing at least 1 span.
203    ///
204    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
205    /// for the meaning and interpretation of spans and their ordering.
206    fn spans(&self) -> &[Span] {
207        self.spans.as_slice()
208    }
209
210    /// Returns the [SpansKind] associated with this error.
211    fn spanskind(&self) -> SpansKind {
212        match self.kind {
213            YaccGrammarWarningKind::UnusedRule | YaccGrammarWarningKind::UnusedToken => {
214                SpansKind::Error
215            }
216        }
217    }
218}
219
220/// Indicates how to interpret the spans of an error.
221#[derive(Debug, PartialEq, Eq, Copy, Clone)]
222#[non_exhaustive]
223pub enum SpansKind {
224    /// The first span is the first occurrence, and a span for each subsequent occurrence.
225    DuplicationError,
226    /// Contains a single span at the site of the error.
227    Error,
228}
229
230impl Spanned for YaccGrammarError {
231    /// Returns the spans associated with the error, always containing at least 1 span.
232    ///
233    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
234    /// for the meaning and interpretation of spans and their ordering.
235    fn spans(&self) -> &[Span] {
236        self.spans.as_slice()
237    }
238
239    /// Returns the [SpansKind] associated with this error.
240    fn spanskind(&self) -> SpansKind {
241        match self.kind {
242            YaccGrammarErrorKind::IllegalInteger
243            | YaccGrammarErrorKind::IllegalName
244            | YaccGrammarErrorKind::IllegalString
245            | YaccGrammarErrorKind::IncompleteRule
246            | YaccGrammarErrorKind::IncompleteComment
247            | YaccGrammarErrorKind::IncompleteAction
248            | YaccGrammarErrorKind::MissingColon
249            | YaccGrammarErrorKind::MissingRightArrow
250            | YaccGrammarErrorKind::MismatchedBrace
251            | YaccGrammarErrorKind::NonEmptyProduction
252            | YaccGrammarErrorKind::PrematureEnd
253            | YaccGrammarErrorKind::ProductionNotTerminated
254            | YaccGrammarErrorKind::PrecNotFollowedByToken
255            | YaccGrammarErrorKind::ProgramsNotSupported
256            | YaccGrammarErrorKind::UnknownDeclaration
257            | YaccGrammarErrorKind::ReachedEOL
258            | YaccGrammarErrorKind::InvalidString
259            | YaccGrammarErrorKind::NoStartRule
260            | YaccGrammarErrorKind::UnknownSymbol
261            | YaccGrammarErrorKind::InvalidStartRule(_)
262            | YaccGrammarErrorKind::UnknownRuleRef(_)
263            | YaccGrammarErrorKind::UnknownToken(_)
264            | YaccGrammarErrorKind::NoPrecForToken(_)
265            | YaccGrammarErrorKind::InvalidYaccKind
266            | YaccGrammarErrorKind::ExpectedInput(_)
267            | YaccGrammarErrorKind::UnknownEPP(_) => SpansKind::Error,
268            YaccGrammarErrorKind::DuplicatePrecedence
269            | YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration
270            | YaccGrammarErrorKind::DuplicateExpectDeclaration
271            | YaccGrammarErrorKind::DuplicateExpectRRDeclaration
272            | YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration
273            | YaccGrammarErrorKind::DuplicateStartDeclaration
274            | YaccGrammarErrorKind::DuplicateActiontypeDeclaration
275            | YaccGrammarErrorKind::DuplicateEPP => SpansKind::DuplicationError,
276            YaccGrammarErrorKind::Header(_, spanskind) => spanskind,
277        }
278    }
279}
280
281pub(crate) struct YaccParser<'a> {
282    yacc_kind: YaccKind,
283    src: &'a str,
284    num_newlines: usize,
285    ast: GrammarAST,
286    global_actiontype: Option<(String, Span)>,
287}
288
289static RE_NAME: LazyLock<Regex> =
290    LazyLock::new(|| Regex::new(r"^[a-zA-Z_.][a-zA-Z0-9_.]*").unwrap());
291static RE_TOKEN: LazyLock<Regex> =
292    LazyLock::new(|| Regex::new("^(?:(\".+?\")|('.+?')|([a-zA-Z_][a-zA-Z_0-9]*))").unwrap());
293
294fn add_duplicate_occurrence(
295    errs: &mut Vec<YaccGrammarError>,
296    kind: YaccGrammarErrorKind,
297    orig_span: Span,
298    dup_span: Span,
299) {
300    if !errs.iter_mut().any(|e| {
301        if e.kind == kind && e.spans[0] == orig_span {
302            e.spans.push(dup_span);
303            true
304        } else {
305            false
306        }
307    }) {
308        errs.push(YaccGrammarError {
309            kind,
310            spans: vec![orig_span, dup_span],
311        });
312    }
313}
314
315/// The actual parser is intended to be entirely opaque from outside users.
316impl YaccParser<'_> {
317    pub(crate) fn new(yacc_kind: YaccKind, src: &str) -> YaccParser<'_> {
318        YaccParser {
319            yacc_kind,
320            src,
321            num_newlines: 0,
322            ast: GrammarAST::new(),
323            global_actiontype: None,
324        }
325    }
326
327    pub(crate) fn parse(&mut self) -> YaccGrammarResult<usize> {
328        let mut errs = Vec::new();
329        let (_, pos) = GrmtoolsSectionParser::new(self.src, false)
330            .parse()
331            .map_err(|mut errs| errs.drain(..).map(|e| e.into()).collect::<Vec<_>>())?;
332        // We pass around an index into the *bytes* of self.src. We guarantee that at all times
333        // this points to the beginning of a UTF-8 character (since multibyte characters exist, not
334        // every byte within the string is also a valid character).
335        let mut result = self.parse_declarations(pos, &mut errs);
336        result = self.parse_rules(match result {
337            Ok(i) => i,
338            Err(e) => {
339                errs.push(e);
340                return Err(errs);
341            }
342        });
343        result = self.parse_programs(
344            match result {
345                Ok(i) => i,
346                Err(e) => {
347                    errs.push(e);
348                    return Err(errs);
349                }
350            },
351            &mut errs,
352        );
353        match result {
354            Ok(i) if errs.is_empty() => Ok(i),
355            Err(e) => {
356                errs.push(e);
357                Err(errs)
358            }
359            _ => Err(errs),
360        }
361    }
362
363    pub(crate) fn build(self) -> GrammarAST {
364        self.ast
365    }
366
367    fn parse_declarations(
368        &mut self,
369        mut i: usize,
370        errs: &mut Vec<YaccGrammarError>,
371    ) -> Result<usize, YaccGrammarError> {
372        i = self.parse_ws(i, true)?;
373        let mut prec_level = 0;
374        while i < self.src.len() {
375            if self.lookahead_is("%%", i).is_some() {
376                return Ok(i);
377            }
378            if let Some(j) = self.lookahead_is("%token", i) {
379                i = self.parse_ws(j, false)?;
380                while i < self.src.len() && self.lookahead_is("%", i).is_none() {
381                    let (j, n, span, _) = self.parse_token(i)?;
382                    let (idx, new_tok) = self.ast.tokens.insert_full(n);
383                    if new_tok {
384                        self.ast.spans.push(span);
385                    }
386                    self.ast.token_directives.insert(idx);
387                    i = self.parse_ws(j, true)?;
388                }
389                continue;
390            }
391            if let YaccKind::Original(_) = self.yacc_kind {
392                if let Some(j) = self.lookahead_is("%actiontype", i) {
393                    i = self.parse_ws(j, false)?;
394                    let (j, n) = self.parse_to_eol(i)?;
395                    let span = Span::new(i, j);
396                    if let Some((_, orig_span)) = self.global_actiontype {
397                        add_duplicate_occurrence(
398                            errs,
399                            YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
400                            orig_span,
401                            span,
402                        );
403                    } else {
404                        self.global_actiontype = Some((n, span));
405                    }
406                    i = self.parse_ws(j, true)?;
407                    continue;
408                }
409            }
410            if let Some(j) = self.lookahead_is("%start", i) {
411                i = self.parse_ws(j, false)?;
412                let (j, n) = self.parse_name(i)?;
413                let span = Span::new(i, j);
414                if let Some((_, orig_span)) = self.ast.start {
415                    add_duplicate_occurrence(
416                        errs,
417                        YaccGrammarErrorKind::DuplicateStartDeclaration,
418                        orig_span,
419                        span,
420                    );
421                } else {
422                    self.ast.start = Some((n, span));
423                }
424                i = self.parse_ws(j, true)?;
425                continue;
426            }
427            if let Some(j) = self.lookahead_is("%epp", i) {
428                i = self.parse_ws(j, false)?;
429                let (j, n, _, _) = self.parse_token(i)?;
430                let span = Span::new(i, j);
431                i = self.parse_ws(j, false)?;
432                let (j, v) = self.parse_string(i)?;
433                let vspan = Span::new(i, j);
434                match self.ast.epp.entry(n) {
435                    Entry::Occupied(orig) => {
436                        let (orig_span, _) = orig.get();
437                        add_duplicate_occurrence(
438                            errs,
439                            YaccGrammarErrorKind::DuplicateEPP,
440                            *orig_span,
441                            span,
442                        )
443                    }
444                    Entry::Vacant(epp) => {
445                        epp.insert((span, (v, vspan)));
446                    }
447                }
448                i = self.parse_ws(j, true)?;
449                continue;
450            }
451            if let Some(j) = self.lookahead_is("%expect-rr", i) {
452                i = self.parse_ws(j, false)?;
453                let (j, n) = self.parse_int(i)?;
454                let span = Span::new(i, j);
455                if let Some((_, orig_span)) = self.ast.expectrr {
456                    add_duplicate_occurrence(
457                        errs,
458                        YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
459                        orig_span,
460                        span,
461                    );
462                } else {
463                    self.ast.expectrr = Some((n, span));
464                }
465                i = self.parse_ws(j, true)?;
466                continue;
467            }
468            if let Some(j) = self.lookahead_is("%expect-unused", i) {
469                i = self.parse_ws(j, false)?;
470                while i < self.src.len() && self.lookahead_is("%", i).is_none() {
471                    let j = match self.parse_name(i) {
472                        Ok((j, n)) => {
473                            self.ast
474                                .expect_unused
475                                .push(Symbol::Rule(n, Span::new(i, j)));
476                            j
477                        }
478                        Err(_) => match self.parse_token(i) {
479                            Ok((j, n, span, _)) => {
480                                self.ast.expect_unused.push(Symbol::Token(n, span));
481                                j
482                            }
483                            Err(_) => {
484                                return Err(self.mk_error(YaccGrammarErrorKind::UnknownSymbol, i));
485                            }
486                        },
487                    };
488                    i = self.parse_ws(j, true)?;
489                }
490                continue;
491            }
492            if let Some(j) = self.lookahead_is("%expect", i) {
493                i = self.parse_ws(j, false)?;
494                let (j, n) = self.parse_int(i)?;
495                let span = Span::new(i, j);
496                if let Some((_, orig_span)) = self.ast.expect {
497                    add_duplicate_occurrence(
498                        errs,
499                        YaccGrammarErrorKind::DuplicateExpectDeclaration,
500                        orig_span,
501                        span,
502                    );
503                } else {
504                    self.ast.expect = Some((n, span));
505                }
506                i = self.parse_ws(j, true)?;
507                continue;
508            }
509            if let Some(j) = self.lookahead_is("%avoid_insert", i) {
510                i = self.parse_ws(j, false)?;
511                let num_newlines = self.num_newlines;
512                if self.ast.avoid_insert.is_none() {
513                    self.ast.avoid_insert = Some(HashMap::new());
514                }
515                while j < self.src.len() && self.num_newlines == num_newlines {
516                    let (j, n, span, _) = self.parse_token(i)?;
517                    if self.ast.tokens.insert(n.clone()) {
518                        self.ast.spans.push(span);
519                    }
520
521                    match self.ast.avoid_insert.as_mut().unwrap().entry(n) {
522                        Entry::Occupied(occupied) => {
523                            add_duplicate_occurrence(
524                                errs,
525                                YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
526                                *occupied.get(),
527                                span,
528                            );
529                        }
530                        Entry::Vacant(vacant) => {
531                            vacant.insert(span);
532                        }
533                    }
534                    i = self.parse_ws(j, true)?;
535                }
536                continue;
537            }
538            if let Some(j) = self.lookahead_is("%parse-param", i) {
539                i = self.parse_ws(j, false)?;
540                let (j, name) = self.parse_to_single_colon(i)?;
541                match self.lookahead_is(":", j) {
542                    Some(j) => i = self.parse_ws(j, false)?,
543                    None => {
544                        return Err(self.mk_error(YaccGrammarErrorKind::MissingColon, j));
545                    }
546                }
547                let (j, ty) = self.parse_to_eol(i)?;
548                self.ast.parse_param = Some((name, ty));
549                i = self.parse_ws(j, true)?;
550                continue;
551            }
552            if let Some(j) = self.lookahead_is("%parse-generics", i) {
553                i = self.parse_ws(j, false)?;
554                let (j, ty) = self.parse_to_eol(i)?;
555                self.ast.parse_generics = Some(ty);
556                i = self.parse_ws(j, true)?;
557                continue;
558            }
559            if let YaccKind::Eco = self.yacc_kind {
560                if let Some(j) = self.lookahead_is("%implicit_tokens", i) {
561                    i = self.parse_ws(j, false)?;
562                    let num_newlines = self.num_newlines;
563                    if self.ast.implicit_tokens.is_none() {
564                        self.ast.implicit_tokens = Some(HashMap::new());
565                    }
566                    while j < self.src.len() && self.num_newlines == num_newlines {
567                        let (j, n, span, _) = self.parse_token(i)?;
568                        if self.ast.tokens.insert(n.clone()) {
569                            self.ast.spans.push(span);
570                        }
571                        match self.ast.implicit_tokens.as_mut().unwrap().entry(n) {
572                            Entry::Occupied(entry) => {
573                                let orig_span = *entry.get();
574                                add_duplicate_occurrence(
575                                    errs,
576                                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
577                                    orig_span,
578                                    span,
579                                );
580                            }
581                            Entry::Vacant(entry) => {
582                                entry.insert(span);
583                            }
584                        }
585                        i = self.parse_ws(j, true)?;
586                    }
587                    continue;
588                }
589            }
590            {
591                let k;
592                let kind;
593                if let Some(j) = self.lookahead_is("%left", i) {
594                    kind = AssocKind::Left;
595                    k = j;
596                } else if let Some(j) = self.lookahead_is("%right", i) {
597                    kind = AssocKind::Right;
598                    k = j;
599                } else if let Some(j) = self.lookahead_is("%nonassoc", i) {
600                    kind = AssocKind::Nonassoc;
601                    k = j;
602                } else {
603                    return Err(self.mk_error(YaccGrammarErrorKind::UnknownDeclaration, i));
604                }
605
606                i = self.parse_ws(k, false)?;
607                let num_newlines = self.num_newlines;
608                while i < self.src.len() && num_newlines == self.num_newlines {
609                    let (j, n, span, _) = self.parse_token(i)?;
610                    match self.ast.precs.entry(n) {
611                        Entry::Occupied(orig) => {
612                            let (_, orig_span) = orig.get();
613                            add_duplicate_occurrence(
614                                errs,
615                                YaccGrammarErrorKind::DuplicatePrecedence,
616                                *orig_span,
617                                span,
618                            );
619                        }
620                        Entry::Vacant(entry) => {
621                            let prec = Precedence {
622                                level: prec_level,
623                                kind,
624                            };
625                            entry.insert((prec, span));
626                        }
627                    }
628
629                    i = self.parse_ws(j, true)?;
630                }
631                prec_level += 1;
632            }
633        }
634        debug_assert!(i == self.src.len());
635        Err(self.mk_error(YaccGrammarErrorKind::PrematureEnd, i))
636    }
637
638    fn parse_rules(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
639        // self.parse_declarations should have left the input at '%%'
640        i = self.lookahead_is("%%", i).unwrap();
641        i = self.parse_ws(i, true)?;
642        while i < self.src.len() && self.lookahead_is("%%", i).is_none() {
643            i = self.parse_rule(i)?;
644            i = self.parse_ws(i, true)?;
645        }
646        Ok(i)
647    }
648
649    fn parse_rule(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
650        let (j, rn) = self.parse_name(i)?;
651        let span = Span::new(i, j);
652        if self.ast.start.is_none() {
653            self.ast.start = Some((rn.clone(), span));
654        }
655        match self.yacc_kind {
656            YaccKind::Original(_) | YaccKind::Eco => {
657                if self.ast.get_rule(&rn).is_none() {
658                    self.ast.add_rule(
659                        (rn.clone(), span),
660                        self.global_actiontype.clone().map(|(s, _)| s),
661                    );
662                }
663                i = j;
664            }
665            YaccKind::Grmtools => {
666                i = self.parse_ws(j, true)?;
667                if let Some(j) = self.lookahead_is("->", i) {
668                    i = j;
669                } else {
670                    return Err(self.mk_error(YaccGrammarErrorKind::MissingRightArrow, i));
671                }
672                i = self.parse_ws(i, true)?;
673                let (j, actiont) = self.parse_to_single_colon(i)?;
674                if self.ast.get_rule(&rn).is_none() {
675                    self.ast.add_rule((rn.clone(), span), Some(actiont));
676                }
677                i = j;
678            }
679        }
680        i = self.parse_ws(i, true)?;
681        match self.lookahead_is(":", i) {
682            Some(j) => i = j,
683            None => {
684                return Err(self.mk_error(YaccGrammarErrorKind::MissingColon, i));
685            }
686        }
687        let mut syms = Vec::new();
688        let mut prec = None;
689        let mut action = None;
690        i = self.parse_ws(i, true)?;
691        let mut pos_prod_start = i;
692        let mut pos_prod_end = None;
693        while i < self.src.len() {
694            if let Some(j) = self.lookahead_is("|", i) {
695                self.ast.add_prod(
696                    rn.clone(),
697                    syms,
698                    prec,
699                    action,
700                    Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)),
701                );
702                syms = Vec::new();
703                prec = None;
704                action = None;
705                i = self.parse_ws(j, true)?;
706                pos_prod_start = i;
707                continue;
708            } else if let Some(j) = self.lookahead_is(";", i) {
709                self.ast.add_prod(
710                    rn,
711                    syms,
712                    prec,
713                    action,
714                    Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)),
715                );
716                return Ok(j);
717            }
718
719            if self.lookahead_is("\"", i).is_some() || self.lookahead_is("'", i).is_some() {
720                let (j, sym, span, _) = self.parse_token(i)?;
721                pos_prod_end = Some(j);
722                i = self.parse_ws(j, true)?;
723                if self.ast.tokens.insert(sym.clone()) {
724                    self.ast.spans.push(span);
725                }
726                syms.push(Symbol::Token(sym, span));
727            } else if let Some(j) = self.lookahead_is("%prec", i) {
728                i = self.parse_ws(j, true)?;
729                let (k, sym, span, _) = self.parse_token(i)?;
730                if self.ast.tokens.insert(sym.clone()) {
731                    self.ast.spans.push(span);
732                }
733                prec = Some(sym);
734                pos_prod_end = Some(k);
735                i = k;
736            } else if self.lookahead_is("{", i).is_some() {
737                let pos_action_start = i + 1;
738                pos_prod_end = Some(i);
739                // With j the location of the right brace, i the location of the left brace.
740                let (j, a) = self.parse_action(i)?;
741                i = self.parse_ws(j, true)?;
742                let action_span = Span::new(pos_action_start, pos_action_start + a.len());
743                action = Some((a, action_span));
744
745                if !(self.lookahead_is("|", i).is_some() || self.lookahead_is(";", i).is_some()) {
746                    return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i));
747                }
748            } else if let Some(j) = self.lookahead_is("%empty", i) {
749                let k = self.parse_ws(j, true)?;
750                // %empty could be followed by all sorts of weird syntax errors: all we try and do
751                // is say "does this production look like it's finished" and trust that the other
752                // errors will be caught by other parts of the parser.
753                if !syms.is_empty()
754                    | !(self.lookahead_is("|", k).is_some()
755                        || self.lookahead_is(";", k).is_some()
756                        || self.lookahead_is("{", k).is_some()
757                        || self.lookahead_is("%prec", k).is_some())
758                {
759                    return Err(self.mk_error(YaccGrammarErrorKind::NonEmptyProduction, i));
760                }
761                pos_prod_end = Some(j);
762                i = k;
763            } else {
764                let (j, sym, span, quoted) = self.parse_token(i)?;
765                pos_prod_end = Some(j);
766                if self
767                    .ast
768                    .tokens
769                    .get_index_of(&sym)
770                    .is_some_and(|idx| quoted || self.ast.token_directives.contains(&idx))
771                {
772                    syms.push(Symbol::Token(sym, span));
773                } else {
774                    syms.push(Symbol::Rule(sym, span));
775                }
776                i = j;
777            }
778            i = self.parse_ws(i, true)?;
779        }
780        Err(self.mk_error(YaccGrammarErrorKind::IncompleteRule, i))
781    }
782
783    fn parse_name(&self, i: usize) -> Result<(usize, String), YaccGrammarError> {
784        match RE_NAME.find(&self.src[i..]) {
785            Some(m) => {
786                assert_eq!(m.start(), 0);
787                Ok((i + m.end(), self.src[i..i + m.end()].to_string()))
788            }
789            None => Err(self.mk_error(YaccGrammarErrorKind::IllegalName, i)),
790        }
791    }
792
793    fn parse_token(&self, i: usize) -> Result<(usize, String, Span, bool), YaccGrammarError> {
794        match RE_TOKEN.find(&self.src[i..]) {
795            Some(m) => {
796                assert!(m.start() == 0 && m.end() > 0);
797                match self.src[i..].chars().next().unwrap() {
798                    '"' | '\'' => {
799                        debug_assert!('"'.len_utf8() == 1 && '\''.len_utf8() == 1);
800                        let start_cidx = i + 1;
801                        let end_cidx = i + m.end() - 1;
802                        Ok((
803                            i + m.end(),
804                            self.src[start_cidx..end_cidx].to_string(),
805                            Span::new(start_cidx, end_cidx),
806                            true,
807                        ))
808                    }
809                    _ => Ok((
810                        i + m.end(),
811                        self.src[i..i + m.end()].to_string(),
812                        Span::new(i, i + m.end()),
813                        false,
814                    )),
815                }
816            }
817            None => Err(self.mk_error(YaccGrammarErrorKind::IllegalString, i)),
818        }
819    }
820
821    fn parse_action(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
822        debug_assert!(self.lookahead_is("{", i).is_some());
823        let mut j = i;
824        let mut c = 0; // Count braces
825        while j < self.src.len() {
826            let ch = self.src[j..].chars().next().unwrap();
827            match ch {
828                '{' => c += 1,
829                '}' if c == 1 => {
830                    c = 0;
831                    break;
832                }
833                '}' => c -= 1,
834                '\n' | '\r' => {
835                    self.num_newlines += 1;
836                }
837                _ => (),
838            };
839            j += ch.len_utf8();
840        }
841        if c > 0 {
842            Err(self.mk_error(YaccGrammarErrorKind::IncompleteAction, i))
843        } else {
844            debug_assert!(self.lookahead_is("}", j).is_some());
845            let s = self.src[i + '{'.len_utf8()..j].trim().to_string();
846            Ok((j + '}'.len_utf8(), s))
847        }
848    }
849
850    fn parse_programs(
851        &mut self,
852        mut i: usize,
853        _: &mut Vec<YaccGrammarError>,
854    ) -> Result<usize, YaccGrammarError> {
855        if let Some(j) = self.lookahead_is("%%", i) {
856            i = self.parse_ws(j, true)?;
857            let prog = self.src[i..].to_string();
858            i += prog.len();
859            self.ast.set_programs(prog);
860        }
861        Ok(i)
862    }
863
864    /// Parse up to (but do not include) the end of line (or, if it comes sooner, the end of file).
865    fn parse_to_eol(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
866        let mut j = i;
867        while j < self.src.len() {
868            let c = self.src[j..].chars().next().unwrap();
869            match c {
870                '\n' | '\r' => break,
871                _ => j += c.len_utf8(),
872            }
873        }
874        Ok((j, self.src[i..j].to_string()))
875    }
876
877    /// Parse up to (but do not include) a single colon (double colons are allowed so that strings
878    /// like `a::b::c:` treat `a::b::c` as a single name. Errors if EOL encountered.
879    fn parse_to_single_colon(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
880        let mut j = i;
881        while j < self.src.len() {
882            let c = self.src[j..].chars().next().unwrap();
883            match c {
884                ':' => {
885                    let k = j + ':'.len_utf8();
886                    if k == self.src.len() || !self.src[k..].starts_with(':') {
887                        return Ok((j, self.src[i..j].trim().to_string()));
888                    }
889                    j += 2 * ':'.len_utf8();
890                }
891                '\n' | '\r' => {
892                    self.num_newlines += 1;
893                    j += c.len_utf8();
894                }
895                _ => j += c.len_utf8(),
896            }
897        }
898        Err(self.mk_error(YaccGrammarErrorKind::ReachedEOL, j))
899    }
900
901    /// Parse a quoted string, allowing escape characters.
902    fn parse_int<T: FromStr + PrimInt>(
903        &mut self,
904        i: usize,
905    ) -> Result<(usize, T), YaccGrammarError> {
906        let mut j = i;
907        while j < self.src.len() {
908            let c = self.src[j..].chars().next().unwrap();
909            match c {
910                '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => j += 1,
911                _ => break,
912            }
913        }
914        match self.src[i..j].parse::<T>() {
915            Ok(x) => Ok((j, x)),
916            Err(_) => Err(self.mk_error(YaccGrammarErrorKind::IllegalInteger, i)),
917        }
918    }
919
920    /// Parse a quoted string, allowing escape characters.
921    fn parse_string(&mut self, mut i: usize) -> Result<(usize, String), YaccGrammarError> {
922        let qc = if self.lookahead_is("'", i).is_some() {
923            '\''
924        } else if self.lookahead_is("\"", i).is_some() {
925            '"'
926        } else {
927            return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, i));
928        };
929
930        debug_assert!('"'.len_utf8() == 1 && '\''.len_utf8() == 1);
931        // Because we can encounter escape characters, we can't simply match text and slurp it into
932        // a String in one go (otherwise we'd include the escape characters). Conceptually we have
933        // to build the String up byte by byte, skipping escape characters, but that's slow.
934        // Instead we append chunks of the string up to (but excluding) escape characters.
935        let mut s = String::new();
936        i += 1;
937        let mut j = i;
938        while j < self.src.len() {
939            let c = self.src[j..].chars().next().unwrap();
940            match c {
941                '\n' | '\r' => {
942                    return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j));
943                }
944                x if x == qc => {
945                    s.push_str(&self.src[i..j]);
946                    return Ok((j + 1, s));
947                }
948                '\\' => {
949                    debug_assert!('\\'.len_utf8() == 1);
950                    match self.src[j + 1..].chars().next() {
951                        Some(c) if c == '\'' || c == '"' => {
952                            s.push_str(&self.src[i..j]);
953                            i = j + 1;
954                            j += 2;
955                        }
956                        _ => {
957                            return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j));
958                        }
959                    }
960                }
961                _ => j += c.len_utf8(),
962            }
963        }
964        Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j))
965    }
966
967    /// Skip whitespace from `i` onwards. If `inc_newlines` is `false`, will return `Err` if a
968    /// newline is encountered; otherwise newlines are consumed and skipped.
969    fn parse_ws(&mut self, mut i: usize, inc_newlines: bool) -> Result<usize, YaccGrammarError> {
970        while i < self.src.len() {
971            let c = self.src[i..].chars().next().unwrap();
972            match c {
973                ' ' | '\t' => i += c.len_utf8(),
974                '\n' | '\r' => {
975                    if !inc_newlines {
976                        return Err(self.mk_error(YaccGrammarErrorKind::ReachedEOL, i));
977                    }
978                    self.num_newlines += 1;
979                    i += c.len_utf8();
980                }
981                '/' => {
982                    if i + c.len_utf8() == self.src.len() {
983                        break;
984                    } else {
985                        let j = i + c.len_utf8();
986                        let c = self.src[j..].chars().next().unwrap();
987                        match c {
988                            '/' => {
989                                i = j + c.len_utf8();
990                                for c in self.src[i..].chars() {
991                                    i += c.len_utf8();
992                                    if c == '\n' || c == '\r' {
993                                        self.num_newlines += 1;
994                                        break;
995                                    }
996                                }
997                            }
998                            '*' => {
999                                // This is complicated by the fact that we need to deal with
1000                                // unclosed comments (i.e. '/*' without a corresponding '*/').
1001                                let mut k = j + c.len_utf8();
1002                                let mut found = false;
1003                                while k < self.src.len() {
1004                                    let c = self.src[k..].chars().next().unwrap();
1005                                    k += c.len_utf8();
1006                                    match c {
1007                                        '\n' | '\r' => {
1008                                            if !inc_newlines {
1009                                                return Err(self.mk_error(
1010                                                    YaccGrammarErrorKind::ReachedEOL,
1011                                                    i,
1012                                                ));
1013                                            }
1014                                            self.num_newlines += 1;
1015                                        }
1016                                        '*' => (),
1017                                        _ => continue,
1018                                    }
1019                                    if k < self.src.len() {
1020                                        let c = self.src[k..].chars().next().unwrap();
1021                                        if c == '/' {
1022                                            i = k + c.len_utf8();
1023                                            found = true;
1024                                            break;
1025                                        }
1026                                    }
1027                                }
1028                                if !found {
1029                                    return Err(
1030                                        self.mk_error(YaccGrammarErrorKind::IncompleteComment, i)
1031                                    );
1032                                }
1033                            }
1034                            _ => break,
1035                        }
1036                    }
1037                }
1038                _ => break,
1039            }
1040        }
1041        Ok(i)
1042    }
1043
1044    fn lookahead_is(&self, s: &'static str, i: usize) -> Option<usize> {
1045        if self.src[i..].starts_with(s) {
1046            Some(i + s.len())
1047        } else {
1048            None
1049        }
1050    }
1051
1052    fn mk_error(&self, k: YaccGrammarErrorKind, off: usize) -> YaccGrammarError {
1053        let span = Span::new(off, off);
1054        YaccGrammarError {
1055            kind: k,
1056            spans: vec![span],
1057        }
1058    }
1059}
1060
1061#[cfg(test)]
1062mod test {
1063    use super::{
1064        super::{
1065            AssocKind, Precedence, YaccKind, YaccOriginalActionKind,
1066            ast::{GrammarAST, Production, Symbol},
1067        },
1068        Span, Spanned, YaccGrammarError, YaccGrammarErrorKind, YaccParser,
1069    };
1070    use std::collections::HashSet;
1071
1072    fn parse(yacc_kind: YaccKind, s: &str) -> Result<GrammarAST, Vec<YaccGrammarError>> {
1073        let mut yp = YaccParser::new(yacc_kind, s);
1074        yp.parse()?;
1075        Ok(yp.build())
1076    }
1077
1078    fn rule(n: &str) -> Symbol {
1079        Symbol::Rule(n.to_string(), Span::new(0, 0))
1080    }
1081
1082    fn rule_span(n: &str, span: Span) -> Symbol {
1083        Symbol::Rule(n.to_string(), span)
1084    }
1085
1086    fn token(n: &str) -> Symbol {
1087        Symbol::Token(n.to_string(), Span::new(0, 0))
1088    }
1089    fn token_span(n: &str, span: Span) -> Symbol {
1090        Symbol::Token(n.to_string(), span)
1091    }
1092
1093    fn line_of_offset(s: &str, off: usize) -> usize {
1094        s[..off].lines().count()
1095    }
1096
1097    macro_rules! line_col {
1098        ($src:ident, $span: expr) => {{
1099            let mut line_cache = crate::newlinecache::NewlineCache::new();
1100            line_cache.feed(&$src);
1101            line_cache
1102                .byte_to_line_num_and_col_num(&$src, $span.start())
1103                .unwrap()
1104        }};
1105    }
1106
1107    trait ErrorsHelper {
1108        fn expect_error_at_line(self, src: &str, kind: YaccGrammarErrorKind, line: usize);
1109        fn expect_error_at_line_col(
1110            self,
1111            src: &str,
1112            kind: YaccGrammarErrorKind,
1113            line: usize,
1114            col: usize,
1115        );
1116        fn expect_error_at_lines_cols(
1117            self,
1118            src: &str,
1119            kind: YaccGrammarErrorKind,
1120            lines_cols: &mut dyn Iterator<Item = (usize, usize)>,
1121        );
1122        fn expect_multiple_errors(
1123            self,
1124            src: &str,
1125            expected: &mut dyn Iterator<Item = (YaccGrammarErrorKind, Vec<(usize, usize)>)>,
1126        );
1127    }
1128
1129    impl ErrorsHelper for Result<GrammarAST, Vec<YaccGrammarError>> {
1130        #[track_caller]
1131        fn expect_error_at_line(self, src: &str, kind: YaccGrammarErrorKind, line: usize) {
1132            let errs = self
1133                .as_ref()
1134                .map_err(Vec::as_slice)
1135                .expect_err("Parsed ok while expecting error");
1136            assert_eq!(errs.len(), 1);
1137            let e = &errs[0];
1138            assert_eq!(e.kind, kind);
1139            assert_eq!(line_of_offset(src, e.spans()[0].start()), line);
1140            assert_eq!(e.spans.len(), 1);
1141        }
1142
1143        #[track_caller]
1144        fn expect_error_at_line_col(
1145            self,
1146            src: &str,
1147            kind: YaccGrammarErrorKind,
1148            line: usize,
1149            col: usize,
1150        ) {
1151            self.expect_error_at_lines_cols(src, kind, &mut std::iter::once((line, col)))
1152        }
1153
1154        #[track_caller]
1155        fn expect_error_at_lines_cols(
1156            self,
1157            src: &str,
1158            kind: YaccGrammarErrorKind,
1159            lines_cols: &mut dyn Iterator<Item = (usize, usize)>,
1160        ) {
1161            let errs = self
1162                .as_ref()
1163                .map_err(Vec::as_slice)
1164                .expect_err("Parsed ok while expecting error");
1165            assert_eq!(errs.len(), 1);
1166            let e = &errs[0];
1167            assert_eq!(e.kind, kind);
1168            assert_eq!(
1169                e.spans()
1170                    .iter()
1171                    .map(|span| line_col!(src, span))
1172                    .collect::<Vec<(usize, usize)>>(),
1173                lines_cols.collect::<Vec<(usize, usize)>>()
1174            );
1175            // Check that it is valid to slice.
1176            for span in e.spans() {
1177                let _ = &src[span.start()..span.end()];
1178            }
1179        }
1180
1181        #[track_caller]
1182        fn expect_multiple_errors(
1183            self,
1184            src: &str,
1185            expected: &mut dyn Iterator<Item = (YaccGrammarErrorKind, Vec<(usize, usize)>)>,
1186        ) {
1187            let errs = self.expect_err("Parsed ok while expecting error");
1188            for e in &errs {
1189                // Check that it is valid to slice the source with the spans.
1190                for span in e.spans() {
1191                    let _ = &src[span.start()..span.end()];
1192                }
1193            }
1194
1195            assert_eq!(
1196                errs.iter()
1197                    .map(|e| {
1198                        (
1199                            e.kind.clone(),
1200                            e.spans()
1201                                .iter()
1202                                .map(|span| line_col!(src, span))
1203                                .collect::<Vec<_>>(),
1204                        )
1205                    })
1206                    .collect::<Vec<_>>(),
1207                expected.collect::<Vec<_>>()
1208            );
1209        }
1210    }
1211
1212    #[test]
1213    fn test_helper_fn() {
1214        assert_eq!(Symbol::Token("A".to_string(), Span::new(0, 0)), token("A"));
1215    }
1216
1217    #[test]
1218    fn test_symbol_eq() {
1219        assert_eq!(rule("A"), rule("A"));
1220        assert_ne!(rule("A"), rule("B"));
1221        assert_ne!(rule("A"), token("A"));
1222    }
1223
1224    #[test]
1225    fn test_rule() {
1226        let src = "
1227            %%
1228            A : 'a';
1229        "
1230        .to_string();
1231        let grm = parse(
1232            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1233            &src,
1234        )
1235        .unwrap();
1236        assert_eq!(grm.get_rule("A").unwrap().pidxs, vec![0]);
1237        let a_span = Span::new(33, 34);
1238        assert_eq!(
1239            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1240            Production {
1241                symbols: vec![token_span("a", a_span)],
1242                precedence: None,
1243                action: None,
1244                prod_span: Span::new(32, 35),
1245            }
1246        );
1247        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1248    }
1249
1250    #[test]
1251    fn test_rule_production_simple() {
1252        let src = "
1253            %%
1254            A : 'a';
1255            A : 'b';
1256        "
1257        .to_string();
1258        let grm = parse(
1259            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1260            &src,
1261        )
1262        .unwrap();
1263        let a_span = Span::new(33, 34);
1264        assert_eq!(
1265            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1266            Production {
1267                symbols: vec![token_span("a", a_span)],
1268                precedence: None,
1269                action: None,
1270                prod_span: Span::new(32, 35),
1271            }
1272        );
1273        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1274        let b_span = Span::new(54, 55);
1275        assert_eq!(
1276            grm.prods[grm.get_rule("A").unwrap().pidxs[1]],
1277            Production {
1278                symbols: vec![token_span("b", Span::new(54, 55))],
1279                precedence: None,
1280                action: None,
1281                prod_span: Span::new(53, 56),
1282            }
1283        );
1284        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1285    }
1286
1287    #[test]
1288    fn test_rule_empty() {
1289        let src = "
1290            %%
1291            A : ;
1292            B : 'b' | ;
1293            C : | 'c';
1294        "
1295        .to_string();
1296        let grm = parse(
1297            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1298            &src,
1299        )
1300        .unwrap();
1301
1302        assert_eq!(
1303            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1304            Production {
1305                symbols: vec![],
1306                precedence: None,
1307                action: None,
1308                prod_span: Span::new(32, 32),
1309            }
1310        );
1311
1312        let b_span = Span::new(51, 52);
1313        assert_eq!(
1314            grm.prods[grm.get_rule("B").unwrap().pidxs[0]],
1315            Production {
1316                symbols: vec![token_span("b", b_span)],
1317                precedence: None,
1318                action: None,
1319                prod_span: Span::new(50, 53),
1320            }
1321        );
1322        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1323        assert_eq!(
1324            grm.prods[grm.get_rule("B").unwrap().pidxs[1]],
1325            Production {
1326                symbols: vec![],
1327                precedence: None,
1328                action: None,
1329                prod_span: Span::new(56, 56),
1330            }
1331        );
1332
1333        assert_eq!(
1334            grm.prods[grm.get_rule("C").unwrap().pidxs[0]],
1335            Production {
1336                symbols: vec![],
1337                precedence: None,
1338                action: None,
1339                prod_span: Span::new(74, 74),
1340            }
1341        );
1342        let c_span = Span::new(77, 78);
1343        assert_eq!(
1344            grm.prods[grm.get_rule("C").unwrap().pidxs[1]],
1345            Production {
1346                symbols: vec![token_span("c", c_span)],
1347                precedence: None,
1348                action: None,
1349                prod_span: Span::new(76, 79),
1350            }
1351        );
1352        assert_eq!(&src[c_span.start()..c_span.end()], "c");
1353    }
1354
1355    #[test]
1356    fn test_empty_program() {
1357        let src = "%%\nA : 'a';\n%%".to_string();
1358        parse(
1359            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1360            &src,
1361        )
1362        .unwrap();
1363    }
1364
1365    #[test]
1366    fn test_multiple_symbols() {
1367        let src = "%%\nA : 'a' B;".to_string();
1368        let grm = parse(
1369            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1370            &src,
1371        )
1372        .unwrap();
1373        let a_span = Span::new(8, 9);
1374        let b_span = Span::new(11, 12);
1375        assert_eq!(
1376            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1377            Production {
1378                symbols: vec![token_span("a", a_span), rule_span("B", b_span)],
1379                precedence: None,
1380                action: None,
1381                prod_span: Span::new(7, 12),
1382            }
1383        );
1384        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1385        assert_eq!(&src[b_span.start()..b_span.end()], "B");
1386    }
1387
1388    #[test]
1389    fn test_token_types() {
1390        let src = "%%\nA : 'a' \"b\";".to_string();
1391        let grm = parse(
1392            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1393            &src,
1394        )
1395        .unwrap();
1396        let a_span = Span::new(8, 9);
1397        let b_span = Span::new(12, 13);
1398        assert_eq!(
1399            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1400            Production {
1401                symbols: vec![token_span("a", a_span), token_span("b", b_span)],
1402                precedence: None,
1403                action: None,
1404                prod_span: Span::new(7, 14),
1405            }
1406        );
1407        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1408        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1409    }
1410
1411    #[test]
1412    fn test_declaration_start() {
1413        let src = "%start   A\n%%\nA : a;".to_string();
1414        let grm = parse(
1415            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1416            &src,
1417        )
1418        .unwrap();
1419        assert_eq!(grm.start.unwrap(), ("A".to_string(), Span::new(9, 10)));
1420    }
1421
1422    #[test]
1423    fn test_declaration_token() {
1424        let src = "%token   a\n%%\nA : a;".to_string();
1425        let grm = parse(
1426            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1427            &src,
1428        )
1429        .unwrap();
1430        assert!(grm.has_token("a"));
1431    }
1432
1433    #[test]
1434    fn test_declaration_token_literal() {
1435        let src = "%token   'a'\n%%\nA : 'a';".to_string();
1436        let grm = parse(
1437            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1438            &src,
1439        )
1440        .unwrap();
1441        assert!(grm.has_token("a"));
1442    }
1443
1444    #[test]
1445    fn test_declaration_tokens() {
1446        let src = "%token   a b c 'd'\n%%\nA : a;".to_string();
1447        let grm = parse(
1448            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1449            &src,
1450        )
1451        .unwrap();
1452        assert!(grm.has_token("a"));
1453        assert!(grm.has_token("b"));
1454        assert!(grm.has_token("c"));
1455    }
1456
1457    #[test]
1458    fn test_auto_add_tokens() {
1459        let src = "%%\nA : 'a';".to_string();
1460        let grm = parse(
1461            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1462            &src,
1463        )
1464        .unwrap();
1465        assert!(grm.has_token("a"));
1466    }
1467
1468    #[test]
1469    fn test_token_non_literal() {
1470        let src = "%token T %%\nA : T;".to_string();
1471        let grm = parse(
1472            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1473            &src,
1474        )
1475        .unwrap();
1476        assert!(grm.has_token("T"));
1477        let t_span = Span::new(16, 17);
1478        assert_eq!(
1479            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1480            Production {
1481                symbols: vec![token_span("T", t_span)],
1482                precedence: None,
1483                action: None,
1484                prod_span: t_span,
1485            }
1486        );
1487        assert_eq!(&src[t_span.start()..t_span.end() + 1], "T;");
1488    }
1489
1490    #[test]
1491    fn test_token_unicode() {
1492        let src = "%token '❤' %%\nA : '❤';".to_string();
1493        let grm = parse(
1494            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1495            &src,
1496        )
1497        .unwrap();
1498        assert!(grm.has_token("❤"));
1499    }
1500
1501    #[test]
1502    fn test_unicode_err1() {
1503        let src = "%token '❤' ❤;".to_string();
1504        parse(
1505            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1506            &src,
1507        )
1508        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IllegalString, 1, 12);
1509    }
1510
1511    #[test]
1512    fn test_unicode_err2() {
1513        let src = "%token '❤'\n%%\nA : '❤' | ❤;".to_string();
1514        parse(
1515            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1516            &src,
1517        )
1518        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IllegalString, 3, 11);
1519    }
1520
1521    #[test]
1522    fn test_missing_end_quote() {
1523        let src = "%epp X \"f\\".to_string();
1524        parse(
1525            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1526            &src,
1527        )
1528        .expect_error_at_line_col(&src, YaccGrammarErrorKind::InvalidString, 1, 10);
1529    }
1530
1531    #[test]
1532    fn test_simple_decl_fail() {
1533        let src = "%fail x\n%%\nA : a".to_string();
1534        parse(
1535            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1536            &src,
1537        )
1538        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 1, 1);
1539    }
1540
1541    #[test]
1542    fn test_empty() {
1543        let src = "".to_string();
1544        parse(
1545            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1546            &src,
1547        )
1548        .expect_error_at_line_col("", YaccGrammarErrorKind::PrematureEnd, 1, 1);
1549    }
1550
1551    #[test]
1552    fn test_incomplete_rule1() {
1553        let src = "%%A:".to_string();
1554        parse(
1555            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1556            &src,
1557        )
1558        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 1, 5);
1559    }
1560
1561    #[test]
1562    fn test_line_col_report1() {
1563        let src = "%%
1564A:"
1565        .to_string();
1566        parse(
1567            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1568            &src,
1569        )
1570        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 2, 3);
1571    }
1572
1573    #[test]
1574    fn test_line_col_report2() {
1575        let src = "%%
1576A:
1577"
1578        .to_string();
1579        parse(
1580            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1581            &src,
1582        )
1583        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 3, 1);
1584    }
1585
1586    #[test]
1587    fn test_line_col_report3() {
1588        let src = "
1589
1590        %woo"
1591            .to_string();
1592        parse(
1593            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1594            &src,
1595        )
1596        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 3, 9);
1597    }
1598
1599    #[test]
1600    fn test_missing_colon() {
1601        let src = "%%A x;".to_string();
1602        parse(
1603            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1604            &src,
1605        )
1606        .expect_error_at_line_col(&src, YaccGrammarErrorKind::MissingColon, 1, 5);
1607    }
1608
1609    #[test]
1610    fn test_premature_end() {
1611        let src = "%token x".to_string();
1612        parse(
1613            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1614            &src,
1615        )
1616        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 9);
1617    }
1618
1619    #[test]
1620    fn test_premature_end_multibyte() {
1621        let src = "%actiontype 🦀".to_string();
1622        parse(
1623            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1624            &src,
1625        )
1626        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 14);
1627        let src = "%parse-param c:🦀".to_string();
1628        parse(
1629            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1630            &src,
1631        )
1632        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 17);
1633        let src = "// 🦀".to_string();
1634        parse(
1635            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1636            &src,
1637        )
1638        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 5);
1639    }
1640
1641    #[test]
1642    fn test_same_line() {
1643        let src = "%token
1644x"
1645        .to_string();
1646        parse(
1647            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1648            &src,
1649        )
1650        .expect_error_at_line_col(&src, YaccGrammarErrorKind::ReachedEOL, 1, 7);
1651    }
1652
1653    #[test]
1654    fn test_unknown_declaration() {
1655        let src = "%woo".to_string();
1656        parse(
1657            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1658            &src,
1659        )
1660        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 1, 1);
1661    }
1662
1663    #[test]
1664    fn test_grmtools_format() {
1665        let src = "
1666          %start A
1667          %%
1668          A -> T: 'b';
1669          B -> Result<(), T>: 'c';
1670          C -> ::std::result::Result<(), T>: 'd';
1671          "
1672        .to_string();
1673        let grm = parse(YaccKind::Grmtools, &src).unwrap();
1674        assert_eq!(grm.rules["A"].actiont, Some("T".to_string()));
1675        assert_eq!(grm.rules["B"].actiont, Some("Result<(), T>".to_string()));
1676        assert_eq!(
1677            grm.rules["C"].actiont,
1678            Some("::std::result::Result<(), T>".to_string())
1679        );
1680    }
1681
1682    #[test]
1683    #[rustfmt::skip]
1684    fn test_precs() {
1685        let src = "
1686          %left '+' '-'
1687          %left '*'
1688          %right '/'
1689          %right '^'
1690          %nonassoc '~'
1691          %%
1692          ".to_string();
1693        let grm = parse(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), &src).unwrap();
1694        assert_eq!(grm.precs.len(), 6);
1695        assert_eq!(grm.precs["+"], (Precedence{level: 0, kind: AssocKind::Left}, Span::new(18, 19)));
1696        assert_eq!(grm.precs["-"], (Precedence{level: 0, kind: AssocKind::Left}, Span::new(22, 23)));
1697        assert_eq!(grm.precs["*"], (Precedence{level: 1, kind: AssocKind::Left}, Span::new(42, 43)));
1698        assert_eq!(grm.precs["/"], (Precedence{level: 2, kind: AssocKind::Right}, Span::new(63, 64)));
1699        assert_eq!(grm.precs["^"], (Precedence{level: 3, kind: AssocKind::Right}, Span::new(84, 85)));
1700        assert_eq!(grm.precs["~"], (Precedence{level: 4, kind: AssocKind::Nonassoc}, Span::new(108, 109)));
1701    }
1702
1703    #[test]
1704    fn test_dup_precs() {
1705        #[rustfmt::skip]
1706        let srcs = vec![
1707            ("
1708          %left 'x'
1709          %left 'x'
1710          %%
1711          ", ((2, 18), (3, 18))),
1712            ("
1713          %left 'x'
1714          %right 'x'
1715          %%
1716          ", ((2, 18), (3, 19))),
1717            ("
1718          %right 'x'
1719          %right 'x'
1720          %%
1721          ", ((2, 19), (3, 19))),
1722            ("
1723          %nonassoc 'x'
1724          %nonassoc 'x'
1725          %%
1726          ", ((2, 22), (3, 22))),
1727            ("
1728          %left 'x'
1729          %nonassoc 'x'
1730          %%
1731          ", ((2, 18), (3, 22))),
1732            ("
1733          %right 'x'
1734          %nonassoc 'x'
1735          %%
1736          ", ((2, 19), (3, 22)))
1737        ];
1738        for (src, (expected_origin, expected_dup)) in srcs.iter() {
1739            parse(
1740                YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1741                src,
1742            )
1743            .expect_error_at_lines_cols(
1744                src,
1745                YaccGrammarErrorKind::DuplicatePrecedence,
1746                &mut [*expected_origin, *expected_dup].into_iter(),
1747            );
1748        }
1749    }
1750
1751    #[test]
1752    fn test_multiple_dup_precs() {
1753        let src = "
1754          %left 'x'
1755          %left 'x'
1756          %right 'x'
1757          %nonassoc 'x'
1758          %left 'y'
1759          %nonassoc 'y'
1760          %right 'y'
1761          %%";
1762
1763        parse(
1764            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1765            src,
1766        )
1767        .expect_multiple_errors(
1768            src,
1769            &mut [
1770                (
1771                    YaccGrammarErrorKind::DuplicatePrecedence,
1772                    vec![(2, 18), (3, 18), (4, 19), (5, 22)],
1773                ),
1774                (
1775                    YaccGrammarErrorKind::DuplicatePrecedence,
1776                    vec![(6, 18), (7, 22), (8, 19)],
1777                ),
1778            ]
1779            .into_iter(),
1780        );
1781    }
1782
1783    #[test]
1784    #[rustfmt::skip]
1785    fn test_prec_override() {
1786        // Taken from the Yacc manual
1787        let src = "
1788            %left '+' '-'
1789            %left '*' '/'
1790            %%
1791            expr : expr '+' expr
1792                 | expr '-' expr
1793                 | expr '*' expr
1794                 | expr '/' expr
1795                 | '-'  expr %prec '*'
1796                 | NAME ;
1797        ";
1798        let grm = parse(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), src).unwrap();
1799        assert_eq!(grm.precs.len(), 4);
1800        assert_eq!(grm.prods[grm.rules["expr"].pidxs[0]].precedence, None);
1801        assert_eq!(grm.prods[grm.rules["expr"].pidxs[3]].symbols.len(), 3);
1802        assert_eq!(grm.prods[grm.rules["expr"].pidxs[4]].symbols.len(), 2);
1803        assert_eq!(grm.prods[grm.rules["expr"].pidxs[4]].precedence, Some("*".to_string()));
1804    }
1805
1806    #[test]
1807    fn test_prec_empty() {
1808        let src = "
1809        %%
1810        expr : 'a'
1811             | %empty %prec 'a';
1812        ";
1813        let grm = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
1814        assert_eq!(
1815            grm.prods[grm.rules["expr"].pidxs[1]].precedence,
1816            Some("a".to_string())
1817        );
1818    }
1819
1820    #[test]
1821    fn test_bad_prec_overrides() {
1822        let src = "
1823        %%
1824        S: 'A' %prec ;
1825        ";
1826        parse(
1827            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1828            src,
1829        )
1830        .expect_error_at_line(src, YaccGrammarErrorKind::IllegalString, 3);
1831    }
1832
1833    #[test]
1834    fn test_parse_avoid_insert() {
1835        let ast = parse(
1836            YaccKind::Eco,
1837            "
1838          %avoid_insert ws1 ws2
1839          %start R
1840          %%
1841          R: 'a';
1842          ",
1843        )
1844        .unwrap();
1845        assert_eq!(
1846            ast.avoid_insert,
1847            Some(
1848                [
1849                    ("ws1".to_string(), Span::new(25, 28)),
1850                    ("ws2".to_string(), Span::new(29, 32))
1851                ]
1852                .iter()
1853                .cloned()
1854                .collect()
1855            )
1856        );
1857        assert!(ast.tokens.get("ws1").is_some());
1858        assert!(ast.tokens.get("ws2").is_some());
1859    }
1860
1861    #[test]
1862    fn test_multiple_avoid_insert() {
1863        let ast = parse(
1864            YaccKind::Eco,
1865            "
1866          %avoid_insert X
1867          %avoid_insert Y
1868          %%
1869          ",
1870        )
1871        .unwrap();
1872        assert_eq!(
1873            ast.avoid_insert,
1874            Some(
1875                [
1876                    ("X".to_string(), Span::new(25, 26)),
1877                    ("Y".to_string(), Span::new(51, 52))
1878                ]
1879                .iter()
1880                .cloned()
1881                .collect()
1882            )
1883        );
1884    }
1885
1886    #[test]
1887    fn test_duplicate_avoid_insert() {
1888        let src = "
1889          %avoid_insert X Y
1890          %avoid_insert Y
1891          %%
1892          ";
1893        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
1894            src,
1895            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1896            &mut [(2usize, 27usize), (3, 25)].into_iter(),
1897        );
1898    }
1899
1900    #[test]
1901    fn test_duplicate_avoid_insert2() {
1902        let src = "
1903        %avoid_insert X
1904        %avoid_insert Y Y
1905        %%
1906        ";
1907        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
1908            src,
1909            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1910            &mut [(3, 23), (3, 25)].into_iter(),
1911        );
1912    }
1913
1914    #[test]
1915    fn test_multiple_duplicate_avoid_insert() {
1916        let src = "
1917        %avoid_insert X
1918        %avoid_insert Y Y X
1919        %%
1920        ";
1921        parse(YaccKind::Eco, src).expect_multiple_errors(
1922            src,
1923            &mut [
1924                (
1925                    YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1926                    vec![(3, 23), (3, 25)],
1927                ),
1928                (
1929                    YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1930                    vec![(2, 23), (3, 27)],
1931                ),
1932            ]
1933            .into_iter(),
1934        );
1935    }
1936
1937    #[test]
1938    fn test_no_implicit_tokens_in_original_yacc() {
1939        let src = "
1940        %implicit_tokens X
1941        %%
1942        ";
1943        parse(
1944            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1945            src,
1946        )
1947        .expect_error_at_line(src, YaccGrammarErrorKind::UnknownDeclaration, 2);
1948    }
1949
1950    #[test]
1951    fn test_parse_implicit_tokens() {
1952        let ast = parse(
1953            YaccKind::Eco,
1954            "
1955          %implicit_tokens ws1 ws2
1956          %start R
1957          %%
1958          R: 'a';
1959          ",
1960        )
1961        .unwrap();
1962        assert_eq!(
1963            ast.implicit_tokens,
1964            Some(
1965                [
1966                    ("ws1".to_string(), Span::new(28, 31)),
1967                    ("ws2".to_string(), Span::new(32, 35))
1968                ]
1969                .iter()
1970                .cloned()
1971                .collect()
1972            )
1973        );
1974        assert!(ast.tokens.get("ws1").is_some());
1975        assert!(ast.tokens.get("ws2").is_some());
1976    }
1977
1978    #[test]
1979    fn test_multiple_implicit_tokens() {
1980        let ast = parse(
1981            YaccKind::Eco,
1982            "
1983          %implicit_tokens X
1984          %implicit_tokens Y
1985          %%
1986          ",
1987        )
1988        .unwrap();
1989        assert_eq!(
1990            ast.implicit_tokens,
1991            Some(
1992                [
1993                    ("X".to_string(), Span::new(28, 29)),
1994                    ("Y".to_string(), Span::new(57, 58))
1995                ]
1996                .iter()
1997                .cloned()
1998                .collect()
1999            )
2000        );
2001    }
2002
2003    #[test]
2004    fn test_duplicate_implicit_tokens() {
2005        let src = "
2006        %implicit_tokens X
2007        %implicit_tokens X Y
2008        %%
2009        ";
2010        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2011            src,
2012            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2013            &mut [(2, 26), (3, 26)].into_iter(),
2014        );
2015    }
2016
2017    #[test]
2018    fn test_duplicate_implicit_tokens2() {
2019        let src = "
2020        %implicit_tokens X X
2021        %implicit_tokens Y
2022        %%
2023        ";
2024        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2025            src,
2026            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2027            &mut [(2, 26), (2, 28)].into_iter(),
2028        );
2029    }
2030
2031    #[test]
2032    fn test_multiple_duplicate_implicit_tokens_and_invalid_rule() {
2033        let src = "
2034        %implicit_tokens X
2035        %implicit_tokens X Y
2036        %implicit_tokens Y
2037        %%
2038        IncompleteRule: ";
2039        parse(YaccKind::Eco, src).expect_multiple_errors(
2040            src,
2041            &mut [
2042                (
2043                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2044                    vec![(2, 26), (3, 26)],
2045                ),
2046                (
2047                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2048                    vec![(3, 28), (4, 26)],
2049                ),
2050                (YaccGrammarErrorKind::IncompleteRule, vec![(6, 25)]),
2051            ]
2052            .into_iter(),
2053        );
2054    }
2055
2056    #[test]
2057    #[rustfmt::skip]
2058    fn test_parse_epp() {
2059        let ast = parse(
2060            YaccKind::Eco,
2061            r#"
2062          %epp A "a"
2063          %epp B 'a'
2064          %epp C '"'
2065          %epp D "'"
2066          %epp E "\""
2067          %epp F '\''
2068          %epp G "a\"b"
2069          %%
2070          R: 'A';
2071          "#,
2072        )
2073        .unwrap();
2074        assert_eq!(ast.epp.len(), 7);
2075        assert_eq!(ast.epp["A"], (Span::new(16, 17),   ("a".to_string(),   Span::new(18, 21))));
2076        assert_eq!(ast.epp["B"], (Span::new(37, 38),   ("a".to_string(),   Span::new(39, 42))));
2077        assert_eq!(ast.epp["C"], (Span::new(58, 59),   ("\"".to_string(),  Span::new(60, 63))));
2078        assert_eq!(ast.epp["D"], (Span::new(79, 80),   ("'".to_string(),   Span::new(81, 84))));
2079        assert_eq!(ast.epp["E"], (Span::new(100, 101), ("\"".to_string(),  Span::new(102, 106))));
2080        assert_eq!(ast.epp["F"], (Span::new(122, 123), ("'".to_string(),   Span::new(124, 128))));
2081        assert_eq!(ast.epp["G"], (Span::new(144, 145), ("a\"b".to_string(),Span::new(146, 152))));
2082    }
2083
2084    #[test]
2085    fn test_duplicate_epp() {
2086        let src = "
2087        %epp A \"a\"
2088        %epp A \"a\"
2089        %epp A \"a\"
2090        %%
2091        ";
2092        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2093            src,
2094            YaccGrammarErrorKind::DuplicateEPP,
2095            &mut [(2, 14), (3, 14), (4, 14)].into_iter(),
2096        );
2097    }
2098
2099    #[test]
2100    fn test_multiple_duplicate_epp() {
2101        let src = "
2102        %epp A \"a1\"
2103        %epp A \"a2\"
2104        %epp A \"a3\"
2105        %epp B \"b1\"
2106        %epp B \"b2\"
2107        %epp B \"b3\"
2108        %%
2109        ";
2110        parse(YaccKind::Eco, src).expect_multiple_errors(
2111            src,
2112            &mut [
2113                (
2114                    YaccGrammarErrorKind::DuplicateEPP,
2115                    vec![(2, 14), (3, 14), (4, 14)],
2116                ),
2117                (
2118                    YaccGrammarErrorKind::DuplicateEPP,
2119                    vec![(5, 14), (6, 14), (7, 14)],
2120                ),
2121            ]
2122            .into_iter(),
2123        );
2124    }
2125
2126    #[test]
2127    fn test_broken_string() {
2128        let src = "
2129          %epp A \"a
2130          %%
2131          ";
2132        parse(YaccKind::Eco, src).expect_error_at_line(src, YaccGrammarErrorKind::InvalidString, 2);
2133
2134        let src = "
2135        %epp A \"a";
2136        parse(YaccKind::Eco, src).expect_error_at_line(src, YaccGrammarErrorKind::InvalidString, 2);
2137    }
2138
2139    #[test]
2140    fn test_duplicate_start() {
2141        let src = "
2142          %start X
2143          %start X
2144          %%
2145          ";
2146        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2147            src,
2148            YaccGrammarErrorKind::DuplicateStartDeclaration,
2149            &mut [(2, 18), (3, 18)].into_iter(),
2150        );
2151    }
2152
2153    #[test]
2154    fn test_duplicate_start_premature_end() {
2155        let src = "
2156          %start X
2157          %start X";
2158        parse(YaccKind::Eco, src).expect_multiple_errors(
2159            src,
2160            &mut [
2161                (
2162                    YaccGrammarErrorKind::DuplicateStartDeclaration,
2163                    vec![(2, 18), (3, 18)],
2164                ),
2165                (YaccGrammarErrorKind::PrematureEnd, vec![(3, 19)]),
2166            ]
2167            .into_iter(),
2168        );
2169    }
2170
2171    #[test]
2172    fn test_duplicate_expect() {
2173        let src = "
2174          %expect 1
2175          %expect 2
2176          %expect 3
2177          %%
2178          ";
2179        parse(
2180            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2181            src,
2182        )
2183        .expect_error_at_lines_cols(
2184            src,
2185            YaccGrammarErrorKind::DuplicateExpectDeclaration,
2186            &mut [(2, 19), (3, 19), (4, 19)].into_iter(),
2187        )
2188    }
2189
2190    #[test]
2191    fn test_duplicate_expect_and_missing_colon() {
2192        let src = "
2193          %expect 1
2194          %expect 2
2195          %expect 3
2196          %%
2197          A ;";
2198        parse(
2199            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2200            src,
2201        )
2202        .expect_multiple_errors(
2203            src,
2204            &mut [
2205                (
2206                    YaccGrammarErrorKind::DuplicateExpectDeclaration,
2207                    vec![(2, 19), (3, 19), (4, 19)],
2208                ),
2209                (YaccGrammarErrorKind::MissingColon, vec![(6, 13)]),
2210            ]
2211            .into_iter(),
2212        )
2213    }
2214
2215    #[test]
2216    fn test_duplicate_expectrr() {
2217        let src = "
2218          %expect-rr 1
2219          %expect-rr 2
2220          %expect-rr 3
2221          %%
2222          ";
2223        parse(
2224            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2225            src,
2226        )
2227        .expect_error_at_lines_cols(
2228            src,
2229            YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
2230            &mut [(2, 22), (3, 22), (4, 22)].into_iter(),
2231        );
2232    }
2233
2234    #[test]
2235    fn test_duplicate_expectrr_illegal_name() {
2236        let src = "
2237          %expect-rr 1
2238          %expect-rr 2
2239          %expect-rr 3
2240          %%
2241          +IllegalRuleName+:;";
2242        parse(
2243            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2244            src,
2245        )
2246        .expect_multiple_errors(
2247            src,
2248            &mut [
2249                (
2250                    YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
2251                    vec![(2, 22), (3, 22), (4, 22)],
2252                ),
2253                (YaccGrammarErrorKind::IllegalName, vec![(6, 11)]),
2254            ]
2255            .into_iter(),
2256        );
2257    }
2258
2259    #[test]
2260    fn test_implicit_start() {
2261        let ast = parse(
2262            YaccKind::Eco,
2263            "
2264          %%
2265          R: ;
2266          R2: ;
2267          R3: ;
2268          ",
2269        )
2270        .unwrap();
2271        assert_eq!(ast.start, Some(("R".to_string(), Span::new(24, 25))));
2272    }
2273
2274    #[test]
2275    fn test_action() {
2276        let grm = parse(
2277            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2278            "
2279          %%
2280          A: 'a' B { println!(\"test\"); }
2281           ;
2282          B: 'b' 'c' { add($1, $2); }
2283           | 'd'
2284           ;
2285          D: 'd' {}
2286           ;
2287          ",
2288        )
2289        .unwrap();
2290        let action_str = "println!(\"test\");".to_string();
2291        assert_eq!(
2292            grm.prods[grm.rules["A"].pidxs[0]].action,
2293            Some((action_str.clone(), Span::new(34, 34 + action_str.len())))
2294        );
2295        let action_str = "add($1, $2);".to_string();
2296        assert_eq!(
2297            grm.prods[grm.rules["B"].pidxs[0]].action,
2298            Some((action_str.clone(), Span::new(90, 90 + action_str.len())))
2299        );
2300        assert_eq!(grm.prods[grm.rules["B"].pidxs[1]].action, None);
2301    }
2302
2303    #[test]
2304    fn test_action_ends_in_multibyte() {
2305        let grm = parse(
2306            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2307            "%%A: '_' {(); // 🦀};",
2308        )
2309        .unwrap();
2310        let action_str = "(); // 🦀".to_string();
2311        assert_eq!(
2312            grm.prods[grm.rules["A"].pidxs[0]].action,
2313            Some((action_str.clone(), Span::new(10, 10 + action_str.len())))
2314        );
2315    }
2316
2317    #[test]
2318    fn test_programs() {
2319        let grm = parse(
2320            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2321            "
2322         %%
2323         A: 'a';
2324         %%
2325         fn foo() {}",
2326        )
2327        .unwrap();
2328        assert_eq!(grm.programs, Some("fn foo() {}".to_string()));
2329    }
2330
2331    #[test]
2332    fn test_actions_with_newlines() {
2333        let src = "
2334        %%
2335        A: 'a' { foo();
2336                 bar(); }
2337        ;
2338        B: b';";
2339        parse(
2340            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2341            src,
2342        )
2343        .expect_error_at_line(src, YaccGrammarErrorKind::IllegalString, 6);
2344    }
2345
2346    #[test]
2347    fn test_comments() {
2348        let src = "
2349            // A valid comment
2350            %token   a
2351            /* Another valid comment */
2352            %%\n
2353            A : a;";
2354        let grm = parse(
2355            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2356            src,
2357        )
2358        .unwrap();
2359        assert!(grm.has_token("a"));
2360
2361        let src = "
2362        /* An invalid comment * /
2363        %token   a
2364        %%\n
2365        A : a;";
2366        parse(
2367            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2368            src,
2369        )
2370        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteComment, 2);
2371
2372        let src = "
2373        %token   a
2374        %%
2375        /* A valid
2376         * multi-line comment
2377         */
2378        /* An invalid comment * /
2379        A : a;";
2380        parse(
2381            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2382            src,
2383        )
2384        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteComment, 7);
2385
2386        let src = "
2387        %token   a
2388        %%
2389        // Valid comment
2390        A : a";
2391        parse(
2392            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2393            src,
2394        )
2395        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteRule, 5);
2396    }
2397
2398    #[test]
2399    fn test_action_type() {
2400        let grm = parse(
2401            YaccKind::Original(YaccOriginalActionKind::UserAction),
2402            "
2403         %actiontype T
2404         %%
2405         A: 'a';
2406         %%
2407         fn foo() {}",
2408        )
2409        .unwrap();
2410        assert_eq!(grm.rules["A"].actiont, Some("T".to_string()));
2411    }
2412
2413    #[test]
2414    fn test_only_one_type() {
2415        let src = "
2416         %actiontype T1
2417         %actiontype T2
2418         %actiontype T3
2419         %%
2420         A: 'a';";
2421        parse(
2422            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2423            src,
2424        )
2425        .expect_error_at_lines_cols(
2426            src,
2427            YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
2428            &mut [(2, 22), (3, 22), (4, 22)].into_iter(),
2429        );
2430    }
2431
2432    #[test]
2433    fn test_duplicate_actiontype_and_premature_end() {
2434        let src = "
2435         %actiontype T1
2436         %actiontype T2
2437         %actiontype T3";
2438        parse(
2439            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2440            src,
2441        )
2442        .expect_multiple_errors(
2443            src,
2444            &mut [
2445                (
2446                    YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
2447                    vec![(2, 22), (3, 22), (4, 22)],
2448                ),
2449                (YaccGrammarErrorKind::PrematureEnd, vec![(4, 24)]),
2450            ]
2451            .into_iter(),
2452        )
2453    }
2454
2455    #[test]
2456    fn test_parse_param() {
2457        let src = "
2458          %parse-param a::b: (u64, u64)
2459          %%
2460          A: 'a';
2461         ";
2462        let grm = parse(YaccKind::Original(YaccOriginalActionKind::UserAction), src).unwrap();
2463
2464        assert_eq!(
2465            grm.parse_param,
2466            Some(("a::b".to_owned(), "(u64, u64)".to_owned()))
2467        );
2468    }
2469
2470    #[test]
2471    fn test_parse_generics() {
2472        let src = "
2473          %parse-generics 'a, K, V
2474          %%
2475          A: 'a';
2476         ";
2477        let grm = parse(YaccKind::Original(YaccOriginalActionKind::UserAction), src).unwrap();
2478
2479        assert_eq!(grm.parse_generics, Some("'a, K, V".to_owned()));
2480    }
2481
2482    #[test]
2483    fn test_duplicate_rule() {
2484        let ast = parse(
2485            YaccKind::Grmtools,
2486            "%token A B D
2487%%
2488Expr -> () : %empty | A;
2489Expr -> () : B | 'C';
2490Expr -> () : D;
2491",
2492        )
2493        .unwrap();
2494        let expr_rule = ast.get_rule("Expr").unwrap();
2495        let mut prod_names = HashSet::new();
2496        for pidx in &expr_rule.pidxs {
2497            for sym in &ast.prods[*pidx].symbols {
2498                let name = match sym {
2499                    Symbol::Token(name, _) | Symbol::Rule(name, _) => name.clone(),
2500                };
2501                prod_names.insert(name);
2502            }
2503        }
2504        assert_eq!(ast.prods.len(), 5);
2505        assert_eq!(
2506            prod_names,
2507            HashSet::from_iter(["A", "B", "C", "D"].map(|s| s.to_owned()))
2508        );
2509    }
2510
2511    #[test]
2512    fn test_duplicate_start_and_missing_arrow() {
2513        let src = "%start A
2514%start A
2515%start A
2516%%
2517A -> () : 'a1';
2518B";
2519        parse(YaccKind::Grmtools, src).expect_multiple_errors(
2520            src,
2521            &mut [
2522                (
2523                    YaccGrammarErrorKind::DuplicateStartDeclaration,
2524                    vec![(1, 8), (2, 8), (3, 8)],
2525                ),
2526                (YaccGrammarErrorKind::MissingRightArrow, vec![(6, 2)]),
2527            ]
2528            .into_iter(),
2529        )
2530    }
2531
2532    #[test]
2533    fn test_routines_multiple_errors() {
2534        let mut src = String::from(
2535            "
2536        %start A
2537        %start B
2538        %expect 1
2539        %expect 2
2540        %%
2541        A -> () : 'a';
2542        %%
2543        ",
2544        );
2545        let mut expected_errs = vec![
2546            (
2547                YaccGrammarErrorKind::DuplicateStartDeclaration,
2548                vec![(2, 16), (3, 16)],
2549            ),
2550            (
2551                YaccGrammarErrorKind::DuplicateExpectDeclaration,
2552                vec![(4, 17), (5, 17)],
2553            ),
2554        ];
2555        parse(YaccKind::Grmtools, &src)
2556            .expect_multiple_errors(&src, &mut expected_errs.clone().into_iter());
2557
2558        src.push_str(
2559            "
2560                /* Incomplete comment
2561        ",
2562        );
2563        expected_errs.push((YaccGrammarErrorKind::IncompleteComment, vec![(10, 17)]));
2564        parse(YaccKind::Grmtools, &src)
2565            .expect_multiple_errors(&src, &mut expected_errs.clone().into_iter());
2566    }
2567
2568    #[test]
2569    fn test_expect_unused() {
2570        let src = r#"
2571        %expect-unused A 'b' "c"
2572        %%
2573        A: ;
2574        "#;
2575        let grm = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2576        assert!(
2577            grm.expect_unused
2578                .contains(&Symbol::Rule("A".to_string(), Span::new(24, 25)))
2579        );
2580        assert!(
2581            grm.expect_unused
2582                .contains(&Symbol::Token("b".to_string(), Span::new(27, 28)))
2583        );
2584        assert!(
2585            grm.expect_unused
2586                .contains(&Symbol::Token("c".to_string(), Span::new(31, 32)))
2587        );
2588    }
2589
2590    #[test]
2591    fn test_bad_expect_unused() {
2592        let src = "
2593        %expect-unused %
2594        %%
2595        A: ;
2596        ";
2597        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2598            src,
2599            YaccGrammarErrorKind::UnknownDeclaration,
2600            2,
2601            24,
2602        );
2603    }
2604
2605    #[test]
2606    fn test_unused_symbols() {
2607        let ast = parse(
2608            YaccKind::Original(YaccOriginalActionKind::NoAction),
2609            "
2610        %expect-unused UnusedAllowed 'b'
2611        %token a b
2612        %start Start
2613        %%
2614        Unused: ;
2615        Start: ;
2616        UnusedAllowed: ;
2617        ",
2618        )
2619        .unwrap();
2620
2621        assert_eq!(
2622            ast.unused_symbols()
2623                .map(|sym_idx| { sym_idx.symbol(&ast) })
2624                .collect::<Vec<Symbol>>()
2625                .as_slice(),
2626            &[
2627                Symbol::Rule("Unused".to_string(), Span::new(101, 107)),
2628                Symbol::Token("a".to_string(), Span::new(57, 58))
2629            ]
2630        );
2631
2632        let ast = parse(
2633            YaccKind::Original(YaccOriginalActionKind::NoAction),
2634            "
2635        %start A
2636        %%
2637        A: ;
2638        Rec: Rec | ;
2639        ",
2640        )
2641        .unwrap();
2642        assert_eq!(
2643            ast.unused_symbols()
2644                .map(|sym_idx| sym_idx.symbol(&ast))
2645                .collect::<Vec<Symbol>>()
2646                .as_slice(),
2647            &[Symbol::Rule("Rec".to_string(), Span::new(50, 53))]
2648        );
2649
2650        let ast = parse(
2651            YaccKind::Original(YaccOriginalActionKind::NoAction),
2652            "
2653        %%
2654        A: 'a' | 'z' ;
2655        B: 'a' | 'c' ;
2656        ",
2657        )
2658        .unwrap();
2659        // Check that we warn on B and 'c' but not 'a'
2660        assert_eq!(
2661            ast.unused_symbols()
2662                .map(|sym_idx| sym_idx.symbol(&ast))
2663                .collect::<Vec<Symbol>>()
2664                .as_slice(),
2665            &[
2666                Symbol::Rule("B".to_string(), Span::new(43, 44)),
2667                Symbol::Token("c".to_string(), Span::new(53, 54))
2668            ]
2669        );
2670    }
2671
2672    #[test]
2673    fn test_percent_empty() {
2674        parse(
2675            YaccKind::Original(YaccOriginalActionKind::NoAction),
2676            r#"
2677        %token a
2678        %start A
2679        %%
2680        A: %empty | "a";
2681        "#,
2682        )
2683        .unwrap();
2684
2685        let src = r#"
2686        %token a b
2687        %start A
2688        %%
2689        A: "a" | %empty | "b";
2690        B: %empty | "b";
2691        "#;
2692        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2693
2694        let src = r#"
2695        %token a
2696        %start A
2697        %%
2698        A: %empty "a";
2699        "#;
2700        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2701            src,
2702            YaccGrammarErrorKind::NonEmptyProduction,
2703            5,
2704            12,
2705        );
2706
2707        let src = r#"
2708        %token a
2709        %start A
2710        %%
2711        A: "a" %empty;
2712        "#;
2713        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2714            src,
2715            YaccGrammarErrorKind::NonEmptyProduction,
2716            5,
2717            16,
2718        );
2719    }
2720
2721    #[test]
2722    fn test_action_successor() {
2723        let src = "
2724        %%
2725        A: B {} B;
2726        B: ;
2727        ";
2728        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2729            src,
2730            YaccGrammarErrorKind::ProductionNotTerminated,
2731            3,
2732            17,
2733        );
2734
2735        let src = "
2736        %%
2737        A: B B {};
2738        B: {} ;
2739        ";
2740        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2741    }
2742
2743    #[test]
2744    fn test_empty_production_spans_issue_473() {
2745        let empty_prod_conflicts = [
2746            (
2747                "%start Expr
2748%%
2749Expr: %empty | Factor;
2750Factor: ')' Expr ')';
2751",
2752                (0, Span::new(21, 27)),
2753            ),
2754            (
2755                "%start Expr
2756%%
2757Expr: | Factor;
2758Factor: ')' Expr ')';
2759",
2760                (0, Span::new(21, 21)),
2761            ),
2762            (
2763                "%start Expr
2764%%
2765Expr:| Factor;
2766Factor: ')' Expr ')';
2767",
2768                (0, Span::new(20, 20)),
2769            ),
2770            (
2771                "%start Expr
2772%%
2773Expr: Factor | %empty;
2774Factor: ')' Expr ')';
2775",
2776                (1, Span::new(30, 36)),
2777            ),
2778            (
2779                "%start Expr
2780%%
2781Expr: Factor | ;
2782Factor: ')' Expr ')';
2783",
2784                (1, Span::new(30, 30)),
2785            ),
2786            (
2787                "%start Expr
2788%%
2789Expr: Factor|;
2790Factor: ')' Expr ')';
2791",
2792                (1, Span::new(28, 28)),
2793            ),
2794        ];
2795
2796        for (i, (src, (empty_pidx, empty_span))) in empty_prod_conflicts.iter().enumerate() {
2797            eprintln!("{}", i);
2798            let ast = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2799            assert_eq!(
2800                ast.prods[ast.get_rule("Expr").unwrap().pidxs[*empty_pidx]],
2801                Production {
2802                    symbols: vec![],
2803                    precedence: None,
2804                    action: None,
2805                    prod_span: *empty_span,
2806                }
2807            );
2808        }
2809    }
2810}