cfgrammar/yacc/
parser.rs

1// Note: this is the parser for both YaccKind::Original(YaccOriginalActionKind::GenericParseTree) and YaccKind::Eco yacc kinds.
2
3#[cfg(feature = "bincode")]
4use bincode::{Decode, Encode};
5use num_traits::PrimInt;
6use regex::Regex;
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9use std::{
10    collections::{HashMap, hash_map::Entry},
11    error::Error,
12    fmt,
13    str::FromStr,
14    sync::LazyLock,
15};
16
17use crate::{
18    Span, Spanned,
19    header::{GrmtoolsSectionParser, HeaderErrorKind},
20};
21
22pub type YaccGrammarResult<T> = Result<T, Vec<YaccGrammarError>>;
23
24use super::{
25    AssocKind, Precedence, YaccKind,
26    ast::{GrammarAST, Symbol},
27};
28
29/// The various different possible Yacc parser errors.
30#[derive(Debug, PartialEq, Eq, Clone)]
31#[non_exhaustive]
32pub enum YaccGrammarErrorKind {
33    IllegalInteger,
34    IllegalName,
35    IllegalString,
36    IncompleteRule,
37    IncompleteComment,
38    IncompleteAction,
39    MissingColon,
40    MissingRightArrow,
41    MismatchedBrace,
42    NonEmptyProduction,
43    PrematureEnd,
44    ProductionNotTerminated,
45    ProgramsNotSupported,
46    UnknownDeclaration,
47    PrecNotFollowedByToken,
48    DuplicatePrecedence,
49    DuplicateAvoidInsertDeclaration,
50    DuplicateImplicitTokensDeclaration,
51    DuplicateExpectDeclaration,
52    DuplicateExpectRRDeclaration,
53    DuplicateStartDeclaration,
54    DuplicateActiontypeDeclaration,
55    DuplicateEPP,
56    ReachedEOL,
57    InvalidString,
58    NoStartRule,
59    UnknownSymbol,
60    InvalidStartRule(String),
61    UnknownRuleRef(String),
62    UnknownToken(String),
63    NoPrecForToken(String),
64    UnknownEPP(String),
65    ExpectedInput(char),
66    InvalidYaccKind,
67    Header(HeaderErrorKind, SpansKind),
68}
69
70/// Any error from the Yacc parser returns an instance of this struct.
71#[derive(Debug, PartialEq, Eq, Clone)]
72pub struct YaccGrammarError {
73    /// Uniquely identifies each error.
74    pub(crate) kind: YaccGrammarErrorKind,
75    /// Always contains at least 1 span.
76    ///
77    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
78    /// For meaning and interpretation of spans and their ordering.
79    pub(crate) spans: Vec<Span>,
80}
81
82impl Error for YaccGrammarError {}
83
84impl fmt::Display for YaccGrammarError {
85    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
86        write!(f, "{}", self.kind)
87    }
88}
89
90impl fmt::Display for YaccGrammarErrorKind {
91    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92        let s = match self {
93            YaccGrammarErrorKind::ExpectedInput(c) => &format!("Expected input '{c}'"),
94            YaccGrammarErrorKind::IllegalInteger => "Illegal integer",
95            YaccGrammarErrorKind::IllegalName => "Illegal name",
96            YaccGrammarErrorKind::IllegalString => "Illegal string",
97            YaccGrammarErrorKind::IncompleteRule => "Incomplete rule",
98            YaccGrammarErrorKind::IncompleteComment => "Incomplete comment",
99            YaccGrammarErrorKind::IncompleteAction => "Incomplete action",
100            YaccGrammarErrorKind::MissingColon => "Missing ':'",
101            YaccGrammarErrorKind::MissingRightArrow => "Missing '->'",
102            YaccGrammarErrorKind::MismatchedBrace => "Mismatched brace",
103            YaccGrammarErrorKind::NonEmptyProduction => "%empty used in non-empty production",
104            YaccGrammarErrorKind::PrematureEnd => "File ends prematurely",
105            YaccGrammarErrorKind::ProductionNotTerminated => "Production not terminated correctly",
106            YaccGrammarErrorKind::ProgramsNotSupported => "Programs not currently supported",
107            YaccGrammarErrorKind::UnknownDeclaration => "Unknown declaration",
108            YaccGrammarErrorKind::DuplicatePrecedence => "Token has multiple precedences specified",
109            YaccGrammarErrorKind::PrecNotFollowedByToken => "%prec not followed by token name",
110            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration => {
111                "Duplicated %avoid_insert declaration"
112            }
113            YaccGrammarErrorKind::DuplicateExpectDeclaration => "Duplicated %expect declaration",
114            YaccGrammarErrorKind::DuplicateExpectRRDeclaration => {
115                "Duplicate %expect-rr declaration"
116            }
117            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration => {
118                "Duplicated %implicit_tokens declaration"
119            }
120            YaccGrammarErrorKind::DuplicateStartDeclaration => "Duplicated %start declaration",
121            YaccGrammarErrorKind::DuplicateActiontypeDeclaration => {
122                "Duplicate %actiontype declaration"
123            }
124            YaccGrammarErrorKind::DuplicateEPP => "Duplicate %epp declaration for this token",
125            YaccGrammarErrorKind::ReachedEOL => {
126                "Reached end of line without finding expected content"
127            }
128            YaccGrammarErrorKind::InvalidString => "Invalid string",
129            YaccGrammarErrorKind::NoStartRule => return write!(f, "No start rule specified"),
130            YaccGrammarErrorKind::UnknownSymbol => "Unknown symbol, expected a rule or token",
131            YaccGrammarErrorKind::InvalidStartRule(name) => {
132                return write!(f, "Start rule '{}' does not appear in grammar", name);
133            }
134            YaccGrammarErrorKind::UnknownRuleRef(name) => {
135                return write!(f, "Unknown reference to rule '{}'", name);
136            }
137            YaccGrammarErrorKind::UnknownToken(name) => {
138                return write!(f, "Unknown token '{}'", name);
139            }
140            YaccGrammarErrorKind::NoPrecForToken(name) => {
141                return write!(
142                    f,
143                    "Token '{}' used in %prec has no precedence attached",
144                    name
145                );
146            }
147            YaccGrammarErrorKind::UnknownEPP(name) => {
148                return write!(
149                    f,
150                    "Token '{}' in %epp declaration is not referenced in the grammar",
151                    name
152                );
153            }
154            YaccGrammarErrorKind::InvalidYaccKind => "Invalid yacc kind",
155            YaccGrammarErrorKind::Header(hk, _) => &format!("Error in '%grmtools' {}", hk),
156        };
157        write!(f, "{}", s)
158    }
159}
160
161/// The various different possible Yacc parser errors.
162#[derive(Debug, PartialEq, Eq, Clone)]
163#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
164#[cfg_attr(feature = "bincode", derive(Encode, Decode))]
165#[non_exhaustive]
166pub enum YaccGrammarWarningKind {
167    UnusedRule,
168    UnusedToken,
169}
170
171/// Any Warning from the Yacc parser returns an instance of this struct.
172#[derive(Debug, PartialEq, Eq, Clone)]
173#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
174#[cfg_attr(feature = "bincode", derive(Encode, Decode))]
175pub struct YaccGrammarWarning {
176    /// The specific kind of warning.
177    pub(crate) kind: YaccGrammarWarningKind,
178    /// Always contains at least 1 span.
179    ///
180    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
181    /// For meaning and interpretation of spans and their ordering.
182    pub(crate) spans: Vec<Span>,
183}
184
185impl fmt::Display for YaccGrammarWarning {
186    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
187        write!(f, "{}", self.kind)
188    }
189}
190
191impl fmt::Display for YaccGrammarWarningKind {
192    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193        let s = match self {
194            YaccGrammarWarningKind::UnusedRule => "Unused rule",
195            YaccGrammarWarningKind::UnusedToken => "Unused token",
196        };
197        write!(f, "{}", s)
198    }
199}
200
201impl Spanned for YaccGrammarWarning {
202    /// Returns the spans associated with the error, always containing at least 1 span.
203    ///
204    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
205    /// for the meaning and interpretation of spans and their ordering.
206    fn spans(&self) -> &[Span] {
207        self.spans.as_slice()
208    }
209
210    /// Returns the [SpansKind] associated with this error.
211    fn spanskind(&self) -> SpansKind {
212        match self.kind {
213            YaccGrammarWarningKind::UnusedRule | YaccGrammarWarningKind::UnusedToken => {
214                SpansKind::Error
215            }
216        }
217    }
218}
219
220/// Indicates how to interpret the spans of an error.
221#[derive(Debug, PartialEq, Eq, Copy, Clone)]
222#[non_exhaustive]
223pub enum SpansKind {
224    /// The first span is the first occurrence, and a span for each subsequent occurrence.
225    DuplicationError,
226    /// Contains a single span at the site of the error.
227    Error,
228}
229
230impl Spanned for YaccGrammarError {
231    /// Returns the spans associated with the error, always containing at least 1 span.
232    ///
233    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
234    /// for the meaning and interpretation of spans and their ordering.
235    fn spans(&self) -> &[Span] {
236        self.spans.as_slice()
237    }
238
239    /// Returns the [SpansKind] associated with this error.
240    fn spanskind(&self) -> SpansKind {
241        match self.kind {
242            YaccGrammarErrorKind::IllegalInteger
243            | YaccGrammarErrorKind::IllegalName
244            | YaccGrammarErrorKind::IllegalString
245            | YaccGrammarErrorKind::IncompleteRule
246            | YaccGrammarErrorKind::IncompleteComment
247            | YaccGrammarErrorKind::IncompleteAction
248            | YaccGrammarErrorKind::MissingColon
249            | YaccGrammarErrorKind::MissingRightArrow
250            | YaccGrammarErrorKind::MismatchedBrace
251            | YaccGrammarErrorKind::NonEmptyProduction
252            | YaccGrammarErrorKind::PrematureEnd
253            | YaccGrammarErrorKind::ProductionNotTerminated
254            | YaccGrammarErrorKind::PrecNotFollowedByToken
255            | YaccGrammarErrorKind::ProgramsNotSupported
256            | YaccGrammarErrorKind::UnknownDeclaration
257            | YaccGrammarErrorKind::ReachedEOL
258            | YaccGrammarErrorKind::InvalidString
259            | YaccGrammarErrorKind::NoStartRule
260            | YaccGrammarErrorKind::UnknownSymbol
261            | YaccGrammarErrorKind::InvalidStartRule(_)
262            | YaccGrammarErrorKind::UnknownRuleRef(_)
263            | YaccGrammarErrorKind::UnknownToken(_)
264            | YaccGrammarErrorKind::NoPrecForToken(_)
265            | YaccGrammarErrorKind::InvalidYaccKind
266            | YaccGrammarErrorKind::ExpectedInput(_)
267            | YaccGrammarErrorKind::UnknownEPP(_) => SpansKind::Error,
268            YaccGrammarErrorKind::DuplicatePrecedence
269            | YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration
270            | YaccGrammarErrorKind::DuplicateExpectDeclaration
271            | YaccGrammarErrorKind::DuplicateExpectRRDeclaration
272            | YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration
273            | YaccGrammarErrorKind::DuplicateStartDeclaration
274            | YaccGrammarErrorKind::DuplicateActiontypeDeclaration
275            | YaccGrammarErrorKind::DuplicateEPP => SpansKind::DuplicationError,
276            YaccGrammarErrorKind::Header(_, spanskind) => spanskind,
277        }
278    }
279}
280
281pub(crate) struct YaccParser<'a> {
282    yacc_kind: YaccKind,
283    src: &'a str,
284    num_newlines: usize,
285    ast: GrammarAST,
286    global_actiontype: Option<(String, Span)>,
287}
288
289static RE_NAME: LazyLock<Regex> =
290    LazyLock::new(|| Regex::new(r"^[a-zA-Z_.][a-zA-Z0-9_.]*").unwrap());
291static RE_TOKEN: LazyLock<Regex> =
292    LazyLock::new(|| Regex::new("^(?:(\".+?\")|('.+?')|([a-zA-Z_][a-zA-Z_0-9]*))").unwrap());
293
294fn add_duplicate_occurrence(
295    errs: &mut Vec<YaccGrammarError>,
296    kind: YaccGrammarErrorKind,
297    orig_span: Span,
298    dup_span: Span,
299) {
300    if !errs.iter_mut().any(|e| {
301        if e.kind == kind && e.spans[0] == orig_span {
302            e.spans.push(dup_span);
303            true
304        } else {
305            false
306        }
307    }) {
308        errs.push(YaccGrammarError {
309            kind,
310            spans: vec![orig_span, dup_span],
311        });
312    }
313}
314
315/// The actual parser is intended to be entirely opaque from outside users.
316impl YaccParser<'_> {
317    pub(crate) fn new(yacc_kind: YaccKind, src: &str) -> YaccParser {
318        YaccParser {
319            yacc_kind,
320            src,
321            num_newlines: 0,
322            ast: GrammarAST::new(),
323            global_actiontype: None,
324        }
325    }
326
327    pub(crate) fn parse(&mut self) -> YaccGrammarResult<usize> {
328        let mut errs = Vec::new();
329        let (_, pos) = GrmtoolsSectionParser::new(self.src, false)
330            .parse()
331            .map_err(|mut errs| errs.drain(..).map(|e| e.into()).collect::<Vec<_>>())?;
332        // We pass around an index into the *bytes* of self.src. We guarantee that at all times
333        // this points to the beginning of a UTF-8 character (since multibyte characters exist, not
334        // every byte within the string is also a valid character).
335        let mut result = self.parse_declarations(pos, &mut errs);
336        result = self.parse_rules(match result {
337            Ok(i) => i,
338            Err(e) => {
339                errs.push(e);
340                return Err(errs);
341            }
342        });
343        result = self.parse_programs(
344            match result {
345                Ok(i) => i,
346                Err(e) => {
347                    errs.push(e);
348                    return Err(errs);
349                }
350            },
351            &mut errs,
352        );
353        match result {
354            Ok(i) if errs.is_empty() => Ok(i),
355            Err(e) => {
356                errs.push(e);
357                Err(errs)
358            }
359            _ => Err(errs),
360        }
361    }
362
363    pub(crate) fn build(self) -> GrammarAST {
364        self.ast
365    }
366
367    fn parse_declarations(
368        &mut self,
369        mut i: usize,
370        errs: &mut Vec<YaccGrammarError>,
371    ) -> Result<usize, YaccGrammarError> {
372        i = self.parse_ws(i, true)?;
373        let mut prec_level = 0;
374        while i < self.src.len() {
375            if self.lookahead_is("%%", i).is_some() {
376                return Ok(i);
377            }
378            if let Some(j) = self.lookahead_is("%token", i) {
379                i = self.parse_ws(j, false)?;
380                while i < self.src.len() && self.lookahead_is("%", i).is_none() {
381                    let (j, n, span, _) = self.parse_token(i)?;
382                    let (idx, new_tok) = self.ast.tokens.insert_full(n);
383                    if new_tok {
384                        self.ast.spans.push(span);
385                    }
386                    self.ast.token_directives.insert(idx);
387                    i = self.parse_ws(j, true)?;
388                }
389                continue;
390            }
391            if let YaccKind::Original(_) = self.yacc_kind {
392                if let Some(j) = self.lookahead_is("%actiontype", i) {
393                    i = self.parse_ws(j, false)?;
394                    let (j, n) = self.parse_to_eol(i)?;
395                    let span = Span::new(i, j);
396                    if let Some((_, orig_span)) = self.global_actiontype {
397                        add_duplicate_occurrence(
398                            errs,
399                            YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
400                            orig_span,
401                            span,
402                        );
403                    } else {
404                        self.global_actiontype = Some((n, span));
405                    }
406                    i = self.parse_ws(j, true)?;
407                    continue;
408                }
409            }
410            if let Some(j) = self.lookahead_is("%start", i) {
411                i = self.parse_ws(j, false)?;
412                let (j, n) = self.parse_name(i)?;
413                let span = Span::new(i, j);
414                if let Some((_, orig_span)) = self.ast.start {
415                    add_duplicate_occurrence(
416                        errs,
417                        YaccGrammarErrorKind::DuplicateStartDeclaration,
418                        orig_span,
419                        span,
420                    );
421                } else {
422                    self.ast.start = Some((n, span));
423                }
424                i = self.parse_ws(j, true)?;
425                continue;
426            }
427            if let Some(j) = self.lookahead_is("%epp", i) {
428                i = self.parse_ws(j, false)?;
429                let (j, n, _, _) = self.parse_token(i)?;
430                let span = Span::new(i, j);
431                i = self.parse_ws(j, false)?;
432                let (j, v) = self.parse_string(i)?;
433                let vspan = Span::new(i, j);
434                match self.ast.epp.entry(n) {
435                    Entry::Occupied(orig) => {
436                        let (orig_span, _) = orig.get();
437                        add_duplicate_occurrence(
438                            errs,
439                            YaccGrammarErrorKind::DuplicateEPP,
440                            *orig_span,
441                            span,
442                        )
443                    }
444                    Entry::Vacant(epp) => {
445                        epp.insert((span, (v, vspan)));
446                    }
447                }
448                i = self.parse_ws(j, true)?;
449                continue;
450            }
451            if let Some(j) = self.lookahead_is("%expect-rr", i) {
452                i = self.parse_ws(j, false)?;
453                let (j, n) = self.parse_int(i)?;
454                let span = Span::new(i, j);
455                if let Some((_, orig_span)) = self.ast.expectrr {
456                    add_duplicate_occurrence(
457                        errs,
458                        YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
459                        orig_span,
460                        span,
461                    );
462                } else {
463                    self.ast.expectrr = Some((n, span));
464                }
465                i = self.parse_ws(j, true)?;
466                continue;
467            }
468            if let Some(j) = self.lookahead_is("%expect-unused", i) {
469                i = self.parse_ws(j, false)?;
470                while i < self.src.len() && self.lookahead_is("%", i).is_none() {
471                    let j = match self.parse_name(i) {
472                        Ok((j, n)) => {
473                            self.ast
474                                .expect_unused
475                                .push(Symbol::Rule(n, Span::new(i, j)));
476                            j
477                        }
478                        Err(_) => match self.parse_token(i) {
479                            Ok((j, n, span, _)) => {
480                                self.ast.expect_unused.push(Symbol::Token(n, span));
481                                j
482                            }
483                            Err(_) => {
484                                return Err(self.mk_error(YaccGrammarErrorKind::UnknownSymbol, i));
485                            }
486                        },
487                    };
488                    i = self.parse_ws(j, true)?;
489                }
490                continue;
491            }
492            if let Some(j) = self.lookahead_is("%expect", i) {
493                i = self.parse_ws(j, false)?;
494                let (j, n) = self.parse_int(i)?;
495                let span = Span::new(i, j);
496                if let Some((_, orig_span)) = self.ast.expect {
497                    add_duplicate_occurrence(
498                        errs,
499                        YaccGrammarErrorKind::DuplicateExpectDeclaration,
500                        orig_span,
501                        span,
502                    );
503                } else {
504                    self.ast.expect = Some((n, span));
505                }
506                i = self.parse_ws(j, true)?;
507                continue;
508            }
509            if let Some(j) = self.lookahead_is("%avoid_insert", i) {
510                i = self.parse_ws(j, false)?;
511                let num_newlines = self.num_newlines;
512                if self.ast.avoid_insert.is_none() {
513                    self.ast.avoid_insert = Some(HashMap::new());
514                }
515                while j < self.src.len() && self.num_newlines == num_newlines {
516                    let (j, n, span, _) = self.parse_token(i)?;
517                    if self.ast.tokens.insert(n.clone()) {
518                        self.ast.spans.push(span);
519                    }
520
521                    match self.ast.avoid_insert.as_mut().unwrap().entry(n) {
522                        Entry::Occupied(occupied) => {
523                            add_duplicate_occurrence(
524                                errs,
525                                YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
526                                *occupied.get(),
527                                span,
528                            );
529                        }
530                        Entry::Vacant(vacant) => {
531                            vacant.insert(span);
532                        }
533                    }
534                    i = self.parse_ws(j, true)?;
535                }
536                continue;
537            }
538            if let Some(j) = self.lookahead_is("%parse-param", i) {
539                i = self.parse_ws(j, false)?;
540                let (j, name) = self.parse_to_single_colon(i)?;
541                match self.lookahead_is(":", j) {
542                    Some(j) => i = self.parse_ws(j, false)?,
543                    None => {
544                        return Err(self.mk_error(YaccGrammarErrorKind::MissingColon, j));
545                    }
546                }
547                let (j, ty) = self.parse_to_eol(i)?;
548                self.ast.parse_param = Some((name, ty));
549                i = self.parse_ws(j, true)?;
550                continue;
551            }
552            if let Some(j) = self.lookahead_is("%parse-generics", i) {
553                i = self.parse_ws(j, false)?;
554                let (j, ty) = self.parse_to_eol(i)?;
555                self.ast.parse_generics = Some(ty);
556                i = self.parse_ws(j, true)?;
557                continue;
558            }
559            if let YaccKind::Eco = self.yacc_kind {
560                if let Some(j) = self.lookahead_is("%implicit_tokens", i) {
561                    i = self.parse_ws(j, false)?;
562                    let num_newlines = self.num_newlines;
563                    if self.ast.implicit_tokens.is_none() {
564                        self.ast.implicit_tokens = Some(HashMap::new());
565                    }
566                    while j < self.src.len() && self.num_newlines == num_newlines {
567                        let (j, n, span, _) = self.parse_token(i)?;
568                        if self.ast.tokens.insert(n.clone()) {
569                            self.ast.spans.push(span);
570                        }
571                        match self.ast.implicit_tokens.as_mut().unwrap().entry(n) {
572                            Entry::Occupied(entry) => {
573                                let orig_span = *entry.get();
574                                add_duplicate_occurrence(
575                                    errs,
576                                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
577                                    orig_span,
578                                    span,
579                                );
580                            }
581                            Entry::Vacant(entry) => {
582                                entry.insert(span);
583                            }
584                        }
585                        i = self.parse_ws(j, true)?;
586                    }
587                    continue;
588                }
589            }
590            {
591                let k;
592                let kind;
593                if let Some(j) = self.lookahead_is("%left", i) {
594                    kind = AssocKind::Left;
595                    k = j;
596                } else if let Some(j) = self.lookahead_is("%right", i) {
597                    kind = AssocKind::Right;
598                    k = j;
599                } else if let Some(j) = self.lookahead_is("%nonassoc", i) {
600                    kind = AssocKind::Nonassoc;
601                    k = j;
602                } else {
603                    return Err(self.mk_error(YaccGrammarErrorKind::UnknownDeclaration, i));
604                }
605
606                i = self.parse_ws(k, false)?;
607                let num_newlines = self.num_newlines;
608                while i < self.src.len() && num_newlines == self.num_newlines {
609                    let (j, n, span, _) = self.parse_token(i)?;
610                    match self.ast.precs.entry(n) {
611                        Entry::Occupied(orig) => {
612                            let (_, orig_span) = orig.get();
613                            add_duplicate_occurrence(
614                                errs,
615                                YaccGrammarErrorKind::DuplicatePrecedence,
616                                *orig_span,
617                                span,
618                            );
619                        }
620                        Entry::Vacant(entry) => {
621                            let prec = Precedence {
622                                level: prec_level,
623                                kind,
624                            };
625                            entry.insert((prec, span));
626                        }
627                    }
628
629                    i = self.parse_ws(j, true)?;
630                }
631                prec_level += 1;
632            }
633        }
634        debug_assert!(i == self.src.len());
635        Err(self.mk_error(YaccGrammarErrorKind::PrematureEnd, i))
636    }
637
638    fn parse_rules(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
639        // self.parse_declarations should have left the input at '%%'
640        i = self.lookahead_is("%%", i).unwrap();
641        i = self.parse_ws(i, true)?;
642        while i < self.src.len() && self.lookahead_is("%%", i).is_none() {
643            i = self.parse_rule(i)?;
644            i = self.parse_ws(i, true)?;
645        }
646        Ok(i)
647    }
648
649    fn parse_rule(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
650        let (j, rn) = self.parse_name(i)?;
651        let span = Span::new(i, j);
652        if self.ast.start.is_none() {
653            self.ast.start = Some((rn.clone(), span));
654        }
655        match self.yacc_kind {
656            YaccKind::Original(_) | YaccKind::Eco => {
657                if self.ast.get_rule(&rn).is_none() {
658                    self.ast.add_rule(
659                        (rn.clone(), span),
660                        self.global_actiontype.clone().map(|(s, _)| s),
661                    );
662                }
663                i = j;
664            }
665            YaccKind::Grmtools => {
666                i = self.parse_ws(j, true)?;
667                if let Some(j) = self.lookahead_is("->", i) {
668                    i = j;
669                } else {
670                    return Err(self.mk_error(YaccGrammarErrorKind::MissingRightArrow, i));
671                }
672                i = self.parse_ws(i, true)?;
673                let (j, actiont) = self.parse_to_single_colon(i)?;
674                if self.ast.get_rule(&rn).is_none() {
675                    self.ast.add_rule((rn.clone(), span), Some(actiont));
676                }
677                i = j;
678            }
679        }
680        i = self.parse_ws(i, true)?;
681        match self.lookahead_is(":", i) {
682            Some(j) => i = j,
683            None => {
684                return Err(self.mk_error(YaccGrammarErrorKind::MissingColon, i));
685            }
686        }
687        let mut syms = Vec::new();
688        let mut prec = None;
689        let mut action = None;
690        i = self.parse_ws(i, true)?;
691        let mut pos_prod_start = i;
692        let mut pos_prod_end = None;
693        while i < self.src.len() {
694            if let Some(j) = self.lookahead_is("|", i) {
695                self.ast.add_prod(
696                    rn.clone(),
697                    syms,
698                    prec,
699                    action,
700                    Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)),
701                );
702                syms = Vec::new();
703                prec = None;
704                action = None;
705                i = self.parse_ws(j, true)?;
706                pos_prod_start = i;
707                continue;
708            } else if let Some(j) = self.lookahead_is(";", i) {
709                self.ast.add_prod(
710                    rn,
711                    syms,
712                    prec,
713                    action,
714                    Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)),
715                );
716                return Ok(j);
717            }
718
719            if self.lookahead_is("\"", i).is_some() || self.lookahead_is("'", i).is_some() {
720                let (j, sym, span, _) = self.parse_token(i)?;
721                pos_prod_end = Some(j);
722                i = self.parse_ws(j, true)?;
723                if self.ast.tokens.insert(sym.clone()) {
724                    self.ast.spans.push(span);
725                }
726                syms.push(Symbol::Token(sym, span));
727            } else if let Some(j) = self.lookahead_is("%prec", i) {
728                i = self.parse_ws(j, true)?;
729                let (k, sym, span, _) = self.parse_token(i)?;
730                if self.ast.tokens.insert(sym.clone()) {
731                    self.ast.spans.push(span);
732                }
733                prec = Some(sym);
734                pos_prod_end = Some(k);
735                i = k;
736            } else if self.lookahead_is("{", i).is_some() {
737                pos_prod_end = Some(i);
738                let (j, a) = self.parse_action(i)?;
739                i = self.parse_ws(j, true)?;
740                action = Some(a);
741
742                if !(self.lookahead_is("|", i).is_some() || self.lookahead_is(";", i).is_some()) {
743                    return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i));
744                }
745            } else if let Some(j) = self.lookahead_is("%empty", i) {
746                let k = self.parse_ws(j, true)?;
747                // %empty could be followed by all sorts of weird syntax errors: all we try and do
748                // is say "does this production look like it's finished" and trust that the other
749                // errors will be caught by other parts of the parser.
750                if !syms.is_empty()
751                    | !(self.lookahead_is("|", k).is_some()
752                        || self.lookahead_is(";", k).is_some()
753                        || self.lookahead_is("{", k).is_some()
754                        || self.lookahead_is("%prec", k).is_some())
755                {
756                    return Err(self.mk_error(YaccGrammarErrorKind::NonEmptyProduction, i));
757                }
758                pos_prod_end = Some(j);
759                i = k;
760            } else {
761                let (j, sym, span, quoted) = self.parse_token(i)?;
762                pos_prod_end = Some(j);
763                if self
764                    .ast
765                    .tokens
766                    .get_index_of(&sym)
767                    .is_some_and(|idx| quoted || self.ast.token_directives.contains(&idx))
768                {
769                    syms.push(Symbol::Token(sym, span));
770                } else {
771                    syms.push(Symbol::Rule(sym, span));
772                }
773                i = j;
774            }
775            i = self.parse_ws(i, true)?;
776        }
777        Err(self.mk_error(YaccGrammarErrorKind::IncompleteRule, i))
778    }
779
780    fn parse_name(&self, i: usize) -> Result<(usize, String), YaccGrammarError> {
781        match RE_NAME.find(&self.src[i..]) {
782            Some(m) => {
783                assert_eq!(m.start(), 0);
784                Ok((i + m.end(), self.src[i..i + m.end()].to_string()))
785            }
786            None => Err(self.mk_error(YaccGrammarErrorKind::IllegalName, i)),
787        }
788    }
789
790    fn parse_token(&self, i: usize) -> Result<(usize, String, Span, bool), YaccGrammarError> {
791        match RE_TOKEN.find(&self.src[i..]) {
792            Some(m) => {
793                assert!(m.start() == 0 && m.end() > 0);
794                match self.src[i..].chars().next().unwrap() {
795                    '"' | '\'' => {
796                        debug_assert!('"'.len_utf8() == 1 && '\''.len_utf8() == 1);
797                        let start_cidx = i + 1;
798                        let end_cidx = i + m.end() - 1;
799                        Ok((
800                            i + m.end(),
801                            self.src[start_cidx..end_cidx].to_string(),
802                            Span::new(start_cidx, end_cidx),
803                            true,
804                        ))
805                    }
806                    _ => Ok((
807                        i + m.end(),
808                        self.src[i..i + m.end()].to_string(),
809                        Span::new(i, i + m.end()),
810                        false,
811                    )),
812                }
813            }
814            None => Err(self.mk_error(YaccGrammarErrorKind::IllegalString, i)),
815        }
816    }
817
818    fn parse_action(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
819        debug_assert!(self.lookahead_is("{", i).is_some());
820        let mut j = i;
821        let mut c = 0; // Count braces
822        while j < self.src.len() {
823            let ch = self.src[j..].chars().next().unwrap();
824            match ch {
825                '{' => c += 1,
826                '}' if c == 1 => {
827                    c = 0;
828                    break;
829                }
830                '}' => c -= 1,
831                '\n' | '\r' => {
832                    self.num_newlines += 1;
833                }
834                _ => (),
835            };
836            j += ch.len_utf8();
837        }
838        if c > 0 {
839            Err(self.mk_error(YaccGrammarErrorKind::IncompleteAction, i))
840        } else {
841            debug_assert!(self.lookahead_is("}", j).is_some());
842            let s = self.src[i + '{'.len_utf8()..j].trim().to_string();
843            Ok((j + '}'.len_utf8(), s))
844        }
845    }
846
847    fn parse_programs(
848        &mut self,
849        mut i: usize,
850        _: &mut Vec<YaccGrammarError>,
851    ) -> Result<usize, YaccGrammarError> {
852        if let Some(j) = self.lookahead_is("%%", i) {
853            i = self.parse_ws(j, true)?;
854            let prog = self.src[i..].to_string();
855            i += prog.len();
856            self.ast.set_programs(prog);
857        }
858        Ok(i)
859    }
860
861    /// Parse up to (but do not include) the end of line (or, if it comes sooner, the end of file).
862    fn parse_to_eol(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
863        let mut j = i;
864        while j < self.src.len() {
865            let c = self.src[j..].chars().next().unwrap();
866            match c {
867                '\n' | '\r' => break,
868                _ => j += c.len_utf8(),
869            }
870        }
871        Ok((j, self.src[i..j].to_string()))
872    }
873
874    /// Parse up to (but do not include) a single colon (double colons are allowed so that strings
875    /// like `a::b::c:` treat `a::b::c` as a single name. Errors if EOL encountered.
876    fn parse_to_single_colon(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
877        let mut j = i;
878        while j < self.src.len() {
879            let c = self.src[j..].chars().next().unwrap();
880            match c {
881                ':' => {
882                    let k = j + ':'.len_utf8();
883                    if k == self.src.len() || !self.src[k..].starts_with(':') {
884                        return Ok((j, self.src[i..j].trim().to_string()));
885                    }
886                    j += 2 * ':'.len_utf8();
887                }
888                '\n' | '\r' => {
889                    self.num_newlines += 1;
890                    j += c.len_utf8();
891                }
892                _ => j += c.len_utf8(),
893            }
894        }
895        Err(self.mk_error(YaccGrammarErrorKind::ReachedEOL, j))
896    }
897
898    /// Parse a quoted string, allowing escape characters.
899    fn parse_int<T: FromStr + PrimInt>(
900        &mut self,
901        i: usize,
902    ) -> Result<(usize, T), YaccGrammarError> {
903        let mut j = i;
904        while j < self.src.len() {
905            let c = self.src[j..].chars().next().unwrap();
906            match c {
907                '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => j += 1,
908                _ => break,
909            }
910        }
911        match self.src[i..j].parse::<T>() {
912            Ok(x) => Ok((j, x)),
913            Err(_) => Err(self.mk_error(YaccGrammarErrorKind::IllegalInteger, i)),
914        }
915    }
916
917    /// Parse a quoted string, allowing escape characters.
918    fn parse_string(&mut self, mut i: usize) -> Result<(usize, String), YaccGrammarError> {
919        let qc = if self.lookahead_is("'", i).is_some() {
920            '\''
921        } else if self.lookahead_is("\"", i).is_some() {
922            '"'
923        } else {
924            return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, i));
925        };
926
927        debug_assert!('"'.len_utf8() == 1 && '\''.len_utf8() == 1);
928        // Because we can encounter escape characters, we can't simply match text and slurp it into
929        // a String in one go (otherwise we'd include the escape characters). Conceptually we have
930        // to build the String up byte by byte, skipping escape characters, but that's slow.
931        // Instead we append chunks of the string up to (but excluding) escape characters.
932        let mut s = String::new();
933        i += 1;
934        let mut j = i;
935        while j < self.src.len() {
936            let c = self.src[j..].chars().next().unwrap();
937            match c {
938                '\n' | '\r' => {
939                    return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j));
940                }
941                x if x == qc => {
942                    s.push_str(&self.src[i..j]);
943                    return Ok((j + 1, s));
944                }
945                '\\' => {
946                    debug_assert!('\\'.len_utf8() == 1);
947                    match self.src[j + 1..].chars().next() {
948                        Some(c) if c == '\'' || c == '"' => {
949                            s.push_str(&self.src[i..j]);
950                            i = j + 1;
951                            j += 2;
952                        }
953                        _ => {
954                            return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j));
955                        }
956                    }
957                }
958                _ => j += c.len_utf8(),
959            }
960        }
961        Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j))
962    }
963
964    /// Skip whitespace from `i` onwards. If `inc_newlines` is `false`, will return `Err` if a
965    /// newline is encountered; otherwise newlines are consumed and skipped.
966    fn parse_ws(&mut self, mut i: usize, inc_newlines: bool) -> Result<usize, YaccGrammarError> {
967        while i < self.src.len() {
968            let c = self.src[i..].chars().next().unwrap();
969            match c {
970                ' ' | '\t' => i += c.len_utf8(),
971                '\n' | '\r' => {
972                    if !inc_newlines {
973                        return Err(self.mk_error(YaccGrammarErrorKind::ReachedEOL, i));
974                    }
975                    self.num_newlines += 1;
976                    i += c.len_utf8();
977                }
978                '/' => {
979                    if i + c.len_utf8() == self.src.len() {
980                        break;
981                    } else {
982                        let j = i + c.len_utf8();
983                        let c = self.src[j..].chars().next().unwrap();
984                        match c {
985                            '/' => {
986                                i = j + c.len_utf8();
987                                for c in self.src[i..].chars() {
988                                    i += c.len_utf8();
989                                    if c == '\n' || c == '\r' {
990                                        self.num_newlines += 1;
991                                        break;
992                                    }
993                                }
994                            }
995                            '*' => {
996                                // This is complicated by the fact that we need to deal with
997                                // unclosed comments (i.e. '/*' without a corresponding '*/').
998                                let mut k = j + c.len_utf8();
999                                let mut found = false;
1000                                while k < self.src.len() {
1001                                    let c = self.src[k..].chars().next().unwrap();
1002                                    k += c.len_utf8();
1003                                    match c {
1004                                        '\n' | '\r' => {
1005                                            if !inc_newlines {
1006                                                return Err(self.mk_error(
1007                                                    YaccGrammarErrorKind::ReachedEOL,
1008                                                    i,
1009                                                ));
1010                                            }
1011                                            self.num_newlines += 1;
1012                                        }
1013                                        '*' => (),
1014                                        _ => continue,
1015                                    }
1016                                    if k < self.src.len() {
1017                                        let c = self.src[k..].chars().next().unwrap();
1018                                        if c == '/' {
1019                                            i = k + c.len_utf8();
1020                                            found = true;
1021                                            break;
1022                                        }
1023                                    }
1024                                }
1025                                if !found {
1026                                    return Err(
1027                                        self.mk_error(YaccGrammarErrorKind::IncompleteComment, i)
1028                                    );
1029                                }
1030                            }
1031                            _ => break,
1032                        }
1033                    }
1034                }
1035                _ => break,
1036            }
1037        }
1038        Ok(i)
1039    }
1040
1041    fn lookahead_is(&self, s: &'static str, i: usize) -> Option<usize> {
1042        if self.src[i..].starts_with(s) {
1043            Some(i + s.len())
1044        } else {
1045            None
1046        }
1047    }
1048
1049    fn mk_error(&self, k: YaccGrammarErrorKind, off: usize) -> YaccGrammarError {
1050        let span = Span::new(off, off);
1051        YaccGrammarError {
1052            kind: k,
1053            spans: vec![span],
1054        }
1055    }
1056}
1057
1058#[cfg(test)]
1059mod test {
1060    use super::{
1061        super::{
1062            AssocKind, Precedence, YaccKind, YaccOriginalActionKind,
1063            ast::{GrammarAST, Production, Symbol},
1064        },
1065        Span, Spanned, YaccGrammarError, YaccGrammarErrorKind, YaccParser,
1066    };
1067    use std::collections::HashSet;
1068
1069    fn parse(yacc_kind: YaccKind, s: &str) -> Result<GrammarAST, Vec<YaccGrammarError>> {
1070        let mut yp = YaccParser::new(yacc_kind, s);
1071        yp.parse()?;
1072        Ok(yp.build())
1073    }
1074
1075    fn rule(n: &str) -> Symbol {
1076        Symbol::Rule(n.to_string(), Span::new(0, 0))
1077    }
1078
1079    fn rule_span(n: &str, span: Span) -> Symbol {
1080        Symbol::Rule(n.to_string(), span)
1081    }
1082
1083    fn token(n: &str) -> Symbol {
1084        Symbol::Token(n.to_string(), Span::new(0, 0))
1085    }
1086    fn token_span(n: &str, span: Span) -> Symbol {
1087        Symbol::Token(n.to_string(), span)
1088    }
1089
1090    fn line_of_offset(s: &str, off: usize) -> usize {
1091        s[..off].lines().count()
1092    }
1093
1094    macro_rules! line_col {
1095        ($src:ident, $span: expr) => {{
1096            let mut line_cache = crate::newlinecache::NewlineCache::new();
1097            line_cache.feed(&$src);
1098            line_cache
1099                .byte_to_line_num_and_col_num(&$src, $span.start())
1100                .unwrap()
1101        }};
1102    }
1103
1104    trait ErrorsHelper {
1105        fn expect_error_at_line(self, src: &str, kind: YaccGrammarErrorKind, line: usize);
1106        fn expect_error_at_line_col(
1107            self,
1108            src: &str,
1109            kind: YaccGrammarErrorKind,
1110            line: usize,
1111            col: usize,
1112        );
1113        fn expect_error_at_lines_cols(
1114            self,
1115            src: &str,
1116            kind: YaccGrammarErrorKind,
1117            lines_cols: &mut dyn Iterator<Item = (usize, usize)>,
1118        );
1119        fn expect_multiple_errors(
1120            self,
1121            src: &str,
1122            expected: &mut dyn Iterator<Item = (YaccGrammarErrorKind, Vec<(usize, usize)>)>,
1123        );
1124    }
1125
1126    impl ErrorsHelper for Result<GrammarAST, Vec<YaccGrammarError>> {
1127        #[track_caller]
1128        fn expect_error_at_line(self, src: &str, kind: YaccGrammarErrorKind, line: usize) {
1129            let errs = self
1130                .as_ref()
1131                .map_err(Vec::as_slice)
1132                .expect_err("Parsed ok while expecting error");
1133            assert_eq!(errs.len(), 1);
1134            let e = &errs[0];
1135            assert_eq!(e.kind, kind);
1136            assert_eq!(line_of_offset(src, e.spans()[0].start()), line);
1137            assert_eq!(e.spans.len(), 1);
1138        }
1139
1140        #[track_caller]
1141        fn expect_error_at_line_col(
1142            self,
1143            src: &str,
1144            kind: YaccGrammarErrorKind,
1145            line: usize,
1146            col: usize,
1147        ) {
1148            self.expect_error_at_lines_cols(src, kind, &mut std::iter::once((line, col)))
1149        }
1150
1151        #[track_caller]
1152        fn expect_error_at_lines_cols(
1153            self,
1154            src: &str,
1155            kind: YaccGrammarErrorKind,
1156            lines_cols: &mut dyn Iterator<Item = (usize, usize)>,
1157        ) {
1158            let errs = self
1159                .as_ref()
1160                .map_err(Vec::as_slice)
1161                .expect_err("Parsed ok while expecting error");
1162            assert_eq!(errs.len(), 1);
1163            let e = &errs[0];
1164            assert_eq!(e.kind, kind);
1165            assert_eq!(
1166                e.spans()
1167                    .iter()
1168                    .map(|span| line_col!(src, span))
1169                    .collect::<Vec<(usize, usize)>>(),
1170                lines_cols.collect::<Vec<(usize, usize)>>()
1171            );
1172            // Check that it is valid to slice.
1173            for span in e.spans() {
1174                let _ = &src[span.start()..span.end()];
1175            }
1176        }
1177
1178        #[track_caller]
1179        fn expect_multiple_errors(
1180            self,
1181            src: &str,
1182            expected: &mut dyn Iterator<Item = (YaccGrammarErrorKind, Vec<(usize, usize)>)>,
1183        ) {
1184            let errs = self.expect_err("Parsed ok while expecting error");
1185            for e in &errs {
1186                // Check that it is valid to slice the source with the spans.
1187                for span in e.spans() {
1188                    let _ = &src[span.start()..span.end()];
1189                }
1190            }
1191
1192            assert_eq!(
1193                errs.iter()
1194                    .map(|e| {
1195                        (
1196                            e.kind.clone(),
1197                            e.spans()
1198                                .iter()
1199                                .map(|span| line_col!(src, span))
1200                                .collect::<Vec<_>>(),
1201                        )
1202                    })
1203                    .collect::<Vec<_>>(),
1204                expected.collect::<Vec<_>>()
1205            );
1206        }
1207    }
1208
1209    #[test]
1210    fn test_helper_fn() {
1211        assert_eq!(Symbol::Token("A".to_string(), Span::new(0, 0)), token("A"));
1212    }
1213
1214    #[test]
1215    fn test_symbol_eq() {
1216        assert_eq!(rule("A"), rule("A"));
1217        assert_ne!(rule("A"), rule("B"));
1218        assert_ne!(rule("A"), token("A"));
1219    }
1220
1221    #[test]
1222    fn test_rule() {
1223        let src = "
1224            %%
1225            A : 'a';
1226        "
1227        .to_string();
1228        let grm = parse(
1229            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1230            &src,
1231        )
1232        .unwrap();
1233        assert_eq!(grm.get_rule("A").unwrap().pidxs, vec![0]);
1234        let a_span = Span::new(33, 34);
1235        assert_eq!(
1236            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1237            Production {
1238                symbols: vec![token_span("a", a_span)],
1239                precedence: None,
1240                action: None,
1241                prod_span: Span::new(32, 35),
1242            }
1243        );
1244        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1245    }
1246
1247    #[test]
1248    fn test_rule_production_simple() {
1249        let src = "
1250            %%
1251            A : 'a';
1252            A : 'b';
1253        "
1254        .to_string();
1255        let grm = parse(
1256            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1257            &src,
1258        )
1259        .unwrap();
1260        let a_span = Span::new(33, 34);
1261        assert_eq!(
1262            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1263            Production {
1264                symbols: vec![token_span("a", a_span)],
1265                precedence: None,
1266                action: None,
1267                prod_span: Span::new(32, 35),
1268            }
1269        );
1270        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1271        let b_span = Span::new(54, 55);
1272        assert_eq!(
1273            grm.prods[grm.get_rule("A").unwrap().pidxs[1]],
1274            Production {
1275                symbols: vec![token_span("b", Span::new(54, 55))],
1276                precedence: None,
1277                action: None,
1278                prod_span: Span::new(53, 56),
1279            }
1280        );
1281        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1282    }
1283
1284    #[test]
1285    fn test_rule_empty() {
1286        let src = "
1287            %%
1288            A : ;
1289            B : 'b' | ;
1290            C : | 'c';
1291        "
1292        .to_string();
1293        let grm = parse(
1294            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1295            &src,
1296        )
1297        .unwrap();
1298
1299        assert_eq!(
1300            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1301            Production {
1302                symbols: vec![],
1303                precedence: None,
1304                action: None,
1305                prod_span: Span::new(32, 32),
1306            }
1307        );
1308
1309        let b_span = Span::new(51, 52);
1310        assert_eq!(
1311            grm.prods[grm.get_rule("B").unwrap().pidxs[0]],
1312            Production {
1313                symbols: vec![token_span("b", b_span)],
1314                precedence: None,
1315                action: None,
1316                prod_span: Span::new(50, 53),
1317            }
1318        );
1319        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1320        assert_eq!(
1321            grm.prods[grm.get_rule("B").unwrap().pidxs[1]],
1322            Production {
1323                symbols: vec![],
1324                precedence: None,
1325                action: None,
1326                prod_span: Span::new(56, 56),
1327            }
1328        );
1329
1330        assert_eq!(
1331            grm.prods[grm.get_rule("C").unwrap().pidxs[0]],
1332            Production {
1333                symbols: vec![],
1334                precedence: None,
1335                action: None,
1336                prod_span: Span::new(74, 74),
1337            }
1338        );
1339        let c_span = Span::new(77, 78);
1340        assert_eq!(
1341            grm.prods[grm.get_rule("C").unwrap().pidxs[1]],
1342            Production {
1343                symbols: vec![token_span("c", c_span)],
1344                precedence: None,
1345                action: None,
1346                prod_span: Span::new(76, 79),
1347            }
1348        );
1349        assert_eq!(&src[c_span.start()..c_span.end()], "c");
1350    }
1351
1352    #[test]
1353    fn test_empty_program() {
1354        let src = "%%\nA : 'a';\n%%".to_string();
1355        parse(
1356            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1357            &src,
1358        )
1359        .unwrap();
1360    }
1361
1362    #[test]
1363    fn test_multiple_symbols() {
1364        let src = "%%\nA : 'a' B;".to_string();
1365        let grm = parse(
1366            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1367            &src,
1368        )
1369        .unwrap();
1370        let a_span = Span::new(8, 9);
1371        let b_span = Span::new(11, 12);
1372        assert_eq!(
1373            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1374            Production {
1375                symbols: vec![token_span("a", a_span), rule_span("B", b_span)],
1376                precedence: None,
1377                action: None,
1378                prod_span: Span::new(7, 12),
1379            }
1380        );
1381        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1382        assert_eq!(&src[b_span.start()..b_span.end()], "B");
1383    }
1384
1385    #[test]
1386    fn test_token_types() {
1387        let src = "%%\nA : 'a' \"b\";".to_string();
1388        let grm = parse(
1389            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1390            &src,
1391        )
1392        .unwrap();
1393        let a_span = Span::new(8, 9);
1394        let b_span = Span::new(12, 13);
1395        assert_eq!(
1396            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1397            Production {
1398                symbols: vec![token_span("a", a_span), token_span("b", b_span)],
1399                precedence: None,
1400                action: None,
1401                prod_span: Span::new(7, 14),
1402            }
1403        );
1404        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1405        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1406    }
1407
1408    #[test]
1409    fn test_declaration_start() {
1410        let src = "%start   A\n%%\nA : a;".to_string();
1411        let grm = parse(
1412            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1413            &src,
1414        )
1415        .unwrap();
1416        assert_eq!(grm.start.unwrap(), ("A".to_string(), Span::new(9, 10)));
1417    }
1418
1419    #[test]
1420    fn test_declaration_token() {
1421        let src = "%token   a\n%%\nA : a;".to_string();
1422        let grm = parse(
1423            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1424            &src,
1425        )
1426        .unwrap();
1427        assert!(grm.has_token("a"));
1428    }
1429
1430    #[test]
1431    fn test_declaration_token_literal() {
1432        let src = "%token   'a'\n%%\nA : 'a';".to_string();
1433        let grm = parse(
1434            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1435            &src,
1436        )
1437        .unwrap();
1438        assert!(grm.has_token("a"));
1439    }
1440
1441    #[test]
1442    fn test_declaration_tokens() {
1443        let src = "%token   a b c 'd'\n%%\nA : a;".to_string();
1444        let grm = parse(
1445            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1446            &src,
1447        )
1448        .unwrap();
1449        assert!(grm.has_token("a"));
1450        assert!(grm.has_token("b"));
1451        assert!(grm.has_token("c"));
1452    }
1453
1454    #[test]
1455    fn test_auto_add_tokens() {
1456        let src = "%%\nA : 'a';".to_string();
1457        let grm = parse(
1458            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1459            &src,
1460        )
1461        .unwrap();
1462        assert!(grm.has_token("a"));
1463    }
1464
1465    #[test]
1466    fn test_token_non_literal() {
1467        let src = "%token T %%\nA : T;".to_string();
1468        let grm = parse(
1469            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1470            &src,
1471        )
1472        .unwrap();
1473        assert!(grm.has_token("T"));
1474        let t_span = Span::new(16, 17);
1475        assert_eq!(
1476            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1477            Production {
1478                symbols: vec![token_span("T", t_span)],
1479                precedence: None,
1480                action: None,
1481                prod_span: t_span,
1482            }
1483        );
1484        assert_eq!(&src[t_span.start()..t_span.end() + 1], "T;");
1485    }
1486
1487    #[test]
1488    fn test_token_unicode() {
1489        let src = "%token '❤' %%\nA : '❤';".to_string();
1490        let grm = parse(
1491            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1492            &src,
1493        )
1494        .unwrap();
1495        assert!(grm.has_token("❤"));
1496    }
1497
1498    #[test]
1499    fn test_unicode_err1() {
1500        let src = "%token '❤' ❤;".to_string();
1501        parse(
1502            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1503            &src,
1504        )
1505        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IllegalString, 1, 12);
1506    }
1507
1508    #[test]
1509    fn test_unicode_err2() {
1510        let src = "%token '❤'\n%%\nA : '❤' | ❤;".to_string();
1511        parse(
1512            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1513            &src,
1514        )
1515        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IllegalString, 3, 11);
1516    }
1517
1518    #[test]
1519    fn test_missing_end_quote() {
1520        let src = "%epp X \"f\\".to_string();
1521        parse(
1522            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1523            &src,
1524        )
1525        .expect_error_at_line_col(&src, YaccGrammarErrorKind::InvalidString, 1, 10);
1526    }
1527
1528    #[test]
1529    fn test_simple_decl_fail() {
1530        let src = "%fail x\n%%\nA : a".to_string();
1531        parse(
1532            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1533            &src,
1534        )
1535        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 1, 1);
1536    }
1537
1538    #[test]
1539    fn test_empty() {
1540        let src = "".to_string();
1541        parse(
1542            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1543            &src,
1544        )
1545        .expect_error_at_line_col("", YaccGrammarErrorKind::PrematureEnd, 1, 1);
1546    }
1547
1548    #[test]
1549    fn test_incomplete_rule1() {
1550        let src = "%%A:".to_string();
1551        parse(
1552            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1553            &src,
1554        )
1555        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 1, 5);
1556    }
1557
1558    #[test]
1559    fn test_line_col_report1() {
1560        let src = "%%
1561A:"
1562        .to_string();
1563        parse(
1564            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1565            &src,
1566        )
1567        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 2, 3);
1568    }
1569
1570    #[test]
1571    fn test_line_col_report2() {
1572        let src = "%%
1573A:
1574"
1575        .to_string();
1576        parse(
1577            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1578            &src,
1579        )
1580        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 3, 1);
1581    }
1582
1583    #[test]
1584    fn test_line_col_report3() {
1585        let src = "
1586
1587        %woo"
1588            .to_string();
1589        parse(
1590            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1591            &src,
1592        )
1593        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 3, 9);
1594    }
1595
1596    #[test]
1597    fn test_missing_colon() {
1598        let src = "%%A x;".to_string();
1599        parse(
1600            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1601            &src,
1602        )
1603        .expect_error_at_line_col(&src, YaccGrammarErrorKind::MissingColon, 1, 5);
1604    }
1605
1606    #[test]
1607    fn test_premature_end() {
1608        let src = "%token x".to_string();
1609        parse(
1610            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1611            &src,
1612        )
1613        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 9);
1614    }
1615
1616    #[test]
1617    fn test_premature_end_multibyte() {
1618        let src = "%actiontype 🦀".to_string();
1619        parse(
1620            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1621            &src,
1622        )
1623        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 14);
1624        let src = "%parse-param c:🦀".to_string();
1625        parse(
1626            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1627            &src,
1628        )
1629        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 17);
1630        let src = "// 🦀".to_string();
1631        parse(
1632            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1633            &src,
1634        )
1635        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 5);
1636    }
1637
1638    #[test]
1639    fn test_same_line() {
1640        let src = "%token
1641x"
1642        .to_string();
1643        parse(
1644            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1645            &src,
1646        )
1647        .expect_error_at_line_col(&src, YaccGrammarErrorKind::ReachedEOL, 1, 7);
1648    }
1649
1650    #[test]
1651    fn test_unknown_declaration() {
1652        let src = "%woo".to_string();
1653        parse(
1654            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1655            &src,
1656        )
1657        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 1, 1);
1658    }
1659
1660    #[test]
1661    fn test_grmtools_format() {
1662        let src = "
1663          %start A
1664          %%
1665          A -> T: 'b';
1666          B -> Result<(), T>: 'c';
1667          C -> ::std::result::Result<(), T>: 'd';
1668          "
1669        .to_string();
1670        let grm = parse(YaccKind::Grmtools, &src).unwrap();
1671        assert_eq!(grm.rules["A"].actiont, Some("T".to_string()));
1672        assert_eq!(grm.rules["B"].actiont, Some("Result<(), T>".to_string()));
1673        assert_eq!(
1674            grm.rules["C"].actiont,
1675            Some("::std::result::Result<(), T>".to_string())
1676        );
1677    }
1678
1679    #[test]
1680    #[rustfmt::skip]
1681    fn test_precs() {
1682        let src = "
1683          %left '+' '-'
1684          %left '*'
1685          %right '/'
1686          %right '^'
1687          %nonassoc '~'
1688          %%
1689          ".to_string();
1690        let grm = parse(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), &src).unwrap();
1691        assert_eq!(grm.precs.len(), 6);
1692        assert_eq!(grm.precs["+"], (Precedence{level: 0, kind: AssocKind::Left}, Span::new(18, 19)));
1693        assert_eq!(grm.precs["-"], (Precedence{level: 0, kind: AssocKind::Left}, Span::new(22, 23)));
1694        assert_eq!(grm.precs["*"], (Precedence{level: 1, kind: AssocKind::Left}, Span::new(42, 43)));
1695        assert_eq!(grm.precs["/"], (Precedence{level: 2, kind: AssocKind::Right}, Span::new(63, 64)));
1696        assert_eq!(grm.precs["^"], (Precedence{level: 3, kind: AssocKind::Right}, Span::new(84, 85)));
1697        assert_eq!(grm.precs["~"], (Precedence{level: 4, kind: AssocKind::Nonassoc}, Span::new(108, 109)));
1698    }
1699
1700    #[test]
1701    fn test_dup_precs() {
1702        #[rustfmt::skip]
1703        let srcs = vec![
1704            ("
1705          %left 'x'
1706          %left 'x'
1707          %%
1708          ", ((2, 18), (3, 18))),
1709            ("
1710          %left 'x'
1711          %right 'x'
1712          %%
1713          ", ((2, 18), (3, 19))),
1714            ("
1715          %right 'x'
1716          %right 'x'
1717          %%
1718          ", ((2, 19), (3, 19))),
1719            ("
1720          %nonassoc 'x'
1721          %nonassoc 'x'
1722          %%
1723          ", ((2, 22), (3, 22))),
1724            ("
1725          %left 'x'
1726          %nonassoc 'x'
1727          %%
1728          ", ((2, 18), (3, 22))),
1729            ("
1730          %right 'x'
1731          %nonassoc 'x'
1732          %%
1733          ", ((2, 19), (3, 22)))
1734        ];
1735        for (src, (expected_origin, expected_dup)) in srcs.iter() {
1736            parse(
1737                YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1738                src,
1739            )
1740            .expect_error_at_lines_cols(
1741                src,
1742                YaccGrammarErrorKind::DuplicatePrecedence,
1743                &mut [*expected_origin, *expected_dup].into_iter(),
1744            );
1745        }
1746    }
1747
1748    #[test]
1749    fn test_multiple_dup_precs() {
1750        let src = "
1751          %left 'x'
1752          %left 'x'
1753          %right 'x'
1754          %nonassoc 'x'
1755          %left 'y'
1756          %nonassoc 'y'
1757          %right 'y'
1758          %%";
1759
1760        parse(
1761            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1762            src,
1763        )
1764        .expect_multiple_errors(
1765            src,
1766            &mut [
1767                (
1768                    YaccGrammarErrorKind::DuplicatePrecedence,
1769                    vec![(2, 18), (3, 18), (4, 19), (5, 22)],
1770                ),
1771                (
1772                    YaccGrammarErrorKind::DuplicatePrecedence,
1773                    vec![(6, 18), (7, 22), (8, 19)],
1774                ),
1775            ]
1776            .into_iter(),
1777        );
1778    }
1779
1780    #[test]
1781    #[rustfmt::skip]
1782    fn test_prec_override() {
1783        // Taken from the Yacc manual
1784        let src = "
1785            %left '+' '-'
1786            %left '*' '/'
1787            %%
1788            expr : expr '+' expr
1789                 | expr '-' expr
1790                 | expr '*' expr
1791                 | expr '/' expr
1792                 | '-'  expr %prec '*'
1793                 | NAME ;
1794        ";
1795        let grm = parse(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), src).unwrap();
1796        assert_eq!(grm.precs.len(), 4);
1797        assert_eq!(grm.prods[grm.rules["expr"].pidxs[0]].precedence, None);
1798        assert_eq!(grm.prods[grm.rules["expr"].pidxs[3]].symbols.len(), 3);
1799        assert_eq!(grm.prods[grm.rules["expr"].pidxs[4]].symbols.len(), 2);
1800        assert_eq!(grm.prods[grm.rules["expr"].pidxs[4]].precedence, Some("*".to_string()));
1801    }
1802
1803    #[test]
1804    fn test_prec_empty() {
1805        let src = "
1806        %%
1807        expr : 'a'
1808             | %empty %prec 'a';
1809        ";
1810        let grm = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
1811        assert_eq!(
1812            grm.prods[grm.rules["expr"].pidxs[1]].precedence,
1813            Some("a".to_string())
1814        );
1815    }
1816
1817    #[test]
1818    fn test_bad_prec_overrides() {
1819        let src = "
1820        %%
1821        S: 'A' %prec ;
1822        ";
1823        parse(
1824            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1825            src,
1826        )
1827        .expect_error_at_line(src, YaccGrammarErrorKind::IllegalString, 3);
1828    }
1829
1830    #[test]
1831    fn test_parse_avoid_insert() {
1832        let ast = parse(
1833            YaccKind::Eco,
1834            "
1835          %avoid_insert ws1 ws2
1836          %start R
1837          %%
1838          R: 'a';
1839          ",
1840        )
1841        .unwrap();
1842        assert_eq!(
1843            ast.avoid_insert,
1844            Some(
1845                [
1846                    ("ws1".to_string(), Span::new(25, 28)),
1847                    ("ws2".to_string(), Span::new(29, 32))
1848                ]
1849                .iter()
1850                .cloned()
1851                .collect()
1852            )
1853        );
1854        assert!(ast.tokens.get("ws1").is_some());
1855        assert!(ast.tokens.get("ws2").is_some());
1856    }
1857
1858    #[test]
1859    fn test_multiple_avoid_insert() {
1860        let ast = parse(
1861            YaccKind::Eco,
1862            "
1863          %avoid_insert X
1864          %avoid_insert Y
1865          %%
1866          ",
1867        )
1868        .unwrap();
1869        assert_eq!(
1870            ast.avoid_insert,
1871            Some(
1872                [
1873                    ("X".to_string(), Span::new(25, 26)),
1874                    ("Y".to_string(), Span::new(51, 52))
1875                ]
1876                .iter()
1877                .cloned()
1878                .collect()
1879            )
1880        );
1881    }
1882
1883    #[test]
1884    fn test_duplicate_avoid_insert() {
1885        let src = "
1886          %avoid_insert X Y
1887          %avoid_insert Y
1888          %%
1889          ";
1890        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
1891            src,
1892            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1893            &mut [(2usize, 27usize), (3, 25)].into_iter(),
1894        );
1895    }
1896
1897    #[test]
1898    fn test_duplicate_avoid_insert2() {
1899        let src = "
1900        %avoid_insert X
1901        %avoid_insert Y Y
1902        %%
1903        ";
1904        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
1905            src,
1906            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1907            &mut [(3, 23), (3, 25)].into_iter(),
1908        );
1909    }
1910
1911    #[test]
1912    fn test_multiple_duplicate_avoid_insert() {
1913        let src = "
1914        %avoid_insert X
1915        %avoid_insert Y Y X
1916        %%
1917        ";
1918        parse(YaccKind::Eco, src).expect_multiple_errors(
1919            src,
1920            &mut [
1921                (
1922                    YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1923                    vec![(3, 23), (3, 25)],
1924                ),
1925                (
1926                    YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1927                    vec![(2, 23), (3, 27)],
1928                ),
1929            ]
1930            .into_iter(),
1931        );
1932    }
1933
1934    #[test]
1935    fn test_no_implicit_tokens_in_original_yacc() {
1936        let src = "
1937        %implicit_tokens X
1938        %%
1939        ";
1940        parse(
1941            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1942            src,
1943        )
1944        .expect_error_at_line(src, YaccGrammarErrorKind::UnknownDeclaration, 2);
1945    }
1946
1947    #[test]
1948    fn test_parse_implicit_tokens() {
1949        let ast = parse(
1950            YaccKind::Eco,
1951            "
1952          %implicit_tokens ws1 ws2
1953          %start R
1954          %%
1955          R: 'a';
1956          ",
1957        )
1958        .unwrap();
1959        assert_eq!(
1960            ast.implicit_tokens,
1961            Some(
1962                [
1963                    ("ws1".to_string(), Span::new(28, 31)),
1964                    ("ws2".to_string(), Span::new(32, 35))
1965                ]
1966                .iter()
1967                .cloned()
1968                .collect()
1969            )
1970        );
1971        assert!(ast.tokens.get("ws1").is_some());
1972        assert!(ast.tokens.get("ws2").is_some());
1973    }
1974
1975    #[test]
1976    fn test_multiple_implicit_tokens() {
1977        let ast = parse(
1978            YaccKind::Eco,
1979            "
1980          %implicit_tokens X
1981          %implicit_tokens Y
1982          %%
1983          ",
1984        )
1985        .unwrap();
1986        assert_eq!(
1987            ast.implicit_tokens,
1988            Some(
1989                [
1990                    ("X".to_string(), Span::new(28, 29)),
1991                    ("Y".to_string(), Span::new(57, 58))
1992                ]
1993                .iter()
1994                .cloned()
1995                .collect()
1996            )
1997        );
1998    }
1999
2000    #[test]
2001    fn test_duplicate_implicit_tokens() {
2002        let src = "
2003        %implicit_tokens X
2004        %implicit_tokens X Y
2005        %%
2006        ";
2007        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2008            src,
2009            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2010            &mut [(2, 26), (3, 26)].into_iter(),
2011        );
2012    }
2013
2014    #[test]
2015    fn test_duplicate_implicit_tokens2() {
2016        let src = "
2017        %implicit_tokens X X
2018        %implicit_tokens Y
2019        %%
2020        ";
2021        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2022            src,
2023            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2024            &mut [(2, 26), (2, 28)].into_iter(),
2025        );
2026    }
2027
2028    #[test]
2029    fn test_multiple_duplicate_implicit_tokens_and_invalid_rule() {
2030        let src = "
2031        %implicit_tokens X
2032        %implicit_tokens X Y
2033        %implicit_tokens Y
2034        %%
2035        IncompleteRule: ";
2036        parse(YaccKind::Eco, src).expect_multiple_errors(
2037            src,
2038            &mut [
2039                (
2040                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2041                    vec![(2, 26), (3, 26)],
2042                ),
2043                (
2044                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2045                    vec![(3, 28), (4, 26)],
2046                ),
2047                (YaccGrammarErrorKind::IncompleteRule, vec![(6, 25)]),
2048            ]
2049            .into_iter(),
2050        );
2051    }
2052
2053    #[test]
2054    #[rustfmt::skip]
2055    fn test_parse_epp() {
2056        let ast = parse(
2057            YaccKind::Eco,
2058            r#"
2059          %epp A "a"
2060          %epp B 'a'
2061          %epp C '"'
2062          %epp D "'"
2063          %epp E "\""
2064          %epp F '\''
2065          %epp G "a\"b"
2066          %%
2067          R: 'A';
2068          "#,
2069        )
2070        .unwrap();
2071        assert_eq!(ast.epp.len(), 7);
2072        assert_eq!(ast.epp["A"], (Span::new(16, 17),   ("a".to_string(),   Span::new(18, 21))));
2073        assert_eq!(ast.epp["B"], (Span::new(37, 38),   ("a".to_string(),   Span::new(39, 42))));
2074        assert_eq!(ast.epp["C"], (Span::new(58, 59),   ("\"".to_string(),  Span::new(60, 63))));
2075        assert_eq!(ast.epp["D"], (Span::new(79, 80),   ("'".to_string(),   Span::new(81, 84))));
2076        assert_eq!(ast.epp["E"], (Span::new(100, 101), ("\"".to_string(),  Span::new(102, 106))));
2077        assert_eq!(ast.epp["F"], (Span::new(122, 123), ("'".to_string(),   Span::new(124, 128))));
2078        assert_eq!(ast.epp["G"], (Span::new(144, 145), ("a\"b".to_string(),Span::new(146, 152))));
2079    }
2080
2081    #[test]
2082    fn test_duplicate_epp() {
2083        let src = "
2084        %epp A \"a\"
2085        %epp A \"a\"
2086        %epp A \"a\"
2087        %%
2088        ";
2089        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2090            src,
2091            YaccGrammarErrorKind::DuplicateEPP,
2092            &mut [(2, 14), (3, 14), (4, 14)].into_iter(),
2093        );
2094    }
2095
2096    #[test]
2097    fn test_multiple_duplicate_epp() {
2098        let src = "
2099        %epp A \"a1\"
2100        %epp A \"a2\"
2101        %epp A \"a3\"
2102        %epp B \"b1\"
2103        %epp B \"b2\"
2104        %epp B \"b3\"
2105        %%
2106        ";
2107        parse(YaccKind::Eco, src).expect_multiple_errors(
2108            src,
2109            &mut [
2110                (
2111                    YaccGrammarErrorKind::DuplicateEPP,
2112                    vec![(2, 14), (3, 14), (4, 14)],
2113                ),
2114                (
2115                    YaccGrammarErrorKind::DuplicateEPP,
2116                    vec![(5, 14), (6, 14), (7, 14)],
2117                ),
2118            ]
2119            .into_iter(),
2120        );
2121    }
2122
2123    #[test]
2124    fn test_broken_string() {
2125        let src = "
2126          %epp A \"a
2127          %%
2128          ";
2129        parse(YaccKind::Eco, src).expect_error_at_line(src, YaccGrammarErrorKind::InvalidString, 2);
2130
2131        let src = "
2132        %epp A \"a";
2133        parse(YaccKind::Eco, src).expect_error_at_line(src, YaccGrammarErrorKind::InvalidString, 2);
2134    }
2135
2136    #[test]
2137    fn test_duplicate_start() {
2138        let src = "
2139          %start X
2140          %start X
2141          %%
2142          ";
2143        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2144            src,
2145            YaccGrammarErrorKind::DuplicateStartDeclaration,
2146            &mut [(2, 18), (3, 18)].into_iter(),
2147        );
2148    }
2149
2150    #[test]
2151    fn test_duplicate_start_premature_end() {
2152        let src = "
2153          %start X
2154          %start X";
2155        parse(YaccKind::Eco, src).expect_multiple_errors(
2156            src,
2157            &mut [
2158                (
2159                    YaccGrammarErrorKind::DuplicateStartDeclaration,
2160                    vec![(2, 18), (3, 18)],
2161                ),
2162                (YaccGrammarErrorKind::PrematureEnd, vec![(3, 19)]),
2163            ]
2164            .into_iter(),
2165        );
2166    }
2167
2168    #[test]
2169    fn test_duplicate_expect() {
2170        let src = "
2171          %expect 1
2172          %expect 2
2173          %expect 3
2174          %%
2175          ";
2176        parse(
2177            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2178            src,
2179        )
2180        .expect_error_at_lines_cols(
2181            src,
2182            YaccGrammarErrorKind::DuplicateExpectDeclaration,
2183            &mut [(2, 19), (3, 19), (4, 19)].into_iter(),
2184        )
2185    }
2186
2187    #[test]
2188    fn test_duplicate_expect_and_missing_colon() {
2189        let src = "
2190          %expect 1
2191          %expect 2
2192          %expect 3
2193          %%
2194          A ;";
2195        parse(
2196            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2197            src,
2198        )
2199        .expect_multiple_errors(
2200            src,
2201            &mut [
2202                (
2203                    YaccGrammarErrorKind::DuplicateExpectDeclaration,
2204                    vec![(2, 19), (3, 19), (4, 19)],
2205                ),
2206                (YaccGrammarErrorKind::MissingColon, vec![(6, 13)]),
2207            ]
2208            .into_iter(),
2209        )
2210    }
2211
2212    #[test]
2213    fn test_duplicate_expectrr() {
2214        let src = "
2215          %expect-rr 1
2216          %expect-rr 2
2217          %expect-rr 3
2218          %%
2219          ";
2220        parse(
2221            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2222            src,
2223        )
2224        .expect_error_at_lines_cols(
2225            src,
2226            YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
2227            &mut [(2, 22), (3, 22), (4, 22)].into_iter(),
2228        );
2229    }
2230
2231    #[test]
2232    fn test_duplicate_expectrr_illegal_name() {
2233        let src = "
2234          %expect-rr 1
2235          %expect-rr 2
2236          %expect-rr 3
2237          %%
2238          +IllegalRuleName+:;";
2239        parse(
2240            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2241            src,
2242        )
2243        .expect_multiple_errors(
2244            src,
2245            &mut [
2246                (
2247                    YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
2248                    vec![(2, 22), (3, 22), (4, 22)],
2249                ),
2250                (YaccGrammarErrorKind::IllegalName, vec![(6, 11)]),
2251            ]
2252            .into_iter(),
2253        );
2254    }
2255
2256    #[test]
2257    fn test_implicit_start() {
2258        let ast = parse(
2259            YaccKind::Eco,
2260            "
2261          %%
2262          R: ;
2263          R2: ;
2264          R3: ;
2265          ",
2266        )
2267        .unwrap();
2268        assert_eq!(ast.start, Some(("R".to_string(), Span::new(24, 25))));
2269    }
2270
2271    #[test]
2272    fn test_action() {
2273        let grm = parse(
2274            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2275            "
2276          %%
2277          A: 'a' B { println!(\"test\"); }
2278           ;
2279          B: 'b' 'c' { add($1, $2); }
2280           | 'd'
2281           ;
2282          D: 'd' {}
2283           ;
2284          ",
2285        )
2286        .unwrap();
2287        assert_eq!(
2288            grm.prods[grm.rules["A"].pidxs[0]].action,
2289            Some("println!(\"test\");".to_string())
2290        );
2291        assert_eq!(
2292            grm.prods[grm.rules["B"].pidxs[0]].action,
2293            Some("add($1, $2);".to_string())
2294        );
2295        assert_eq!(grm.prods[grm.rules["B"].pidxs[1]].action, None);
2296    }
2297
2298    #[test]
2299    fn test_action_ends_in_multibyte() {
2300        let grm = parse(
2301            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2302            "%%A: '_' {(); // 🦀};",
2303        )
2304        .unwrap();
2305        assert_eq!(
2306            grm.prods[grm.rules["A"].pidxs[0]].action,
2307            Some("(); // 🦀".to_string())
2308        );
2309    }
2310
2311    #[test]
2312    fn test_programs() {
2313        let grm = parse(
2314            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2315            "
2316         %%
2317         A: 'a';
2318         %%
2319         fn foo() {}",
2320        )
2321        .unwrap();
2322        assert_eq!(grm.programs, Some("fn foo() {}".to_string()));
2323    }
2324
2325    #[test]
2326    fn test_actions_with_newlines() {
2327        let src = "
2328        %%
2329        A: 'a' { foo();
2330                 bar(); }
2331        ;
2332        B: b';";
2333        parse(
2334            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2335            src,
2336        )
2337        .expect_error_at_line(src, YaccGrammarErrorKind::IllegalString, 6);
2338    }
2339
2340    #[test]
2341    fn test_comments() {
2342        let src = "
2343            // A valid comment
2344            %token   a
2345            /* Another valid comment */
2346            %%\n
2347            A : a;";
2348        let grm = parse(
2349            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2350            src,
2351        )
2352        .unwrap();
2353        assert!(grm.has_token("a"));
2354
2355        let src = "
2356        /* An invalid comment * /
2357        %token   a
2358        %%\n
2359        A : a;";
2360        parse(
2361            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2362            src,
2363        )
2364        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteComment, 2);
2365
2366        let src = "
2367        %token   a
2368        %%
2369        /* A valid
2370         * multi-line comment
2371         */
2372        /* An invalid comment * /
2373        A : a;";
2374        parse(
2375            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2376            src,
2377        )
2378        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteComment, 7);
2379
2380        let src = "
2381        %token   a
2382        %%
2383        // Valid comment
2384        A : a";
2385        parse(
2386            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2387            src,
2388        )
2389        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteRule, 5);
2390    }
2391
2392    #[test]
2393    fn test_action_type() {
2394        let grm = parse(
2395            YaccKind::Original(YaccOriginalActionKind::UserAction),
2396            "
2397         %actiontype T
2398         %%
2399         A: 'a';
2400         %%
2401         fn foo() {}",
2402        )
2403        .unwrap();
2404        assert_eq!(grm.rules["A"].actiont, Some("T".to_string()));
2405    }
2406
2407    #[test]
2408    fn test_only_one_type() {
2409        let src = "
2410         %actiontype T1
2411         %actiontype T2
2412         %actiontype T3
2413         %%
2414         A: 'a';";
2415        parse(
2416            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2417            src,
2418        )
2419        .expect_error_at_lines_cols(
2420            src,
2421            YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
2422            &mut [(2, 22), (3, 22), (4, 22)].into_iter(),
2423        );
2424    }
2425
2426    #[test]
2427    fn test_duplicate_actiontype_and_premature_end() {
2428        let src = "
2429         %actiontype T1
2430         %actiontype T2
2431         %actiontype T3";
2432        parse(
2433            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2434            src,
2435        )
2436        .expect_multiple_errors(
2437            src,
2438            &mut [
2439                (
2440                    YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
2441                    vec![(2, 22), (3, 22), (4, 22)],
2442                ),
2443                (YaccGrammarErrorKind::PrematureEnd, vec![(4, 24)]),
2444            ]
2445            .into_iter(),
2446        )
2447    }
2448
2449    #[test]
2450    fn test_parse_param() {
2451        let src = "
2452          %parse-param a::b: (u64, u64)
2453          %%
2454          A: 'a';
2455         ";
2456        let grm = parse(YaccKind::Original(YaccOriginalActionKind::UserAction), src).unwrap();
2457
2458        assert_eq!(
2459            grm.parse_param,
2460            Some(("a::b".to_owned(), "(u64, u64)".to_owned()))
2461        );
2462    }
2463
2464    #[test]
2465    fn test_parse_generics() {
2466        let src = "
2467          %parse-generics 'a, K, V
2468          %%
2469          A: 'a';
2470         ";
2471        let grm = parse(YaccKind::Original(YaccOriginalActionKind::UserAction), src).unwrap();
2472
2473        assert_eq!(grm.parse_generics, Some("'a, K, V".to_owned()));
2474    }
2475
2476    #[test]
2477    fn test_duplicate_rule() {
2478        let ast = parse(
2479            YaccKind::Grmtools,
2480            "%token A B D
2481%%
2482Expr -> () : %empty | A;
2483Expr -> () : B | 'C';
2484Expr -> () : D;
2485",
2486        )
2487        .unwrap();
2488        let expr_rule = ast.get_rule("Expr").unwrap();
2489        let mut prod_names = HashSet::new();
2490        for pidx in &expr_rule.pidxs {
2491            for sym in &ast.prods[*pidx].symbols {
2492                let name = match sym {
2493                    Symbol::Token(name, _) | Symbol::Rule(name, _) => name.clone(),
2494                };
2495                prod_names.insert(name);
2496            }
2497        }
2498        assert_eq!(ast.prods.len(), 5);
2499        assert_eq!(
2500            prod_names,
2501            HashSet::from_iter(["A", "B", "C", "D"].map(|s| s.to_owned()))
2502        );
2503    }
2504
2505    #[test]
2506    fn test_duplicate_start_and_missing_arrow() {
2507        let src = "%start A
2508%start A
2509%start A
2510%%
2511A -> () : 'a1';
2512B";
2513        parse(YaccKind::Grmtools, src).expect_multiple_errors(
2514            src,
2515            &mut [
2516                (
2517                    YaccGrammarErrorKind::DuplicateStartDeclaration,
2518                    vec![(1, 8), (2, 8), (3, 8)],
2519                ),
2520                (YaccGrammarErrorKind::MissingRightArrow, vec![(6, 2)]),
2521            ]
2522            .into_iter(),
2523        )
2524    }
2525
2526    #[test]
2527    fn test_routines_multiple_errors() {
2528        let mut src = String::from(
2529            "
2530        %start A
2531        %start B
2532        %expect 1
2533        %expect 2
2534        %%
2535        A -> () : 'a';
2536        %%
2537        ",
2538        );
2539        let mut expected_errs = vec![
2540            (
2541                YaccGrammarErrorKind::DuplicateStartDeclaration,
2542                vec![(2, 16), (3, 16)],
2543            ),
2544            (
2545                YaccGrammarErrorKind::DuplicateExpectDeclaration,
2546                vec![(4, 17), (5, 17)],
2547            ),
2548        ];
2549        parse(YaccKind::Grmtools, &src)
2550            .expect_multiple_errors(&src, &mut expected_errs.clone().into_iter());
2551
2552        src.push_str(
2553            "
2554                /* Incomplete comment
2555        ",
2556        );
2557        expected_errs.push((YaccGrammarErrorKind::IncompleteComment, vec![(10, 17)]));
2558        parse(YaccKind::Grmtools, &src)
2559            .expect_multiple_errors(&src, &mut expected_errs.clone().into_iter());
2560    }
2561
2562    #[test]
2563    fn test_expect_unused() {
2564        let src = r#"
2565        %expect-unused A 'b' "c"
2566        %%
2567        A: ;
2568        "#;
2569        let grm = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2570        assert!(
2571            grm.expect_unused
2572                .contains(&Symbol::Rule("A".to_string(), Span::new(24, 25)))
2573        );
2574        assert!(
2575            grm.expect_unused
2576                .contains(&Symbol::Token("b".to_string(), Span::new(27, 28)))
2577        );
2578        assert!(
2579            grm.expect_unused
2580                .contains(&Symbol::Token("c".to_string(), Span::new(31, 32)))
2581        );
2582    }
2583
2584    #[test]
2585    fn test_bad_expect_unused() {
2586        let src = "
2587        %expect-unused %
2588        %%
2589        A: ;
2590        ";
2591        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2592            src,
2593            YaccGrammarErrorKind::UnknownDeclaration,
2594            2,
2595            24,
2596        );
2597    }
2598
2599    #[test]
2600    fn test_unused_symbols() {
2601        let ast = parse(
2602            YaccKind::Original(YaccOriginalActionKind::NoAction),
2603            "
2604        %expect-unused UnusedAllowed 'b'
2605        %token a b
2606        %start Start
2607        %%
2608        Unused: ;
2609        Start: ;
2610        UnusedAllowed: ;
2611        ",
2612        )
2613        .unwrap();
2614
2615        assert_eq!(
2616            ast.unused_symbols()
2617                .map(|sym_idx| { sym_idx.symbol(&ast) })
2618                .collect::<Vec<Symbol>>()
2619                .as_slice(),
2620            &[
2621                Symbol::Rule("Unused".to_string(), Span::new(101, 107)),
2622                Symbol::Token("a".to_string(), Span::new(57, 58))
2623            ]
2624        );
2625
2626        let ast = parse(
2627            YaccKind::Original(YaccOriginalActionKind::NoAction),
2628            "
2629        %start A
2630        %%
2631        A: ;
2632        Rec: Rec | ;
2633        ",
2634        )
2635        .unwrap();
2636        assert_eq!(
2637            ast.unused_symbols()
2638                .map(|sym_idx| sym_idx.symbol(&ast))
2639                .collect::<Vec<Symbol>>()
2640                .as_slice(),
2641            &[Symbol::Rule("Rec".to_string(), Span::new(50, 53))]
2642        );
2643
2644        let ast = parse(
2645            YaccKind::Original(YaccOriginalActionKind::NoAction),
2646            "
2647        %%
2648        A: 'a' | 'z' ;
2649        B: 'a' | 'c' ;
2650        ",
2651        )
2652        .unwrap();
2653        // Check that we warn on B and 'c' but not 'a'
2654        assert_eq!(
2655            ast.unused_symbols()
2656                .map(|sym_idx| sym_idx.symbol(&ast))
2657                .collect::<Vec<Symbol>>()
2658                .as_slice(),
2659            &[
2660                Symbol::Rule("B".to_string(), Span::new(43, 44)),
2661                Symbol::Token("c".to_string(), Span::new(53, 54))
2662            ]
2663        );
2664    }
2665
2666    #[test]
2667    fn test_percent_empty() {
2668        parse(
2669            YaccKind::Original(YaccOriginalActionKind::NoAction),
2670            r#"
2671        %token a
2672        %start A
2673        %%
2674        A: %empty | "a";
2675        "#,
2676        )
2677        .unwrap();
2678
2679        let src = r#"
2680        %token a b
2681        %start A
2682        %%
2683        A: "a" | %empty | "b";
2684        B: %empty | "b";
2685        "#;
2686        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2687
2688        let src = r#"
2689        %token a
2690        %start A
2691        %%
2692        A: %empty "a";
2693        "#;
2694        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2695            src,
2696            YaccGrammarErrorKind::NonEmptyProduction,
2697            5,
2698            12,
2699        );
2700
2701        let src = r#"
2702        %token a
2703        %start A
2704        %%
2705        A: "a" %empty;
2706        "#;
2707        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2708            src,
2709            YaccGrammarErrorKind::NonEmptyProduction,
2710            5,
2711            16,
2712        );
2713    }
2714
2715    #[test]
2716    fn test_action_successor() {
2717        let src = "
2718        %%
2719        A: B {} B;
2720        B: ;
2721        ";
2722        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2723            src,
2724            YaccGrammarErrorKind::ProductionNotTerminated,
2725            3,
2726            17,
2727        );
2728
2729        let src = "
2730        %%
2731        A: B B {};
2732        B: {} ;
2733        ";
2734        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2735    }
2736
2737    #[test]
2738    fn test_empty_production_spans_issue_473() {
2739        let empty_prod_conflicts = [
2740            (
2741                "%start Expr
2742%%
2743Expr: %empty | Factor;
2744Factor: ')' Expr ')';
2745",
2746                (0, Span::new(21, 27)),
2747            ),
2748            (
2749                "%start Expr
2750%%
2751Expr: | Factor;
2752Factor: ')' Expr ')';
2753",
2754                (0, Span::new(21, 21)),
2755            ),
2756            (
2757                "%start Expr
2758%%
2759Expr:| Factor;
2760Factor: ')' Expr ')';
2761",
2762                (0, Span::new(20, 20)),
2763            ),
2764            (
2765                "%start Expr
2766%%
2767Expr: Factor | %empty;
2768Factor: ')' Expr ')';
2769",
2770                (1, Span::new(30, 36)),
2771            ),
2772            (
2773                "%start Expr
2774%%
2775Expr: Factor | ;
2776Factor: ')' Expr ')';
2777",
2778                (1, Span::new(30, 30)),
2779            ),
2780            (
2781                "%start Expr
2782%%
2783Expr: Factor|;
2784Factor: ')' Expr ')';
2785",
2786                (1, Span::new(28, 28)),
2787            ),
2788        ];
2789
2790        for (i, (src, (empty_pidx, empty_span))) in empty_prod_conflicts.iter().enumerate() {
2791            eprintln!("{}", i);
2792            let ast = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2793            assert_eq!(
2794                ast.prods[ast.get_rule("Expr").unwrap().pidxs[*empty_pidx]],
2795                Production {
2796                    symbols: vec![],
2797                    precedence: None,
2798                    action: None,
2799                    prod_span: *empty_span,
2800                }
2801            );
2802        }
2803    }
2804}