cfgrammar/yacc/
parser.rs

1// Note: this is the parser for both YaccKind::Original(YaccOriginalActionKind::GenericParseTree) and YaccKind::Eco yacc kinds.
2
3#[cfg(feature = "bincode")]
4use bincode::{Decode, Encode};
5use lazy_static::lazy_static;
6use num_traits::PrimInt;
7use regex::Regex;
8#[cfg(feature = "serde")]
9use serde::{Deserialize, Serialize};
10use std::{
11    collections::{hash_map::Entry, HashMap},
12    error::Error,
13    fmt,
14    str::FromStr,
15};
16
17use crate::{
18    header::{GrmtoolsSectionParser, HeaderErrorKind},
19    Span, Spanned,
20};
21
22pub type YaccGrammarResult<T> = Result<T, Vec<YaccGrammarError>>;
23
24use super::{
25    ast::{GrammarAST, Symbol},
26    AssocKind, Precedence, YaccKind,
27};
28
29/// The various different possible Yacc parser errors.
30#[derive(Debug, PartialEq, Eq, Clone)]
31#[non_exhaustive]
32pub enum YaccGrammarErrorKind {
33    IllegalInteger,
34    IllegalName,
35    IllegalString,
36    IncompleteRule,
37    IncompleteComment,
38    IncompleteAction,
39    MissingColon,
40    MissingRightArrow,
41    MismatchedBrace,
42    NonEmptyProduction,
43    PrematureEnd,
44    ProductionNotTerminated,
45    ProgramsNotSupported,
46    UnknownDeclaration,
47    PrecNotFollowedByToken,
48    DuplicatePrecedence,
49    DuplicateAvoidInsertDeclaration,
50    DuplicateImplicitTokensDeclaration,
51    DuplicateExpectDeclaration,
52    DuplicateExpectRRDeclaration,
53    DuplicateStartDeclaration,
54    DuplicateActiontypeDeclaration,
55    DuplicateEPP,
56    ReachedEOL,
57    InvalidString,
58    NoStartRule,
59    UnknownSymbol,
60    InvalidStartRule(String),
61    UnknownRuleRef(String),
62    UnknownToken(String),
63    NoPrecForToken(String),
64    UnknownEPP(String),
65    ExpectedInput(char),
66    InvalidYaccKind,
67    Header(HeaderErrorKind, SpansKind),
68}
69
70/// Any error from the Yacc parser returns an instance of this struct.
71#[derive(Debug, PartialEq, Eq, Clone)]
72pub struct YaccGrammarError {
73    /// Uniquely identifies each error.
74    pub(crate) kind: YaccGrammarErrorKind,
75    /// Always contains at least 1 span.
76    ///
77    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
78    /// For meaning and interpretation of spans and their ordering.
79    pub(crate) spans: Vec<Span>,
80}
81
82impl Error for YaccGrammarError {}
83
84impl fmt::Display for YaccGrammarError {
85    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
86        write!(f, "{}", self.kind)
87    }
88}
89
90impl fmt::Display for YaccGrammarErrorKind {
91    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92        let s = match self {
93            YaccGrammarErrorKind::ExpectedInput(c) => &format!("Expected input '{c}'"),
94            YaccGrammarErrorKind::IllegalInteger => "Illegal integer",
95            YaccGrammarErrorKind::IllegalName => "Illegal name",
96            YaccGrammarErrorKind::IllegalString => "Illegal string",
97            YaccGrammarErrorKind::IncompleteRule => "Incomplete rule",
98            YaccGrammarErrorKind::IncompleteComment => "Incomplete comment",
99            YaccGrammarErrorKind::IncompleteAction => "Incomplete action",
100            YaccGrammarErrorKind::MissingColon => "Missing ':'",
101            YaccGrammarErrorKind::MissingRightArrow => "Missing '->'",
102            YaccGrammarErrorKind::MismatchedBrace => "Mismatched brace",
103            YaccGrammarErrorKind::NonEmptyProduction => "%empty used in non-empty production",
104            YaccGrammarErrorKind::PrematureEnd => "File ends prematurely",
105            YaccGrammarErrorKind::ProductionNotTerminated => "Production not terminated correctly",
106            YaccGrammarErrorKind::ProgramsNotSupported => "Programs not currently supported",
107            YaccGrammarErrorKind::UnknownDeclaration => "Unknown declaration",
108            YaccGrammarErrorKind::DuplicatePrecedence => "Token has multiple precedences specified",
109            YaccGrammarErrorKind::PrecNotFollowedByToken => "%prec not followed by token name",
110            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration => {
111                "Duplicated %avoid_insert declaration"
112            }
113            YaccGrammarErrorKind::DuplicateExpectDeclaration => "Duplicated %expect declaration",
114            YaccGrammarErrorKind::DuplicateExpectRRDeclaration => {
115                "Duplicate %expect-rr declaration"
116            }
117            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration => {
118                "Duplicated %implicit_tokens declaration"
119            }
120            YaccGrammarErrorKind::DuplicateStartDeclaration => "Duplicated %start declaration",
121            YaccGrammarErrorKind::DuplicateActiontypeDeclaration => {
122                "Duplicate %actiontype declaration"
123            }
124            YaccGrammarErrorKind::DuplicateEPP => "Duplicate %epp declaration for this token",
125            YaccGrammarErrorKind::ReachedEOL => {
126                "Reached end of line without finding expected content"
127            }
128            YaccGrammarErrorKind::InvalidString => "Invalid string",
129            YaccGrammarErrorKind::NoStartRule => return write!(f, "No start rule specified"),
130            YaccGrammarErrorKind::UnknownSymbol => "Unknown symbol, expected a rule or token",
131            YaccGrammarErrorKind::InvalidStartRule(name) => {
132                return write!(f, "Start rule '{}' does not appear in grammar", name)
133            }
134            YaccGrammarErrorKind::UnknownRuleRef(name) => {
135                return write!(f, "Unknown reference to rule '{}'", name)
136            }
137            YaccGrammarErrorKind::UnknownToken(name) => {
138                return write!(f, "Unknown token '{}'", name)
139            }
140            YaccGrammarErrorKind::NoPrecForToken(name) => {
141                return write!(
142                    f,
143                    "Token '{}' used in %prec has no precedence attached",
144                    name
145                )
146            }
147            YaccGrammarErrorKind::UnknownEPP(name) => {
148                return write!(
149                    f,
150                    "Token '{}' in %epp declaration is not referenced in the grammar",
151                    name
152                )
153            }
154            YaccGrammarErrorKind::InvalidYaccKind => "Invalid yacc kind",
155            YaccGrammarErrorKind::Header(hk, _) => &format!("Error in '%grmtools' {}", hk),
156        };
157        write!(f, "{}", s)
158    }
159}
160
161/// The various different possible Yacc parser errors.
162#[derive(Debug, PartialEq, Eq, Clone)]
163#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
164#[cfg_attr(feature = "bincode", derive(Encode, Decode))]
165#[non_exhaustive]
166pub enum YaccGrammarWarningKind {
167    UnusedRule,
168    UnusedToken,
169}
170
171/// Any Warning from the Yacc parser returns an instance of this struct.
172#[derive(Debug, PartialEq, Eq, Clone)]
173#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
174#[cfg_attr(feature = "bincode", derive(Encode, Decode))]
175pub struct YaccGrammarWarning {
176    /// The specific kind of warning.
177    pub(crate) kind: YaccGrammarWarningKind,
178    /// Always contains at least 1 span.
179    ///
180    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
181    /// For meaning and interpretation of spans and their ordering.
182    pub(crate) spans: Vec<Span>,
183}
184
185impl fmt::Display for YaccGrammarWarning {
186    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
187        write!(f, "{}", self.kind)
188    }
189}
190
191impl fmt::Display for YaccGrammarWarningKind {
192    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193        let s = match self {
194            YaccGrammarWarningKind::UnusedRule => "Unused rule",
195            YaccGrammarWarningKind::UnusedToken => "Unused token",
196        };
197        write!(f, "{}", s)
198    }
199}
200
201impl Spanned for YaccGrammarWarning {
202    /// Returns the spans associated with the error, always containing at least 1 span.
203    ///
204    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
205    /// for the meaning and interpretation of spans and their ordering.
206    fn spans(&self) -> &[Span] {
207        self.spans.as_slice()
208    }
209
210    /// Returns the [SpansKind] associated with this error.
211    fn spanskind(&self) -> SpansKind {
212        match self.kind {
213            YaccGrammarWarningKind::UnusedRule | YaccGrammarWarningKind::UnusedToken => {
214                SpansKind::Error
215            }
216        }
217    }
218}
219
220/// Indicates how to interpret the spans of an error.
221#[derive(Debug, PartialEq, Eq, Copy, Clone)]
222#[non_exhaustive]
223pub enum SpansKind {
224    /// The first span is the first occurrence, and a span for each subsequent occurrence.
225    DuplicationError,
226    /// Contains a single span at the site of the error.
227    Error,
228}
229
230impl Spanned for YaccGrammarError {
231    /// Returns the spans associated with the error, always containing at least 1 span.
232    ///
233    /// Refer to [SpansKind] via [spanskind](Self::spanskind)
234    /// for the meaning and interpretation of spans and their ordering.
235    fn spans(&self) -> &[Span] {
236        self.spans.as_slice()
237    }
238
239    /// Returns the [SpansKind] associated with this error.
240    fn spanskind(&self) -> SpansKind {
241        match self.kind {
242            YaccGrammarErrorKind::IllegalInteger
243            | YaccGrammarErrorKind::IllegalName
244            | YaccGrammarErrorKind::IllegalString
245            | YaccGrammarErrorKind::IncompleteRule
246            | YaccGrammarErrorKind::IncompleteComment
247            | YaccGrammarErrorKind::IncompleteAction
248            | YaccGrammarErrorKind::MissingColon
249            | YaccGrammarErrorKind::MissingRightArrow
250            | YaccGrammarErrorKind::MismatchedBrace
251            | YaccGrammarErrorKind::NonEmptyProduction
252            | YaccGrammarErrorKind::PrematureEnd
253            | YaccGrammarErrorKind::ProductionNotTerminated
254            | YaccGrammarErrorKind::PrecNotFollowedByToken
255            | YaccGrammarErrorKind::ProgramsNotSupported
256            | YaccGrammarErrorKind::UnknownDeclaration
257            | YaccGrammarErrorKind::ReachedEOL
258            | YaccGrammarErrorKind::InvalidString
259            | YaccGrammarErrorKind::NoStartRule
260            | YaccGrammarErrorKind::UnknownSymbol
261            | YaccGrammarErrorKind::InvalidStartRule(_)
262            | YaccGrammarErrorKind::UnknownRuleRef(_)
263            | YaccGrammarErrorKind::UnknownToken(_)
264            | YaccGrammarErrorKind::NoPrecForToken(_)
265            | YaccGrammarErrorKind::InvalidYaccKind
266            | YaccGrammarErrorKind::ExpectedInput(_)
267            | YaccGrammarErrorKind::UnknownEPP(_) => SpansKind::Error,
268            YaccGrammarErrorKind::DuplicatePrecedence
269            | YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration
270            | YaccGrammarErrorKind::DuplicateExpectDeclaration
271            | YaccGrammarErrorKind::DuplicateExpectRRDeclaration
272            | YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration
273            | YaccGrammarErrorKind::DuplicateStartDeclaration
274            | YaccGrammarErrorKind::DuplicateActiontypeDeclaration
275            | YaccGrammarErrorKind::DuplicateEPP => SpansKind::DuplicationError,
276            YaccGrammarErrorKind::Header(_, spanskind) => spanskind,
277        }
278    }
279}
280
281pub(crate) struct YaccParser<'a> {
282    yacc_kind: YaccKind,
283    src: &'a str,
284    num_newlines: usize,
285    ast: GrammarAST,
286    global_actiontype: Option<(String, Span)>,
287}
288
289lazy_static! {
290    static ref RE_NAME: Regex = Regex::new(r"^[a-zA-Z_.][a-zA-Z0-9_.]*").unwrap();
291    static ref RE_TOKEN: Regex =
292        Regex::new("^(?:(\".+?\")|('.+?')|([a-zA-Z_][a-zA-Z_0-9]*))").unwrap();
293}
294
295fn add_duplicate_occurrence(
296    errs: &mut Vec<YaccGrammarError>,
297    kind: YaccGrammarErrorKind,
298    orig_span: Span,
299    dup_span: Span,
300) {
301    if !errs.iter_mut().any(|e| {
302        if e.kind == kind && e.spans[0] == orig_span {
303            e.spans.push(dup_span);
304            true
305        } else {
306            false
307        }
308    }) {
309        errs.push(YaccGrammarError {
310            kind,
311            spans: vec![orig_span, dup_span],
312        });
313    }
314}
315
316/// The actual parser is intended to be entirely opaque from outside users.
317impl YaccParser<'_> {
318    pub(crate) fn new(yacc_kind: YaccKind, src: &str) -> YaccParser {
319        YaccParser {
320            yacc_kind,
321            src,
322            num_newlines: 0,
323            ast: GrammarAST::new(),
324            global_actiontype: None,
325        }
326    }
327
328    pub(crate) fn parse(&mut self) -> YaccGrammarResult<usize> {
329        let mut errs = Vec::new();
330        let (_, pos) = GrmtoolsSectionParser::new(self.src, false)
331            .parse()
332            .map_err(|mut errs| errs.drain(..).map(|e| e.into()).collect::<Vec<_>>())?;
333        // We pass around an index into the *bytes* of self.src. We guarantee that at all times
334        // this points to the beginning of a UTF-8 character (since multibyte characters exist, not
335        // every byte within the string is also a valid character).
336        let mut result = self.parse_declarations(pos, &mut errs);
337        result = self.parse_rules(match result {
338            Ok(i) => i,
339            Err(e) => {
340                errs.push(e);
341                return Err(errs);
342            }
343        });
344        result = self.parse_programs(
345            match result {
346                Ok(i) => i,
347                Err(e) => {
348                    errs.push(e);
349                    return Err(errs);
350                }
351            },
352            &mut errs,
353        );
354        match result {
355            Ok(i) if errs.is_empty() => Ok(i),
356            Err(e) => {
357                errs.push(e);
358                Err(errs)
359            }
360            _ => Err(errs),
361        }
362    }
363
364    pub(crate) fn build(self) -> GrammarAST {
365        self.ast
366    }
367
368    fn parse_declarations(
369        &mut self,
370        mut i: usize,
371        errs: &mut Vec<YaccGrammarError>,
372    ) -> Result<usize, YaccGrammarError> {
373        i = self.parse_ws(i, true)?;
374        let mut prec_level = 0;
375        while i < self.src.len() {
376            if self.lookahead_is("%%", i).is_some() {
377                return Ok(i);
378            }
379            if let Some(j) = self.lookahead_is("%token", i) {
380                i = self.parse_ws(j, false)?;
381                while i < self.src.len() && self.lookahead_is("%", i).is_none() {
382                    let (j, n, span, _) = self.parse_token(i)?;
383                    let (idx, new_tok) = self.ast.tokens.insert_full(n);
384                    if new_tok {
385                        self.ast.spans.push(span);
386                    }
387                    self.ast.token_directives.insert(idx);
388                    i = self.parse_ws(j, true)?;
389                }
390                continue;
391            }
392            if let YaccKind::Original(_) = self.yacc_kind {
393                if let Some(j) = self.lookahead_is("%actiontype", i) {
394                    i = self.parse_ws(j, false)?;
395                    let (j, n) = self.parse_to_eol(i)?;
396                    let span = Span::new(i, j);
397                    if let Some((_, orig_span)) = self.global_actiontype {
398                        add_duplicate_occurrence(
399                            errs,
400                            YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
401                            orig_span,
402                            span,
403                        );
404                    } else {
405                        self.global_actiontype = Some((n, span));
406                    }
407                    i = self.parse_ws(j, true)?;
408                    continue;
409                }
410            }
411            if let Some(j) = self.lookahead_is("%start", i) {
412                i = self.parse_ws(j, false)?;
413                let (j, n) = self.parse_name(i)?;
414                let span = Span::new(i, j);
415                if let Some((_, orig_span)) = self.ast.start {
416                    add_duplicate_occurrence(
417                        errs,
418                        YaccGrammarErrorKind::DuplicateStartDeclaration,
419                        orig_span,
420                        span,
421                    );
422                } else {
423                    self.ast.start = Some((n, span));
424                }
425                i = self.parse_ws(j, true)?;
426                continue;
427            }
428            if let Some(j) = self.lookahead_is("%epp", i) {
429                i = self.parse_ws(j, false)?;
430                let (j, n, _, _) = self.parse_token(i)?;
431                let span = Span::new(i, j);
432                i = self.parse_ws(j, false)?;
433                let (j, v) = self.parse_string(i)?;
434                let vspan = Span::new(i, j);
435                match self.ast.epp.entry(n) {
436                    Entry::Occupied(orig) => {
437                        let (orig_span, _) = orig.get();
438                        add_duplicate_occurrence(
439                            errs,
440                            YaccGrammarErrorKind::DuplicateEPP,
441                            *orig_span,
442                            span,
443                        )
444                    }
445                    Entry::Vacant(epp) => {
446                        epp.insert((span, (v, vspan)));
447                    }
448                }
449                i = self.parse_ws(j, true)?;
450                continue;
451            }
452            if let Some(j) = self.lookahead_is("%expect-rr", i) {
453                i = self.parse_ws(j, false)?;
454                let (j, n) = self.parse_int(i)?;
455                let span = Span::new(i, j);
456                if let Some((_, orig_span)) = self.ast.expectrr {
457                    add_duplicate_occurrence(
458                        errs,
459                        YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
460                        orig_span,
461                        span,
462                    );
463                } else {
464                    self.ast.expectrr = Some((n, span));
465                }
466                i = self.parse_ws(j, true)?;
467                continue;
468            }
469            if let Some(j) = self.lookahead_is("%expect-unused", i) {
470                i = self.parse_ws(j, false)?;
471                while i < self.src.len() && self.lookahead_is("%", i).is_none() {
472                    let j = match self.parse_name(i) {
473                        Ok((j, n)) => {
474                            self.ast
475                                .expect_unused
476                                .push(Symbol::Rule(n, Span::new(i, j)));
477                            j
478                        }
479                        Err(_) => match self.parse_token(i) {
480                            Ok((j, n, span, _)) => {
481                                self.ast.expect_unused.push(Symbol::Token(n, span));
482                                j
483                            }
484                            Err(_) => {
485                                return Err(self.mk_error(YaccGrammarErrorKind::UnknownSymbol, i))
486                            }
487                        },
488                    };
489                    i = self.parse_ws(j, true)?;
490                }
491                continue;
492            }
493            if let Some(j) = self.lookahead_is("%expect", i) {
494                i = self.parse_ws(j, false)?;
495                let (j, n) = self.parse_int(i)?;
496                let span = Span::new(i, j);
497                if let Some((_, orig_span)) = self.ast.expect {
498                    add_duplicate_occurrence(
499                        errs,
500                        YaccGrammarErrorKind::DuplicateExpectDeclaration,
501                        orig_span,
502                        span,
503                    );
504                } else {
505                    self.ast.expect = Some((n, span));
506                }
507                i = self.parse_ws(j, true)?;
508                continue;
509            }
510            if let Some(j) = self.lookahead_is("%avoid_insert", i) {
511                i = self.parse_ws(j, false)?;
512                let num_newlines = self.num_newlines;
513                if self.ast.avoid_insert.is_none() {
514                    self.ast.avoid_insert = Some(HashMap::new());
515                }
516                while j < self.src.len() && self.num_newlines == num_newlines {
517                    let (j, n, span, _) = self.parse_token(i)?;
518                    if self.ast.tokens.insert(n.clone()) {
519                        self.ast.spans.push(span);
520                    }
521
522                    match self.ast.avoid_insert.as_mut().unwrap().entry(n) {
523                        Entry::Occupied(occupied) => {
524                            add_duplicate_occurrence(
525                                errs,
526                                YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
527                                *occupied.get(),
528                                span,
529                            );
530                        }
531                        Entry::Vacant(vacant) => {
532                            vacant.insert(span);
533                        }
534                    }
535                    i = self.parse_ws(j, true)?;
536                }
537                continue;
538            }
539            if let Some(j) = self.lookahead_is("%parse-param", i) {
540                i = self.parse_ws(j, false)?;
541                let (j, name) = self.parse_to_single_colon(i)?;
542                match self.lookahead_is(":", j) {
543                    Some(j) => i = self.parse_ws(j, false)?,
544                    None => {
545                        return Err(self.mk_error(YaccGrammarErrorKind::MissingColon, j));
546                    }
547                }
548                let (j, ty) = self.parse_to_eol(i)?;
549                self.ast.parse_param = Some((name, ty));
550                i = self.parse_ws(j, true)?;
551                continue;
552            }
553            if let YaccKind::Eco = self.yacc_kind {
554                if let Some(j) = self.lookahead_is("%implicit_tokens", i) {
555                    i = self.parse_ws(j, false)?;
556                    let num_newlines = self.num_newlines;
557                    if self.ast.implicit_tokens.is_none() {
558                        self.ast.implicit_tokens = Some(HashMap::new());
559                    }
560                    while j < self.src.len() && self.num_newlines == num_newlines {
561                        let (j, n, span, _) = self.parse_token(i)?;
562                        if self.ast.tokens.insert(n.clone()) {
563                            self.ast.spans.push(span);
564                        }
565                        match self.ast.implicit_tokens.as_mut().unwrap().entry(n) {
566                            Entry::Occupied(entry) => {
567                                let orig_span = *entry.get();
568                                add_duplicate_occurrence(
569                                    errs,
570                                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
571                                    orig_span,
572                                    span,
573                                );
574                            }
575                            Entry::Vacant(entry) => {
576                                entry.insert(span);
577                            }
578                        }
579                        i = self.parse_ws(j, true)?;
580                    }
581                    continue;
582                }
583            }
584            {
585                let k;
586                let kind;
587                if let Some(j) = self.lookahead_is("%left", i) {
588                    kind = AssocKind::Left;
589                    k = j;
590                } else if let Some(j) = self.lookahead_is("%right", i) {
591                    kind = AssocKind::Right;
592                    k = j;
593                } else if let Some(j) = self.lookahead_is("%nonassoc", i) {
594                    kind = AssocKind::Nonassoc;
595                    k = j;
596                } else {
597                    return Err(self.mk_error(YaccGrammarErrorKind::UnknownDeclaration, i));
598                }
599
600                i = self.parse_ws(k, false)?;
601                let num_newlines = self.num_newlines;
602                while i < self.src.len() && num_newlines == self.num_newlines {
603                    let (j, n, span, _) = self.parse_token(i)?;
604                    match self.ast.precs.entry(n) {
605                        Entry::Occupied(orig) => {
606                            let (_, orig_span) = orig.get();
607                            add_duplicate_occurrence(
608                                errs,
609                                YaccGrammarErrorKind::DuplicatePrecedence,
610                                *orig_span,
611                                span,
612                            );
613                        }
614                        Entry::Vacant(entry) => {
615                            let prec = Precedence {
616                                level: prec_level,
617                                kind,
618                            };
619                            entry.insert((prec, span));
620                        }
621                    }
622
623                    i = self.parse_ws(j, true)?;
624                }
625                prec_level += 1;
626            }
627        }
628        debug_assert!(i == self.src.len());
629        Err(self.mk_error(YaccGrammarErrorKind::PrematureEnd, i))
630    }
631
632    fn parse_rules(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
633        // self.parse_declarations should have left the input at '%%'
634        i = self.lookahead_is("%%", i).unwrap();
635        i = self.parse_ws(i, true)?;
636        while i < self.src.len() && self.lookahead_is("%%", i).is_none() {
637            i = self.parse_rule(i)?;
638            i = self.parse_ws(i, true)?;
639        }
640        Ok(i)
641    }
642
643    fn parse_rule(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
644        let (j, rn) = self.parse_name(i)?;
645        let span = Span::new(i, j);
646        if self.ast.start.is_none() {
647            self.ast.start = Some((rn.clone(), span));
648        }
649        match self.yacc_kind {
650            YaccKind::Original(_) | YaccKind::Eco => {
651                if self.ast.get_rule(&rn).is_none() {
652                    self.ast.add_rule(
653                        (rn.clone(), span),
654                        self.global_actiontype.clone().map(|(s, _)| s),
655                    );
656                }
657                i = j;
658            }
659            YaccKind::Grmtools => {
660                i = self.parse_ws(j, true)?;
661                if let Some(j) = self.lookahead_is("->", i) {
662                    i = j;
663                } else {
664                    return Err(self.mk_error(YaccGrammarErrorKind::MissingRightArrow, i));
665                }
666                i = self.parse_ws(i, true)?;
667                let (j, actiont) = self.parse_to_single_colon(i)?;
668                if self.ast.get_rule(&rn).is_none() {
669                    self.ast.add_rule((rn.clone(), span), Some(actiont));
670                }
671                i = j;
672            }
673        }
674        i = self.parse_ws(i, true)?;
675        match self.lookahead_is(":", i) {
676            Some(j) => i = j,
677            None => {
678                return Err(self.mk_error(YaccGrammarErrorKind::MissingColon, i));
679            }
680        }
681        let mut syms = Vec::new();
682        let mut prec = None;
683        let mut action = None;
684        i = self.parse_ws(i, true)?;
685        let mut pos_prod_start = i;
686        let mut pos_prod_end = None;
687        while i < self.src.len() {
688            if let Some(j) = self.lookahead_is("|", i) {
689                self.ast.add_prod(
690                    rn.clone(),
691                    syms,
692                    prec,
693                    action,
694                    Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)),
695                );
696                syms = Vec::new();
697                prec = None;
698                action = None;
699                i = self.parse_ws(j, true)?;
700                pos_prod_start = i;
701                continue;
702            } else if let Some(j) = self.lookahead_is(";", i) {
703                self.ast.add_prod(
704                    rn,
705                    syms,
706                    prec,
707                    action,
708                    Span::new(pos_prod_start, pos_prod_end.take().unwrap_or(i)),
709                );
710                return Ok(j);
711            }
712
713            if self.lookahead_is("\"", i).is_some() || self.lookahead_is("'", i).is_some() {
714                let (j, sym, span, _) = self.parse_token(i)?;
715                pos_prod_end = Some(j);
716                i = self.parse_ws(j, true)?;
717                if self.ast.tokens.insert(sym.clone()) {
718                    self.ast.spans.push(span);
719                }
720                syms.push(Symbol::Token(sym, span));
721            } else if let Some(j) = self.lookahead_is("%prec", i) {
722                i = self.parse_ws(j, true)?;
723                let (k, sym, span, _) = self.parse_token(i)?;
724                if self.ast.tokens.insert(sym.clone()) {
725                    self.ast.spans.push(span);
726                }
727                prec = Some(sym);
728                pos_prod_end = Some(k);
729                i = k;
730            } else if self.lookahead_is("{", i).is_some() {
731                pos_prod_end = Some(i);
732                let (j, a) = self.parse_action(i)?;
733                i = self.parse_ws(j, true)?;
734                action = Some(a);
735
736                if !(self.lookahead_is("|", i).is_some() || self.lookahead_is(";", i).is_some()) {
737                    return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i));
738                }
739            } else if let Some(j) = self.lookahead_is("%empty", i) {
740                let k = self.parse_ws(j, true)?;
741                // %empty could be followed by all sorts of weird syntax errors: all we try and do
742                // is say "does this production look like it's finished" and trust that the other
743                // errors will be caught by other parts of the parser.
744                if !syms.is_empty()
745                    | !(self.lookahead_is("|", k).is_some()
746                        || self.lookahead_is(";", k).is_some()
747                        || self.lookahead_is("{", k).is_some()
748                        || self.lookahead_is("%prec", k).is_some())
749                {
750                    return Err(self.mk_error(YaccGrammarErrorKind::NonEmptyProduction, i));
751                }
752                pos_prod_end = Some(j);
753                i = k;
754            } else {
755                let (j, sym, span, quoted) = self.parse_token(i)?;
756                pos_prod_end = Some(j);
757                if self
758                    .ast
759                    .tokens
760                    .get_index_of(&sym)
761                    .is_some_and(|idx| quoted || self.ast.token_directives.contains(&idx))
762                {
763                    syms.push(Symbol::Token(sym, span));
764                } else {
765                    syms.push(Symbol::Rule(sym, span));
766                }
767                i = j;
768            }
769            i = self.parse_ws(i, true)?;
770        }
771        Err(self.mk_error(YaccGrammarErrorKind::IncompleteRule, i))
772    }
773
774    fn parse_name(&self, i: usize) -> Result<(usize, String), YaccGrammarError> {
775        match RE_NAME.find(&self.src[i..]) {
776            Some(m) => {
777                assert_eq!(m.start(), 0);
778                Ok((i + m.end(), self.src[i..i + m.end()].to_string()))
779            }
780            None => Err(self.mk_error(YaccGrammarErrorKind::IllegalName, i)),
781        }
782    }
783
784    fn parse_token(&self, i: usize) -> Result<(usize, String, Span, bool), YaccGrammarError> {
785        match RE_TOKEN.find(&self.src[i..]) {
786            Some(m) => {
787                assert!(m.start() == 0 && m.end() > 0);
788                match self.src[i..].chars().next().unwrap() {
789                    '"' | '\'' => {
790                        debug_assert!('"'.len_utf8() == 1 && '\''.len_utf8() == 1);
791                        let start_cidx = i + 1;
792                        let end_cidx = i + m.end() - 1;
793                        Ok((
794                            i + m.end(),
795                            self.src[start_cidx..end_cidx].to_string(),
796                            Span::new(start_cidx, end_cidx),
797                            true,
798                        ))
799                    }
800                    _ => Ok((
801                        i + m.end(),
802                        self.src[i..i + m.end()].to_string(),
803                        Span::new(i, i + m.end()),
804                        false,
805                    )),
806                }
807            }
808            None => Err(self.mk_error(YaccGrammarErrorKind::IllegalString, i)),
809        }
810    }
811
812    fn parse_action(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
813        debug_assert!(self.lookahead_is("{", i).is_some());
814        let mut j = i;
815        let mut c = 0; // Count braces
816        while j < self.src.len() {
817            let ch = self.src[j..].chars().next().unwrap();
818            match ch {
819                '{' => c += 1,
820                '}' if c == 1 => {
821                    c = 0;
822                    break;
823                }
824                '}' => c -= 1,
825                '\n' | '\r' => {
826                    self.num_newlines += 1;
827                }
828                _ => (),
829            };
830            j += ch.len_utf8();
831        }
832        if c > 0 {
833            Err(self.mk_error(YaccGrammarErrorKind::IncompleteAction, i))
834        } else {
835            debug_assert!(self.lookahead_is("}", j).is_some());
836            let s = self.src[i + '{'.len_utf8()..j].trim().to_string();
837            Ok((j + '}'.len_utf8(), s))
838        }
839    }
840
841    fn parse_programs(
842        &mut self,
843        mut i: usize,
844        _: &mut Vec<YaccGrammarError>,
845    ) -> Result<usize, YaccGrammarError> {
846        if let Some(j) = self.lookahead_is("%%", i) {
847            i = self.parse_ws(j, true)?;
848            let prog = self.src[i..].to_string();
849            i += prog.len();
850            self.ast.set_programs(prog);
851        }
852        Ok(i)
853    }
854
855    /// Parse up to (but do not include) the end of line (or, if it comes sooner, the end of file).
856    fn parse_to_eol(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
857        let mut j = i;
858        while j < self.src.len() {
859            let c = self.src[j..].chars().next().unwrap();
860            match c {
861                '\n' | '\r' => break,
862                _ => j += c.len_utf8(),
863            }
864        }
865        Ok((j, self.src[i..j].to_string()))
866    }
867
868    /// Parse up to (but do not include) a single colon (double colons are allowed so that strings
869    /// like `a::b::c:` treat `a::b::c` as a single name. Errors if EOL encountered.
870    fn parse_to_single_colon(&mut self, i: usize) -> Result<(usize, String), YaccGrammarError> {
871        let mut j = i;
872        while j < self.src.len() {
873            let c = self.src[j..].chars().next().unwrap();
874            match c {
875                ':' => {
876                    let k = j + ':'.len_utf8();
877                    if k == self.src.len() || !self.src[k..].starts_with(':') {
878                        return Ok((j, self.src[i..j].trim().to_string()));
879                    }
880                    j += 2 * ':'.len_utf8();
881                }
882                '\n' | '\r' => {
883                    self.num_newlines += 1;
884                    j += c.len_utf8();
885                }
886                _ => j += c.len_utf8(),
887            }
888        }
889        Err(self.mk_error(YaccGrammarErrorKind::ReachedEOL, j))
890    }
891
892    /// Parse a quoted string, allowing escape characters.
893    fn parse_int<T: FromStr + PrimInt>(
894        &mut self,
895        i: usize,
896    ) -> Result<(usize, T), YaccGrammarError> {
897        let mut j = i;
898        while j < self.src.len() {
899            let c = self.src[j..].chars().next().unwrap();
900            match c {
901                '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => j += 1,
902                _ => break,
903            }
904        }
905        match self.src[i..j].parse::<T>() {
906            Ok(x) => Ok((j, x)),
907            Err(_) => Err(self.mk_error(YaccGrammarErrorKind::IllegalInteger, i)),
908        }
909    }
910
911    /// Parse a quoted string, allowing escape characters.
912    fn parse_string(&mut self, mut i: usize) -> Result<(usize, String), YaccGrammarError> {
913        let qc = if self.lookahead_is("'", i).is_some() {
914            '\''
915        } else if self.lookahead_is("\"", i).is_some() {
916            '"'
917        } else {
918            return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, i));
919        };
920
921        debug_assert!('"'.len_utf8() == 1 && '\''.len_utf8() == 1);
922        // Because we can encounter escape characters, we can't simply match text and slurp it into
923        // a String in one go (otherwise we'd include the escape characters). Conceptually we have
924        // to build the String up byte by byte, skipping escape characters, but that's slow.
925        // Instead we append chunks of the string up to (but excluding) escape characters.
926        let mut s = String::new();
927        i += 1;
928        let mut j = i;
929        while j < self.src.len() {
930            let c = self.src[j..].chars().next().unwrap();
931            match c {
932                '\n' | '\r' => {
933                    return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j));
934                }
935                x if x == qc => {
936                    s.push_str(&self.src[i..j]);
937                    return Ok((j + 1, s));
938                }
939                '\\' => {
940                    debug_assert!('\\'.len_utf8() == 1);
941                    match self.src[j + 1..].chars().next() {
942                        Some(c) if c == '\'' || c == '"' => {
943                            s.push_str(&self.src[i..j]);
944                            i = j + 1;
945                            j += 2;
946                        }
947                        _ => {
948                            return Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j));
949                        }
950                    }
951                }
952                _ => j += c.len_utf8(),
953            }
954        }
955        Err(self.mk_error(YaccGrammarErrorKind::InvalidString, j))
956    }
957
958    /// Skip whitespace from `i` onwards. If `inc_newlines` is `false`, will return `Err` if a
959    /// newline is encountered; otherwise newlines are consumed and skipped.
960    fn parse_ws(&mut self, mut i: usize, inc_newlines: bool) -> Result<usize, YaccGrammarError> {
961        while i < self.src.len() {
962            let c = self.src[i..].chars().next().unwrap();
963            match c {
964                ' ' | '\t' => i += c.len_utf8(),
965                '\n' | '\r' => {
966                    if !inc_newlines {
967                        return Err(self.mk_error(YaccGrammarErrorKind::ReachedEOL, i));
968                    }
969                    self.num_newlines += 1;
970                    i += c.len_utf8();
971                }
972                '/' => {
973                    if i + c.len_utf8() == self.src.len() {
974                        break;
975                    } else {
976                        let j = i + c.len_utf8();
977                        let c = self.src[j..].chars().next().unwrap();
978                        match c {
979                            '/' => {
980                                i = j + c.len_utf8();
981                                for c in self.src[i..].chars() {
982                                    i += c.len_utf8();
983                                    if c == '\n' || c == '\r' {
984                                        self.num_newlines += 1;
985                                        break;
986                                    }
987                                }
988                            }
989                            '*' => {
990                                // This is complicated by the fact that we need to deal with
991                                // unclosed comments (i.e. '/*' without a corresponding '*/').
992                                let mut k = j + c.len_utf8();
993                                let mut found = false;
994                                while k < self.src.len() {
995                                    let c = self.src[k..].chars().next().unwrap();
996                                    k += c.len_utf8();
997                                    match c {
998                                        '\n' | '\r' => {
999                                            if !inc_newlines {
1000                                                return Err(self.mk_error(
1001                                                    YaccGrammarErrorKind::ReachedEOL,
1002                                                    i,
1003                                                ));
1004                                            }
1005                                            self.num_newlines += 1;
1006                                        }
1007                                        '*' => (),
1008                                        _ => continue,
1009                                    }
1010                                    if k < self.src.len() {
1011                                        let c = self.src[k..].chars().next().unwrap();
1012                                        if c == '/' {
1013                                            i = k + c.len_utf8();
1014                                            found = true;
1015                                            break;
1016                                        }
1017                                    }
1018                                }
1019                                if !found {
1020                                    return Err(
1021                                        self.mk_error(YaccGrammarErrorKind::IncompleteComment, i)
1022                                    );
1023                                }
1024                            }
1025                            _ => break,
1026                        }
1027                    }
1028                }
1029                _ => break,
1030            }
1031        }
1032        Ok(i)
1033    }
1034
1035    fn lookahead_is(&self, s: &'static str, i: usize) -> Option<usize> {
1036        if self.src[i..].starts_with(s) {
1037            Some(i + s.len())
1038        } else {
1039            None
1040        }
1041    }
1042
1043    fn mk_error(&self, k: YaccGrammarErrorKind, off: usize) -> YaccGrammarError {
1044        let span = Span::new(off, off);
1045        YaccGrammarError {
1046            kind: k,
1047            spans: vec![span],
1048        }
1049    }
1050}
1051
1052#[cfg(test)]
1053mod test {
1054    use super::{
1055        super::{
1056            ast::{GrammarAST, Production, Symbol},
1057            AssocKind, Precedence, YaccKind, YaccOriginalActionKind,
1058        },
1059        Span, Spanned, YaccGrammarError, YaccGrammarErrorKind, YaccParser,
1060    };
1061    use std::collections::HashSet;
1062
1063    fn parse(yacc_kind: YaccKind, s: &str) -> Result<GrammarAST, Vec<YaccGrammarError>> {
1064        let mut yp = YaccParser::new(yacc_kind, s);
1065        yp.parse()?;
1066        Ok(yp.build())
1067    }
1068
1069    fn rule(n: &str) -> Symbol {
1070        Symbol::Rule(n.to_string(), Span::new(0, 0))
1071    }
1072
1073    fn rule_span(n: &str, span: Span) -> Symbol {
1074        Symbol::Rule(n.to_string(), span)
1075    }
1076
1077    fn token(n: &str) -> Symbol {
1078        Symbol::Token(n.to_string(), Span::new(0, 0))
1079    }
1080    fn token_span(n: &str, span: Span) -> Symbol {
1081        Symbol::Token(n.to_string(), span)
1082    }
1083
1084    fn line_of_offset(s: &str, off: usize) -> usize {
1085        s[..off].lines().count()
1086    }
1087
1088    macro_rules! line_col {
1089        ($src:ident, $span: expr) => {{
1090            let mut line_cache = crate::newlinecache::NewlineCache::new();
1091            line_cache.feed(&$src);
1092            line_cache
1093                .byte_to_line_num_and_col_num(&$src, $span.start())
1094                .unwrap()
1095        }};
1096    }
1097
1098    trait ErrorsHelper {
1099        fn expect_error_at_line(self, src: &str, kind: YaccGrammarErrorKind, line: usize);
1100        fn expect_error_at_line_col(
1101            self,
1102            src: &str,
1103            kind: YaccGrammarErrorKind,
1104            line: usize,
1105            col: usize,
1106        );
1107        fn expect_error_at_lines_cols(
1108            self,
1109            src: &str,
1110            kind: YaccGrammarErrorKind,
1111            lines_cols: &mut dyn Iterator<Item = (usize, usize)>,
1112        );
1113        fn expect_multiple_errors(
1114            self,
1115            src: &str,
1116            expected: &mut dyn Iterator<Item = (YaccGrammarErrorKind, Vec<(usize, usize)>)>,
1117        );
1118    }
1119
1120    impl ErrorsHelper for Result<GrammarAST, Vec<YaccGrammarError>> {
1121        #[track_caller]
1122        fn expect_error_at_line(self, src: &str, kind: YaccGrammarErrorKind, line: usize) {
1123            let errs = self
1124                .as_ref()
1125                .map_err(Vec::as_slice)
1126                .expect_err("Parsed ok while expecting error");
1127            assert_eq!(errs.len(), 1);
1128            let e = &errs[0];
1129            assert_eq!(e.kind, kind);
1130            assert_eq!(line_of_offset(src, e.spans()[0].start()), line);
1131            assert_eq!(e.spans.len(), 1);
1132        }
1133
1134        #[track_caller]
1135        fn expect_error_at_line_col(
1136            self,
1137            src: &str,
1138            kind: YaccGrammarErrorKind,
1139            line: usize,
1140            col: usize,
1141        ) {
1142            self.expect_error_at_lines_cols(src, kind, &mut std::iter::once((line, col)))
1143        }
1144
1145        #[track_caller]
1146        fn expect_error_at_lines_cols(
1147            self,
1148            src: &str,
1149            kind: YaccGrammarErrorKind,
1150            lines_cols: &mut dyn Iterator<Item = (usize, usize)>,
1151        ) {
1152            let errs = self
1153                .as_ref()
1154                .map_err(Vec::as_slice)
1155                .expect_err("Parsed ok while expecting error");
1156            assert_eq!(errs.len(), 1);
1157            let e = &errs[0];
1158            assert_eq!(e.kind, kind);
1159            assert_eq!(
1160                e.spans()
1161                    .iter()
1162                    .map(|span| line_col!(src, span))
1163                    .collect::<Vec<(usize, usize)>>(),
1164                lines_cols.collect::<Vec<(usize, usize)>>()
1165            );
1166            // Check that it is valid to slice.
1167            for span in e.spans() {
1168                let _ = &src[span.start()..span.end()];
1169            }
1170        }
1171
1172        #[track_caller]
1173        fn expect_multiple_errors(
1174            self,
1175            src: &str,
1176            expected: &mut dyn Iterator<Item = (YaccGrammarErrorKind, Vec<(usize, usize)>)>,
1177        ) {
1178            let errs = self.expect_err("Parsed ok while expecting error");
1179            for e in &errs {
1180                // Check that it is valid to slice the source with the spans.
1181                for span in e.spans() {
1182                    let _ = &src[span.start()..span.end()];
1183                }
1184            }
1185
1186            assert_eq!(
1187                errs.iter()
1188                    .map(|e| {
1189                        (
1190                            e.kind.clone(),
1191                            e.spans()
1192                                .iter()
1193                                .map(|span| line_col!(src, span))
1194                                .collect::<Vec<_>>(),
1195                        )
1196                    })
1197                    .collect::<Vec<_>>(),
1198                expected.collect::<Vec<_>>()
1199            );
1200        }
1201    }
1202
1203    #[test]
1204    fn test_helper_fn() {
1205        assert_eq!(Symbol::Token("A".to_string(), Span::new(0, 0)), token("A"));
1206    }
1207
1208    #[test]
1209    fn test_symbol_eq() {
1210        assert_eq!(rule("A"), rule("A"));
1211        assert_ne!(rule("A"), rule("B"));
1212        assert_ne!(rule("A"), token("A"));
1213    }
1214
1215    #[test]
1216    fn test_rule() {
1217        let src = "
1218            %%
1219            A : 'a';
1220        "
1221        .to_string();
1222        let grm = parse(
1223            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1224            &src,
1225        )
1226        .unwrap();
1227        assert_eq!(grm.get_rule("A").unwrap().pidxs, vec![0]);
1228        let a_span = Span::new(33, 34);
1229        assert_eq!(
1230            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1231            Production {
1232                symbols: vec![token_span("a", a_span)],
1233                precedence: None,
1234                action: None,
1235                prod_span: Span::new(32, 35),
1236            }
1237        );
1238        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1239    }
1240
1241    #[test]
1242    fn test_rule_production_simple() {
1243        let src = "
1244            %%
1245            A : 'a';
1246            A : 'b';
1247        "
1248        .to_string();
1249        let grm = parse(
1250            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1251            &src,
1252        )
1253        .unwrap();
1254        let a_span = Span::new(33, 34);
1255        assert_eq!(
1256            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1257            Production {
1258                symbols: vec![token_span("a", a_span)],
1259                precedence: None,
1260                action: None,
1261                prod_span: Span::new(32, 35),
1262            }
1263        );
1264        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1265        let b_span = Span::new(54, 55);
1266        assert_eq!(
1267            grm.prods[grm.get_rule("A").unwrap().pidxs[1]],
1268            Production {
1269                symbols: vec![token_span("b", Span::new(54, 55))],
1270                precedence: None,
1271                action: None,
1272                prod_span: Span::new(53, 56),
1273            }
1274        );
1275        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1276    }
1277
1278    #[test]
1279    fn test_rule_empty() {
1280        let src = "
1281            %%
1282            A : ;
1283            B : 'b' | ;
1284            C : | 'c';
1285        "
1286        .to_string();
1287        let grm = parse(
1288            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1289            &src,
1290        )
1291        .unwrap();
1292
1293        assert_eq!(
1294            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1295            Production {
1296                symbols: vec![],
1297                precedence: None,
1298                action: None,
1299                prod_span: Span::new(32, 32),
1300            }
1301        );
1302
1303        let b_span = Span::new(51, 52);
1304        assert_eq!(
1305            grm.prods[grm.get_rule("B").unwrap().pidxs[0]],
1306            Production {
1307                symbols: vec![token_span("b", b_span)],
1308                precedence: None,
1309                action: None,
1310                prod_span: Span::new(50, 53),
1311            }
1312        );
1313        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1314        assert_eq!(
1315            grm.prods[grm.get_rule("B").unwrap().pidxs[1]],
1316            Production {
1317                symbols: vec![],
1318                precedence: None,
1319                action: None,
1320                prod_span: Span::new(56, 56),
1321            }
1322        );
1323
1324        assert_eq!(
1325            grm.prods[grm.get_rule("C").unwrap().pidxs[0]],
1326            Production {
1327                symbols: vec![],
1328                precedence: None,
1329                action: None,
1330                prod_span: Span::new(74, 74),
1331            }
1332        );
1333        let c_span = Span::new(77, 78);
1334        assert_eq!(
1335            grm.prods[grm.get_rule("C").unwrap().pidxs[1]],
1336            Production {
1337                symbols: vec![token_span("c", c_span)],
1338                precedence: None,
1339                action: None,
1340                prod_span: Span::new(76, 79),
1341            }
1342        );
1343        assert_eq!(&src[c_span.start()..c_span.end()], "c");
1344    }
1345
1346    #[test]
1347    fn test_empty_program() {
1348        let src = "%%\nA : 'a';\n%%".to_string();
1349        parse(
1350            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1351            &src,
1352        )
1353        .unwrap();
1354    }
1355
1356    #[test]
1357    fn test_multiple_symbols() {
1358        let src = "%%\nA : 'a' B;".to_string();
1359        let grm = parse(
1360            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1361            &src,
1362        )
1363        .unwrap();
1364        let a_span = Span::new(8, 9);
1365        let b_span = Span::new(11, 12);
1366        assert_eq!(
1367            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1368            Production {
1369                symbols: vec![token_span("a", a_span), rule_span("B", b_span)],
1370                precedence: None,
1371                action: None,
1372                prod_span: Span::new(7, 12),
1373            }
1374        );
1375        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1376        assert_eq!(&src[b_span.start()..b_span.end()], "B");
1377    }
1378
1379    #[test]
1380    fn test_token_types() {
1381        let src = "%%\nA : 'a' \"b\";".to_string();
1382        let grm = parse(
1383            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1384            &src,
1385        )
1386        .unwrap();
1387        let a_span = Span::new(8, 9);
1388        let b_span = Span::new(12, 13);
1389        assert_eq!(
1390            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1391            Production {
1392                symbols: vec![token_span("a", a_span), token_span("b", b_span)],
1393                precedence: None,
1394                action: None,
1395                prod_span: Span::new(7, 14),
1396            }
1397        );
1398        assert_eq!(&src[a_span.start()..a_span.end()], "a");
1399        assert_eq!(&src[b_span.start()..b_span.end()], "b");
1400    }
1401
1402    #[test]
1403    fn test_declaration_start() {
1404        let src = "%start   A\n%%\nA : a;".to_string();
1405        let grm = parse(
1406            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1407            &src,
1408        )
1409        .unwrap();
1410        assert_eq!(grm.start.unwrap(), ("A".to_string(), Span::new(9, 10)));
1411    }
1412
1413    #[test]
1414    fn test_declaration_token() {
1415        let src = "%token   a\n%%\nA : a;".to_string();
1416        let grm = parse(
1417            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1418            &src,
1419        )
1420        .unwrap();
1421        assert!(grm.has_token("a"));
1422    }
1423
1424    #[test]
1425    fn test_declaration_token_literal() {
1426        let src = "%token   'a'\n%%\nA : 'a';".to_string();
1427        let grm = parse(
1428            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1429            &src,
1430        )
1431        .unwrap();
1432        assert!(grm.has_token("a"));
1433    }
1434
1435    #[test]
1436    fn test_declaration_tokens() {
1437        let src = "%token   a b c 'd'\n%%\nA : a;".to_string();
1438        let grm = parse(
1439            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1440            &src,
1441        )
1442        .unwrap();
1443        assert!(grm.has_token("a"));
1444        assert!(grm.has_token("b"));
1445        assert!(grm.has_token("c"));
1446    }
1447
1448    #[test]
1449    fn test_auto_add_tokens() {
1450        let src = "%%\nA : 'a';".to_string();
1451        let grm = parse(
1452            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1453            &src,
1454        )
1455        .unwrap();
1456        assert!(grm.has_token("a"));
1457    }
1458
1459    #[test]
1460    fn test_token_non_literal() {
1461        let src = "%token T %%\nA : T;".to_string();
1462        let grm = parse(
1463            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1464            &src,
1465        )
1466        .unwrap();
1467        assert!(grm.has_token("T"));
1468        let t_span = Span::new(16, 17);
1469        assert_eq!(
1470            grm.prods[grm.get_rule("A").unwrap().pidxs[0]],
1471            Production {
1472                symbols: vec![token_span("T", t_span)],
1473                precedence: None,
1474                action: None,
1475                prod_span: t_span,
1476            }
1477        );
1478        assert_eq!(&src[t_span.start()..t_span.end() + 1], "T;");
1479    }
1480
1481    #[test]
1482    fn test_token_unicode() {
1483        let src = "%token '❤' %%\nA : '❤';".to_string();
1484        let grm = parse(
1485            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1486            &src,
1487        )
1488        .unwrap();
1489        assert!(grm.has_token("❤"));
1490    }
1491
1492    #[test]
1493    fn test_unicode_err1() {
1494        let src = "%token '❤' ❤;".to_string();
1495        parse(
1496            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1497            &src,
1498        )
1499        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IllegalString, 1, 12);
1500    }
1501
1502    #[test]
1503    fn test_unicode_err2() {
1504        let src = "%token '❤'\n%%\nA : '❤' | ❤;".to_string();
1505        parse(
1506            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1507            &src,
1508        )
1509        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IllegalString, 3, 11);
1510    }
1511
1512    #[test]
1513    fn test_missing_end_quote() {
1514        let src = "%epp X \"f\\".to_string();
1515        parse(
1516            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1517            &src,
1518        )
1519        .expect_error_at_line_col(&src, YaccGrammarErrorKind::InvalidString, 1, 10);
1520    }
1521
1522    #[test]
1523    fn test_simple_decl_fail() {
1524        let src = "%fail x\n%%\nA : a".to_string();
1525        parse(
1526            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1527            &src,
1528        )
1529        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 1, 1);
1530    }
1531
1532    #[test]
1533    fn test_empty() {
1534        let src = "".to_string();
1535        parse(
1536            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1537            &src,
1538        )
1539        .expect_error_at_line_col("", YaccGrammarErrorKind::PrematureEnd, 1, 1);
1540    }
1541
1542    #[test]
1543    fn test_incomplete_rule1() {
1544        let src = "%%A:".to_string();
1545        parse(
1546            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1547            &src,
1548        )
1549        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 1, 5);
1550    }
1551
1552    #[test]
1553    fn test_line_col_report1() {
1554        let src = "%%
1555A:"
1556        .to_string();
1557        parse(
1558            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1559            &src,
1560        )
1561        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 2, 3);
1562    }
1563
1564    #[test]
1565    fn test_line_col_report2() {
1566        let src = "%%
1567A:
1568"
1569        .to_string();
1570        parse(
1571            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1572            &src,
1573        )
1574        .expect_error_at_line_col(&src, YaccGrammarErrorKind::IncompleteRule, 3, 1);
1575    }
1576
1577    #[test]
1578    fn test_line_col_report3() {
1579        let src = "
1580
1581        %woo"
1582            .to_string();
1583        parse(
1584            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1585            &src,
1586        )
1587        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 3, 9);
1588    }
1589
1590    #[test]
1591    fn test_missing_colon() {
1592        let src = "%%A x;".to_string();
1593        parse(
1594            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1595            &src,
1596        )
1597        .expect_error_at_line_col(&src, YaccGrammarErrorKind::MissingColon, 1, 5);
1598    }
1599
1600    #[test]
1601    fn test_premature_end() {
1602        let src = "%token x".to_string();
1603        parse(
1604            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1605            &src,
1606        )
1607        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 9);
1608    }
1609
1610    #[test]
1611    fn test_premature_end_multibyte() {
1612        let src = "%actiontype 🦀".to_string();
1613        parse(
1614            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1615            &src,
1616        )
1617        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 14);
1618        let src = "%parse-param c:🦀".to_string();
1619        parse(
1620            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1621            &src,
1622        )
1623        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 17);
1624        let src = "// 🦀".to_string();
1625        parse(
1626            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1627            &src,
1628        )
1629        .expect_error_at_line_col(&src, YaccGrammarErrorKind::PrematureEnd, 1, 5);
1630    }
1631
1632    #[test]
1633    fn test_same_line() {
1634        let src = "%token
1635x"
1636        .to_string();
1637        parse(
1638            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1639            &src,
1640        )
1641        .expect_error_at_line_col(&src, YaccGrammarErrorKind::ReachedEOL, 1, 7);
1642    }
1643
1644    #[test]
1645    fn test_unknown_declaration() {
1646        let src = "%woo".to_string();
1647        parse(
1648            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1649            &src,
1650        )
1651        .expect_error_at_line_col(&src, YaccGrammarErrorKind::UnknownDeclaration, 1, 1);
1652    }
1653
1654    #[test]
1655    fn test_grmtools_format() {
1656        let src = "
1657          %start A
1658          %%
1659          A -> T: 'b';
1660          B -> Result<(), T>: 'c';
1661          C -> ::std::result::Result<(), T>: 'd';
1662          "
1663        .to_string();
1664        let grm = parse(YaccKind::Grmtools, &src).unwrap();
1665        assert_eq!(grm.rules["A"].actiont, Some("T".to_string()));
1666        assert_eq!(grm.rules["B"].actiont, Some("Result<(), T>".to_string()));
1667        assert_eq!(
1668            grm.rules["C"].actiont,
1669            Some("::std::result::Result<(), T>".to_string())
1670        );
1671    }
1672
1673    #[test]
1674    #[rustfmt::skip]
1675    fn test_precs() {
1676        let src = "
1677          %left '+' '-'
1678          %left '*'
1679          %right '/'
1680          %right '^'
1681          %nonassoc '~'
1682          %%
1683          ".to_string();
1684        let grm = parse(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), &src).unwrap();
1685        assert_eq!(grm.precs.len(), 6);
1686        assert_eq!(grm.precs["+"], (Precedence{level: 0, kind: AssocKind::Left}, Span::new(18, 19)));
1687        assert_eq!(grm.precs["-"], (Precedence{level: 0, kind: AssocKind::Left}, Span::new(22, 23)));
1688        assert_eq!(grm.precs["*"], (Precedence{level: 1, kind: AssocKind::Left}, Span::new(42, 43)));
1689        assert_eq!(grm.precs["/"], (Precedence{level: 2, kind: AssocKind::Right}, Span::new(63, 64)));
1690        assert_eq!(grm.precs["^"], (Precedence{level: 3, kind: AssocKind::Right}, Span::new(84, 85)));
1691        assert_eq!(grm.precs["~"], (Precedence{level: 4, kind: AssocKind::Nonassoc}, Span::new(108, 109)));
1692    }
1693
1694    #[test]
1695    fn test_dup_precs() {
1696        #[rustfmt::skip]
1697        let srcs = vec![
1698            ("
1699          %left 'x'
1700          %left 'x'
1701          %%
1702          ", ((2, 18), (3, 18))),
1703            ("
1704          %left 'x'
1705          %right 'x'
1706          %%
1707          ", ((2, 18), (3, 19))),
1708            ("
1709          %right 'x'
1710          %right 'x'
1711          %%
1712          ", ((2, 19), (3, 19))),
1713            ("
1714          %nonassoc 'x'
1715          %nonassoc 'x'
1716          %%
1717          ", ((2, 22), (3, 22))),
1718            ("
1719          %left 'x'
1720          %nonassoc 'x'
1721          %%
1722          ", ((2, 18), (3, 22))),
1723            ("
1724          %right 'x'
1725          %nonassoc 'x'
1726          %%
1727          ", ((2, 19), (3, 22)))
1728        ];
1729        for (src, (expected_origin, expected_dup)) in srcs.iter() {
1730            parse(
1731                YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1732                src,
1733            )
1734            .expect_error_at_lines_cols(
1735                src,
1736                YaccGrammarErrorKind::DuplicatePrecedence,
1737                &mut [*expected_origin, *expected_dup].into_iter(),
1738            );
1739        }
1740    }
1741
1742    #[test]
1743    fn test_multiple_dup_precs() {
1744        let src = "
1745          %left 'x'
1746          %left 'x'
1747          %right 'x'
1748          %nonassoc 'x'
1749          %left 'y'
1750          %nonassoc 'y'
1751          %right 'y'
1752          %%";
1753
1754        parse(
1755            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1756            src,
1757        )
1758        .expect_multiple_errors(
1759            src,
1760            &mut [
1761                (
1762                    YaccGrammarErrorKind::DuplicatePrecedence,
1763                    vec![(2, 18), (3, 18), (4, 19), (5, 22)],
1764                ),
1765                (
1766                    YaccGrammarErrorKind::DuplicatePrecedence,
1767                    vec![(6, 18), (7, 22), (8, 19)],
1768                ),
1769            ]
1770            .into_iter(),
1771        );
1772    }
1773
1774    #[test]
1775    #[rustfmt::skip]
1776    fn test_prec_override() {
1777        // Taken from the Yacc manual
1778        let src = "
1779            %left '+' '-'
1780            %left '*' '/'
1781            %%
1782            expr : expr '+' expr
1783                 | expr '-' expr
1784                 | expr '*' expr
1785                 | expr '/' expr
1786                 | '-'  expr %prec '*'
1787                 | NAME ;
1788        ";
1789        let grm = parse(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), src).unwrap();
1790        assert_eq!(grm.precs.len(), 4);
1791        assert_eq!(grm.prods[grm.rules["expr"].pidxs[0]].precedence, None);
1792        assert_eq!(grm.prods[grm.rules["expr"].pidxs[3]].symbols.len(), 3);
1793        assert_eq!(grm.prods[grm.rules["expr"].pidxs[4]].symbols.len(), 2);
1794        assert_eq!(grm.prods[grm.rules["expr"].pidxs[4]].precedence, Some("*".to_string()));
1795    }
1796
1797    #[test]
1798    fn test_prec_empty() {
1799        let src = "
1800        %%
1801        expr : 'a'
1802             | %empty %prec 'a';
1803        ";
1804        let grm = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
1805        assert_eq!(
1806            grm.prods[grm.rules["expr"].pidxs[1]].precedence,
1807            Some("a".to_string())
1808        );
1809    }
1810
1811    #[test]
1812    fn test_bad_prec_overrides() {
1813        let src = "
1814        %%
1815        S: 'A' %prec ;
1816        ";
1817        parse(
1818            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1819            src,
1820        )
1821        .expect_error_at_line(src, YaccGrammarErrorKind::IllegalString, 3);
1822    }
1823
1824    #[test]
1825    fn test_parse_avoid_insert() {
1826        let ast = parse(
1827            YaccKind::Eco,
1828            "
1829          %avoid_insert ws1 ws2
1830          %start R
1831          %%
1832          R: 'a';
1833          ",
1834        )
1835        .unwrap();
1836        assert_eq!(
1837            ast.avoid_insert,
1838            Some(
1839                [
1840                    ("ws1".to_string(), Span::new(25, 28)),
1841                    ("ws2".to_string(), Span::new(29, 32))
1842                ]
1843                .iter()
1844                .cloned()
1845                .collect()
1846            )
1847        );
1848        assert!(ast.tokens.get("ws1").is_some());
1849        assert!(ast.tokens.get("ws2").is_some());
1850    }
1851
1852    #[test]
1853    fn test_multiple_avoid_insert() {
1854        let ast = parse(
1855            YaccKind::Eco,
1856            "
1857          %avoid_insert X
1858          %avoid_insert Y
1859          %%
1860          ",
1861        )
1862        .unwrap();
1863        assert_eq!(
1864            ast.avoid_insert,
1865            Some(
1866                [
1867                    ("X".to_string(), Span::new(25, 26)),
1868                    ("Y".to_string(), Span::new(51, 52))
1869                ]
1870                .iter()
1871                .cloned()
1872                .collect()
1873            )
1874        );
1875    }
1876
1877    #[test]
1878    fn test_duplicate_avoid_insert() {
1879        let src = "
1880          %avoid_insert X Y
1881          %avoid_insert Y
1882          %%
1883          ";
1884        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
1885            src,
1886            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1887            &mut [(2usize, 27usize), (3, 25)].into_iter(),
1888        );
1889    }
1890
1891    #[test]
1892    fn test_duplicate_avoid_insert2() {
1893        let src = "
1894        %avoid_insert X
1895        %avoid_insert Y Y
1896        %%
1897        ";
1898        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
1899            src,
1900            YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1901            &mut [(3, 23), (3, 25)].into_iter(),
1902        );
1903    }
1904
1905    #[test]
1906    fn test_multiple_duplicate_avoid_insert() {
1907        let src = "
1908        %avoid_insert X
1909        %avoid_insert Y Y X
1910        %%
1911        ";
1912        parse(YaccKind::Eco, src).expect_multiple_errors(
1913            src,
1914            &mut [
1915                (
1916                    YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1917                    vec![(3, 23), (3, 25)],
1918                ),
1919                (
1920                    YaccGrammarErrorKind::DuplicateAvoidInsertDeclaration,
1921                    vec![(2, 23), (3, 27)],
1922                ),
1923            ]
1924            .into_iter(),
1925        );
1926    }
1927
1928    #[test]
1929    fn test_no_implicit_tokens_in_original_yacc() {
1930        let src = "
1931        %implicit_tokens X
1932        %%
1933        ";
1934        parse(
1935            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1936            src,
1937        )
1938        .expect_error_at_line(src, YaccGrammarErrorKind::UnknownDeclaration, 2);
1939    }
1940
1941    #[test]
1942    fn test_parse_implicit_tokens() {
1943        let ast = parse(
1944            YaccKind::Eco,
1945            "
1946          %implicit_tokens ws1 ws2
1947          %start R
1948          %%
1949          R: 'a';
1950          ",
1951        )
1952        .unwrap();
1953        assert_eq!(
1954            ast.implicit_tokens,
1955            Some(
1956                [
1957                    ("ws1".to_string(), Span::new(28, 31)),
1958                    ("ws2".to_string(), Span::new(32, 35))
1959                ]
1960                .iter()
1961                .cloned()
1962                .collect()
1963            )
1964        );
1965        assert!(ast.tokens.get("ws1").is_some());
1966        assert!(ast.tokens.get("ws2").is_some());
1967    }
1968
1969    #[test]
1970    fn test_multiple_implicit_tokens() {
1971        let ast = parse(
1972            YaccKind::Eco,
1973            "
1974          %implicit_tokens X
1975          %implicit_tokens Y
1976          %%
1977          ",
1978        )
1979        .unwrap();
1980        assert_eq!(
1981            ast.implicit_tokens,
1982            Some(
1983                [
1984                    ("X".to_string(), Span::new(28, 29)),
1985                    ("Y".to_string(), Span::new(57, 58))
1986                ]
1987                .iter()
1988                .cloned()
1989                .collect()
1990            )
1991        );
1992    }
1993
1994    #[test]
1995    fn test_duplicate_implicit_tokens() {
1996        let src = "
1997        %implicit_tokens X
1998        %implicit_tokens X Y
1999        %%
2000        ";
2001        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2002            src,
2003            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2004            &mut [(2, 26), (3, 26)].into_iter(),
2005        );
2006    }
2007
2008    #[test]
2009    fn test_duplicate_implicit_tokens2() {
2010        let src = "
2011        %implicit_tokens X X
2012        %implicit_tokens Y
2013        %%
2014        ";
2015        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2016            src,
2017            YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2018            &mut [(2, 26), (2, 28)].into_iter(),
2019        );
2020    }
2021
2022    #[test]
2023    fn test_multiple_duplicate_implicit_tokens_and_invalid_rule() {
2024        let src = "
2025        %implicit_tokens X
2026        %implicit_tokens X Y
2027        %implicit_tokens Y
2028        %%
2029        IncompleteRule: ";
2030        parse(YaccKind::Eco, src).expect_multiple_errors(
2031            src,
2032            &mut [
2033                (
2034                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2035                    vec![(2, 26), (3, 26)],
2036                ),
2037                (
2038                    YaccGrammarErrorKind::DuplicateImplicitTokensDeclaration,
2039                    vec![(3, 28), (4, 26)],
2040                ),
2041                (YaccGrammarErrorKind::IncompleteRule, vec![(6, 25)]),
2042            ]
2043            .into_iter(),
2044        );
2045    }
2046
2047    #[test]
2048    #[rustfmt::skip]
2049    fn test_parse_epp() {
2050        let ast = parse(
2051            YaccKind::Eco,
2052            r#"
2053          %epp A "a"
2054          %epp B 'a'
2055          %epp C '"'
2056          %epp D "'"
2057          %epp E "\""
2058          %epp F '\''
2059          %epp G "a\"b"
2060          %%
2061          R: 'A';
2062          "#,
2063        )
2064        .unwrap();
2065        assert_eq!(ast.epp.len(), 7);
2066        assert_eq!(ast.epp["A"], (Span::new(16, 17),   ("a".to_string(),   Span::new(18, 21))));
2067        assert_eq!(ast.epp["B"], (Span::new(37, 38),   ("a".to_string(),   Span::new(39, 42))));
2068        assert_eq!(ast.epp["C"], (Span::new(58, 59),   ("\"".to_string(),  Span::new(60, 63))));
2069        assert_eq!(ast.epp["D"], (Span::new(79, 80),   ("'".to_string(),   Span::new(81, 84))));
2070        assert_eq!(ast.epp["E"], (Span::new(100, 101), ("\"".to_string(),  Span::new(102, 106))));
2071        assert_eq!(ast.epp["F"], (Span::new(122, 123), ("'".to_string(),   Span::new(124, 128))));
2072        assert_eq!(ast.epp["G"], (Span::new(144, 145), ("a\"b".to_string(),Span::new(146, 152))));
2073    }
2074
2075    #[test]
2076    fn test_duplicate_epp() {
2077        let src = "
2078        %epp A \"a\"
2079        %epp A \"a\"
2080        %epp A \"a\"
2081        %%
2082        ";
2083        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2084            src,
2085            YaccGrammarErrorKind::DuplicateEPP,
2086            &mut [(2, 14), (3, 14), (4, 14)].into_iter(),
2087        );
2088    }
2089
2090    #[test]
2091    fn test_multiple_duplicate_epp() {
2092        let src = "
2093        %epp A \"a1\"
2094        %epp A \"a2\"
2095        %epp A \"a3\"
2096        %epp B \"b1\"
2097        %epp B \"b2\"
2098        %epp B \"b3\"
2099        %%
2100        ";
2101        parse(YaccKind::Eco, src).expect_multiple_errors(
2102            src,
2103            &mut [
2104                (
2105                    YaccGrammarErrorKind::DuplicateEPP,
2106                    vec![(2, 14), (3, 14), (4, 14)],
2107                ),
2108                (
2109                    YaccGrammarErrorKind::DuplicateEPP,
2110                    vec![(5, 14), (6, 14), (7, 14)],
2111                ),
2112            ]
2113            .into_iter(),
2114        );
2115    }
2116
2117    #[test]
2118    fn test_broken_string() {
2119        let src = "
2120          %epp A \"a
2121          %%
2122          ";
2123        parse(YaccKind::Eco, src).expect_error_at_line(src, YaccGrammarErrorKind::InvalidString, 2);
2124
2125        let src = "
2126        %epp A \"a";
2127        parse(YaccKind::Eco, src).expect_error_at_line(src, YaccGrammarErrorKind::InvalidString, 2);
2128    }
2129
2130    #[test]
2131    fn test_duplicate_start() {
2132        let src = "
2133          %start X
2134          %start X
2135          %%
2136          ";
2137        parse(YaccKind::Eco, src).expect_error_at_lines_cols(
2138            src,
2139            YaccGrammarErrorKind::DuplicateStartDeclaration,
2140            &mut [(2, 18), (3, 18)].into_iter(),
2141        );
2142    }
2143
2144    #[test]
2145    fn test_duplicate_start_premature_end() {
2146        let src = "
2147          %start X
2148          %start X";
2149        parse(YaccKind::Eco, src).expect_multiple_errors(
2150            src,
2151            &mut [
2152                (
2153                    YaccGrammarErrorKind::DuplicateStartDeclaration,
2154                    vec![(2, 18), (3, 18)],
2155                ),
2156                (YaccGrammarErrorKind::PrematureEnd, vec![(3, 19)]),
2157            ]
2158            .into_iter(),
2159        );
2160    }
2161
2162    #[test]
2163    fn test_duplicate_expect() {
2164        let src = "
2165          %expect 1
2166          %expect 2
2167          %expect 3
2168          %%
2169          ";
2170        parse(
2171            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2172            src,
2173        )
2174        .expect_error_at_lines_cols(
2175            src,
2176            YaccGrammarErrorKind::DuplicateExpectDeclaration,
2177            &mut [(2, 19), (3, 19), (4, 19)].into_iter(),
2178        )
2179    }
2180
2181    #[test]
2182    fn test_duplicate_expect_and_missing_colon() {
2183        let src = "
2184          %expect 1
2185          %expect 2
2186          %expect 3
2187          %%
2188          A ;";
2189        parse(
2190            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2191            src,
2192        )
2193        .expect_multiple_errors(
2194            src,
2195            &mut [
2196                (
2197                    YaccGrammarErrorKind::DuplicateExpectDeclaration,
2198                    vec![(2, 19), (3, 19), (4, 19)],
2199                ),
2200                (YaccGrammarErrorKind::MissingColon, vec![(6, 13)]),
2201            ]
2202            .into_iter(),
2203        )
2204    }
2205
2206    #[test]
2207    fn test_duplicate_expectrr() {
2208        let src = "
2209          %expect-rr 1
2210          %expect-rr 2
2211          %expect-rr 3
2212          %%
2213          ";
2214        parse(
2215            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2216            src,
2217        )
2218        .expect_error_at_lines_cols(
2219            src,
2220            YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
2221            &mut [(2, 22), (3, 22), (4, 22)].into_iter(),
2222        );
2223    }
2224
2225    #[test]
2226    fn test_duplicate_expectrr_illegal_name() {
2227        let src = "
2228          %expect-rr 1
2229          %expect-rr 2
2230          %expect-rr 3
2231          %%
2232          +IllegalRuleName+:;";
2233        parse(
2234            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2235            src,
2236        )
2237        .expect_multiple_errors(
2238            src,
2239            &mut [
2240                (
2241                    YaccGrammarErrorKind::DuplicateExpectRRDeclaration,
2242                    vec![(2, 22), (3, 22), (4, 22)],
2243                ),
2244                (YaccGrammarErrorKind::IllegalName, vec![(6, 11)]),
2245            ]
2246            .into_iter(),
2247        );
2248    }
2249
2250    #[test]
2251    fn test_implicit_start() {
2252        let ast = parse(
2253            YaccKind::Eco,
2254            "
2255          %%
2256          R: ;
2257          R2: ;
2258          R3: ;
2259          ",
2260        )
2261        .unwrap();
2262        assert_eq!(ast.start, Some(("R".to_string(), Span::new(24, 25))));
2263    }
2264
2265    #[test]
2266    fn test_action() {
2267        let grm = parse(
2268            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2269            "
2270          %%
2271          A: 'a' B { println!(\"test\"); }
2272           ;
2273          B: 'b' 'c' { add($1, $2); }
2274           | 'd'
2275           ;
2276          D: 'd' {}
2277           ;
2278          ",
2279        )
2280        .unwrap();
2281        assert_eq!(
2282            grm.prods[grm.rules["A"].pidxs[0]].action,
2283            Some("println!(\"test\");".to_string())
2284        );
2285        assert_eq!(
2286            grm.prods[grm.rules["B"].pidxs[0]].action,
2287            Some("add($1, $2);".to_string())
2288        );
2289        assert_eq!(grm.prods[grm.rules["B"].pidxs[1]].action, None);
2290    }
2291
2292    #[test]
2293    fn test_action_ends_in_multibyte() {
2294        let grm = parse(
2295            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2296            "%%A: '_' {(); // 🦀};",
2297        )
2298        .unwrap();
2299        assert_eq!(
2300            grm.prods[grm.rules["A"].pidxs[0]].action,
2301            Some("(); // 🦀".to_string())
2302        );
2303    }
2304
2305    #[test]
2306    fn test_programs() {
2307        let grm = parse(
2308            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2309            "
2310         %%
2311         A: 'a';
2312         %%
2313         fn foo() {}",
2314        )
2315        .unwrap();
2316        assert_eq!(grm.programs, Some("fn foo() {}".to_string()));
2317    }
2318
2319    #[test]
2320    fn test_actions_with_newlines() {
2321        let src = "
2322        %%
2323        A: 'a' { foo();
2324                 bar(); }
2325        ;
2326        B: b';";
2327        parse(
2328            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2329            src,
2330        )
2331        .expect_error_at_line(src, YaccGrammarErrorKind::IllegalString, 6);
2332    }
2333
2334    #[test]
2335    fn test_comments() {
2336        let src = "
2337            // A valid comment
2338            %token   a
2339            /* Another valid comment */
2340            %%\n
2341            A : a;";
2342        let grm = parse(
2343            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2344            src,
2345        )
2346        .unwrap();
2347        assert!(grm.has_token("a"));
2348
2349        let src = "
2350        /* An invalid comment * /
2351        %token   a
2352        %%\n
2353        A : a;";
2354        parse(
2355            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2356            src,
2357        )
2358        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteComment, 2);
2359
2360        let src = "
2361        %token   a
2362        %%
2363        /* A valid
2364         * multi-line comment
2365         */
2366        /* An invalid comment * /
2367        A : a;";
2368        parse(
2369            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2370            src,
2371        )
2372        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteComment, 7);
2373
2374        let src = "
2375        %token   a
2376        %%
2377        // Valid comment
2378        A : a";
2379        parse(
2380            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2381            src,
2382        )
2383        .expect_error_at_line(src, YaccGrammarErrorKind::IncompleteRule, 5);
2384    }
2385
2386    #[test]
2387    fn test_action_type() {
2388        let grm = parse(
2389            YaccKind::Original(YaccOriginalActionKind::UserAction),
2390            "
2391         %actiontype T
2392         %%
2393         A: 'a';
2394         %%
2395         fn foo() {}",
2396        )
2397        .unwrap();
2398        assert_eq!(grm.rules["A"].actiont, Some("T".to_string()));
2399    }
2400
2401    #[test]
2402    fn test_only_one_type() {
2403        let src = "
2404         %actiontype T1
2405         %actiontype T2
2406         %actiontype T3
2407         %%
2408         A: 'a';";
2409        parse(
2410            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2411            src,
2412        )
2413        .expect_error_at_lines_cols(
2414            src,
2415            YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
2416            &mut [(2, 22), (3, 22), (4, 22)].into_iter(),
2417        );
2418    }
2419
2420    #[test]
2421    fn test_duplicate_actiontype_and_premature_end() {
2422        let src = "
2423         %actiontype T1
2424         %actiontype T2
2425         %actiontype T3";
2426        parse(
2427            YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
2428            src,
2429        )
2430        .expect_multiple_errors(
2431            src,
2432            &mut [
2433                (
2434                    YaccGrammarErrorKind::DuplicateActiontypeDeclaration,
2435                    vec![(2, 22), (3, 22), (4, 22)],
2436                ),
2437                (YaccGrammarErrorKind::PrematureEnd, vec![(4, 24)]),
2438            ]
2439            .into_iter(),
2440        )
2441    }
2442
2443    #[test]
2444    fn test_parse_param() {
2445        let src = "
2446          %parse-param a::b: (u64, u64)
2447          %%
2448          A: 'a';
2449         ";
2450        let grm = parse(YaccKind::Original(YaccOriginalActionKind::UserAction), src).unwrap();
2451
2452        assert_eq!(
2453            grm.parse_param,
2454            Some(("a::b".to_owned(), "(u64, u64)".to_owned()))
2455        );
2456    }
2457
2458    #[test]
2459    fn test_duplicate_rule() {
2460        let ast = parse(
2461            YaccKind::Grmtools,
2462            "%token A B D
2463%%
2464Expr -> () : %empty | A;
2465Expr -> () : B | 'C';
2466Expr -> () : D;
2467",
2468        )
2469        .unwrap();
2470        let expr_rule = ast.get_rule("Expr").unwrap();
2471        let mut prod_names = HashSet::new();
2472        for pidx in &expr_rule.pidxs {
2473            for sym in &ast.prods[*pidx].symbols {
2474                let name = match sym {
2475                    Symbol::Token(name, _) | Symbol::Rule(name, _) => name.clone(),
2476                };
2477                prod_names.insert(name);
2478            }
2479        }
2480        assert_eq!(ast.prods.len(), 5);
2481        assert_eq!(
2482            prod_names,
2483            HashSet::from_iter(["A", "B", "C", "D"].map(|s| s.to_owned()))
2484        );
2485    }
2486
2487    #[test]
2488    fn test_duplicate_start_and_missing_arrow() {
2489        let src = "%start A
2490%start A
2491%start A
2492%%
2493A -> () : 'a1';
2494B";
2495        parse(YaccKind::Grmtools, src).expect_multiple_errors(
2496            src,
2497            &mut [
2498                (
2499                    YaccGrammarErrorKind::DuplicateStartDeclaration,
2500                    vec![(1, 8), (2, 8), (3, 8)],
2501                ),
2502                (YaccGrammarErrorKind::MissingRightArrow, vec![(6, 2)]),
2503            ]
2504            .into_iter(),
2505        )
2506    }
2507
2508    #[test]
2509    fn test_routines_multiple_errors() {
2510        let mut src = String::from(
2511            "
2512        %start A
2513        %start B
2514        %expect 1
2515        %expect 2
2516        %%
2517        A -> () : 'a';
2518        %%
2519        ",
2520        );
2521        let mut expected_errs = vec![
2522            (
2523                YaccGrammarErrorKind::DuplicateStartDeclaration,
2524                vec![(2, 16), (3, 16)],
2525            ),
2526            (
2527                YaccGrammarErrorKind::DuplicateExpectDeclaration,
2528                vec![(4, 17), (5, 17)],
2529            ),
2530        ];
2531        parse(YaccKind::Grmtools, &src)
2532            .expect_multiple_errors(&src, &mut expected_errs.clone().into_iter());
2533
2534        src.push_str(
2535            "
2536                /* Incomplete comment
2537        ",
2538        );
2539        expected_errs.push((YaccGrammarErrorKind::IncompleteComment, vec![(10, 17)]));
2540        parse(YaccKind::Grmtools, &src)
2541            .expect_multiple_errors(&src, &mut expected_errs.clone().into_iter());
2542    }
2543
2544    #[test]
2545    fn test_expect_unused() {
2546        let src = r#"
2547        %expect-unused A 'b' "c"
2548        %%
2549        A: ;
2550        "#;
2551        let grm = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2552        assert!(grm
2553            .expect_unused
2554            .contains(&Symbol::Rule("A".to_string(), Span::new(24, 25))));
2555        assert!(grm
2556            .expect_unused
2557            .contains(&Symbol::Token("b".to_string(), Span::new(27, 28))));
2558        assert!(grm
2559            .expect_unused
2560            .contains(&Symbol::Token("c".to_string(), Span::new(31, 32))));
2561    }
2562
2563    #[test]
2564    fn test_bad_expect_unused() {
2565        let src = "
2566        %expect-unused %
2567        %%
2568        A: ;
2569        ";
2570        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2571            src,
2572            YaccGrammarErrorKind::UnknownDeclaration,
2573            2,
2574            24,
2575        );
2576    }
2577
2578    #[test]
2579    fn test_unused_symbols() {
2580        let ast = parse(
2581            YaccKind::Original(YaccOriginalActionKind::NoAction),
2582            "
2583        %expect-unused UnusedAllowed 'b'
2584        %token a b
2585        %start Start
2586        %%
2587        Unused: ;
2588        Start: ;
2589        UnusedAllowed: ;
2590        ",
2591        )
2592        .unwrap();
2593
2594        assert_eq!(
2595            ast.unused_symbols()
2596                .map(|sym_idx| { sym_idx.symbol(&ast) })
2597                .collect::<Vec<Symbol>>()
2598                .as_slice(),
2599            &[
2600                Symbol::Rule("Unused".to_string(), Span::new(101, 107)),
2601                Symbol::Token("a".to_string(), Span::new(57, 58))
2602            ]
2603        );
2604
2605        let ast = parse(
2606            YaccKind::Original(YaccOriginalActionKind::NoAction),
2607            "
2608        %start A
2609        %%
2610        A: ;
2611        Rec: Rec | ;
2612        ",
2613        )
2614        .unwrap();
2615        assert_eq!(
2616            ast.unused_symbols()
2617                .map(|sym_idx| sym_idx.symbol(&ast))
2618                .collect::<Vec<Symbol>>()
2619                .as_slice(),
2620            &[Symbol::Rule("Rec".to_string(), Span::new(50, 53))]
2621        );
2622
2623        let ast = parse(
2624            YaccKind::Original(YaccOriginalActionKind::NoAction),
2625            "
2626        %%
2627        A: 'a' | 'z' ;
2628        B: 'a' | 'c' ;
2629        ",
2630        )
2631        .unwrap();
2632        // Check that we warn on B and 'c' but not 'a'
2633        assert_eq!(
2634            ast.unused_symbols()
2635                .map(|sym_idx| sym_idx.symbol(&ast))
2636                .collect::<Vec<Symbol>>()
2637                .as_slice(),
2638            &[
2639                Symbol::Rule("B".to_string(), Span::new(43, 44)),
2640                Symbol::Token("c".to_string(), Span::new(53, 54))
2641            ]
2642        );
2643    }
2644
2645    #[test]
2646    fn test_percent_empty() {
2647        parse(
2648            YaccKind::Original(YaccOriginalActionKind::NoAction),
2649            r#"
2650        %token a
2651        %start A
2652        %%
2653        A: %empty | "a";
2654        "#,
2655        )
2656        .unwrap();
2657
2658        let src = r#"
2659        %token a b
2660        %start A
2661        %%
2662        A: "a" | %empty | "b";
2663        B: %empty | "b";
2664        "#;
2665        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2666
2667        let src = r#"
2668        %token a
2669        %start A
2670        %%
2671        A: %empty "a";
2672        "#;
2673        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2674            src,
2675            YaccGrammarErrorKind::NonEmptyProduction,
2676            5,
2677            12,
2678        );
2679
2680        let src = r#"
2681        %token a
2682        %start A
2683        %%
2684        A: "a" %empty;
2685        "#;
2686        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2687            src,
2688            YaccGrammarErrorKind::NonEmptyProduction,
2689            5,
2690            16,
2691        );
2692    }
2693
2694    #[test]
2695    fn test_action_successor() {
2696        let src = "
2697        %%
2698        A: B {} B;
2699        B: ;
2700        ";
2701        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).expect_error_at_line_col(
2702            src,
2703            YaccGrammarErrorKind::ProductionNotTerminated,
2704            3,
2705            17,
2706        );
2707
2708        let src = "
2709        %%
2710        A: B B {};
2711        B: {} ;
2712        ";
2713        parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2714    }
2715
2716    #[test]
2717    fn test_empty_production_spans_issue_473() {
2718        let empty_prod_conflicts = [
2719            (
2720                "%start Expr
2721%%
2722Expr: %empty | Factor;
2723Factor: ')' Expr ')';
2724",
2725                (0, Span::new(21, 27)),
2726            ),
2727            (
2728                "%start Expr
2729%%
2730Expr: | Factor;
2731Factor: ')' Expr ')';
2732",
2733                (0, Span::new(21, 21)),
2734            ),
2735            (
2736                "%start Expr
2737%%
2738Expr:| Factor;
2739Factor: ')' Expr ')';
2740",
2741                (0, Span::new(20, 20)),
2742            ),
2743            (
2744                "%start Expr
2745%%
2746Expr: Factor | %empty;
2747Factor: ')' Expr ')';
2748",
2749                (1, Span::new(30, 36)),
2750            ),
2751            (
2752                "%start Expr
2753%%
2754Expr: Factor | ;
2755Factor: ')' Expr ')';
2756",
2757                (1, Span::new(30, 30)),
2758            ),
2759            (
2760                "%start Expr
2761%%
2762Expr: Factor|;
2763Factor: ')' Expr ')';
2764",
2765                (1, Span::new(28, 28)),
2766            ),
2767        ];
2768
2769        for (i, (src, (empty_pidx, empty_span))) in empty_prod_conflicts.iter().enumerate() {
2770            eprintln!("{}", i);
2771            let ast = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2772            assert_eq!(
2773                ast.prods[ast.get_rule("Expr").unwrap().pidxs[*empty_pidx]],
2774                Production {
2775                    symbols: vec![],
2776                    precedence: None,
2777                    action: None,
2778                    prod_span: *empty_span,
2779                }
2780            );
2781        }
2782    }
2783}