Skip to main content

lrpar/
ctbuilder.rs

1//! Build grammars at compile-time so that they can be statically included into a binary.
2
3use std::{
4    any::type_name,
5    collections::{HashMap, HashSet},
6    env::{current_dir, var},
7    error::Error,
8    fmt::{self, Debug, Write as fmtWrite},
9    fs::{self, File, create_dir_all, read_to_string},
10    hash::Hash,
11    io::Write,
12    marker::PhantomData,
13    path::{Path, PathBuf},
14    sync::{LazyLock, Mutex},
15};
16
17use crate::{
18    LexerTypes, RTParserBuilder, RecoveryKind,
19    diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
20};
21
22#[cfg(feature = "_unstable_api")]
23use crate::unstable_api::UnstableApi;
24
25use bincode::{Decode, Encode, decode_from_slice, encode_to_vec};
26use cfgrammar::{
27    Location, RIdx, Span, Symbol,
28    header::{GrmtoolsSectionParser, Header, HeaderValue, Value},
29    markmap::{Entry, MergeBehavior},
30    yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind, ast::ASTWithValidityInfo},
31};
32use filetime::FileTime;
33use lrtable::{Minimiser, StateGraph, StateTable, from_yacc, statetable::Conflicts};
34use num_traits::{AsPrimitive, PrimInt, Unsigned};
35use proc_macro2::{Literal, TokenStream};
36use quote::{ToTokens, TokenStreamExt, format_ident, quote};
37use syn::{Generics, parse_quote};
38
39const ACTION_PREFIX: &str = "__gt_";
40const GLOBAL_PREFIX: &str = "__GT_";
41const ACTIONS_KIND: &str = "__GtActionsKind";
42const ACTIONS_KIND_PREFIX: &str = "Ak";
43const ACTIONS_KIND_HIDDEN: &str = "__GtActionsKindHidden";
44
45const RUST_FILE_EXT: &str = "rs";
46
47const WARNING: &str = "[Warning]";
48const ERROR: &str = "[Error]";
49
50static GENERATED_PATHS: LazyLock<Mutex<HashSet<PathBuf>>> =
51    LazyLock::new(|| Mutex::new(HashSet::new()));
52
53struct CTConflictsError<StorageT: Eq + Hash> {
54    conflicts_diagnostic: String,
55    #[cfg(test)]
56    #[cfg_attr(test, allow(dead_code))]
57    stable: StateTable<StorageT>,
58    phantom: PhantomData<StorageT>,
59}
60
61/// The quote impl of `ToTokens` for `Option` prints an empty string for `None`
62/// and the inner value for `Some(inner_value)`.
63///
64/// This wrapper instead emits both `Some` and `None` variants.
65/// See: [quote #20](https://github.com/dtolnay/quote/issues/20)
66struct QuoteOption<T>(Option<T>);
67
68impl<T: ToTokens> ToTokens for QuoteOption<T> {
69    fn to_tokens(&self, tokens: &mut TokenStream) {
70        tokens.append_all(match self.0 {
71            Some(ref t) => quote! { ::std::option::Option::Some(#t) },
72            None => quote! { ::std::option::Option::None },
73        });
74    }
75}
76
77/// The quote impl of `ToTokens` for `usize` prints literal values
78/// including a type suffix for example `0usize`.
79///
80/// This wrapper omits the type suffix emitting `0` instead.
81struct UnsuffixedUsize(usize);
82
83impl ToTokens for UnsuffixedUsize {
84    fn to_tokens(&self, tokens: &mut TokenStream) {
85        tokens.append(Literal::usize_unsuffixed(self.0))
86    }
87}
88
89/// This wrapper adds a missing impl of `ToTokens` for tuples.
90/// For a tuple `(a, b)` emits `(a.to_tokens(), b.to_tokens())`
91struct QuoteTuple<T>(T);
92
93impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
94    fn to_tokens(&self, tokens: &mut TokenStream) {
95        let (a, b) = &self.0;
96        tokens.append_all(quote!((#a, #b)));
97    }
98}
99
100/// The wrapped `&str` value will be emitted with a call to `to_string()`
101struct QuoteToString<'a>(&'a str);
102
103impl ToTokens for QuoteToString<'_> {
104    fn to_tokens(&self, tokens: &mut TokenStream) {
105        let x = &self.0;
106        tokens.append_all(quote! { #x.to_string() });
107    }
108}
109
110impl<StorageT> fmt::Display for CTConflictsError<StorageT>
111where
112    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
113    usize: AsPrimitive<StorageT>,
114{
115    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
116        write!(f, "{}", self.conflicts_diagnostic)
117    }
118}
119
120impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
121where
122    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
123    usize: AsPrimitive<StorageT>,
124{
125    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126        write!(f, "{}", self.conflicts_diagnostic)
127    }
128}
129
130impl<StorageT> Error for CTConflictsError<StorageT>
131where
132    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
133    usize: AsPrimitive<StorageT>,
134{
135}
136
137/// A string which uses `Display` for it's `Debug` impl.
138struct ErrorString(String);
139impl fmt::Display for ErrorString {
140    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
141        let ErrorString(s) = self;
142        write!(f, "{}", s)
143    }
144}
145impl fmt::Debug for ErrorString {
146    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147        let ErrorString(s) = self;
148        write!(f, "{}", s)
149    }
150}
151impl Error for ErrorString {}
152
153/// Specify the visibility of the module generated by `CTBuilder`.
154#[derive(Clone, PartialEq, Eq, Debug)]
155#[non_exhaustive]
156pub enum Visibility {
157    /// Module-level visibility only.
158    Private,
159    /// `pub`
160    Public,
161    /// `pub(super)`
162    PublicSuper,
163    /// `pub(self)`
164    PublicSelf,
165    /// `pub(crate)`
166    PublicCrate,
167    /// `pub(in {arg})`
168    PublicIn(String),
169}
170
171/// Specifies the [Rust Edition] that will be emitted during code generation.
172///
173/// [Rust Edition]: https://doc.rust-lang.org/edition-guide/rust-2021/index.html
174#[derive(Clone, Copy, PartialEq, Eq, Debug)]
175#[non_exhaustive]
176pub enum RustEdition {
177    Rust2015,
178    Rust2018,
179    Rust2021,
180}
181
182impl RustEdition {
183    fn to_variant_tokens(self) -> TokenStream {
184        match self {
185            RustEdition::Rust2015 => quote!(::lrpar::RustEdition::Rust2015),
186            RustEdition::Rust2018 => quote!(::lrpar::RustEdition::Rust2018),
187            RustEdition::Rust2021 => quote!(::lrpar::RustEdition::Rust2021),
188        }
189    }
190}
191
192impl ToTokens for Visibility {
193    fn to_tokens(&self, tokens: &mut TokenStream) {
194        tokens.extend(match self {
195            Visibility::Private => quote!(),
196            Visibility::Public => quote! {pub},
197            Visibility::PublicSuper => quote! {pub(super)},
198            Visibility::PublicSelf => quote! {pub(self)},
199            Visibility::PublicCrate => quote! {pub(crate)},
200            Visibility::PublicIn(data) => {
201                let other = str::parse::<TokenStream>(data).unwrap();
202                quote! {pub(in #other)}
203            }
204        })
205    }
206}
207
208impl Visibility {
209    fn to_variant_tokens(&self) -> TokenStream {
210        match self {
211            Visibility::Private => quote!(::lrpar::Visibility::Private),
212            Visibility::Public => quote!(::lrpar::Visibility::Public),
213            Visibility::PublicSuper => quote!(::lrpar::Visibility::PublicSuper),
214            Visibility::PublicSelf => quote!(::lrpar::Visibility::PublicSelf),
215            Visibility::PublicCrate => quote!(::lrpar::Visibility::PublicCrate),
216            Visibility::PublicIn(data) => {
217                let data = QuoteToString(data);
218                quote!(::lrpar::Visibility::PublicIn(#data))
219            }
220        }
221    }
222}
223
224/// A `CTParserBuilder` allows one to specify the criteria for building a statically generated
225/// parser.
226pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes>
227where
228    LexerTypesT::StorageT: Eq + Hash,
229    usize: AsPrimitive<LexerTypesT::StorageT>,
230{
231    // Anything stored in here (except `output_path`, `conflicts`, and `error_on_conflict`) almost
232    // certainly needs to be included as part of the rebuild_cache function below so that, if it's
233    // changed, the grammar is rebuilt.
234    grammar_path: Option<PathBuf>,
235    // If specified rather than reading source from `grammar_path`, use this string directly
236    grammar_src: Option<String>,
237    // If specified along with `grammar_src`, use this rather than building an ast from `grammar_src`.
238    from_ast: Option<ASTWithValidityInfo>,
239    output_path: Option<PathBuf>,
240    mod_name: Option<&'a str>,
241    recoverer: Option<RecoveryKind>,
242    yacckind: Option<YaccKind>,
243    error_on_conflicts: bool,
244    warnings_are_errors: bool,
245    show_warnings: bool,
246    visibility: Visibility,
247    rust_edition: RustEdition,
248    inspect_rt: Option<
249        Box<
250            dyn for<'b> FnMut(
251                &'b mut Header<Location>,
252                RTParserBuilder<LexerTypesT::StorageT, LexerTypesT>,
253                &'b HashMap<String, LexerTypesT::StorageT>,
254                &PathBuf,
255            ) -> Result<(), Box<dyn Error>>,
256        >,
257    >,
258    // test function for inspecting private state
259    #[cfg(test)]
260    inspect_callback: Option<Box<dyn Fn(RecoveryKind) -> Result<(), Box<dyn Error>>>>,
261    phantom: PhantomData<LexerTypesT>,
262}
263
264impl<
265    'a,
266    StorageT: 'static + Debug + Hash + PrimInt + Encode + Unsigned,
267    LexerTypesT: LexerTypes<StorageT = StorageT>,
268> CTParserBuilder<'a, LexerTypesT>
269where
270    usize: AsPrimitive<StorageT>,
271{
272    /// Create a new `CTParserBuilder`.
273    ///
274    /// `StorageT` must be an unsigned integer type (e.g. `u8`, `u16`) which is:
275    ///   * big enough to index (separately) all the tokens, rules, productions in the grammar,
276    ///   * big enough to index the state table created from the grammar,
277    ///   * less than or equal in size to `u32`.
278    ///
279    /// In other words, if you have a grammar with 256 tokens, 256 rules, and 256 productions,
280    /// which creates a state table of 256 states you can safely specify `u8` here; but if any of
281    /// those counts becomes 257 or greater you will need to specify `u16`. If you are parsing
282    /// large files, the additional storage requirements of larger integer types can be noticeable,
283    /// and in such cases it can be worth specifying a smaller type. `StorageT` defaults to `u32`
284    /// if unspecified.
285    ///
286    /// # Examples
287    ///
288    /// ```text
289    /// CTParserBuilder::<DefaultLexerTypes<u8>>::new()
290    ///     .grammar_in_src_dir("grm.y")?
291    ///     .build()?;
292    /// ```
293    pub fn new() -> Self {
294        CTParserBuilder {
295            grammar_path: None,
296            grammar_src: None,
297            from_ast: None,
298            output_path: None,
299            mod_name: None,
300            recoverer: None,
301            yacckind: None,
302            error_on_conflicts: true,
303            warnings_are_errors: true,
304            show_warnings: true,
305            visibility: Visibility::Private,
306            rust_edition: RustEdition::Rust2021,
307            inspect_rt: None,
308            #[cfg(test)]
309            inspect_callback: None,
310            phantom: PhantomData,
311        }
312    }
313
314    /// Set the input grammar path to a file relative to this project's `src` directory. This will
315    /// also set the output path (i.e. you do not need to call [CTParserBuilder::output_path]).
316    ///
317    /// For example if `a/b.y` is passed as `inp` then [CTParserBuilder::build] will:
318    ///   * use `src/a/b.y` as the input file.
319    ///   * write output to a file which can then be imported by calling `lrpar_mod!("a/b.y")`.
320    ///   * create a module in that output file named `b_y`.
321    ///
322    /// You can override the output path and/or module name by calling [CTParserBuilder::output_path]
323    /// and/or [CTParserBuilder::mod_name], respectively, after calling this function.
324    ///
325    /// This is a convenience function that makes it easier to compile grammar files stored in a
326    /// project's `src/` directory: please see [CTParserBuilder::build] for additional constraints
327    /// and information about the generated files. Note also that each `.y` file can only be
328    /// processed once using this function: if you want to generate multiple grammars from a single
329    /// `.y` file, you will need to use [CTParserBuilder::output_path].
330    pub fn grammar_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
331    where
332        P: AsRef<Path>,
333    {
334        if !srcp.as_ref().is_relative() {
335            return Err(format!(
336                "Grammar path '{}' must be a relative path.",
337                srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
338            )
339            .into());
340        }
341
342        let mut grmp = current_dir()?;
343        grmp.push("src");
344        grmp.push(srcp.as_ref());
345        self.grammar_path = Some(grmp);
346
347        let mut outp = PathBuf::new();
348        outp.push(var("OUT_DIR").unwrap());
349        outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
350        create_dir_all(&outp)?;
351        let mut leaf = srcp
352            .as_ref()
353            .file_name()
354            .unwrap()
355            .to_str()
356            .unwrap()
357            .to_owned();
358        write!(leaf, ".{}", RUST_FILE_EXT).ok();
359        outp.push(leaf);
360        Ok(self.output_path(outp))
361    }
362
363    /// If set, specifies that this grammar should be built from a pre-validated AST
364    /// instead of a `.y`` file. When this is specified, `grammar_path` will not be read.
365    #[cfg(feature = "_unstable_api")]
366    pub fn grammar_ast(mut self, valid_ast: ASTWithValidityInfo, _api_key: UnstableApi) -> Self {
367        self.from_ast = Some(valid_ast);
368        self
369    }
370
371    /// Set the input grammar path to `inp`. If specified, you must also call
372    /// [CTParserBuilder::output_path]. In general it is easier to use
373    /// [CTParserBuilder::grammar_in_src_dir].
374    pub fn grammar_path<P>(mut self, inp: P) -> Self
375    where
376        P: AsRef<Path>,
377    {
378        self.grammar_path = Some(inp.as_ref().to_owned());
379        self
380    }
381
382    #[cfg(feature = "_unstable_api")]
383    pub fn with_grammar_src(mut self, src: String, _api_key: UnstableApi) -> Self {
384        self.grammar_src = Some(src);
385        self
386    }
387
388    /// Set the output grammar path to `outp`. Note that there are no requirements on `outp`: the
389    /// file can exist anywhere you can create a valid [Path] to. However, if you wish to use
390    /// [crate::lrpar_mod!] you will need to make sure that `outp` is in
391    /// [std::env::var]`("OUT_DIR")` or one of its subdirectories.
392    pub fn output_path<P>(mut self, outp: P) -> Self
393    where
394        P: AsRef<Path>,
395    {
396        self.output_path = Some(outp.as_ref().to_owned());
397        self
398    }
399
400    /// Set the generated module name to `mod_name`. If no module name is specified,
401    /// [CTParserBuilder::build] will attempt to create a sensible default based on the grammar
402    /// filename.
403    pub fn mod_name(mut self, mod_name: &'a str) -> Self {
404        self.mod_name = Some(mod_name);
405        self
406    }
407
408    /// Set the visibility of the generated module to `vis`. Defaults to `Visibility::Private`.
409    pub fn visibility(mut self, vis: Visibility) -> Self {
410        self.visibility = vis;
411        self
412    }
413
414    /// Set the recoverer for this parser to `rk`. Defaults to `RecoveryKind::CPCTPlus`.
415    pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
416        self.recoverer = Some(rk);
417        self
418    }
419
420    /// Set the `YaccKind` for this parser to `ak`.
421    pub fn yacckind(mut self, yk: YaccKind) -> Self {
422        self.yacckind = Some(yk);
423        self
424    }
425
426    /// If set to true, [CTParserBuilder::build] will return an error if the given grammar contains
427    /// any Shift/Reduce or Reduce/Reduce conflicts. Defaults to `true`.
428    pub fn error_on_conflicts(mut self, b: bool) -> Self {
429        self.error_on_conflicts = b;
430        self
431    }
432
433    /// If set to true, [CTParserBuilder::build] will return an error if the given grammar contains
434    /// any warnings. Defaults to `true`.
435    pub fn warnings_are_errors(mut self, b: bool) -> Self {
436        self.warnings_are_errors = b;
437        self
438    }
439
440    /// If set to true, [CTParserBuilder::build] will print warnings to stderr, or via cargo when
441    /// running under cargo. Defaults to `true`.
442    pub fn show_warnings(mut self, b: bool) -> Self {
443        self.show_warnings = b;
444        self
445    }
446
447    /// Sets the rust edition to be used for generated code. Defaults to the latest edition of
448    /// rust supported by grmtools.
449    pub fn rust_edition(mut self, edition: RustEdition) -> Self {
450        self.rust_edition = edition;
451        self
452    }
453
454    #[cfg(test)]
455    pub fn inspect_recoverer(
456        mut self,
457        cb: Box<dyn for<'h, 'y> Fn(RecoveryKind) -> Result<(), Box<dyn Error>>>,
458    ) -> Self {
459        self.inspect_callback = Some(cb);
460        self
461    }
462
463    #[doc(hidden)]
464    pub fn inspect_rt(
465        mut self,
466        cb: Box<
467            dyn for<'b, 'y> FnMut(
468                &'b mut Header<Location>,
469                RTParserBuilder<'y, StorageT, LexerTypesT>,
470                &'b HashMap<String, StorageT>,
471                &PathBuf,
472            ) -> Result<(), Box<dyn Error>>,
473        >,
474    ) -> Self {
475        self.inspect_rt = Some(cb);
476        self
477    }
478
479    /// Statically compile the Yacc file specified by [CTParserBuilder::grammar_path()] into Rust,
480    /// placing the output into the file spec [CTParserBuilder::output_path()]. Note that three
481    /// additional files will be created with the same name as specified in [self.output_path] but
482    /// with the extensions `grm`, and `stable`, overwriting any existing files with those names.
483    ///
484    /// If `%parse-param` is not specified, the generated module follows the form:
485    ///
486    /// ```text
487    ///   mod <modname> {
488    ///     pub fn parse<'lexer, 'input: 'lexer>(lexer: &'lexer dyn NonStreamingLexer<...>)
489    ///       -> (Option<ActionT>, Vec<LexParseError<...>> { ... }
490    ///
491    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
492    ///       ...
493    ///     }
494    ///
495    ///     ...
496    ///   }
497    /// ```
498    ///
499    /// If `%parse-param x: t` is specified, the generated module follows the form:
500    ///
501    /// ```text
502    ///   mod <modname> {
503    ///     pub fn parse<'lexer, 'input: 'lexer>(lexer: &'lexer dyn NonStreamingLexer<...>, x: t)
504    ///       -> (Option<ActionT>, Vec<LexParseError<...>> { ... }
505    ///
506    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
507    ///       ...
508    ///     }
509    ///
510    ///     ...
511    ///   }
512    /// ```
513    ///
514    /// where:
515    ///  * `modname` is either:
516    ///    * the module name specified by [CTParserBuilder::mod_name()];
517    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.y` the
518    ///      module name is `c_y` (i.e. the file's leaf name, minus its extension, with a prefix of
519    ///      `_y`).
520    ///  * `ActionT` is either:
521    ///    * if the `yacckind` was set to `YaccKind::GrmTools` or
522    ///      `YaccKind::Original(YaccOriginalActionKind::UserAction)`, it is
523    ///      the return type of the `%start` rule;
524    ///    * or, if the `yacckind` was set to
525    ///      `YaccKind::Original(YaccOriginalActionKind::GenericParseTree)`, it
526    ///      is `Node<StorageT>` where the `Node` type is defined within your `lrpar_mod!`.
527    ///
528    /// # Panics
529    ///
530    /// If `StorageT` is not big enough to index the grammar's tokens, rules, or productions.
531    pub fn build(mut self) -> Result<CTParser<StorageT>, Box<dyn Error>> {
532        let grmp = self
533            .grammar_path
534            .as_ref()
535            .expect("grammar_path must be specified before processing.");
536        let outp = self
537            .output_path
538            .as_ref()
539            .expect("output_path must be specified before processing.");
540        let mut header = Header::new();
541
542        match header.entry("yacckind".to_string()) {
543            Entry::Occupied(_) => unreachable!(),
544            Entry::Vacant(mut v) => match self.yacckind {
545                Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."),
546                Some(yk) => {
547                    let yk_value = Value::try_from(yk)?;
548                    let mut o = v.insert_entry(HeaderValue(
549                        Location::Other("CTParserBuilder".to_string()),
550                        yk_value,
551                    ));
552                    o.set_merge_behavior(MergeBehavior::Ours);
553                }
554                None => {
555                    v.mark_required();
556                }
557            },
558        }
559        if let Some(recoverer) = self.recoverer {
560            match header.entry("recoverer".to_string()) {
561                Entry::Occupied(_) => unreachable!(),
562                Entry::Vacant(v) => {
563                    let rk_value: Value<Location> = Value::try_from(recoverer)?;
564                    let mut o = v.insert_entry(HeaderValue(
565                        Location::Other("CTParserBuilder".to_string()),
566                        rk_value,
567                    ));
568                    o.set_merge_behavior(MergeBehavior::Ours);
569                }
570            }
571        }
572
573        {
574            let mut lk = GENERATED_PATHS.lock().unwrap();
575            if lk.contains(outp.as_path()) {
576                return Err(format!("Generating two parsers to the same path ('{}') is not allowed: use CTParserBuilder::output_path (and, optionally, CTParserBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
577            }
578            lk.insert(outp.clone());
579        }
580
581        let inc = if let Some(grammar_src) = &self.grammar_src {
582            grammar_src.clone()
583        } else {
584            read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?
585        };
586
587        let yacc_diag = SpannedDiagnosticFormatter::new(&inc, grmp);
588        let parsed_header = GrmtoolsSectionParser::new(&inc, false).parse();
589        if let Err(errs) = parsed_header {
590            let mut out = String::new();
591            out.push_str(&format!(
592                "\n{ERROR}{}\n",
593                yacc_diag.file_location_msg(" parsing the `%grmtools` section", None)
594            ));
595            for e in errs {
596                out.push_str(&indent("     ", &yacc_diag.format_error(e).to_string()));
597            }
598            return Err(ErrorString(out))?;
599        }
600        let (parsed_header, _) = parsed_header.unwrap();
601        header.merge_from(parsed_header)?;
602        self.yacckind = header
603            .get("yacckind")
604            .map(|HeaderValue(_, val)| val)
605            .map(YaccKind::try_from)
606            .transpose()?;
607        header.mark_used(&"yacckind".to_string());
608        let ast_validation = if let Some(ast) = &self.from_ast {
609            ast.clone()
610        } else if let Some(yk) = self.yacckind {
611            ASTWithValidityInfo::new(yk, &inc)
612        } else {
613            Err("Missing 'yacckind'".to_string())?
614        };
615
616        header.mark_used(&"recoverer".to_string());
617        let rk_val = header.get("recoverer").map(|HeaderValue(_, rk_val)| rk_val);
618
619        if let Some(rk_val) = rk_val {
620            self.recoverer = Some(RecoveryKind::try_from(rk_val)?);
621        } else {
622            // Fallback to the default recoverykind.
623            self.recoverer = Some(RecoveryKind::CPCTPlus);
624        }
625        self.yacckind = Some(ast_validation.yacc_kind());
626        let warnings = ast_validation.ast().warnings();
627        let res = YaccGrammar::<StorageT>::new_from_ast_with_validity_info(&ast_validation);
628        let grm = match res {
629            Ok(_) if self.warnings_are_errors && !warnings.is_empty() => {
630                let mut out = String::new();
631                out.push_str(&format!(
632                    "\n{ERROR}{}\n",
633                    yacc_diag.file_location_msg("", None)
634                ));
635                for e in warnings {
636                    out.push_str(&format!(
637                        "{}\n",
638                        indent("     ", &yacc_diag.format_warning(e).to_string())
639                    ));
640                }
641                return Err(ErrorString(out))?;
642            }
643            Ok(grm) => {
644                if !warnings.is_empty() {
645                    for w in warnings {
646                        let ws_loc = yacc_diag.file_location_msg("", None);
647                        let ws = indent("     ", &yacc_diag.format_warning(w).to_string());
648                        // Assume if this variable is set we are running under cargo.
649                        if std::env::var("OUT_DIR").is_ok() && self.show_warnings {
650                            for line in ws_loc.lines().chain(ws.lines()) {
651                                println!("cargo:warning={}", line);
652                            }
653                        } else if self.show_warnings {
654                            eprintln!("{}", ws_loc);
655                            eprintln!("{WARNING} {}", ws);
656                        }
657                    }
658                }
659                grm
660            }
661            Err(errs) => {
662                let mut out = String::new();
663                out.push_str(&format!(
664                    "\n{ERROR}{}\n",
665                    yacc_diag.file_location_msg("", None)
666                ));
667                for e in errs {
668                    out.push_str(&indent("     ", &yacc_diag.format_error(e).to_string()));
669                    out.push('\n');
670                }
671
672                return Err(ErrorString(out))?;
673            }
674        };
675
676        #[cfg(test)]
677        if let Some(cb) = &self.inspect_callback {
678            cb(self.recoverer.expect("has a default value"))?;
679        }
680
681        let rule_ids = grm
682            .tokens_map()
683            .iter()
684            .map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
685            .collect::<HashMap<_, _>>();
686
687        let derived_mod_name = match self.mod_name {
688            Some(s) => s.to_owned(),
689            None => {
690                // The user hasn't specified a module name, so we create one automatically: what we
691                // do is strip off all the filename extensions (note that it's likely that inp ends
692                // with `y.rs`, so we potentially have to strip off more than one extension) and
693                // then add `_y` to the end.
694                let mut stem = grmp.to_str().unwrap();
695                loop {
696                    let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
697                    if stem == new_stem {
698                        break;
699                    }
700                    stem = new_stem;
701                }
702                format!("{}_y", stem)
703            }
704        };
705
706        let cache = self.rebuild_cache(&derived_mod_name, &grm);
707
708        // We don't need to go through the full rigmarole of generating an output file if all of
709        // the following are true: the output file exists; it is newer than the input file; and the
710        // cache hasn't changed. The last of these might be surprising, but it's vital: we don't
711        // know, for example, what the IDs map might be from one run to the next, and it might
712        // change for reasons beyond lrpar's control. If it does change, that means that the lexer
713        // and lrpar would get out of sync, so we have to play it safe and regenerate in such
714        // cases.
715        if let Ok(ref inmd) = fs::metadata(grmp) {
716            if let Ok(ref out_rs_md) = fs::metadata(outp) {
717                if FileTime::from_last_modification_time(out_rs_md)
718                    > FileTime::from_last_modification_time(inmd)
719                {
720                    if let Ok(outc) = read_to_string(outp) {
721                        if outc.contains(&cache.to_string()) {
722                            return Ok(CTParser {
723                                regenerated: false,
724                                rule_ids,
725                                yacc_grammar: grm,
726                                grammar_src: inc,
727                                grammar_path: self.grammar_path.unwrap(),
728                                conflicts: None,
729                            });
730                        } else {
731                            #[cfg(grmtools_extra_checks)]
732                            if std::env::var("CACHE_EXPECTED").is_ok() {
733                                eprintln!("outc: {}", outc);
734                                eprintln!("using cache: {}", cache,);
735                                // Primarily for use in the testsuite.
736                                panic!("The cache regenerated however, it was expected to match");
737                            }
738                        }
739                    }
740                }
741            }
742        }
743
744        // At this point, we know we're going to generate fresh output; however, if something goes
745        // wrong in the process between now and us writing /out/blah.rs, rustc thinks that
746        // everything's gone swimmingly (even if build.rs errored!), and tries to carry on
747        // compilation, leading to weird errors. We therefore delete /out/blah.rs at this point,
748        // which means, at worse, the user gets a "file not found" error from rustc (which is less
749        // confusing than the alternatives).
750        fs::remove_file(outp).ok();
751
752        let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
753        if self.error_on_conflicts {
754            if let Some(c) = stable.conflicts() {
755                match (grm.expect(), grm.expectrr()) {
756                    (Some(i), Some(j)) if i == c.sr_len() && j == c.rr_len() => (),
757                    (Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (),
758                    (None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (),
759                    (None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (),
760                    _ => {
761                        let conflicts_diagnostic = yacc_diag.format_conflicts::<LexerTypesT>(
762                            &grm,
763                            ast_validation.ast(),
764                            c,
765                            &sgraph,
766                            &stable,
767                        );
768                        return Err(Box::new(CTConflictsError {
769                            conflicts_diagnostic,
770                            phantom: PhantomData,
771                            #[cfg(test)]
772                            stable,
773                        }));
774                    }
775                }
776            }
777        }
778
779        if let Some(ref mut inspector_rt) = self.inspect_rt {
780            let rt: RTParserBuilder<'_, StorageT, LexerTypesT> =
781                RTParserBuilder::new(&grm, &stable);
782            let rt = if let Some(rk) = self.recoverer {
783                rt.recoverer(rk)
784            } else {
785                rt
786            };
787            inspector_rt(&mut header, rt, &rule_ids, grmp)?
788        }
789
790        let unused_keys = header.unused();
791        if !unused_keys.is_empty() {
792            return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into());
793        }
794        let missing_keys = header
795            .missing()
796            .iter()
797            .map(|s| s.as_str())
798            .collect::<Vec<_>>();
799        if !missing_keys.is_empty() {
800            return Err(format!(
801                "Required values were missing from the header: {}",
802                missing_keys.join(", ")
803            )
804            .into());
805        }
806
807        self.output_file(
808            &grm,
809            &stable,
810            &derived_mod_name,
811            outp,
812            &format!("/* CACHE INFORMATION {} */\n", cache),
813            &yacc_diag,
814        )?;
815        let conflicts = if stable.conflicts().is_some() {
816            Some((sgraph, stable))
817        } else {
818            None
819        };
820        Ok(CTParser {
821            regenerated: true,
822            rule_ids,
823            yacc_grammar: grm,
824            grammar_src: inc,
825            grammar_path: self.grammar_path.unwrap(),
826            conflicts,
827        })
828    }
829
830    /// Given the filename `a/b.y` as input, statically compile the grammar `src/a/b.y` into a Rust
831    /// module which can then be imported using `lrpar_mod!("a/b.y")`. This is a convenience
832    /// function around [`process_file`](#method.process_file) which makes it easier to compile
833    /// grammar files stored in a project's `src/` directory: please see
834    /// [`process_file`](#method.process_file) for additional constraints and information about the
835    /// generated files.
836    #[deprecated(
837        since = "0.11.0",
838        note = "Please use grammar_in_src_dir(), build(), and token_map() instead"
839    )]
840    #[allow(deprecated)]
841    pub fn process_file_in_src(
842        &mut self,
843        srcp: &str,
844    ) -> Result<HashMap<String, StorageT>, Box<dyn Error>> {
845        let mut inp = current_dir()?;
846        inp.push("src");
847        inp.push(srcp);
848        let mut outp = PathBuf::new();
849        outp.push(var("OUT_DIR").unwrap());
850        outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
851        create_dir_all(&outp)?;
852        let mut leaf = Path::new(srcp)
853            .file_name()
854            .unwrap()
855            .to_str()
856            .unwrap()
857            .to_owned();
858        write!(leaf, ".{}", RUST_FILE_EXT).ok();
859        outp.push(leaf);
860        self.process_file(inp, outp)
861    }
862
863    /// Statically compile the Yacc file `inp` into Rust, placing the output into the file `outp`.
864    /// Note that three additional files will be created with the same name as `outp` but with the
865    /// extensions `grm`, and `stable`, overwriting any existing files with those names.
866    ///
867    /// `outp` defines a module as follows:
868    ///
869    /// ```text
870    ///   mod modname {
871    ///     pub fn parse(lexemes: &::std::vec::Vec<::lrpar::Lexeme<StorageT>>) { ... }
872    ///         -> (::std::option::Option<ActionT>,
873    ///             ::std::vec::Vec<::lrpar::LexParseError<StorageT>>)> { ...}
874    ///
875    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
876    ///       ...
877    ///     }
878    ///
879    ///     ...
880    ///   }
881    /// ```
882    ///
883    /// where:
884    ///  * `modname` is either:
885    ///    * the module name specified [`mod_name`](#method.mod_name)
886    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.y` the
887    ///      module name is `c_y` (i.e. the file's leaf name, minus its extension, with a prefix of
888    ///      `_y`).
889    ///  * `ActionT` is either:
890    ///    * the `%actiontype` value given to the grammar
891    ///    * or, if the `yacckind` was set YaccKind::Original(YaccOriginalActionKind::UserAction),
892    ///      it is [`Node<StorageT>`](../parser/enum.Node.html)
893    ///
894    /// # Panics
895    ///
896    /// If `StorageT` is not big enough to index the grammar's tokens, rules, or
897    /// productions.
898    #[deprecated(
899        since = "0.11.0",
900        note = "Please use grammar_path(), output_path(), build(), and token_map() instead"
901    )]
902    pub fn process_file<P, Q>(
903        &mut self,
904        inp: P,
905        outp: Q,
906    ) -> Result<HashMap<String, StorageT>, Box<dyn Error>>
907    where
908        P: AsRef<Path>,
909        Q: AsRef<Path>,
910    {
911        self.grammar_path = Some(inp.as_ref().to_owned());
912        self.output_path = Some(outp.as_ref().to_owned());
913        let cl: CTParserBuilder<LexerTypesT> = CTParserBuilder {
914            grammar_path: self.grammar_path.clone(),
915            grammar_src: None,
916            from_ast: None,
917            output_path: self.output_path.clone(),
918            mod_name: self.mod_name,
919            recoverer: self.recoverer,
920            yacckind: self.yacckind,
921            error_on_conflicts: self.error_on_conflicts,
922            warnings_are_errors: self.warnings_are_errors,
923            show_warnings: self.show_warnings,
924            visibility: self.visibility.clone(),
925            rust_edition: self.rust_edition,
926            inspect_rt: None,
927            #[cfg(test)]
928            inspect_callback: None,
929            phantom: PhantomData,
930        };
931        Ok(cl.build()?.rule_ids)
932    }
933
934    fn output_file<P: AsRef<Path>>(
935        &self,
936        grm: &YaccGrammar<StorageT>,
937        stable: &StateTable<StorageT>,
938        mod_name: &str,
939        outp_rs: P,
940        cache: &str,
941        diag: &SpannedDiagnosticFormatter,
942    ) -> Result<(), Box<dyn Error>> {
943        let visibility = self.visibility.clone();
944        let user_actions = if let Some(
945            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools,
946        ) = self.yacckind
947        {
948            Some(self.gen_user_actions(grm, diag)?)
949        } else {
950            None
951        };
952        let rule_consts = self.gen_rule_consts(grm)?;
953        let token_epp = self.gen_token_epp(grm)?;
954        let parse_function = self.gen_parse_function(grm, stable)?;
955        let action_wrappers = match self.yacckind.unwrap() {
956            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
957                Some(self.gen_wrappers(grm)?)
958            }
959            YaccKind::Original(YaccOriginalActionKind::NoAction)
960            | YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => None,
961            _ => unreachable!(),
962        };
963
964        let additional_decls =
965            if let Some(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)) =
966                self.yacckind
967            {
968                // `lrpar::Node`` is deprecated within the lrpar crate, but not from within this module,
969                // Once it is removed from `lrpar`, we should move the declaration here entirely.
970                Some(quote! {
971                            #[allow(unused_imports)]
972                            pub use ::lrpar::parser::_deprecated_moved_::Node;
973                })
974            } else {
975                None
976            };
977
978        let mod_name =
979            match syn::parse_str::<proc_macro2::Ident>(mod_name) {
980                Ok(s) => s,
981                Err(e) => return Err(format!(
982                    "CTParserBuilder::mod_name(\"{}\") is not a valid rust identifier due to '{}'",
983                    mod_name, e
984                )
985                .into()),
986            };
987        let out_tokens = quote! {
988            #visibility mod #mod_name {
989                // At the top so that `user_actions` may contain #![inner_attribute]
990                #user_actions
991                mod _parser_ {
992                    #![allow(clippy::type_complexity)]
993                    #![allow(clippy::unnecessary_wraps)]
994                    #![deny(unsafe_code)]
995                    #[allow(unused_imports)]
996                    use super::*;
997                    #additional_decls
998                    #parse_function
999                    #rule_consts
1000                    #token_epp
1001                    #action_wrappers
1002                } // End of `mod _parser_`
1003                #[allow(unused_imports)]
1004                pub use _parser_::*;
1005                #[allow(unused_imports)]
1006                use ::lrpar::Lexeme;
1007            } // End of `mod #mod_name`
1008        };
1009        // Try and run a code formatter on the generated code.
1010        let unformatted = out_tokens.to_string();
1011        let outs = syn::parse_str(&unformatted)
1012            .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
1013            .unwrap_or(unformatted);
1014        let mut f = File::create(outp_rs)?;
1015        f.write_all(outs.as_bytes())?;
1016        f.write_all(cache.as_bytes())?;
1017        Ok(())
1018    }
1019
1020    /// Generate the cache, which determines if anything's changed enough that we need to
1021    /// regenerate outputs and force rustc to recompile.
1022    fn rebuild_cache(&self, derived_mod_name: &'_ str, grm: &YaccGrammar<StorageT>) -> TokenStream {
1023        // We don't need to be particularly clever here: we just need to record the various things
1024        // that could change between builds.
1025        //
1026        // Record the time that this version of lrpar was built. If the source code changes and
1027        // rustc forces a recompile, this will change this value, causing anything which depends on
1028        // this build of lrpar to be recompiled too.
1029        let Self {
1030            // All variables except for `output_path`, `inspect_callback` and `phantom` should
1031            // be written into the cache.
1032            grammar_path,
1033            // I struggle to imagine the correct thing for `grammar_src`.
1034            grammar_src: _,
1035            // I struggle to imagine the correct thing for `from_ast`.
1036            from_ast: _,
1037            mod_name,
1038            recoverer,
1039            yacckind,
1040            output_path: _,
1041            error_on_conflicts,
1042            warnings_are_errors,
1043            show_warnings,
1044            visibility,
1045            rust_edition,
1046            inspect_rt: _,
1047            #[cfg(test)]
1048                inspect_callback: _,
1049            phantom: _,
1050        } = self;
1051        let build_time = env!("VERGEN_BUILD_TIMESTAMP");
1052        let grammar_path = grammar_path.as_ref().unwrap().to_string_lossy();
1053        let mod_name = QuoteOption(mod_name.as_deref());
1054        let visibility = visibility.to_variant_tokens();
1055        let rust_edition = rust_edition.to_variant_tokens();
1056        let yacckind = yacckind.expect("is_some() by this point");
1057        let rule_map = grm
1058            .iter_tidxs()
1059            .map(|tidx| {
1060                QuoteTuple((
1061                    usize::from(tidx),
1062                    grm.token_name(tidx).unwrap_or("<unknown>"),
1063                ))
1064            })
1065            .collect::<Vec<_>>();
1066        let cache_info = quote! {
1067            BUILD_TIME = #build_time
1068            DERIVED_MOD_NAME = #derived_mod_name
1069            GRAMMAR_PATH = #grammar_path
1070            MOD_NAME = #mod_name
1071            RECOVERER = #recoverer
1072            YACC_KIND = #yacckind
1073            ERROR_ON_CONFLICTS = #error_on_conflicts
1074            SHOW_WARNINGS = #show_warnings
1075            WARNINGS_ARE_ERRORS = #warnings_are_errors
1076            RUST_EDITION = #rust_edition
1077            RULE_IDS_MAP = [#(#rule_map,)*]
1078            VISIBILITY = #visibility
1079        };
1080        let cache_info_str = cache_info.to_string();
1081        quote!(#cache_info_str)
1082    }
1083
1084    /// Generate the main parse() function for the output file.
1085    fn gen_parse_function(
1086        &self,
1087        grm: &YaccGrammar<StorageT>,
1088        stable: &StateTable<StorageT>,
1089    ) -> Result<TokenStream, Box<dyn Error>> {
1090        let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1091        let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1092        let recoverer = self.recoverer;
1093        let run_parser = match self.yacckind.unwrap() {
1094            YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
1095                quote! {
1096                    ::lrpar::RTParserBuilder::new(grm, stable)
1097                        .recoverer(#recoverer)
1098                        .parse_map(
1099                            lexer,
1100                            &|lexeme| Node::Term{lexeme},
1101                            &|ridx, nodes| Node::Nonterm{ridx, nodes}
1102                        )
1103                }
1104            }
1105            YaccKind::Original(YaccOriginalActionKind::NoAction) => {
1106                quote! {
1107                    ::lrpar::RTParserBuilder::new(grm, stable)
1108                        .recoverer(#recoverer)
1109                        .parse_map(lexer, &|_| (), &|_, _| ()).1
1110                }
1111            }
1112            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1113                let actionskind = str::parse::<TokenStream>(ACTIONS_KIND)?;
1114                let parsed_parse_generics = make_generics(grm.parse_generics().as_deref())?;
1115                let (_, type_generics, _) = parsed_parse_generics.split_for_impl();
1116                // actions always have a parse_param argument, and when the `parse` function lacks one
1117                // that parameter will be unit.
1118                let (action_fn_parse_param, action_fn_parse_param_ty) = match grm.parse_param() {
1119                    Some((name, ty)) => {
1120                        let name = str::parse::<TokenStream>(name)?;
1121                        let ty = str::parse::<TokenStream>(ty)?;
1122                        (quote!(#name), quote!(#ty))
1123                    }
1124                    None => (quote!(()), quote!(())),
1125                };
1126                let wrappers = grm.iter_pidxs().map(|pidx| {
1127                    let pidx = usize::from(pidx);
1128                    format_ident!("{}wrapper_{}", ACTION_PREFIX, pidx)
1129                });
1130                let edition_lifetime = if self.rust_edition != RustEdition::Rust2015 {
1131                    quote!('_,)
1132                } else {
1133                    quote!()
1134                };
1135                let ridx = usize::from(self.user_start_ridx(grm));
1136                let action_ident = format_ident!("{}{}", ACTIONS_KIND_PREFIX, ridx);
1137
1138                quote! {
1139                    let actions: ::std::vec::Vec<
1140                            &dyn Fn(
1141                                    ::cfgrammar::RIdx<#storaget>,
1142                                    &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1143                                    ::cfgrammar::Span,
1144                                    ::std::vec::Drain<#edition_lifetime ::lrpar::parser::AStackType<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #actionskind #type_generics>>,
1145                                    #action_fn_parse_param_ty
1146                            ) -> #actionskind #type_generics
1147                        > = ::std::vec![#(&#wrappers,)*];
1148                    match ::lrpar::RTParserBuilder::new(grm, stable)
1149                        .recoverer(#recoverer)
1150                        .parse_actions(lexer, &actions, #action_fn_parse_param) {
1151                            (Some(#actionskind::#action_ident(x)), y) => (Some(x), y),
1152                            (None, y) => (None, y),
1153                            _ => unreachable!()
1154                    }
1155                }
1156            }
1157            kind => panic!("YaccKind {:?} not supported", kind),
1158        };
1159
1160        let parsed_parse_generics: Generics = match self.yacckind.unwrap() {
1161            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1162                make_generics(grm.parse_generics().as_deref())?
1163            }
1164            _ => make_generics(None)?,
1165        };
1166        let (generics, _, where_clause) = parsed_parse_generics.split_for_impl();
1167
1168        // `parse()` may or may not have an argument for `%parseparam`.
1169        let parse_fn_parse_param = match self.yacckind.unwrap() {
1170            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1171                if let Some((name, tyname)) = grm.parse_param() {
1172                    let name = str::parse::<TokenStream>(name)?;
1173                    let tyname = str::parse::<TokenStream>(tyname)?;
1174                    Some(quote! {#name: #tyname})
1175                } else {
1176                    None
1177                }
1178            }
1179            _ => None,
1180        };
1181        let parse_fn_return_ty = match self.yacckind.unwrap() {
1182            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1183                let actiont = grm
1184                    .actiontype(self.user_start_ridx(grm))
1185                    .as_ref()
1186                    .map(|at| str::parse::<TokenStream>(at))
1187                    .transpose()?;
1188                quote! {
1189                    (::std::option::Option<#actiont>, ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>)
1190                }
1191            }
1192            YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => quote! {
1193                (::std::option::Option<Node<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #storaget>>,
1194                    ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>)
1195            },
1196            YaccKind::Original(YaccOriginalActionKind::NoAction) => quote! {
1197                ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>
1198            },
1199            _ => unreachable!(),
1200        };
1201
1202        let grm_data = encode_to_vec(grm, bincode::config::standard())?;
1203        let stable_data = encode_to_vec(stable, bincode::config::standard())?;
1204        Ok(quote! {
1205            const __GRM_DATA: &[u8] = &[#(#grm_data,)*];
1206            const __STABLE_DATA: &[u8] = &[#(#stable_data,)*];
1207
1208            fn __lrpar_parser_data() -> &'static ::lrpar::ParserData<#storaget> {
1209                static DATA: ::std::sync::OnceLock<::lrpar::ParserData<#storaget>>
1210                    = ::std::sync::OnceLock::new();
1211                DATA.get_or_init(
1212                    || ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA)
1213                )
1214            }
1215
1216            #[allow(dead_code)]
1217            pub fn parse #generics (
1218                 lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1219                 #parse_fn_parse_param
1220            ) -> #parse_fn_return_ty
1221            #where_clause
1222            {
1223                let __data = __lrpar_parser_data();
1224                let grm = __data.grm();
1225                let stable = __data.stable();
1226                #run_parser
1227            }
1228        })
1229    }
1230
1231    fn gen_rule_consts(
1232        &self,
1233        grm: &YaccGrammar<StorageT>,
1234    ) -> Result<TokenStream, proc_macro2::LexError> {
1235        let mut toks = TokenStream::new();
1236        for ridx in grm.iter_rules() {
1237            if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
1238                let r_const = format_ident!("R_{}", grm.rule_name_str(ridx).to_ascii_uppercase());
1239                let storage_ty = str::parse::<TokenStream>(type_name::<StorageT>())?;
1240                let ridx = UnsuffixedUsize(usize::from(ridx));
1241                toks.extend(quote! {
1242                    #[allow(dead_code)]
1243                    pub const #r_const: #storage_ty = #ridx;
1244                });
1245            }
1246        }
1247        Ok(toks)
1248    }
1249
1250    fn gen_token_epp(
1251        &self,
1252        grm: &YaccGrammar<StorageT>,
1253    ) -> Result<TokenStream, proc_macro2::LexError> {
1254        let mut tidxs = Vec::new();
1255        for tidx in grm.iter_tidxs() {
1256            tidxs.push(QuoteOption(grm.token_epp(tidx)));
1257        }
1258        let const_epp_ident = format_ident!("{}EPP", GLOBAL_PREFIX);
1259        let storage_ty = str::parse::<TokenStream>(type_name::<StorageT>())?;
1260        Ok(quote! {
1261            const #const_epp_ident: &[::std::option::Option<&str>] = &[
1262                #(#tidxs,)*
1263            ];
1264
1265            /// Return the %epp entry for token `tidx` (where `None` indicates \"the token has no
1266            /// pretty-printed value\"). Panics if `tidx` doesn't exist.
1267            #[allow(dead_code)]
1268            pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<#storage_ty>) -> ::std::option::Option<&'a str> {
1269                #const_epp_ident[usize::from(tidx)]
1270            }
1271        })
1272    }
1273
1274    /// Generate the wrappers that call user actions
1275    fn gen_wrappers(&self, grm: &YaccGrammar<StorageT>) -> Result<TokenStream, Box<dyn Error>> {
1276        let parsed_parse_generics = make_generics(grm.parse_generics().as_deref())?;
1277        let (generics, type_generics, where_clause) = parsed_parse_generics.split_for_impl();
1278
1279        let (parse_paramname, parse_paramdef);
1280        match grm.parse_param() {
1281            Some((name, tyname)) => {
1282                parse_paramname = str::parse::<TokenStream>(name)?;
1283                let ty = str::parse::<TokenStream>(tyname)?;
1284                parse_paramdef = quote!(#parse_paramname: #ty);
1285            }
1286            None => {
1287                parse_paramname = quote!(());
1288                parse_paramdef = quote! {_: ()};
1289            }
1290        };
1291
1292        let mut wrappers = TokenStream::new();
1293        for pidx in grm.iter_pidxs() {
1294            let ridx = grm.prod_to_rule(pidx);
1295
1296            // Iterate over all $-arguments and replace them with their respective
1297            // element from the argument vector (e.g. $1 is replaced by args[0]). At
1298            // the same time extract &str from tokens and actiontype from nonterminals.
1299            let wrapper_fn = format_ident!("{}wrapper_{}", ACTION_PREFIX, usize::from(pidx));
1300            let ridx_var = format_ident!("{}ridx", ACTION_PREFIX);
1301            let lexer_var = format_ident!("{}lexer", ACTION_PREFIX);
1302            let span_var = format_ident!("{}span", ACTION_PREFIX);
1303            let args_var = format_ident!("{}args", ACTION_PREFIX);
1304            let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1305            let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1306            let actionskind = str::parse::<TokenStream>(ACTIONS_KIND)?;
1307            let edition_lifetime = if self.rust_edition != RustEdition::Rust2015 {
1308                Some(quote!('_,))
1309            } else {
1310                None
1311            };
1312            let mut wrapper_fn_body = TokenStream::new();
1313            if grm.action(pidx).is_some() {
1314                // Unpack the arguments passed to us by the drain
1315                for i in 0..grm.prod(pidx).len() {
1316                    let arg = format_ident!("{}arg_{}", ACTION_PREFIX, i + 1);
1317                    wrapper_fn_body.extend(match grm.prod(pidx)[i] {
1318                        Symbol::Rule(ref_ridx) => {
1319                            let ref_ridx = usize::from(ref_ridx);
1320                            let actionvariant = format_ident!("{}{}", ACTIONS_KIND_PREFIX, ref_ridx);
1321                            quote! {
1322                                #[allow(clippy::let_unit_value)]
1323                                let #arg = match #args_var.next().unwrap() {
1324                                    ::lrpar::parser::AStackType::ActionType(#actionskind::#type_generics::#actionvariant(x)) => x,
1325                                    _ => unreachable!()
1326                                };
1327                            }
1328                        }
1329                        Symbol::Token(_) => {
1330                            quote! {
1331                                let #arg = match #args_var.next().unwrap() {
1332                                    ::lrpar::parser::AStackType::Lexeme(l) => {
1333                                        if l.faulty() {
1334                                            Err(l)
1335                                        } else {
1336                                            Ok(l)
1337                                        }
1338                                    },
1339                                    ::lrpar::parser::AStackType::ActionType(_) => unreachable!()
1340                                };
1341                            }
1342                        }
1343                    })
1344                }
1345
1346                // Call the user code
1347                let args = (0..grm.prod(pidx).len())
1348                    .map(|i| format_ident!("{}arg_{}", ACTION_PREFIX, i + 1))
1349                    .collect::<Vec<_>>();
1350                let action_fn = format_ident!("{}action_{}", ACTION_PREFIX, usize::from(pidx));
1351                let actionsvariant = format_ident!("{}{}", ACTIONS_KIND_PREFIX, usize::from(ridx));
1352
1353                wrapper_fn_body.extend(match grm.actiontype(ridx) {
1354                    Some(s) if s == "()" => {
1355                        // If the rule `r` that we're calling has the unit type then Clippy will warn that
1356                        // `enum::A(wrapper_r())` is pointless. We thus have to split it into two:
1357                        // `wrapper_r(); enum::A(())`.
1358                        quote! {
1359                            #action_fn(#ridx_var, #lexer_var, #span_var, #parse_paramname, #(#args,)*);
1360                            #actionskind::#type_generics::#actionsvariant(())
1361                        }
1362                    }
1363                    _ => {
1364                        quote! {
1365                            #actionskind::#type_generics::#actionsvariant(#action_fn(#ridx_var, #lexer_var, #span_var, #parse_paramname, #(#args,)*))
1366                        }
1367                    }
1368                })
1369            } else if pidx == grm.start_prod() {
1370                wrapper_fn_body.extend(quote!(unreachable!()));
1371            } else {
1372                unreachable!(
1373                    "Production in rule '{}' must have an action body, which should have been handled by gen_user_actions.",
1374                    grm.rule_name_str(grm.prod_to_rule(pidx))
1375                );
1376            };
1377
1378            let attrib = if pidx == grm.start_prod() {
1379                // The start prod has an unreachable body so it doesn't use it's variables.
1380                Some(quote!(#[allow(unused_variables)]))
1381            } else {
1382                None
1383            };
1384            wrappers.extend(quote! {
1385                #attrib
1386                fn #wrapper_fn #generics (
1387                    #ridx_var: ::cfgrammar::RIdx<#storaget>,
1388                    #lexer_var: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1389                    #span_var: ::cfgrammar::Span,
1390                    mut #args_var: ::std::vec::Drain<#edition_lifetime ::lrpar::parser::AStackType<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #actionskind #type_generics>>,
1391                    #parse_paramdef
1392                ) -> #actionskind #type_generics
1393                #where_clause
1394                {
1395                    #wrapper_fn_body
1396                }
1397             })
1398        }
1399        let mut actionskindvariants = Vec::new();
1400        let actionskindhidden = format_ident!("_{}", ACTIONS_KIND_HIDDEN);
1401        let actionskind = str::parse::<TokenStream>(ACTIONS_KIND).unwrap();
1402        let mut phantom_data_type = Vec::new();
1403        for ridx in grm.iter_rules() {
1404            if let Some(actiont) = grm.actiontype(ridx) {
1405                let actionskindvariant =
1406                    format_ident!("{}{}", ACTIONS_KIND_PREFIX, usize::from(ridx));
1407                let actiont = str::parse::<TokenStream>(actiont).unwrap();
1408                actionskindvariants.push(quote! {
1409                    #actionskindvariant(#actiont)
1410                })
1411            }
1412        }
1413        for lifetime in parsed_parse_generics.lifetimes() {
1414            let lifetime = &lifetime.lifetime;
1415            phantom_data_type.push(quote! { &#lifetime () });
1416        }
1417        for type_param in parsed_parse_generics.type_params() {
1418            let ident = &type_param.ident;
1419            phantom_data_type.push(quote! { #ident });
1420        }
1421        actionskindvariants.push(quote! {
1422            #actionskindhidden(::std::marker::PhantomData<(#(#phantom_data_type,)*)>)
1423        });
1424        wrappers.extend(quote! {
1425            #[allow(dead_code)]
1426            enum #actionskind #generics #where_clause {
1427                #(#actionskindvariants,)*
1428            }
1429        });
1430        Ok(wrappers)
1431    }
1432
1433    /// Generate the user action functions (if any).
1434    fn gen_user_actions(
1435        &self,
1436        grm: &YaccGrammar<StorageT>,
1437        diag: &SpannedDiagnosticFormatter,
1438    ) -> Result<TokenStream, Box<dyn Error>> {
1439        let programs = grm
1440            .programs()
1441            .as_ref()
1442            .map(|s| str::parse::<TokenStream>(s))
1443            .transpose()?;
1444        let mut action_fns = TokenStream::new();
1445        // Convert actions to functions
1446        let parsed_parse_generics = make_generics(grm.parse_generics().as_deref())?;
1447        let (generics, _, where_clause) = parsed_parse_generics.split_for_impl();
1448        let (parse_paramname, parse_paramdef, parse_param_unit);
1449        match grm.parse_param() {
1450            Some((name, tyname)) => {
1451                parse_param_unit = tyname.trim() == "()";
1452                parse_paramname = str::parse::<TokenStream>(name)?;
1453                let ty = str::parse::<TokenStream>(tyname)?;
1454                parse_paramdef = quote!(#parse_paramname: #ty);
1455            }
1456            None => {
1457                parse_param_unit = true;
1458                parse_paramname = quote!(());
1459                parse_paramdef = quote! {_: ()};
1460            }
1461        };
1462        for pidx in grm.iter_pidxs() {
1463            if pidx == grm.start_prod() {
1464                continue;
1465            }
1466
1467            // Work out the right type for each argument
1468            let mut args = Vec::with_capacity(grm.prod(pidx).len());
1469            for i in 0..grm.prod(pidx).len() {
1470                let argt = match grm.prod(pidx)[i] {
1471                    Symbol::Rule(ref_ridx) => {
1472                        if let Some(action_type) = grm.actiontype(ref_ridx).as_ref() {
1473                            str::parse::<TokenStream>(action_type)?
1474                        } else {
1475                            let mut s = String::from("\n");
1476                            let rule_span = grm.rule_name_span(ref_ridx);
1477                            s.push_str(&diag.file_location_msg("Error", Some(rule_span)));
1478                            s.push_str("\n");
1479                            s.push_str(&diag.underline_span_with_text(
1480                                rule_span,
1481                                "Rule missing action type".to_string(),
1482                                '^',
1483                            ));
1484                            return Err(ErrorString(s).into());
1485                        }
1486                    }
1487                    Symbol::Token(_) => {
1488                        let lexemet =
1489                            str::parse::<TokenStream>(type_name::<LexerTypesT::LexemeT>())?;
1490                        quote!(::std::result::Result<#lexemet, #lexemet>)
1491                    }
1492                };
1493                let arg = format_ident!("{}arg_{}", ACTION_PREFIX, i + 1);
1494                args.push(quote!(mut #arg: #argt));
1495            }
1496
1497            // If this rule's `actiont` is `()` then Clippy will warn that the return type `-> ()`
1498            // is pointless (which is true). We therefore avoid outputting a return type if actiont
1499            // is the unit type.
1500            let returnt = {
1501                let actiont = grm.actiontype(grm.prod_to_rule(pidx)).as_ref().unwrap();
1502                if actiont == "()" {
1503                    None
1504                } else {
1505                    let actiont = str::parse::<TokenStream>(actiont)?;
1506                    Some(quote!( -> #actiont))
1507                }
1508            };
1509            let action_fn = format_ident!("{}action_{}", ACTION_PREFIX, usize::from(pidx));
1510            let lexer_var = format_ident!("{}lexer", ACTION_PREFIX);
1511            let span_var = format_ident!("{}span", ACTION_PREFIX);
1512            let ridx_var = format_ident!("{}ridx", ACTION_PREFIX);
1513            let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1514            let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1515            let bind_parse_param = if !parse_param_unit {
1516                Some(quote! {let _ = #parse_paramname;})
1517            } else {
1518                None
1519            };
1520
1521            // Iterate over all $-arguments and replace them with their respective
1522            // element from the argument vector (e.g. $1 is replaced by args[0]).
1523            let pre_action = grm.action(pidx).as_ref().ok_or_else(|| {
1524                let mut s = String::from("\n");
1525                let span = grm.prod_span(pidx);
1526                s.push_str(&diag.file_location_msg("Error", Some(span)));
1527                s.push_str("\n");
1528                s.push_str(&diag.underline_span_with_text(
1529                    span,
1530                    "Production is missing action code".to_string(),
1531                    '^',
1532                ));
1533                ErrorString(s)
1534            })?;
1535            let mut last = 0;
1536            let mut outs = String::new();
1537            loop {
1538                match pre_action[last..].find('$') {
1539                    Some(off) => {
1540                        if pre_action[last + off..].starts_with("$$") {
1541                            outs.push_str(&pre_action[last..last + off + "$".len()]);
1542                            last = last + off + "$$".len();
1543                        } else if pre_action[last + off..].starts_with("$lexer") {
1544                            outs.push_str(&pre_action[last..last + off]);
1545                            write!(outs, "{prefix}lexer", prefix = ACTION_PREFIX).ok();
1546                            last = last + off + "$lexer".len();
1547                        } else if pre_action[last + off..].starts_with("$span") {
1548                            outs.push_str(&pre_action[last..last + off]);
1549                            write!(outs, "{prefix}span", prefix = ACTION_PREFIX).ok();
1550                            last = last + off + "$span".len();
1551                        } else if last + off + 1 < pre_action.len()
1552                            && pre_action[last + off + 1..].starts_with(|c: char| c.is_numeric())
1553                        {
1554                            outs.push_str(&pre_action[last..last + off]);
1555                            write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok();
1556                            last = last + off + "$".len();
1557                        } else {
1558                            let span = grm.action_span(pidx).unwrap();
1559                            let inner_span =
1560                                Span::new(span.start() + last + off + "$".len(), span.end());
1561                            let mut s = String::from("\n");
1562                            s.push_str(&diag.file_location_msg("Error", Some(inner_span)));
1563                            s.push_str("\n");
1564                            s.push_str(&diag.underline_span_with_text(
1565                                inner_span,
1566                                "Unknown text following '$'".to_string(),
1567                                '^',
1568                            ));
1569                            return Err(ErrorString(s).into());
1570                        }
1571                    }
1572                    None => {
1573                        outs.push_str(&pre_action[last..]);
1574                        break;
1575                    }
1576                }
1577            }
1578
1579            let action_body = str::parse::<TokenStream>(&outs)?;
1580            action_fns.extend(quote! {
1581                #[allow(clippy::too_many_arguments)]
1582                fn #action_fn #generics (
1583                    #ridx_var: ::cfgrammar::RIdx<#storaget>,
1584                    #lexer_var: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1585                    #span_var: ::cfgrammar::Span,
1586                    #parse_paramdef,
1587                    #(#args,)*
1588                ) #returnt
1589                #where_clause
1590                {
1591                    #bind_parse_param
1592                    #action_body
1593                }
1594            })
1595        }
1596        Ok(quote! {
1597            #programs
1598            #action_fns
1599        })
1600    }
1601
1602    /// Return the `RIdx` of the %start rule in the grammar (which will not be the same as
1603    /// grm.start_rule_idx because the latter has an additional rule insert by cfgrammar
1604    /// which then calls the user's %start rule).
1605    fn user_start_ridx(&self, grm: &YaccGrammar<StorageT>) -> RIdx<StorageT> {
1606        debug_assert_eq!(grm.prod(grm.start_prod()).len(), 1);
1607        match grm.prod(grm.start_prod())[0] {
1608            Symbol::Rule(ridx) => ridx,
1609            _ => unreachable!(),
1610        }
1611    }
1612}
1613
1614/// Bundles `YaccGrammar` + `StateTable` so that generated parsers can hold
1615/// them in a `OnceLock` without naming `lrtable` directly.
1616#[doc(hidden)]
1617pub struct ParserData<StorageT: Eq + Hash> {
1618    grm: YaccGrammar<StorageT>,
1619    stable: StateTable<StorageT>,
1620}
1621
1622impl<StorageT: Eq + Hash> ParserData<StorageT> {
1623    pub fn grm(&self) -> &YaccGrammar<StorageT> {
1624        &self.grm
1625    }
1626
1627    pub fn stable(&self) -> &StateTable<StorageT> {
1628        &self.stable
1629    }
1630}
1631
1632/// This function is called by generated files; it exists so that generated files don't require a
1633/// direct dependency on bincode.
1634#[doc(hidden)]
1635pub fn _reconstitute<StorageT: Decode<()> + Eq + Hash + PrimInt + Unsigned + 'static>(
1636    grm_buf: &[u8],
1637    stable_buf: &[u8],
1638) -> ParserData<StorageT> {
1639    let (grm, _) = decode_from_slice(grm_buf, bincode::config::standard()).unwrap();
1640    let (stable, _) = decode_from_slice(stable_buf, bincode::config::standard()).unwrap();
1641    ParserData { grm, stable }
1642}
1643
1644/// An interface to the result of [CTParserBuilder::build()].
1645pub struct CTParser<StorageT = u32>
1646where
1647    StorageT: Eq + Hash,
1648{
1649    regenerated: bool,
1650    rule_ids: HashMap<String, StorageT>,
1651    yacc_grammar: YaccGrammar<StorageT>,
1652    grammar_src: String,
1653    grammar_path: PathBuf,
1654    conflicts: Option<(StateGraph<StorageT>, StateTable<StorageT>)>,
1655}
1656
1657impl<StorageT> CTParser<StorageT>
1658where
1659    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
1660    usize: AsPrimitive<StorageT>,
1661{
1662    /// Returns `true` if this compile-time parser was regenerated or `false` if it was not.
1663    pub fn regenerated(&self) -> bool {
1664        self.regenerated
1665    }
1666
1667    /// Returns a [HashMap] from lexeme string types to numeric types (e.g. `INT: 2`), suitable for
1668    /// handing to a lexer to coordinate the IDs of lexer and parser.
1669    pub fn token_map(&self) -> &HashMap<String, StorageT> {
1670        &self.rule_ids
1671    }
1672
1673    /// If there are any conflicts in the grammar, return a tuple which allows users to inspect and
1674    /// pretty print them; otherwise returns `None`. If the grammar was not regenerated, this will
1675    /// always return `None`, even if the grammar actually has conflicts.
1676    ///
1677    /// **Note: The conflicts feature is currently unstable and may change in the future.**
1678    #[allow(private_interfaces)]
1679    pub fn conflicts(
1680        &self,
1681        _: crate::unstable::UnstableApi,
1682    ) -> Option<(
1683        &YaccGrammar<StorageT>,
1684        &StateGraph<StorageT>,
1685        &StateTable<StorageT>,
1686        &Conflicts<StorageT>,
1687    )> {
1688        if let Some((sgraph, stable)) = &self.conflicts {
1689            return Some((
1690                &self.yacc_grammar,
1691                sgraph,
1692                stable,
1693                stable.conflicts().unwrap(),
1694            ));
1695        }
1696        None
1697    }
1698
1699    #[doc(hidden)]
1700    pub fn yacc_grammar(&self) -> &YaccGrammar<StorageT> {
1701        &self.yacc_grammar
1702    }
1703    #[doc(hidden)]
1704    pub fn grammar_src(&self) -> &str {
1705        &self.grammar_src
1706    }
1707    #[doc(hidden)]
1708    pub fn grammar_path(&self) -> &Path {
1709        self.grammar_path.as_path()
1710    }
1711}
1712
1713/// Indents a multi-line string and trims any trailing newline.
1714/// This currently assumes that indentation on blank lines does not matter.
1715///
1716/// The algorithm used by this function is:
1717/// 1. Prefix `s` with the indentation, indenting the first line.
1718/// 2. Trim any trailing newlines.
1719/// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first.
1720///
1721/// It is plausible that we should a step 4, but currently do not:
1722/// 4. Replace all `\n{indent}\n` with `\n\n`
1723fn indent(indent: &str, s: &str) -> String {
1724    format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
1725}
1726
1727fn make_generics(parse_generics: Option<&str>) -> Result<Generics, Box<dyn Error>> {
1728    if let Some(parse_generics) = parse_generics {
1729        let tokens = str::parse::<TokenStream>(parse_generics)?;
1730        match syn::parse2(quote!(<'lexer, 'input: 'lexer, #tokens>)) {
1731            Ok(res) => Ok(res),
1732            Err(err) => Err(format!("unable to parse %parse-generics: {}", err).into()),
1733        }
1734    } else {
1735        Ok(parse_quote!(<'lexer, 'input: 'lexer>))
1736    }
1737}
1738
1739// Tests dealing with the filesystem not supported under wasm32
1740#[cfg(all(not(target_arch = "wasm32"), test))]
1741mod test {
1742    use std::{fs::File, io::Write, path::PathBuf};
1743
1744    use super::{CTConflictsError, CTParserBuilder};
1745    use crate::test_utils::TestLexerTypes;
1746    use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
1747    use tempfile::TempDir;
1748
1749    #[test]
1750    fn test_conflicts() {
1751        let temp = TempDir::new().unwrap();
1752        let mut file_path = PathBuf::from(temp.as_ref());
1753        file_path.push("grm.y");
1754        let mut f = File::create(&file_path).unwrap();
1755        let _ = f.write_all(
1756            "%start A
1757%%
1758A : 'a' 'b' | B 'b';
1759B : 'a' | C;
1760C : 'a';"
1761                .as_bytes(),
1762        );
1763
1764        match CTParserBuilder::<TestLexerTypes>::new()
1765            .error_on_conflicts(false)
1766            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1767            .grammar_path(file_path.to_str().unwrap())
1768            .output_path(file_path.with_extension("ignored"))
1769            .build()
1770            .unwrap()
1771            .conflicts(crate::unstable::UnstableApi)
1772        {
1773            Some((_, _, _, conflicts)) => {
1774                assert_eq!(conflicts.sr_len(), 1);
1775                assert_eq!(conflicts.rr_len(), 1);
1776            }
1777            None => panic!("Expected error data"),
1778        }
1779    }
1780
1781    #[test]
1782    fn test_conflicts_error() {
1783        let temp = TempDir::new().unwrap();
1784        let mut file_path = PathBuf::from(temp.as_ref());
1785        file_path.push("grm.y");
1786        let mut f = File::create(&file_path).unwrap();
1787        let _ = f.write_all(
1788            "%start A
1789%%
1790A : 'a' 'b' | B 'b';
1791B : 'a' | C;
1792C : 'a';"
1793                .as_bytes(),
1794        );
1795
1796        match CTParserBuilder::<TestLexerTypes>::new()
1797            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1798            .grammar_path(file_path.to_str().unwrap())
1799            .output_path(file_path.with_extension("ignored"))
1800            .build()
1801        {
1802            Ok(_) => panic!("Expected error"),
1803            Err(e) => {
1804                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1805                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1806                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1807            }
1808        }
1809    }
1810
1811    #[test]
1812    fn test_expect_error() {
1813        let temp = TempDir::new().unwrap();
1814        let mut file_path = PathBuf::from(temp.as_ref());
1815        file_path.push("grm.y");
1816        let mut f = File::create(&file_path).unwrap();
1817        let _ = f.write_all(
1818            "%start A
1819%expect 2
1820%%
1821A: 'a' 'b' | B 'b';
1822B: 'a';"
1823                .as_bytes(),
1824        );
1825
1826        match CTParserBuilder::<TestLexerTypes>::new()
1827            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1828            .grammar_path(file_path.to_str().unwrap())
1829            .output_path(file_path.with_extension("ignored"))
1830            .build()
1831        {
1832            Ok(_) => panic!("Expected error"),
1833            Err(e) => {
1834                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1835                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 0);
1836                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1837            }
1838        }
1839    }
1840
1841    #[test]
1842    fn test_expectrr_error() {
1843        let temp = TempDir::new().unwrap();
1844        let mut file_path = PathBuf::from(temp.as_ref());
1845        file_path.push("grm.y");
1846        let mut f = File::create(&file_path).unwrap();
1847        let _ = f.write_all(
1848            "%start A
1849%expect 1
1850%expect-rr 2
1851%%
1852A : 'a' 'b' | B 'b';
1853B : 'a' | C;
1854C : 'a';"
1855                .as_bytes(),
1856        );
1857
1858        match CTParserBuilder::<TestLexerTypes>::new()
1859            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1860            .grammar_path(file_path.to_str().unwrap())
1861            .output_path(file_path.with_extension("ignored"))
1862            .build()
1863        {
1864            Ok(_) => panic!("Expected error"),
1865            Err(e) => {
1866                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1867                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1868                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1869            }
1870        }
1871    }
1872
1873    #[test]
1874    /// Tests a yacc .y filename containing a dash character leading to an invalid rust identifier
1875    /// when that dash is subsequently used as the default `CTParserBuilder::mod_name`.
1876    fn test_invalid_identifier_in_derived_mod_name() {
1877        let temp = TempDir::new().unwrap();
1878        let mut file_path = PathBuf::from(temp.as_ref());
1879        file_path.push("contains-a-dash.y");
1880        let mut f = File::create(&file_path).unwrap();
1881        let _ = f.write_all(
1882            "%start A
1883%%
1884A : 'a';"
1885                .as_bytes(),
1886        );
1887        match CTParserBuilder::<TestLexerTypes>::new()
1888            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1889            .grammar_path(file_path.to_str().unwrap())
1890            .output_path(file_path.with_extension("ignored"))
1891            .build()
1892        {
1893            Ok(_) => panic!("Expected error"),
1894            Err(e) => {
1895                let err_string = e.to_string();
1896                assert_eq!(
1897                    err_string,
1898                    "CTParserBuilder::mod_name(\"contains-a-dash_y\") is not a valid rust identifier due to 'unexpected token'"
1899                );
1900            }
1901        }
1902    }
1903
1904    #[cfg(test)]
1905    #[test]
1906    fn test_recoverer_header() -> Result<(), Box<dyn std::error::Error>> {
1907        use crate::RecoveryKind as RK;
1908        #[rustfmt::skip]
1909            let recovery_kinds = [
1910                //  Builder,          Header setting,     Expected result.
1911                // -----------       ------------------  -------------------
1912                (Some(RK::None),      Some(RK::None),     Some(RK::None)),
1913                (Some(RK::None),      Some(RK::CPCTPlus), Some(RK::None)),
1914                (Some(RK::CPCTPlus),  Some(RK::CPCTPlus), Some(RK::CPCTPlus)),
1915                (Some(RK::CPCTPlus),  Some(RK::None),     Some(RK::CPCTPlus)),
1916                (None,                Some(RK::CPCTPlus), Some(RK::CPCTPlus)),
1917                (None,                Some(RK::None),     Some(RK::None)),
1918                (None,                None,               Some(RK::CPCTPlus)),
1919                (Some(RK::None),      None,               Some(RK::None)),
1920                (Some(RK::CPCTPlus),  None,               Some(RK::CPCTPlus)),
1921            ];
1922
1923        for (i, (builder_arg, header_arg, expected_rk)) in
1924            recovery_kinds.iter().cloned().enumerate()
1925        {
1926            let y_src = if let Some(header_arg) = header_arg {
1927                format!(
1928                    "\
1929                    %grmtools{{yacckind: Original(NoAction), recoverer: {}}} \
1930                    %% \
1931                    start: ; \
1932                    ",
1933                    match header_arg {
1934                        RK::None => "RecoveryKind::None",
1935                        RK::CPCTPlus => "RecoveryKind::CPCTPlus",
1936                    }
1937                )
1938            } else {
1939                r#"
1940                    %grmtools{yacckind: Original(NoAction)}
1941                    %%
1942                    Start: ;
1943                    "#
1944                .to_string()
1945            };
1946            let out_dir = std::env::var("OUT_DIR").unwrap();
1947            let y_path = format!("{out_dir}/recoverykind_test_{i}.y");
1948            let y_out_path = format!("{y_path}.rs");
1949            std::fs::File::create(y_path.clone()).unwrap();
1950            std::fs::write(y_path.clone(), y_src).unwrap();
1951            let mut cp_builder = CTParserBuilder::<TestLexerTypes>::new();
1952            cp_builder = cp_builder
1953                .output_path(y_out_path.clone())
1954                .grammar_path(y_path.clone());
1955            cp_builder = if let Some(builder_arg) = builder_arg {
1956                cp_builder.recoverer(builder_arg)
1957            } else {
1958                cp_builder
1959            }
1960            .inspect_recoverer(Box::new(move |rk| {
1961                if matches!(
1962                    (rk, expected_rk),
1963                    (RK::None, Some(RK::None)) | (RK::CPCTPlus, Some(RK::CPCTPlus))
1964                ) {
1965                    Ok(())
1966                } else {
1967                    panic!("Unexpected recovery kind")
1968                }
1969            }));
1970            cp_builder.build()?;
1971        }
1972        Ok(())
1973    }
1974}