lrpar/
ctbuilder.rs

1//! Build grammars at compile-time so that they can be statically included into a binary.
2
3use std::{
4    any::type_name,
5    collections::{HashMap, HashSet},
6    env::{current_dir, var},
7    error::Error,
8    fmt::{self, Debug, Write as fmtWrite},
9    fs::{self, create_dir_all, read_to_string, File},
10    hash::Hash,
11    io::Write,
12    marker::PhantomData,
13    path::{Path, PathBuf},
14    sync::Mutex,
15};
16
17use crate::{LexerTypes, RTParserBuilder, RecoveryKind};
18use bincode::{decode_from_slice, encode_to_vec, Decode, Encode};
19use cfgrammar::{
20    header::{GrmtoolsSectionParser, Header, HeaderValue, Value},
21    markmap::{Entry, MergeBehavior},
22    newlinecache::NewlineCache,
23    yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind},
24    Location, RIdx, Spanned, Symbol,
25};
26use filetime::FileTime;
27use lazy_static::lazy_static;
28use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable};
29use num_traits::{AsPrimitive, PrimInt, Unsigned};
30use proc_macro2::{Literal, TokenStream};
31use quote::{format_ident, quote, ToTokens, TokenStreamExt};
32use regex::Regex;
33
34const ACTION_PREFIX: &str = "__gt_";
35const GLOBAL_PREFIX: &str = "__GT_";
36const ACTIONS_KIND: &str = "__GtActionsKind";
37const ACTIONS_KIND_PREFIX: &str = "Ak";
38const ACTIONS_KIND_HIDDEN: &str = "__GtActionsKindHidden";
39
40const RUST_FILE_EXT: &str = "rs";
41
42lazy_static! {
43    static ref RE_DOL_NUM: Regex = Regex::new(r"\$([0-9]+)").unwrap();
44    static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
45}
46
47struct CTConflictsError<StorageT: Eq + Hash> {
48    stable: StateTable<StorageT>,
49}
50
51/// The quote impl of `ToTokens` for `Option` prints an empty string for `None`
52/// and the inner value for `Some(inner_value)`.
53///
54/// This wrapper instead emits both `Some` and `None` variants.
55/// See: [quote #20](https://github.com/dtolnay/quote/issues/20)
56struct QuoteOption<T>(Option<T>);
57
58impl<T: ToTokens> ToTokens for QuoteOption<T> {
59    fn to_tokens(&self, tokens: &mut TokenStream) {
60        tokens.append_all(match self.0 {
61            Some(ref t) => quote! { ::std::option::Option::Some(#t) },
62            None => quote! { ::std::option::Option::None },
63        });
64    }
65}
66
67/// The quote impl of `ToTokens` for `usize` prints literal values
68/// including a type suffix for example `0usize`.
69///
70/// This wrapper omits the type suffix emitting `0` instead.
71struct UnsuffixedUsize(usize);
72
73impl ToTokens for UnsuffixedUsize {
74    fn to_tokens(&self, tokens: &mut TokenStream) {
75        tokens.append(Literal::usize_unsuffixed(self.0))
76    }
77}
78
79/// This wrapper adds a missing impl of `ToTokens` for tuples.
80/// For a tuple `(a, b)` emits `(a.to_tokens(), b.to_tokens())`
81struct QuoteTuple<T>(T);
82
83impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
84    fn to_tokens(&self, tokens: &mut TokenStream) {
85        let (a, b) = &self.0;
86        tokens.append_all(quote!((#a, #b)));
87    }
88}
89
90/// The wrapped `&str` value will be emitted with a call to `to_string()`
91struct QuoteToString<'a>(&'a str);
92
93impl ToTokens for QuoteToString<'_> {
94    fn to_tokens(&self, tokens: &mut TokenStream) {
95        let x = &self.0;
96        tokens.append_all(quote! { #x.to_string() });
97    }
98}
99
100impl<StorageT> fmt::Display for CTConflictsError<StorageT>
101where
102    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
103    usize: AsPrimitive<StorageT>,
104{
105    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106        let conflicts = self.stable.conflicts().unwrap();
107        write!(
108            f,
109            "CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
110            conflicts.rr_len(),
111            conflicts.sr_len()
112        )
113    }
114}
115
116impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
117where
118    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
119    usize: AsPrimitive<StorageT>,
120{
121    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
122        let conflicts = self.stable.conflicts().unwrap();
123        write!(
124            f,
125            "CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
126            conflicts.rr_len(),
127            conflicts.sr_len()
128        )
129    }
130}
131
132impl<StorageT> Error for CTConflictsError<StorageT>
133where
134    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
135    usize: AsPrimitive<StorageT>,
136{
137}
138
139/// A string which uses `Display` for it's `Debug` impl.
140struct ErrorString(String);
141impl fmt::Display for ErrorString {
142    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
143        let ErrorString(s) = self;
144        write!(f, "{}", s)
145    }
146}
147impl fmt::Debug for ErrorString {
148    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
149        let ErrorString(s) = self;
150        write!(f, "{}", s)
151    }
152}
153impl Error for ErrorString {}
154
155/// Specify the visibility of the module generated by `CTBuilder`.
156#[derive(Clone, PartialEq, Eq, Debug)]
157#[non_exhaustive]
158pub enum Visibility {
159    /// Module-level visibility only.
160    Private,
161    /// `pub`
162    Public,
163    /// `pub(super)`
164    PublicSuper,
165    /// `pub(self)`
166    PublicSelf,
167    /// `pub(crate)`
168    PublicCrate,
169    /// `pub(in {arg})`
170    PublicIn(String),
171}
172
173/// Specifies the [Rust Edition] that will be emitted during code generation.
174///
175/// [Rust Edition]: https://doc.rust-lang.org/edition-guide/rust-2021/index.html
176#[derive(Clone, Copy, PartialEq, Eq, Debug)]
177pub enum RustEdition {
178    Rust2015,
179    Rust2018,
180    Rust2021,
181}
182
183impl RustEdition {
184    fn to_variant_tokens(self) -> TokenStream {
185        match self {
186            RustEdition::Rust2015 => quote!(::lrpar::RustEdition::Rust2015),
187            RustEdition::Rust2018 => quote!(::lrpar::RustEdition::Rust2018),
188            RustEdition::Rust2021 => quote!(::lrpar::RustEdition::Rust2021),
189        }
190    }
191}
192
193impl ToTokens for Visibility {
194    fn to_tokens(&self, tokens: &mut TokenStream) {
195        tokens.extend(match self {
196            Visibility::Private => quote!(),
197            Visibility::Public => quote! {pub},
198            Visibility::PublicSuper => quote! {pub(super)},
199            Visibility::PublicSelf => quote! {pub(self)},
200            Visibility::PublicCrate => quote! {pub(crate)},
201            Visibility::PublicIn(data) => {
202                let other = str::parse::<TokenStream>(data).unwrap();
203                quote! {pub(in #other)}
204            }
205        })
206    }
207}
208
209impl Visibility {
210    fn to_variant_tokens(&self) -> TokenStream {
211        match self {
212            Visibility::Private => quote!(::lrpar::Visibility::Private),
213            Visibility::Public => quote!(::lrpar::Visibility::Public),
214            Visibility::PublicSuper => quote!(::lrpar::Visibility::PublicSuper),
215            Visibility::PublicSelf => quote!(::lrpar::Visibility::PublicSelf),
216            Visibility::PublicCrate => quote!(::lrpar::Visibility::PublicCrate),
217            Visibility::PublicIn(data) => {
218                let data = QuoteToString(data);
219                quote!(::lrpar::Visibility::PublicIn(#data))
220            }
221        }
222    }
223}
224
225/// A `CTParserBuilder` allows one to specify the criteria for building a statically generated
226/// parser.
227pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes>
228where
229    LexerTypesT::StorageT: Eq + Hash,
230    usize: AsPrimitive<LexerTypesT::StorageT>,
231{
232    // Anything stored in here (except `output_path`, `conflicts`, and `error_on_conflict`) almost
233    // certainly needs to be included as part of the rebuild_cache function below so that, if it's
234    // changed, the grammar is rebuilt.
235    grammar_path: Option<PathBuf>,
236    output_path: Option<PathBuf>,
237    mod_name: Option<&'a str>,
238    recoverer: Option<RecoveryKind>,
239    yacckind: Option<YaccKind>,
240    error_on_conflicts: bool,
241    warnings_are_errors: bool,
242    show_warnings: bool,
243    visibility: Visibility,
244    rust_edition: RustEdition,
245    inspect_rt: Option<
246        Box<
247            dyn for<'b> FnMut(
248                &'b mut Header<Location>,
249                RTParserBuilder<LexerTypesT::StorageT, LexerTypesT>,
250                &'b HashMap<String, LexerTypesT::StorageT>,
251                &PathBuf,
252            ) -> Result<(), Box<dyn Error>>,
253        >,
254    >,
255    // test function for inspecting private state
256    #[cfg(test)]
257    inspect_callback: Option<Box<dyn Fn(RecoveryKind) -> Result<(), Box<dyn Error>>>>,
258    phantom: PhantomData<LexerTypesT>,
259}
260
261impl<
262        'a,
263        StorageT: 'static + Debug + Hash + PrimInt + Encode + Unsigned,
264        LexerTypesT: LexerTypes<StorageT = StorageT>,
265    > CTParserBuilder<'a, LexerTypesT>
266where
267    usize: AsPrimitive<StorageT>,
268{
269    /// Create a new `CTParserBuilder`.
270    ///
271    /// `StorageT` must be an unsigned integer type (e.g. `u8`, `u16`) which is:
272    ///   * big enough to index (separately) all the tokens, rules, productions in the grammar,
273    ///   * big enough to index the state table created from the grammar,
274    ///   * less than or equal in size to `u32`.
275    ///
276    /// In other words, if you have a grammar with 256 tokens, 256 rules, and 256 productions,
277    /// which creates a state table of 256 states you can safely specify `u8` here; but if any of
278    /// those counts becomes 257 or greater you will need to specify `u16`. If you are parsing
279    /// large files, the additional storage requirements of larger integer types can be noticeable,
280    /// and in such cases it can be worth specifying a smaller type. `StorageT` defaults to `u32`
281    /// if unspecified.
282    ///
283    /// # Examples
284    ///
285    /// ```text
286    /// CTParserBuilder::<DefaultLexerTypes<u8>>::new()
287    ///     .grammar_in_src_dir("grm.y")?
288    ///     .build()?;
289    /// ```
290    pub fn new() -> Self {
291        CTParserBuilder {
292            grammar_path: None,
293            output_path: None,
294            mod_name: None,
295            recoverer: None,
296            yacckind: None,
297            error_on_conflicts: true,
298            warnings_are_errors: true,
299            show_warnings: true,
300            visibility: Visibility::Private,
301            rust_edition: RustEdition::Rust2021,
302            inspect_rt: None,
303            #[cfg(test)]
304            inspect_callback: None,
305            phantom: PhantomData,
306        }
307    }
308
309    /// Set the input grammar path to a file relative to this project's `src` directory. This will
310    /// also set the output path (i.e. you do not need to call [CTParserBuilder::output_path]).
311    ///
312    /// For example if `a/b.y` is passed as `inp` then [CTParserBuilder::build] will:
313    ///   * use `src/a/b.y` as the input file.
314    ///   * write output to a file which can then be imported by calling `lrpar_mod!("a/b.y")`.
315    ///   * create a module in that output file named `b_y`.
316    ///
317    /// You can override the output path and/or module name by calling [CTParserBuilder::output_path]
318    /// and/or [CTParserBuilder::mod_name], respectively, after calling this function.
319    ///
320    /// This is a convenience function that makes it easier to compile grammar files stored in a
321    /// project's `src/` directory: please see [CTParserBuilder::build] for additional constraints
322    /// and information about the generated files. Note also that each `.y` file can only be
323    /// processed once using this function: if you want to generate multiple grammars from a single
324    /// `.y` file, you will need to use [CTParserBuilder::output_path].
325    pub fn grammar_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
326    where
327        P: AsRef<Path>,
328    {
329        if !srcp.as_ref().is_relative() {
330            return Err(format!(
331                "Grammar path '{}' must be a relative path.",
332                srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
333            )
334            .into());
335        }
336
337        let mut grmp = current_dir()?;
338        grmp.push("src");
339        grmp.push(srcp.as_ref());
340        self.grammar_path = Some(grmp);
341
342        let mut outp = PathBuf::new();
343        outp.push(var("OUT_DIR").unwrap());
344        outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
345        create_dir_all(&outp)?;
346        let mut leaf = srcp
347            .as_ref()
348            .file_name()
349            .unwrap()
350            .to_str()
351            .unwrap()
352            .to_owned();
353        write!(leaf, ".{}", RUST_FILE_EXT).ok();
354        outp.push(leaf);
355        Ok(self.output_path(outp))
356    }
357
358    /// Set the input grammar path to `inp`. If specified, you must also call
359    /// [CTParserBuilder::output_path]. In general it is easier to use
360    /// [CTParserBuilder::grammar_in_src_dir].
361    pub fn grammar_path<P>(mut self, inp: P) -> Self
362    where
363        P: AsRef<Path>,
364    {
365        self.grammar_path = Some(inp.as_ref().to_owned());
366        self
367    }
368
369    /// Set the output grammar path to `outp`. Note that there are no requirements on `outp`: the
370    /// file can exist anywhere you can create a valid [Path] to. However, if you wish to use
371    /// [crate::lrpar_mod!] you will need to make sure that `outp` is in
372    /// [std::env::var]`("OUT_DIR")` or one of its subdirectories.
373    pub fn output_path<P>(mut self, outp: P) -> Self
374    where
375        P: AsRef<Path>,
376    {
377        self.output_path = Some(outp.as_ref().to_owned());
378        self
379    }
380
381    /// Set the generated module name to `mod_name`. If no module name is specified,
382    /// [CTParserBuilder::build] will attempt to create a sensible default based on the grammar
383    /// filename.
384    pub fn mod_name(mut self, mod_name: &'a str) -> Self {
385        self.mod_name = Some(mod_name);
386        self
387    }
388
389    /// Set the visibility of the generated module to `vis`. Defaults to `Visibility::Private`.
390    pub fn visibility(mut self, vis: Visibility) -> Self {
391        self.visibility = vis;
392        self
393    }
394
395    /// Set the recoverer for this parser to `rk`. Defaults to `RecoveryKind::CPCTPlus`.
396    pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
397        self.recoverer = Some(rk);
398        self
399    }
400
401    /// Set the `YaccKind` for this parser to `ak`.
402    pub fn yacckind(mut self, yk: YaccKind) -> Self {
403        self.yacckind = Some(yk);
404        self
405    }
406
407    /// If set to true, [CTParserBuilder::build] will return an error if the given grammar contains
408    /// any Shift/Reduce or Reduce/Reduce conflicts. Defaults to `true`.
409    pub fn error_on_conflicts(mut self, b: bool) -> Self {
410        self.error_on_conflicts = b;
411        self
412    }
413
414    /// If set to true, [CTParserBuilder::build] will return an error if the given grammar contains
415    /// any warnings. Defaults to `true`.
416    pub fn warnings_are_errors(mut self, b: bool) -> Self {
417        self.warnings_are_errors = b;
418        self
419    }
420
421    /// If set to true, [CTParserBuilder::build] will print warnings to stderr, or via cargo when
422    /// running under cargo. Defaults to `true`.
423    pub fn show_warnings(mut self, b: bool) -> Self {
424        self.show_warnings = b;
425        self
426    }
427
428    /// Sets the rust edition to be used for generated code. Defaults to the latest edition of
429    /// rust supported by grmtools.
430    pub fn rust_edition(mut self, edition: RustEdition) -> Self {
431        self.rust_edition = edition;
432        self
433    }
434
435    #[cfg(test)]
436    pub fn inspect_recoverer(
437        mut self,
438        cb: Box<dyn for<'h, 'y> Fn(RecoveryKind) -> Result<(), Box<dyn Error>>>,
439    ) -> Self {
440        self.inspect_callback = Some(cb);
441        self
442    }
443
444    #[doc(hidden)]
445    pub fn inspect_rt(
446        mut self,
447        cb: Box<
448            dyn for<'b, 'y> FnMut(
449                &'b mut Header<Location>,
450                RTParserBuilder<'y, StorageT, LexerTypesT>,
451                &'b HashMap<String, StorageT>,
452                &PathBuf,
453            ) -> Result<(), Box<dyn Error>>,
454        >,
455    ) -> Self {
456        self.inspect_rt = Some(cb);
457        self
458    }
459
460    /// Statically compile the Yacc file specified by [CTParserBuilder::grammar_path()] into Rust,
461    /// placing the output into the file spec [CTParserBuilder::output_path()]. Note that three
462    /// additional files will be created with the same name as specified in [self.output_path] but
463    /// with the extensions `grm`, and `stable`, overwriting any existing files with those names.
464    ///
465    /// If `%parse-param` is not specified, the generated module follows the form:
466    ///
467    /// ```text
468    ///   mod <modname> {
469    ///     pub fn parse<'lexer, 'input: 'lexer>(lexer: &'lexer dyn NonStreamingLexer<...>)
470    ///       -> (Option<ActionT>, Vec<LexParseError<...>> { ... }
471    ///
472    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
473    ///       ...
474    ///     }
475    ///
476    ///     ...
477    ///   }
478    /// ```
479    ///
480    /// If `%parse-param x: t` is specified, the generated module follows the form:
481    ///
482    /// ```text
483    ///   mod <modname> {
484    ///     pub fn parse<'lexer, 'input: 'lexer>(lexer: &'lexer dyn NonStreamingLexer<...>, x: t)
485    ///       -> (Option<ActionT>, Vec<LexParseError<...>> { ... }
486    ///
487    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
488    ///       ...
489    ///     }
490    ///
491    ///     ...
492    ///   }
493    /// ```
494    ///
495    /// where:
496    ///  * `modname` is either:
497    ///    * the module name specified by [CTParserBuilder::mod_name()];
498    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.y` the
499    ///      module name is `c_y` (i.e. the file's leaf name, minus its extension, with a prefix of
500    ///      `_y`).
501    ///  * `ActionT` is either:
502    ///    * if the `yacckind` was set to `YaccKind::GrmTools` or
503    ///      `YaccKind::Original(YaccOriginalActionKind::UserAction)`, it is
504    ///      the return type of the `%start` rule;
505    ///    * or, if the `yacckind` was set to
506    ///      `YaccKind::Original(YaccOriginalActionKind::GenericParseTree)`, it
507    ///      is [`crate::Node<StorageT>`].
508    ///
509    /// # Panics
510    ///
511    /// If `StorageT` is not big enough to index the grammar's tokens, rules, or productions.
512    pub fn build(mut self) -> Result<CTParser<StorageT>, Box<dyn Error>> {
513        let grmp = self
514            .grammar_path
515            .as_ref()
516            .expect("grammar_path must be specified before processing.");
517        let outp = self
518            .output_path
519            .as_ref()
520            .expect("output_path must be specified before processing.");
521        let mut header = Header::new();
522
523        match header.entry("yacckind".to_string()) {
524            Entry::Occupied(_) => unreachable!(),
525            Entry::Vacant(v) => match self.yacckind {
526                Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."),
527                Some(yk) => {
528                    let yk_value = Value::try_from(yk)?;
529                    let mut o = v.insert_entry(HeaderValue(
530                        Location::Other("CTParserBuilder".to_string()),
531                        yk_value,
532                    ));
533                    o.set_merge_behavior(MergeBehavior::Ours);
534                }
535                None => {
536                    v.occupied_entry().mark_required();
537                }
538            },
539        }
540        if let Some(recoverer) = self.recoverer {
541            match header.entry("recoverer".to_string()) {
542                Entry::Occupied(_) => unreachable!(),
543                Entry::Vacant(v) => {
544                    let rk_value: Value<Location> = Value::try_from(recoverer)?;
545                    let mut o = v.insert_entry(HeaderValue(
546                        Location::Other("CTParserBuilder".to_string()),
547                        rk_value,
548                    ));
549                    o.set_merge_behavior(MergeBehavior::Ours);
550                }
551            }
552        }
553
554        {
555            let mut lk = GENERATED_PATHS.lock().unwrap();
556            if lk.contains(outp.as_path()) {
557                return Err(format!("Generating two parsers to the same path ('{}') is not allowed: use CTParserBuilder::output_path (and, optionally, CTParserBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
558            }
559            lk.insert(outp.clone());
560        }
561
562        let inc =
563            read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?;
564        let parsed_header = GrmtoolsSectionParser::new(&inc, false).parse();
565        if let Err(errs) = parsed_header {
566            return Err(format!(
567                "Error parsing `%grmtools` section:\n{}",
568                errs.iter()
569                    .map(|e| e.to_string())
570                    .collect::<Vec<_>>()
571                    .join("\n")
572            ))?;
573        }
574        let (parsed_header, _) = parsed_header.unwrap();
575        header.merge_from(parsed_header)?;
576        self.yacckind = header
577            .get("yacckind")
578            .map(|HeaderValue(_, val)| val)
579            .map(YaccKind::try_from)
580            .transpose()?;
581        header.mark_used(&"yacckind".to_string());
582        let ast_validation = if let Some(yk) = self.yacckind {
583            ASTWithValidityInfo::new(yk, &inc)
584        } else {
585            Err("Missing 'yacckind'".to_string())?
586        };
587
588        header.mark_used(&"recoverer".to_string());
589        let rk_val = header.get("recoverer").map(|HeaderValue(_, rk_val)| rk_val);
590
591        if let Some(rk_val) = rk_val {
592            self.recoverer = Some(RecoveryKind::try_from(rk_val)?);
593        } else {
594            // Fallback to the default recoverykind.
595            self.recoverer = Some(RecoveryKind::CPCTPlus);
596        }
597        self.yacckind = Some(ast_validation.yacc_kind());
598        let warnings = ast_validation.ast().warnings();
599        let loc_fmt = |err_str, loc, inc: &str, line_cache: &NewlineCache| match loc {
600            Location::Span(span) => {
601                if let Some((line, column)) =
602                    line_cache.byte_to_line_num_and_col_num(inc, span.start())
603                {
604                    format!("{} at line {line} column {column}", err_str)
605                } else {
606                    err_str
607                }
608            }
609            Location::CommandLine => {
610                format!("{} from the command-line.", err_str)
611            }
612            Location::Other(s) => {
613                format!("{} from '{}'", err_str, s)
614            }
615        };
616        let spanned_fmt = |x: &dyn Spanned, inc: &str, line_cache: &NewlineCache| {
617            loc_fmt(x.to_string(), Location::Span(x.spans()[0]), inc, line_cache)
618        };
619
620        let res = YaccGrammar::<StorageT>::new_from_ast_with_validity_info(&ast_validation);
621        let grm = match res {
622            Ok(_) if self.warnings_are_errors && !warnings.is_empty() => {
623                let mut line_cache = NewlineCache::new();
624                line_cache.feed(&inc);
625                return Err(ErrorString(if warnings.len() > 1 {
626                    // Indent under the "Error:" prefix.
627                    format!(
628                        "\n\t{}",
629                        warnings
630                            .iter()
631                            .map(|w| spanned_fmt(w, &inc, &line_cache))
632                            .collect::<Vec<_>>()
633                            .join("\n\t")
634                    )
635                } else {
636                    spanned_fmt(warnings.first().unwrap(), &inc, &line_cache)
637                }))?;
638            }
639            Ok(grm) => {
640                if !warnings.is_empty() {
641                    let mut line_cache = NewlineCache::new();
642                    line_cache.feed(&inc);
643                    for w in warnings {
644                        // Assume if this variable is set we are running under cargo.
645                        if std::env::var("OUT_DIR").is_ok() && self.show_warnings {
646                            println!("cargo:warning={}", spanned_fmt(&w, &inc, &line_cache));
647                        } else if self.show_warnings {
648                            eprintln!("{}", spanned_fmt(&w, &inc, &line_cache));
649                        }
650                    }
651                }
652                grm
653            }
654            Err(errs) => {
655                let mut line_cache = NewlineCache::new();
656                line_cache.feed(&inc);
657                return Err(ErrorString(if errs.len() + warnings.len() > 1 {
658                    // Indent under the "Error:" prefix.
659                    format!(
660                        "\n\t{}",
661                        errs.iter()
662                            .map(|e| spanned_fmt(e, &inc, &line_cache))
663                            .chain(warnings.iter().map(|w| spanned_fmt(w, &inc, &line_cache)))
664                            .collect::<Vec<_>>()
665                            .join("\n\t")
666                    )
667                } else {
668                    spanned_fmt(errs.first().unwrap(), &inc, &line_cache)
669                }))?;
670            }
671        };
672
673        #[cfg(test)]
674        if let Some(cb) = &self.inspect_callback {
675            cb(self.recoverer.expect("has a default value"))?;
676        }
677
678        let rule_ids = grm
679            .tokens_map()
680            .iter()
681            .map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
682            .collect::<HashMap<_, _>>();
683
684        let derived_mod_name = match self.mod_name {
685            Some(s) => s.to_owned(),
686            None => {
687                // The user hasn't specified a module name, so we create one automatically: what we
688                // do is strip off all the filename extensions (note that it's likely that inp ends
689                // with `y.rs`, so we potentially have to strip off more than one extension) and
690                // then add `_y` to the end.
691                let mut stem = grmp.to_str().unwrap();
692                loop {
693                    let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
694                    if stem == new_stem {
695                        break;
696                    }
697                    stem = new_stem;
698                }
699                format!("{}_y", stem)
700            }
701        };
702
703        let cache = self.rebuild_cache(&derived_mod_name, &grm);
704
705        // We don't need to go through the full rigmarole of generating an output file if all of
706        // the following are true: the output file exists; it is newer than the input file; and the
707        // cache hasn't changed. The last of these might be surprising, but it's vital: we don't
708        // know, for example, what the IDs map might be from one run to the next, and it might
709        // change for reasons beyond lrpar's control. If it does change, that means that the lexer
710        // and lrpar would get out of sync, so we have to play it safe and regenerate in such
711        // cases.
712        if let Ok(ref inmd) = fs::metadata(grmp) {
713            if let Ok(ref out_rs_md) = fs::metadata(outp) {
714                if FileTime::from_last_modification_time(out_rs_md)
715                    > FileTime::from_last_modification_time(inmd)
716                {
717                    if let Ok(outc) = read_to_string(outp) {
718                        if outc.contains(&cache.to_string()) {
719                            return Ok(CTParser {
720                                regenerated: false,
721                                rule_ids,
722                                conflicts: None,
723                            });
724                        } else {
725                            #[cfg(grmtools_extra_checks)]
726                            if std::env::var("CACHE_EXPECTED").is_ok() {
727                                eprintln!("outc: {}", outc);
728                                eprintln!("using cache: {}", cache,);
729                                // Primarily for use in the testsuite.
730                                panic!("The cache regenerated however, it was expected to match");
731                            }
732                        }
733                    }
734                }
735            }
736        }
737
738        // At this point, we know we're going to generate fresh output; however, if something goes
739        // wrong in the process between now and us writing /out/blah.rs, rustc thinks that
740        // everything's gone swimmingly (even if build.rs errored!), and tries to carry on
741        // compilation, leading to weird errors. We therefore delete /out/blah.rs at this point,
742        // which means, at worse, the user gets a "file not found" error from rustc (which is less
743        // confusing than the alternatives).
744        fs::remove_file(outp).ok();
745
746        let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
747        if self.error_on_conflicts {
748            if let Some(c) = stable.conflicts() {
749                match (grm.expect(), grm.expectrr()) {
750                    (Some(i), Some(j)) if i == c.sr_len() && j == c.rr_len() => (),
751                    (Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (),
752                    (None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (),
753                    (None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (),
754                    _ => return Err(Box::new(CTConflictsError { stable })),
755                }
756            }
757        }
758
759        if let Some(ref mut inspector_rt) = self.inspect_rt {
760            let rt: RTParserBuilder<'_, StorageT, LexerTypesT> =
761                RTParserBuilder::new(&grm, &stable);
762            let rt = if let Some(rk) = self.recoverer {
763                rt.recoverer(rk)
764            } else {
765                rt
766            };
767            inspector_rt(&mut header, rt, &rule_ids, grmp)?
768        }
769
770        let unused_keys = header.unused();
771        if !unused_keys.is_empty() {
772            return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into());
773        }
774        let missing_keys = header
775            .missing()
776            .iter()
777            .map(|s| s.as_str())
778            .collect::<Vec<_>>();
779        if !missing_keys.is_empty() {
780            return Err(format!(
781                "Required values were missing from the header: {}",
782                missing_keys.join(", ")
783            )
784            .into());
785        }
786
787        self.output_file(
788            &grm,
789            &stable,
790            &derived_mod_name,
791            outp,
792            &format!("/* CACHE INFORMATION {} */\n", cache),
793        )?;
794        let conflicts = if stable.conflicts().is_some() {
795            Some((grm, sgraph, stable))
796        } else {
797            None
798        };
799        Ok(CTParser {
800            regenerated: true,
801            rule_ids,
802            conflicts,
803        })
804    }
805
806    /// Given the filename `a/b.y` as input, statically compile the grammar `src/a/b.y` into a Rust
807    /// module which can then be imported using `lrpar_mod!("a/b.y")`. This is a convenience
808    /// function around [`process_file`](#method.process_file) which makes it easier to compile
809    /// grammar files stored in a project's `src/` directory: please see
810    /// [`process_file`](#method.process_file) for additional constraints and information about the
811    /// generated files.
812    #[deprecated(
813        since = "0.11.0",
814        note = "Please use grammar_in_src_dir(), build(), and token_map() instead"
815    )]
816    #[allow(deprecated)]
817    pub fn process_file_in_src(
818        &mut self,
819        srcp: &str,
820    ) -> Result<HashMap<String, StorageT>, Box<dyn Error>> {
821        let mut inp = current_dir()?;
822        inp.push("src");
823        inp.push(srcp);
824        let mut outp = PathBuf::new();
825        outp.push(var("OUT_DIR").unwrap());
826        outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
827        create_dir_all(&outp)?;
828        let mut leaf = Path::new(srcp)
829            .file_name()
830            .unwrap()
831            .to_str()
832            .unwrap()
833            .to_owned();
834        write!(leaf, ".{}", RUST_FILE_EXT).ok();
835        outp.push(leaf);
836        self.process_file(inp, outp)
837    }
838
839    /// Statically compile the Yacc file `inp` into Rust, placing the output into the file `outp`.
840    /// Note that three additional files will be created with the same name as `outp` but with the
841    /// extensions `grm`, and `stable`, overwriting any existing files with those names.
842    ///
843    /// `outp` defines a module as follows:
844    ///
845    /// ```text
846    ///   mod modname {
847    ///     pub fn parse(lexemes: &::std::vec::Vec<::lrpar::Lexeme<StorageT>>) { ... }
848    ///         -> (::std::option::Option<ActionT>,
849    ///             ::std::vec::Vec<::lrpar::LexParseError<StorageT>>)> { ...}
850    ///
851    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
852    ///       ...
853    ///     }
854    ///
855    ///     ...
856    ///   }
857    /// ```
858    ///
859    /// where:
860    ///  * `modname` is either:
861    ///    * the module name specified [`mod_name`](#method.mod_name)
862    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.y` the
863    ///      module name is `c_y` (i.e. the file's leaf name, minus its extension, with a prefix of
864    ///      `_y`).
865    ///  * `ActionT` is either:
866    ///    * the `%actiontype` value given to the grammar
867    ///    * or, if the `yacckind` was set YaccKind::Original(YaccOriginalActionKind::UserAction),
868    ///      it is [`Node<StorageT>`](../parser/enum.Node.html)
869    ///
870    /// # Panics
871    ///
872    /// If `StorageT` is not big enough to index the grammar's tokens, rules, or
873    /// productions.
874    #[deprecated(
875        since = "0.11.0",
876        note = "Please use grammar_path(), output_path(), build(), and token_map() instead"
877    )]
878    #[allow(deprecated)]
879    pub fn process_file<P, Q>(
880        &mut self,
881        inp: P,
882        outp: Q,
883    ) -> Result<HashMap<String, StorageT>, Box<dyn Error>>
884    where
885        P: AsRef<Path>,
886        Q: AsRef<Path>,
887    {
888        self.grammar_path = Some(inp.as_ref().to_owned());
889        self.output_path = Some(outp.as_ref().to_owned());
890        let cl: CTParserBuilder<LexerTypesT> = CTParserBuilder {
891            grammar_path: self.grammar_path.clone(),
892            output_path: self.output_path.clone(),
893            mod_name: self.mod_name,
894            recoverer: self.recoverer,
895            yacckind: self.yacckind,
896            error_on_conflicts: self.error_on_conflicts,
897            warnings_are_errors: self.warnings_are_errors,
898            show_warnings: self.show_warnings,
899            visibility: self.visibility.clone(),
900            rust_edition: self.rust_edition,
901            inspect_rt: None,
902            #[cfg(test)]
903            inspect_callback: None,
904            phantom: PhantomData,
905        };
906        Ok(cl.build()?.rule_ids)
907    }
908
909    fn output_file<P: AsRef<Path>>(
910        &self,
911        grm: &YaccGrammar<StorageT>,
912        stable: &StateTable<StorageT>,
913        mod_name: &str,
914        outp_rs: P,
915        cache: &str,
916    ) -> Result<(), Box<dyn Error>> {
917        let visibility = self.visibility.clone();
918        let user_actions = if let Some(
919            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools,
920        ) = self.yacckind
921        {
922            Some(self.gen_user_actions(grm)?)
923        } else {
924            None
925        };
926        let rule_consts = self.gen_rule_consts(grm)?;
927        let token_epp = self.gen_token_epp(grm)?;
928        let parse_function = self.gen_parse_function(grm, stable)?;
929        let action_wrappers = match self.yacckind.unwrap() {
930            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
931                Some(self.gen_wrappers(grm)?)
932            }
933            YaccKind::Original(YaccOriginalActionKind::NoAction)
934            | YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => None,
935            _ => unreachable!(),
936        };
937        let mod_name = format_ident!("{}", mod_name);
938        let out_tokens = quote! {
939            #visibility mod #mod_name {
940                // At the top so that `user_actions` may contain #![inner_attribute]
941                #user_actions
942                mod _parser_ {
943                    #![allow(clippy::type_complexity)]
944                    #![allow(clippy::unnecessary_wraps)]
945                    #![deny(unsafe_code)]
946                    #[allow(unused_imports)]
947                    use super::*;
948                    #parse_function
949                    #rule_consts
950                    #token_epp
951                    #action_wrappers
952                } // End of `mod _parser_`
953                #[allow(unused_imports)]
954                pub use _parser_::*;
955                #[allow(unused_imports)]
956                use ::lrpar::Lexeme;
957            } // End of `mod #mod_name`
958        };
959        // Try and run a code formatter on the generated code.
960        let unformatted = out_tokens.to_string();
961        let outs = syn::parse_str(&unformatted)
962            .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
963            .unwrap_or(unformatted);
964        let mut f = File::create(outp_rs)?;
965        f.write_all(outs.as_bytes())?;
966        f.write_all(cache.as_bytes())?;
967        Ok(())
968    }
969
970    /// Generate the cache, which determines if anything's changed enough that we need to
971    /// regenerate outputs and force rustc to recompile.
972    fn rebuild_cache(&self, derived_mod_name: &'_ str, grm: &YaccGrammar<StorageT>) -> TokenStream {
973        // We don't need to be particularly clever here: we just need to record the various things
974        // that could change between builds.
975        //
976        // Record the time that this version of lrpar was built. If the source code changes and
977        // rustc forces a recompile, this will change this value, causing anything which depends on
978        // this build of lrpar to be recompiled too.
979        let Self {
980            // All variables except for `output_path`, `inspect_callback` and `phantom` should
981            // be written into the cache.
982            grammar_path,
983            mod_name,
984            recoverer,
985            yacckind,
986            output_path: _,
987            error_on_conflicts,
988            warnings_are_errors,
989            show_warnings,
990            visibility,
991            rust_edition,
992            inspect_rt: _,
993            #[cfg(test)]
994                inspect_callback: _,
995            phantom: _,
996        } = self;
997        let build_time = env!("VERGEN_BUILD_TIMESTAMP");
998        let grammar_path = grammar_path.as_ref().unwrap().to_string_lossy();
999        let mod_name = QuoteOption(mod_name.as_deref());
1000        let visibility = visibility.to_variant_tokens();
1001        let rust_edition = rust_edition.to_variant_tokens();
1002        let yacckind = yacckind.expect("is_some() by this point");
1003        let rule_map = grm
1004            .iter_tidxs()
1005            .map(|tidx| {
1006                QuoteTuple((
1007                    usize::from(tidx),
1008                    grm.token_name(tidx).unwrap_or("<unknown>"),
1009                ))
1010            })
1011            .collect::<Vec<_>>();
1012        let cache_info = quote! {
1013            BUILD_TIME = #build_time
1014            DERIVED_MOD_NAME = #derived_mod_name
1015            GRAMMAR_PATH = #grammar_path
1016            MOD_NAME = #mod_name
1017            RECOVERER = #recoverer
1018            YACC_KIND = #yacckind
1019            ERROR_ON_CONFLICTS = #error_on_conflicts
1020            SHOW_WARNINGS = #show_warnings
1021            WARNINGS_ARE_ERRORS = #warnings_are_errors
1022            RUST_EDITION = #rust_edition
1023            RULE_IDS_MAP = [#(#rule_map,)*]
1024            VISIBILITY = #visibility
1025        };
1026        let cache_info_str = cache_info.to_string();
1027        quote!(#cache_info_str)
1028    }
1029
1030    /// Generate the main parse() function for the output file.
1031    fn gen_parse_function(
1032        &self,
1033        grm: &YaccGrammar<StorageT>,
1034        stable: &StateTable<StorageT>,
1035    ) -> Result<TokenStream, Box<dyn Error>> {
1036        let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1037        let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1038        let recoverer = self.recoverer;
1039        let run_parser = match self.yacckind.unwrap() {
1040            YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
1041                quote! {
1042                    ::lrpar::RTParserBuilder::new(&grm, &stable)
1043                        .recoverer(#recoverer)
1044                        .parse_generictree(lexer)
1045                }
1046            }
1047            YaccKind::Original(YaccOriginalActionKind::NoAction) => {
1048                quote! {
1049                    ::lrpar::RTParserBuilder::new(&grm, &stable)
1050                        .recoverer(#recoverer)
1051                        .parse_noaction(lexer)
1052                }
1053            }
1054            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1055                let actionskind = str::parse::<TokenStream>(ACTIONS_KIND)?;
1056                // actions always have a parse_param argument, and when the `parse` function lacks one
1057                // that parameter will be unit.
1058                let (action_fn_parse_param, action_fn_parse_param_ty) = match grm.parse_param() {
1059                    Some((name, ty)) => {
1060                        let name = str::parse::<TokenStream>(name)?;
1061                        let ty = str::parse::<TokenStream>(ty)?;
1062                        (quote!(#name), quote!(#ty))
1063                    }
1064                    None => (quote!(()), quote!(())),
1065                };
1066                let wrappers = grm.iter_pidxs().map(|pidx| {
1067                    let pidx = usize::from(pidx);
1068                    format_ident!("{}wrapper_{}", ACTION_PREFIX, pidx)
1069                });
1070                let edition_lifetime = if self.rust_edition != RustEdition::Rust2015 {
1071                    quote!('_,)
1072                } else {
1073                    quote!()
1074                };
1075                let ridx = usize::from(self.user_start_ridx(grm));
1076                let action_ident = format_ident!("{}{}", ACTIONS_KIND_PREFIX, ridx);
1077
1078                quote! {
1079                    let actions: ::std::vec::Vec<
1080                            &dyn Fn(
1081                                    ::cfgrammar::RIdx<#storaget>,
1082                                    &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1083                                    ::cfgrammar::Span,
1084                                    ::std::vec::Drain<#edition_lifetime ::lrpar::parser::AStackType<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #actionskind<'input>>>,
1085                                    #action_fn_parse_param_ty
1086                            ) -> #actionskind<'input>
1087                        > = ::std::vec![#(&#wrappers,)*];
1088                    match ::lrpar::RTParserBuilder::new(&grm, &stable)
1089                        .recoverer(#recoverer)
1090                        .parse_actions(lexer, &actions, #action_fn_parse_param) {
1091                            (Some(#actionskind::#action_ident(x)), y) => (Some(x), y),
1092                            (None, y) => (None, y),
1093                            _ => unreachable!()
1094                    }
1095                }
1096            }
1097            kind => panic!("YaccKind {:?} not supported", kind),
1098        };
1099
1100        // `parse()` may or may not have an argument for `%parseparam`.
1101        let parse_fn_parse_param = match self.yacckind.unwrap() {
1102            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1103                if let Some((name, tyname)) = grm.parse_param() {
1104                    let name = str::parse::<TokenStream>(name)?;
1105                    let tyname = str::parse::<TokenStream>(tyname)?;
1106                    Some(quote! {#name: #tyname})
1107                } else {
1108                    None
1109                }
1110            }
1111            _ => None,
1112        };
1113        let parse_fn_return_ty = match self.yacckind.unwrap() {
1114            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1115                let actiont = grm
1116                    .actiontype(self.user_start_ridx(grm))
1117                    .as_ref()
1118                    .map(|at| str::parse::<TokenStream>(at))
1119                    .transpose()?;
1120                quote! {
1121                    (::std::option::Option<#actiont>, ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>)
1122                }
1123            }
1124            YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => quote! {
1125                (::std::option::Option<::lrpar::Node<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #storaget>>,
1126                    ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>)
1127            },
1128            YaccKind::Original(YaccOriginalActionKind::NoAction) => quote! {
1129                ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>
1130            },
1131            _ => unreachable!(),
1132        };
1133
1134        let grm_data = encode_to_vec(grm, bincode::config::standard())?;
1135        let stable_data = encode_to_vec(stable, bincode::config::standard())?;
1136        Ok(quote! {
1137            const __GRM_DATA: &[u8] = &[#(#grm_data,)*];
1138            const __STABLE_DATA: &[u8] = &[#(#stable_data,)*];
1139
1140            #[allow(dead_code)]
1141            pub fn parse<'lexer, 'input: 'lexer>(
1142                 lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1143                 #parse_fn_parse_param
1144            ) -> #parse_fn_return_ty {
1145                let (grm, stable) = ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA);
1146                #run_parser
1147            }
1148        })
1149    }
1150
1151    fn gen_rule_consts(
1152        &self,
1153        grm: &YaccGrammar<StorageT>,
1154    ) -> Result<TokenStream, proc_macro2::LexError> {
1155        let mut toks = TokenStream::new();
1156        for ridx in grm.iter_rules() {
1157            if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
1158                let r_const = format_ident!("R_{}", grm.rule_name_str(ridx).to_ascii_uppercase());
1159                let storage_ty = str::parse::<TokenStream>(type_name::<StorageT>())?;
1160                let ridx = UnsuffixedUsize(usize::from(ridx));
1161                toks.extend(quote! {
1162                    #[allow(dead_code)]
1163                    pub const #r_const: #storage_ty = #ridx;
1164                });
1165            }
1166        }
1167        Ok(toks)
1168    }
1169
1170    fn gen_token_epp(
1171        &self,
1172        grm: &YaccGrammar<StorageT>,
1173    ) -> Result<TokenStream, proc_macro2::LexError> {
1174        let mut tidxs = Vec::new();
1175        for tidx in grm.iter_tidxs() {
1176            tidxs.push(QuoteOption(grm.token_epp(tidx)));
1177        }
1178        let const_epp_ident = format_ident!("{}EPP", GLOBAL_PREFIX);
1179        let storage_ty = str::parse::<TokenStream>(type_name::<StorageT>())?;
1180        Ok(quote! {
1181            const #const_epp_ident: &[::std::option::Option<&str>] = &[
1182                #(#tidxs,)*
1183            ];
1184
1185            /// Return the %epp entry for token `tidx` (where `None` indicates \"the token has no
1186            /// pretty-printed value\"). Panics if `tidx` doesn't exist.
1187            #[allow(dead_code)]
1188            pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<#storage_ty>) -> ::std::option::Option<&'a str> {
1189                #const_epp_ident[usize::from(tidx)]
1190            }
1191        })
1192    }
1193
1194    /// Generate the wrappers that call user actions
1195    fn gen_wrappers(
1196        &self,
1197        grm: &YaccGrammar<StorageT>,
1198    ) -> Result<TokenStream, proc_macro2::LexError> {
1199        let (parse_paramname, parse_paramdef);
1200        match grm.parse_param() {
1201            Some((name, tyname)) => {
1202                parse_paramname = str::parse::<TokenStream>(name)?;
1203                let ty = str::parse::<TokenStream>(tyname)?;
1204                parse_paramdef = quote!(#parse_paramname: #ty);
1205            }
1206            None => {
1207                parse_paramname = quote!(());
1208                parse_paramdef = quote! {_: ()};
1209            }
1210        };
1211
1212        let mut wrappers = TokenStream::new();
1213        for pidx in grm.iter_pidxs() {
1214            let ridx = grm.prod_to_rule(pidx);
1215
1216            // Iterate over all $-arguments and replace them with their respective
1217            // element from the argument vector (e.g. $1 is replaced by args[0]). At
1218            // the same time extract &str from tokens and actiontype from nonterminals.
1219            let wrapper_fn = format_ident!("{}wrapper_{}", ACTION_PREFIX, usize::from(pidx));
1220            let ridx_var = format_ident!("{}ridx", ACTION_PREFIX);
1221            let lexer_var = format_ident!("{}lexer", ACTION_PREFIX);
1222            let span_var = format_ident!("{}span", ACTION_PREFIX);
1223            let args_var = format_ident!("{}args", ACTION_PREFIX);
1224            let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1225            let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1226            let actionskind = str::parse::<TokenStream>(ACTIONS_KIND)?;
1227            let edition_lifetime = if self.rust_edition != RustEdition::Rust2015 {
1228                Some(quote!('_,))
1229            } else {
1230                None
1231            };
1232            let mut wrapper_fn_body = TokenStream::new();
1233            if grm.action(pidx).is_some() {
1234                // Unpack the arguments passed to us by the drain
1235                for i in 0..grm.prod(pidx).len() {
1236                    let arg = format_ident!("{}arg_{}", ACTION_PREFIX, i + 1);
1237                    wrapper_fn_body.extend(match grm.prod(pidx)[i] {
1238                        Symbol::Rule(ref_ridx) => {
1239                            let ref_ridx = usize::from(ref_ridx);
1240                            let actionvariant = format_ident!("{}{}", ACTIONS_KIND_PREFIX, ref_ridx);
1241                            quote!{
1242                                #[allow(clippy::let_unit_value)]
1243                                let #arg = match #args_var.next().unwrap() {
1244                                    ::lrpar::parser::AStackType::ActionType(#actionskind::#actionvariant(x)) => x,
1245                                    _ => unreachable!()
1246                                };
1247                            }
1248                        }
1249                        Symbol::Token(_) => {
1250                            quote!{
1251                                let #arg = match #args_var.next().unwrap() {
1252                                    ::lrpar::parser::AStackType::Lexeme(l) => {
1253                                        if l.faulty() {
1254                                            Err(l)
1255                                        } else {
1256                                            Ok(l)
1257                                        }
1258                                    },
1259                                    ::lrpar::parser::AStackType::ActionType(_) => unreachable!()
1260                                };
1261                            }
1262                        }
1263                    })
1264                }
1265
1266                // Call the user code
1267                let args = (0..grm.prod(pidx).len())
1268                    .map(|i| format_ident!("{}arg_{}", ACTION_PREFIX, i + 1))
1269                    .collect::<Vec<_>>();
1270                let action_fn = format_ident!("{}action_{}", ACTION_PREFIX, usize::from(pidx));
1271                let actionsvariant = format_ident!("{}{}", ACTIONS_KIND_PREFIX, usize::from(ridx));
1272
1273                wrapper_fn_body.extend(match grm.actiontype(ridx) {
1274                    Some(s) if s == "()" => {
1275                        // If the rule `r` that we're calling has the unit type then Clippy will warn that
1276                        // `enum::A(wrapper_r())` is pointless. We thus have to split it into two:
1277                        // `wrapper_r(); enum::A(())`.
1278                        quote!{
1279                            #action_fn(#ridx_var, #lexer_var, #span_var, #parse_paramname, #(#args,)*);
1280                            #actionskind::#actionsvariant(())
1281                        }
1282                    }
1283                    _ => {
1284                        quote!{
1285                            #actionskind::#actionsvariant(#action_fn(#ridx_var, #lexer_var, #span_var, #parse_paramname, #(#args,)*))
1286                        }
1287                    }
1288                })
1289            } else if pidx == grm.start_prod() {
1290                wrapper_fn_body.extend(quote!(unreachable!()));
1291            } else {
1292                panic!(
1293                    "Production in rule '{}' must have an action body.",
1294                    grm.rule_name_str(grm.prod_to_rule(pidx))
1295                );
1296            };
1297
1298            let attrib = if pidx == grm.start_prod() {
1299                // The start prod has an unreachable body so it doesn't use it's variables.
1300                Some(quote!(#[allow(unused_variables)]))
1301            } else {
1302                None
1303            };
1304            wrappers.extend(quote!{
1305                #attrib
1306                fn #wrapper_fn<'lexer, 'input: 'lexer>(
1307                    #ridx_var: ::cfgrammar::RIdx<#storaget>,
1308                    #lexer_var: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1309                    #span_var: ::cfgrammar::Span,
1310                    mut #args_var: ::std::vec::Drain<#edition_lifetime ::lrpar::parser::AStackType<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #actionskind<'input>>>,
1311                    #parse_paramdef
1312                ) -> #actionskind<'input> {
1313                    #wrapper_fn_body
1314                }
1315             })
1316        }
1317        let mut actionskindvariants = Vec::new();
1318        let actionskindhidden = format_ident!("_{}", ACTIONS_KIND_HIDDEN);
1319        let actionskind = str::parse::<TokenStream>(ACTIONS_KIND).unwrap();
1320        for ridx in grm.iter_rules() {
1321            if let Some(actiont) = grm.actiontype(ridx) {
1322                let actionskindvariant =
1323                    format_ident!("{}{}", ACTIONS_KIND_PREFIX, usize::from(ridx));
1324                let actiont = str::parse::<TokenStream>(actiont).unwrap();
1325                actionskindvariants.push(quote! {
1326                    #actionskindvariant(#actiont)
1327                })
1328            }
1329        }
1330        actionskindvariants
1331            .push(quote!(#actionskindhidden(::std::marker::PhantomData<&'input ()>)));
1332        wrappers.extend(quote! {
1333            #[allow(dead_code)]
1334            enum #actionskind<'input> {
1335                #(#actionskindvariants,)*
1336            }
1337        });
1338        Ok(wrappers)
1339    }
1340
1341    /// Generate the user action functions (if any).
1342    fn gen_user_actions(&self, grm: &YaccGrammar<StorageT>) -> Result<TokenStream, Box<dyn Error>> {
1343        let programs = grm
1344            .programs()
1345            .as_ref()
1346            .map(|s| str::parse::<TokenStream>(s))
1347            .transpose()?;
1348        let mut action_fns = TokenStream::new();
1349        // Convert actions to functions
1350        let (parse_paramname, parse_paramdef, parse_param_unit);
1351        match grm.parse_param() {
1352            Some((name, tyname)) => {
1353                parse_param_unit = tyname.trim() == "()";
1354                parse_paramname = str::parse::<TokenStream>(name)?;
1355                let ty = str::parse::<TokenStream>(tyname)?;
1356                parse_paramdef = quote!(#parse_paramname: #ty);
1357            }
1358            None => {
1359                parse_param_unit = true;
1360                parse_paramname = quote!(());
1361                parse_paramdef = quote! {_: ()};
1362            }
1363        };
1364        for pidx in grm.iter_pidxs() {
1365            if pidx == grm.start_prod() {
1366                continue;
1367            }
1368
1369            // Work out the right type for each argument
1370            let mut args = Vec::with_capacity(grm.prod(pidx).len());
1371            for i in 0..grm.prod(pidx).len() {
1372                let argt = match grm.prod(pidx)[i] {
1373                    Symbol::Rule(ref_ridx) => {
1374                        str::parse::<TokenStream>(grm.actiontype(ref_ridx).as_ref().unwrap())?
1375                    }
1376                    Symbol::Token(_) => {
1377                        let lexemet =
1378                            str::parse::<TokenStream>(type_name::<LexerTypesT::LexemeT>())?;
1379                        quote!(::std::result::Result<#lexemet, #lexemet>)
1380                    }
1381                };
1382                let arg = format_ident!("{}arg_{}", ACTION_PREFIX, i + 1);
1383                args.push(quote!(mut #arg: #argt));
1384            }
1385
1386            // If this rule's `actiont` is `()` then Clippy will warn that the return type `-> ()`
1387            // is pointless (which is true). We therefore avoid outputting a return type if actiont
1388            // is the unit type.
1389            let returnt = {
1390                let actiont = grm.actiontype(grm.prod_to_rule(pidx)).as_ref().unwrap();
1391                if actiont == "()" {
1392                    None
1393                } else {
1394                    let actiont = str::parse::<TokenStream>(actiont)?;
1395                    Some(quote!( -> #actiont))
1396                }
1397            };
1398            let action_fn = format_ident!("{}action_{}", ACTION_PREFIX, usize::from(pidx));
1399            let lexer_var = format_ident!("{}lexer", ACTION_PREFIX);
1400            let span_var = format_ident!("{}span", ACTION_PREFIX);
1401            let ridx_var = format_ident!("{}ridx", ACTION_PREFIX);
1402            let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1403            let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1404            let bind_parse_param = if !parse_param_unit {
1405                Some(quote! {let _ = #parse_paramname;})
1406            } else {
1407                None
1408            };
1409
1410            // Iterate over all $-arguments and replace them with their respective
1411            // element from the argument vector (e.g. $1 is replaced by args[0]).
1412            let pre_action = grm.action(pidx).as_ref().ok_or_else(|| {
1413                format!(
1414                    "Rule {} has a production which is missing action code",
1415                    grm.rule_name_str(grm.prod_to_rule(pidx))
1416                )
1417            })?;
1418            let mut last = 0;
1419            let mut outs = String::new();
1420            loop {
1421                match pre_action[last..].find('$') {
1422                    Some(off) => {
1423                        if pre_action[last + off..].starts_with("$$") {
1424                            outs.push_str(&pre_action[last..last + off + "$".len()]);
1425                            last = last + off + "$$".len();
1426                        } else if pre_action[last + off..].starts_with("$lexer") {
1427                            outs.push_str(&pre_action[last..last + off]);
1428                            write!(outs, "{prefix}lexer", prefix = ACTION_PREFIX).ok();
1429                            last = last + off + "$lexer".len();
1430                        } else if pre_action[last + off..].starts_with("$span") {
1431                            outs.push_str(&pre_action[last..last + off]);
1432                            write!(outs, "{prefix}span", prefix = ACTION_PREFIX).ok();
1433                            last = last + off + "$span".len();
1434                        } else if last + off + 1 < pre_action.len()
1435                            && pre_action[last + off + 1..].starts_with(|c: char| c.is_numeric())
1436                        {
1437                            outs.push_str(&pre_action[last..last + off]);
1438                            write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok();
1439                            last = last + off + "$".len();
1440                        } else {
1441                            panic!(
1442                                "Unknown text following '$' operator: {}",
1443                                &pre_action[last + off..]
1444                            );
1445                        }
1446                    }
1447                    None => {
1448                        outs.push_str(&pre_action[last..]);
1449                        break;
1450                    }
1451                }
1452            }
1453
1454            let action_body = str::parse::<TokenStream>(&outs)?;
1455            action_fns.extend(quote!{
1456                    #[allow(clippy::too_many_arguments)]
1457                    fn #action_fn<'lexer, 'input: 'lexer>(#ridx_var: ::cfgrammar::RIdx<#storaget>,
1458                                    #lexer_var: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1459                                    #span_var: ::cfgrammar::Span,
1460                                    #parse_paramdef,
1461                                    #(#args,)*)#returnt {
1462                        #bind_parse_param
1463                        #action_body
1464                    }
1465
1466            })
1467        }
1468        Ok(quote! {
1469            #programs
1470            #action_fns
1471        })
1472    }
1473
1474    /// Return the `RIdx` of the %start rule in the grammar (which will not be the same as
1475    /// grm.start_rule_idx because the latter has an additional rule insert by cfgrammar
1476    /// which then calls the user's %start rule).
1477    fn user_start_ridx(&self, grm: &YaccGrammar<StorageT>) -> RIdx<StorageT> {
1478        debug_assert_eq!(grm.prod(grm.start_prod()).len(), 1);
1479        match grm.prod(grm.start_prod())[0] {
1480            Symbol::Rule(ridx) => ridx,
1481            _ => unreachable!(),
1482        }
1483    }
1484}
1485
1486/// This function is called by generated files; it exists so that generated files don't require a
1487/// direct dependency on bincode.
1488#[doc(hidden)]
1489pub fn _reconstitute<StorageT: Decode<()> + Hash + PrimInt + Unsigned + 'static>(
1490    grm_buf: &[u8],
1491    stable_buf: &[u8],
1492) -> (YaccGrammar<StorageT>, StateTable<StorageT>) {
1493    let (grm, _) = decode_from_slice(grm_buf, bincode::config::standard()).unwrap();
1494    let (stable, _) = decode_from_slice(stable_buf, bincode::config::standard()).unwrap();
1495    (grm, stable)
1496}
1497
1498/// An interface to the result of [CTParserBuilder::build()].
1499pub struct CTParser<StorageT = u32>
1500where
1501    StorageT: Eq + Hash,
1502{
1503    regenerated: bool,
1504    rule_ids: HashMap<String, StorageT>,
1505    conflicts: Option<(
1506        YaccGrammar<StorageT>,
1507        StateGraph<StorageT>,
1508        StateTable<StorageT>,
1509    )>,
1510}
1511
1512impl<StorageT> CTParser<StorageT>
1513where
1514    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
1515    usize: AsPrimitive<StorageT>,
1516{
1517    /// Returns `true` if this compile-time parser was regenerated or `false` if it was not.
1518    pub fn regenerated(&self) -> bool {
1519        self.regenerated
1520    }
1521
1522    /// Returns a [HashMap] from lexeme string types to numeric types (e.g. `INT: 2`), suitable for
1523    /// handing to a lexer to coordinate the IDs of lexer and parser.
1524    pub fn token_map(&self) -> &HashMap<String, StorageT> {
1525        &self.rule_ids
1526    }
1527
1528    /// If there are any conflicts in the grammar, return a tuple which allows users to inspect and
1529    /// pretty print them; otherwise returns `None`. If the grammar was not regenerated, this will
1530    /// always return `None`, even if the grammar actually has conflicts.
1531    ///
1532    /// **Note: The conflicts feature is currently unstable and may change in the future.**
1533    #[allow(private_interfaces)]
1534    pub fn conflicts(
1535        &self,
1536        _: crate::unstable::UnstableApi,
1537    ) -> Option<(
1538        &YaccGrammar<StorageT>,
1539        &StateGraph<StorageT>,
1540        &StateTable<StorageT>,
1541        &Conflicts<StorageT>,
1542    )> {
1543        if let Some((grm, sgraph, stable)) = &self.conflicts {
1544            return Some((grm, sgraph, stable, stable.conflicts().unwrap()));
1545        }
1546        None
1547    }
1548}
1549
1550// Tests dealing with the filesystem not supported under wasm32
1551#[cfg(all(not(target_arch = "wasm32"), test))]
1552mod test {
1553    use std::{fs::File, io::Write, path::PathBuf};
1554
1555    use super::{CTConflictsError, CTParserBuilder};
1556    use crate::test_utils::TestLexerTypes;
1557    use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
1558    use tempfile::TempDir;
1559
1560    #[test]
1561    fn test_conflicts() {
1562        let temp = TempDir::new().unwrap();
1563        let mut file_path = PathBuf::from(temp.as_ref());
1564        file_path.push("grm.y");
1565        let mut f = File::create(&file_path).unwrap();
1566        let _ = f.write_all(
1567            "%start A
1568%%
1569A : 'a' 'b' | B 'b';
1570B : 'a' | C;
1571C : 'a';"
1572                .as_bytes(),
1573        );
1574
1575        match CTParserBuilder::<TestLexerTypes>::new()
1576            .error_on_conflicts(false)
1577            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1578            .grammar_path(file_path.to_str().unwrap())
1579            .output_path(file_path.with_extension("ignored"))
1580            .build()
1581            .unwrap()
1582            .conflicts(crate::unstable::UnstableApi)
1583        {
1584            Some((_, _, _, conflicts)) => {
1585                assert_eq!(conflicts.sr_len(), 1);
1586                assert_eq!(conflicts.rr_len(), 1);
1587            }
1588            None => panic!("Expected error data"),
1589        }
1590    }
1591
1592    #[test]
1593    fn test_conflicts_error() {
1594        let temp = TempDir::new().unwrap();
1595        let mut file_path = PathBuf::from(temp.as_ref());
1596        file_path.push("grm.y");
1597        let mut f = File::create(&file_path).unwrap();
1598        let _ = f.write_all(
1599            "%start A
1600%%
1601A : 'a' 'b' | B 'b';
1602B : 'a' | C;
1603C : 'a';"
1604                .as_bytes(),
1605        );
1606
1607        match CTParserBuilder::<TestLexerTypes>::new()
1608            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1609            .grammar_path(file_path.to_str().unwrap())
1610            .output_path(file_path.with_extension("ignored"))
1611            .build()
1612        {
1613            Ok(_) => panic!("Expected error"),
1614            Err(e) => {
1615                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1616                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1617                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1618            }
1619        }
1620    }
1621
1622    #[test]
1623    fn test_expect_error() {
1624        let temp = TempDir::new().unwrap();
1625        let mut file_path = PathBuf::from(temp.as_ref());
1626        file_path.push("grm.y");
1627        let mut f = File::create(&file_path).unwrap();
1628        let _ = f.write_all(
1629            "%start A
1630%expect 2
1631%%
1632A: 'a' 'b' | B 'b';
1633B: 'a';"
1634                .as_bytes(),
1635        );
1636
1637        match CTParserBuilder::<TestLexerTypes>::new()
1638            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1639            .grammar_path(file_path.to_str().unwrap())
1640            .output_path(file_path.with_extension("ignored"))
1641            .build()
1642        {
1643            Ok(_) => panic!("Expected error"),
1644            Err(e) => {
1645                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1646                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 0);
1647                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1648            }
1649        }
1650    }
1651
1652    #[test]
1653    fn test_expectrr_error() {
1654        let temp = TempDir::new().unwrap();
1655        let mut file_path = PathBuf::from(temp.as_ref());
1656        file_path.push("grm.y");
1657        let mut f = File::create(&file_path).unwrap();
1658        let _ = f.write_all(
1659            "%start A
1660%expect 1
1661%expect-rr 2
1662%%
1663A : 'a' 'b' | B 'b';
1664B : 'a' | C;
1665C : 'a';"
1666                .as_bytes(),
1667        );
1668
1669        match CTParserBuilder::<TestLexerTypes>::new()
1670            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1671            .grammar_path(file_path.to_str().unwrap())
1672            .output_path(file_path.with_extension("ignored"))
1673            .build()
1674        {
1675            Ok(_) => panic!("Expected error"),
1676            Err(e) => {
1677                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1678                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1679                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1680            }
1681        }
1682    }
1683
1684    #[cfg(test)]
1685    #[test]
1686    fn test_recoverer_header() -> Result<(), Box<dyn std::error::Error>> {
1687        use crate::RecoveryKind as RK;
1688        #[rustfmt::skip]
1689            let recovery_kinds = [
1690                //  Builder,          Header setting,     Expected result.
1691                // -----------       ------------------  -------------------
1692                (Some(RK::None),      Some(RK::None),     Some(RK::None)),
1693                (Some(RK::None),      Some(RK::CPCTPlus), Some(RK::None)),
1694                (Some(RK::CPCTPlus),  Some(RK::CPCTPlus), Some(RK::CPCTPlus)),
1695                (Some(RK::CPCTPlus),  Some(RK::None),     Some(RK::CPCTPlus)),
1696                (None,                Some(RK::CPCTPlus), Some(RK::CPCTPlus)),
1697                (None,                Some(RK::None),     Some(RK::None)),
1698                (None,                None,               Some(RK::CPCTPlus)),
1699                (Some(RK::None),      None,               Some(RK::None)),
1700                (Some(RK::CPCTPlus),  None,               Some(RK::CPCTPlus)),
1701            ];
1702
1703        for (i, (builder_arg, header_arg, expected_rk)) in
1704            recovery_kinds.iter().cloned().enumerate()
1705        {
1706            let y_src = if let Some(header_arg) = header_arg {
1707                format!(
1708                    "\
1709                    %grmtools{{yacckind: Original(NoAction), recoverer: {}}} \
1710                    %% \
1711                    start: ; \
1712                    ",
1713                    match header_arg {
1714                        RK::None => "RecoveryKind::None",
1715                        RK::CPCTPlus => "RecoveryKind::CPCTPlus",
1716                    }
1717                )
1718            } else {
1719                r#"
1720                    %grmtools{yacckind: Original(NoAction)}
1721                    %%
1722                    Start: ;
1723                    "#
1724                .to_string()
1725            };
1726            let out_dir = std::env::var("OUT_DIR").unwrap();
1727            let y_path = format!("{out_dir}/recoverykind_test_{i}.y");
1728            let y_out_path = format!("{y_path}.rs");
1729            std::fs::File::create(y_path.clone()).unwrap();
1730            std::fs::write(y_path.clone(), y_src).unwrap();
1731            let mut cp_builder = CTParserBuilder::<TestLexerTypes>::new();
1732            cp_builder = cp_builder
1733                .output_path(y_out_path.clone())
1734                .grammar_path(y_path.clone());
1735            cp_builder = if let Some(builder_arg) = builder_arg {
1736                cp_builder.recoverer(builder_arg)
1737            } else {
1738                cp_builder
1739            }
1740            .inspect_recoverer(Box::new(move |rk| {
1741                if matches!(
1742                    (rk, expected_rk),
1743                    (RK::None, Some(RK::None)) | (RK::CPCTPlus, Some(RK::CPCTPlus))
1744                ) {
1745                    Ok(())
1746                } else {
1747                    panic!("Unexpected recovery kind")
1748                }
1749            }));
1750            cp_builder.build()?;
1751        }
1752        Ok(())
1753    }
1754}