lrlex/
ctbuilder.rs

1//! Build grammars at run-time.
2
3use bincode::Encode;
4use cfgrammar::{
5    header::{
6        GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, HeaderValue, Namespaced,
7        Setting, Value,
8    },
9    markmap::MergeBehavior,
10    span::{Location, Span},
11};
12use glob::glob;
13use lrpar::{
14    CTParserBuilder, LexerTypes,
15    diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
16};
17use num_traits::{AsPrimitive, PrimInt, Unsigned};
18use proc_macro2::{Ident, TokenStream};
19use quote::{ToTokens, TokenStreamExt, format_ident, quote};
20use regex::Regex;
21use std::marker::PhantomData;
22use std::{
23    any::type_name,
24    borrow::Borrow,
25    collections::{HashMap, HashSet},
26    env::{current_dir, var},
27    error::Error,
28    fmt::{self, Debug, Display, Write as _},
29    fs::{self, File, create_dir_all, read_to_string},
30    hash::Hash,
31    io::Write,
32    path::{Path, PathBuf},
33    sync::{LazyLock, Mutex},
34};
35
36use crate::{DefaultLexerTypes, LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef};
37
38const RUST_FILE_EXT: &str = "rs";
39
40const ERROR: &str = "[Error]";
41const WARNING: &str = "[Warning]";
42
43static RE_TOKEN_ID: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z_0-9]*$").unwrap());
45
46static GENERATED_PATHS: LazyLock<Mutex<HashSet<PathBuf>>> =
47    LazyLock::new(|| Mutex::new(HashSet::new()));
48
49#[non_exhaustive]
50pub enum LexerKind {
51    LRNonStreamingLexer,
52}
53
54impl<T: Clone> TryFrom<&Value<T>> for LexerKind {
55    type Error = cfgrammar::header::HeaderError<T>;
56    fn try_from(it: &Value<T>) -> Result<LexerKind, Self::Error> {
57        match it {
58            Value::Flag(_, loc) => Err(HeaderError {
59                kind: HeaderErrorKind::ConversionError(
60                    "LexerKind",
61                    "Expected `LexerKind` found bool",
62                ),
63                locations: vec![loc.clone()],
64            }),
65            Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
66                kind: HeaderErrorKind::ConversionError(
67                    "LexerKind",
68                    "Expected `LexerKind` found numeric",
69                ),
70                locations: vec![loc.clone()],
71            }),
72            Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
73                kind: HeaderErrorKind::ConversionError(
74                    "LexerKind",
75                    "Expected `LexerKind` found string",
76                ),
77                locations: vec![loc.clone()],
78            }),
79            Value::Setting(Setting::Constructor {
80                ctor:
81                    Namespaced {
82                        namespace: _,
83                        member: (_, loc),
84                    },
85                arg: _,
86            }) => Err(HeaderError {
87                kind: HeaderErrorKind::ConversionError(
88                    "LexerKind",
89                    "Expected `LexerKind` found constructor",
90                ),
91                locations: vec![loc.clone()],
92            }),
93            Value::Setting(Setting::Unitary(Namespaced {
94                namespace,
95                member: (member, member_loc),
96            })) => {
97                if let Some((ns, loc)) = namespace {
98                    if ns.to_lowercase() != "lexerkind" {
99                        return Err(HeaderError {
100                            kind: HeaderErrorKind::ConversionError(
101                                "LexerKind",
102                                "Expected namespace `LexerKind`",
103                            ),
104                            locations: vec![loc.clone()],
105                        });
106                    }
107                }
108                if member.to_lowercase() != "lrnonstreaminglexer" {
109                    return Err(HeaderError {
110                        kind: HeaderErrorKind::ConversionError(
111                            "LexerKind",
112                            "Unknown `LexerKind` Variant",
113                        ),
114                        locations: vec![member_loc.clone()],
115                    });
116                }
117
118                Ok(LexerKind::LRNonStreamingLexer)
119            }
120        }
121    }
122}
123
124/// Specify the visibility of the module generated by [CTLexerBuilder].
125#[derive(Clone, PartialEq, Eq, Debug)]
126#[non_exhaustive]
127pub enum Visibility {
128    /// Module-level visibility only.
129    Private,
130    /// `pub`
131    Public,
132    /// `pub(super)`
133    PublicSuper,
134    /// `pub(self)`
135    PublicSelf,
136    /// `pub(crate)`
137    PublicCrate,
138    /// `pub(in {arg})`
139    PublicIn(String),
140}
141
142impl ToTokens for Visibility {
143    fn to_tokens(&self, tokens: &mut TokenStream) {
144        tokens.extend(match self {
145            Visibility::Private => quote!(),
146            Visibility::Public => quote! {pub},
147            Visibility::PublicSuper => quote! {pub(super)},
148            Visibility::PublicSelf => quote! {pub(self)},
149            Visibility::PublicCrate => quote! {pub(crate)},
150            Visibility::PublicIn(data) => {
151                let other = str::parse::<TokenStream>(data).unwrap();
152                quote! {pub(in #other)}
153            }
154        })
155    }
156}
157
158/// Specifies the [Rust Edition] that will be emitted during code generation.
159///
160/// [Rust Edition]: https://doc.rust-lang.org/edition-guide/rust-2021/index.html
161#[derive(Clone, Copy, PartialEq, Eq, Debug)]
162#[non_exhaustive]
163pub enum RustEdition {
164    Rust2015,
165    Rust2018,
166    Rust2021,
167}
168
169/// The quote impl of `ToTokens` for `Option` prints an empty string for `None`
170/// and the inner value for `Some(inner_value)`.
171///
172/// This wrapper instead emits both `Some` and `None` variants.
173/// See: [quote #20](https://github.com/dtolnay/quote/issues/20)
174struct QuoteOption<T>(Option<T>);
175
176impl<T: ToTokens> ToTokens for QuoteOption<T> {
177    fn to_tokens(&self, tokens: &mut TokenStream) {
178        tokens.append_all(match self.0 {
179            Some(ref t) => quote! { ::std::option::Option::Some(#t) },
180            None => quote! { ::std::option::Option::None },
181        });
182    }
183}
184
185/// This wrapper adds a missing impl of `ToTokens` for tuples.
186/// For a tuple `(a, b)` emits `(a.to_tokens(), b.to_tokens())`
187struct QuoteTuple<T>(T);
188
189impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
190    fn to_tokens(&self, tokens: &mut TokenStream) {
191        let (a, b) = &self.0;
192        tokens.append_all(quote!((#a, #b)));
193    }
194}
195
196/// The wrapped `&str` value will be emitted with a call to `to_string()`
197struct QuoteToString<'a>(&'a str);
198
199impl ToTokens for QuoteToString<'_> {
200    fn to_tokens(&self, tokens: &mut TokenStream) {
201        let x = &self.0;
202        tokens.append_all(quote! { #x.to_string() });
203    }
204}
205
206/// A string which uses `Display` for it's `Debug` impl.
207struct ErrorString(String);
208impl fmt::Display for ErrorString {
209    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
210        let ErrorString(s) = self;
211        write!(f, "{}", s)
212    }
213}
214impl fmt::Debug for ErrorString {
215    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
216        let ErrorString(s) = self;
217        write!(f, "{}", s)
218    }
219}
220impl Error for ErrorString {}
221
222/// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated
223/// lexer.
224pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes<u32>>
225where
226    LexerTypesT::StorageT: Debug + Eq + Hash + ToTokens,
227    usize: num_traits::AsPrimitive<LexerTypesT::StorageT>,
228{
229    lrpar_config: Option<Box<dyn Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>>>,
230    lexer_path: Option<PathBuf>,
231    output_path: Option<PathBuf>,
232    lexerkind: Option<LexerKind>,
233    mod_name: Option<&'a str>,
234    visibility: Visibility,
235    rust_edition: RustEdition,
236    rule_ids_map: Option<HashMap<String, LexerTypesT::StorageT>>,
237    allow_missing_terms_in_lexer: bool,
238    allow_missing_tokens_in_parser: bool,
239    warnings_are_errors: bool,
240    show_warnings: bool,
241    header: Header<Location>,
242    #[cfg(test)]
243    inspect_lexerkind_cb: Option<Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>>,
244}
245
246impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
247    /// Create a new [CTLexerBuilder].
248    pub fn new() -> Self {
249        CTLexerBuilder::<DefaultLexerTypes<u32>>::new_with_lexemet()
250    }
251}
252
253impl<'a, LexerTypesT: LexerTypes<LexErrorT = crate::LRLexError> + 'static>
254    CTLexerBuilder<'a, LexerTypesT>
255where
256    LexerTypesT::StorageT:
257        'static + Debug + Eq + Hash + PrimInt + Encode + TryFrom<usize> + Unsigned + ToTokens,
258    usize: AsPrimitive<LexerTypesT::StorageT>,
259{
260    /// Create a new [CTLexerBuilder].
261    ///
262    /// `LexerTypesT::StorageT` must be an unsigned integer type (e.g. `u8`, `u16`) which is big enough
263    /// to index all the tokens, rules, and productions in the lexer and less than or equal in size
264    /// to `usize` (e.g. on a 64-bit machine `u128` would be too big). If you are lexing large
265    /// files, the additional storage requirements of larger integer types can be noticeable, and
266    /// in such cases it can be worth specifying a smaller type. `StorageT` defaults to `u32` if
267    /// unspecified.
268    ///
269    /// # Examples
270    ///
271    /// ```text
272    /// CTLexerBuilder::<DefaultLexerTypes<u8>>::new_with_lexemet()
273    ///     .lexer_in_src_dir("grm.l", None)?
274    ///     .build()?;
275    /// ```
276    pub fn new_with_lexemet() -> Self {
277        let mut header = Header::new();
278        header.set_default_merge_behavior(MergeBehavior::Ours);
279        CTLexerBuilder {
280            lrpar_config: None,
281            lexer_path: None,
282            output_path: None,
283            lexerkind: None,
284            mod_name: None,
285            visibility: Visibility::Private,
286            rust_edition: RustEdition::Rust2021,
287            rule_ids_map: None,
288            allow_missing_terms_in_lexer: false,
289            allow_missing_tokens_in_parser: false,
290            warnings_are_errors: false,
291            show_warnings: true,
292            header,
293            #[cfg(test)]
294            inspect_lexerkind_cb: None,
295        }
296    }
297
298    /// An optional convenience function to make it easier to create an (lrlex) lexer and (lrpar)
299    /// parser in one shot. The closure passed to this function will be called during
300    /// [CTLexerBuilder::build]: it will be passed an lrpar `CTParserBuilder` instance upon which
301    /// it can set whatever lrpar options are desired. [`CTLexerBuilder`] will then create both the
302    /// compiler and lexer and link them together as required.
303    ///
304    /// # Examples
305    ///
306    /// ```text
307    /// CTLexerBuilder:::new()
308    ///     .lrpar_config(|ctp| {
309    ///         ctp.yacckind(YaccKind::Grmtools)
310    ///             .grammar_in_src_dir("calc.y")
311    ///             .unwrap()
312    ///     })
313    ///     .lexer_in_src_dir("calc.l")?
314    ///     .build()?;
315    /// ```
316    pub fn lrpar_config<F>(mut self, config_func: F) -> Self
317    where
318        F: 'static + Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>,
319    {
320        self.lrpar_config = Some(Box::new(config_func));
321        self
322    }
323
324    /// Set the input lexer path to a file relative to this project's `src` directory. This will
325    /// also set the output path (i.e. you do not need to call [CTLexerBuilder::output_path]).
326    ///
327    /// For example if `a/b.l` is passed as `inp` then [CTLexerBuilder::build] will:
328    ///   * use `src/a/b.l` as the input file.
329    ///   * write output to a file which can then be imported by calling `lrlex_mod!("a/b.l")`.
330    ///   * create a module in that output file named `b_l`.
331    ///
332    /// You can override the output path and/or module name by calling
333    /// [CTLexerBuilder::output_path] and/or [CTLexerBuilder::mod_name], respectively, after
334    /// calling this function.
335    ///
336    /// This is a convenience function that makes it easier to compile lexer files stored in a
337    /// project's `src/` directory: please see [CTLexerBuilder::build] for additional constraints
338    /// and information about the generated files. Note also that each `.l` file can only be
339    /// processed once using this function: if you want to generate multiple lexers from a single
340    /// `.l` file, you will need to use [CTLexerBuilder::output_path].
341    pub fn lexer_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
342    where
343        P: AsRef<Path>,
344    {
345        if !srcp.as_ref().is_relative() {
346            return Err(format!(
347                "Lexer path '{}' must be a relative path.",
348                srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
349            )
350            .into());
351        }
352
353        let mut lexp = current_dir()?;
354        lexp.push("src");
355        lexp.push(srcp.as_ref());
356        self.lexer_path = Some(lexp);
357
358        let mut outp = PathBuf::new();
359        outp.push(var("OUT_DIR").unwrap());
360        outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
361        create_dir_all(&outp)?;
362        let mut leaf = srcp
363            .as_ref()
364            .file_name()
365            .unwrap()
366            .to_str()
367            .unwrap()
368            .to_owned();
369        write!(leaf, ".{}", RUST_FILE_EXT).ok();
370        outp.push(leaf);
371        Ok(self.output_path(outp))
372    }
373
374    /// Set the input lexer path to `inp`. If specified, you must also call
375    /// [CTLexerBuilder::output_path]. In general it is easier to use
376    /// [CTLexerBuilder::lexer_in_src_dir].
377    pub fn lexer_path<P>(mut self, inp: P) -> Self
378    where
379        P: AsRef<Path>,
380    {
381        self.lexer_path = Some(inp.as_ref().to_owned());
382        self
383    }
384
385    /// Set the output lexer path to `outp`. Note that there are no requirements on `outp`: the
386    /// file can exist anywhere you can create a valid [Path] to. However, if you wish to use
387    /// [crate::lrlex_mod!] you will need to make sure that `outp` is in
388    /// [std::env::var]`("OUT_DIR")` or one of its subdirectories.
389    pub fn output_path<P>(mut self, outp: P) -> Self
390    where
391        P: AsRef<Path>,
392    {
393        self.output_path = Some(outp.as_ref().to_owned());
394        self
395    }
396
397    /// Set the type of lexer to be generated to `lexerkind`.
398    pub fn lexerkind(mut self, lexerkind: LexerKind) -> Self {
399        self.lexerkind = Some(lexerkind);
400        self
401    }
402
403    /// Set the generated module name to `mod_name`. If no module name is specified,
404    /// [`process_file`](#method.process_file) will attempt to create a sensible default based on
405    /// the input filename.
406    pub fn mod_name(mut self, mod_name: &'a str) -> Self {
407        self.mod_name = Some(mod_name);
408        self
409    }
410
411    /// Set the visibility of the generated module to `vis`. Defaults to `Visibility::Private`.
412    pub fn visibility(mut self, vis: Visibility) -> Self {
413        self.visibility = vis;
414        self
415    }
416
417    /// Sets the rust edition to be used for generated code. Defaults to the latest edition of
418    /// rust supported by grmtools.
419    pub fn rust_edition(mut self, edition: RustEdition) -> Self {
420        self.rust_edition = edition;
421        self
422    }
423
424    /// Set this lexer builder's map of rule IDs to `rule_ids_map`. By default, lexing rules have
425    /// arbitrary, but distinct, IDs. Setting the map of rule IDs (from rule names to `StorageT`)
426    /// allows users to synchronise a lexer and parser and to check that all rules are used by both
427    /// parts).
428    pub fn rule_ids_map<T: std::borrow::Borrow<HashMap<String, LexerTypesT::StorageT>> + Clone>(
429        mut self,
430        rule_ids_map: T,
431    ) -> Self {
432        self.rule_ids_map = Some(rule_ids_map.borrow().to_owned());
433        self
434    }
435
436    /// Statically compile the `.l` file specified by [CTLexerBuilder::lexer_path()] into Rust,
437    /// placing the output into the file specified by [CTLexerBuilder::output_path()].
438    ///
439    /// The generated module follows the form:
440    ///
441    /// ```text
442    ///    mod modname {
443    ///      pub fn lexerdef() -> LexerDef<LexerTypesT> { ... }
444    ///
445    ///      ...
446    ///    }
447    /// ```
448    ///
449    /// where:
450    ///  * `modname` is either:
451    ///    * the module name specified by [CTLexerBuilder::mod_name()]
452    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the
453    ///      module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
454    ///      `_l`).
455    pub fn build(mut self) -> Result<CTLexer, Box<dyn Error>> {
456        let lexerp = self
457            .lexer_path
458            .as_ref()
459            .expect("lexer_path must be specified before processing.");
460        let outp = self
461            .output_path
462            .as_ref()
463            .expect("output_path must be specified before processing.");
464
465        {
466            let mut lk = GENERATED_PATHS.lock().unwrap();
467            if lk.contains(outp.as_path()) {
468                return Err(format!("Generating two lexers to the same path ('{}') is not allowed: use CTLexerBuilder::output_path (and, optionally, CTLexerBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
469            }
470            lk.insert(outp.clone());
471        }
472        let lex_src = read_to_string(lexerp)
473            .map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
474        let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, lexerp);
475        let mut header = self.header;
476        let (parsed_header, _) = GrmtoolsSectionParser::new(&lex_src, false)
477            .parse()
478            .map_err(|es| {
479                let mut out = String::new();
480                out.push_str(&format!(
481                    "\n{ERROR}{}\n",
482                    lex_diag.file_location_msg(" parsing the `%grmtools` section", None)
483                ));
484                for e in es {
485                    out.push_str(&indent("     ", &lex_diag.format_error(e).to_string()));
486                    out.push('\n');
487                }
488                ErrorString(out)
489            })?;
490        header.merge_from(parsed_header)?;
491        header.mark_used(&"lexerkind".to_string());
492        let lexerkind = match self.lexerkind {
493            Some(lexerkind) => lexerkind,
494            None => {
495                if let Some(HeaderValue(_, lk_val)) = header.get("lexerkind") {
496                    LexerKind::try_from(lk_val)?
497                } else {
498                    LexerKind::LRNonStreamingLexer
499                }
500            }
501        };
502        #[cfg(test)]
503        if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
504            inspect_lexerkind_cb(lexerkind)?
505        }
506        let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) =
507            match lexerkind {
508                LexerKind::LRNonStreamingLexer => {
509                    let lex_flags = LexFlags::try_from(&mut header)?;
510                    let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
511                        &lex_src, lex_flags,
512                    )
513                    .map_err(|errs| {
514                        let mut out = String::new();
515                        out.push_str(&format!(
516                            "\n{ERROR}{}\n",
517                            lex_diag.file_location_msg("", None)
518                        ));
519                        for e in errs {
520                            out.push_str(&indent("     ", &lex_diag.format_error(e).to_string()));
521                            out.push('\n');
522                        }
523                        ErrorString(out)
524                    })?;
525                    let lex_flags = lexerdef.lex_flags().cloned();
526                    (lexerdef, lex_flags.unwrap())
527                }
528            };
529
530        let ct_parser = if let Some(ref lrcfg) = self.lrpar_config {
531            let mut closure_lexerdef = lexerdef.clone();
532            let mut ctp = CTParserBuilder::<LexerTypesT>::new().inspect_rt(Box::new(
533                move |yacc_header, rtpb, rule_ids_map, grm_path| {
534                    let owned_map = rule_ids_map
535                        .iter()
536                        .map(|(x, y)| (&**x, *y))
537                        .collect::<HashMap<_, _>>();
538                    closure_lexerdef.set_rule_ids(&owned_map);
539                    yacc_header.mark_used(&"test_files".to_string());
540                    let test_glob = yacc_header.get("test_files");
541                    match test_glob {
542                        Some(HeaderValue(_, Value::Setting(Setting::String(test_files, _)))) => {
543                            let path_joined = grm_path.parent().unwrap().join(test_files);
544                            for path in
545                                glob(&path_joined.to_string_lossy()).map_err(|e| e.to_string())?
546                            {
547                                let path = path?;
548                                if let Some(ext) = path.extension() {
549                                    if let Some(ext) = ext.to_str() {
550                                        if ext.starts_with("grm") {
551                                            Err(ErrorString("test_files extensions beginning with `grm` are reserved.".into()))?
552                                        }
553                                    }
554                                }
555                                let input = fs::read_to_string(&path)?;
556                                let l: LRNonStreamingLexer<LexerTypesT> =
557                                    closure_lexerdef.lexer(&input);
558                                for e in rtpb.parse_map(&l, &|_| (), &|_, _| ()).1 {
559                                    Err(format!("parsing {}: {}", path.display(), e))?
560                                }
561                            }
562                            Ok(())
563                        }
564                        Some(_) => Err("Invalid value for setting 'test_files'".into()),
565                        None => Ok(()),
566                    }
567                },
568            ));
569            ctp = lrcfg(ctp);
570            let ct_parser = ctp.build()?;
571            self.rule_ids_map = Some(ct_parser.token_map().to_owned());
572            Some(ct_parser)
573        } else {
574            None
575        };
576
577        let mut lexerdef = Box::new(lexerdef);
578        let unused_header_values = header.unused();
579        if !unused_header_values.is_empty() {
580            return Err(
581                format!("Unused header values: {}", unused_header_values.join(", ")).into(),
582            );
583        }
584
585        let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
586            Some(ref rim) => {
587                // Convert from HashMap<String, _> to HashMap<&str, _>
588                let owned_map = rim
589                    .iter()
590                    .map(|(x, y)| (&**x, *y))
591                    .collect::<HashMap<_, _>>();
592                let (x, y) = lexerdef.set_rule_ids_spanned(&owned_map);
593                (
594                    x.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
595                    y.map(|a| {
596                        a.iter()
597                            .map(|(b, span)| (b.to_string(), *span))
598                            .collect::<HashSet<_>>()
599                    }),
600                )
601            }
602            None => (None, None),
603        };
604
605        let mut has_unallowed_missing = false;
606        let err_indent = " ".repeat(ERROR.len());
607        if !self.allow_missing_terms_in_lexer {
608            if let Some(ref mfl) = missing_from_lexer {
609                if let Some(ct_parser) = &ct_parser {
610                    let grm = ct_parser.yacc_grammar();
611                    let token_spans = mfl
612                        .iter()
613                        .map(|name| {
614                            ct_parser
615                                .yacc_grammar()
616                                .token_span(*grm.tokens_map().get(name.as_str()).unwrap())
617                                .expect("Given token should have a span")
618                        })
619                        .collect::<Vec<_>>();
620
621                    let yacc_diag = SpannedDiagnosticFormatter::new(
622                        ct_parser.grammar_src(),
623                        ct_parser.grammar_path(),
624                    );
625
626                    eprintln!(
627                        "{ERROR} these tokens are not referenced in the lexer but defined as follows"
628                    );
629                    eprintln!(
630                        "{err_indent} {}",
631                        yacc_diag.file_location_msg("in the grammar", None)
632                    );
633                    for span in token_spans {
634                        eprintln!(
635                            "{}",
636                            yacc_diag.underline_span_with_text(
637                                span,
638                                "Missing from lexer".to_string(),
639                                '^'
640                            )
641                        );
642                    }
643                    eprintln!();
644                } else {
645                    eprintln!(
646                        "{ERROR} the following tokens are used in the grammar but are not defined in the lexer:"
647                    );
648                    for n in mfl {
649                        eprintln!("    {}", n);
650                    }
651                }
652                has_unallowed_missing = true;
653            }
654        }
655        if !self.allow_missing_tokens_in_parser && self.show_warnings {
656            if let Some(ref mfp) = missing_from_parser {
657                let error_prefix = if self.warnings_are_errors {
658                    ERROR
659                } else {
660                    WARNING
661                };
662                let err_indent = " ".repeat(error_prefix.len());
663                let mut outs = Vec::new();
664                outs.push(format!("{error_prefix} these tokens are not referenced in the grammar but defined as follows"));
665                outs.push(format!(
666                    "{err_indent} {}",
667                    lex_diag.file_location_msg("in the lexer", None)
668                ));
669                for (_, span) in mfp {
670                    let error_contents = lex_diag.underline_span_with_text(
671                        *span,
672                        "Missing from parser".to_string(),
673                        '^',
674                    );
675                    outs.extend(error_contents.lines().map(|s| s.to_string()));
676                }
677
678                for s in outs {
679                    if !self.warnings_are_errors && std::env::var("OUT_DIR").is_ok() {
680                        println!("cargo:warning={}", s)
681                    } else {
682                        eprintln!("{}", s);
683                    }
684                }
685
686                has_unallowed_missing |= self.warnings_are_errors;
687            }
688        }
689        if has_unallowed_missing {
690            fs::remove_file(outp).ok();
691            panic!();
692        }
693
694        let mod_name = match self.mod_name {
695            Some(s) => s.to_owned(),
696            None => {
697                // The user hasn't specified a module name, so we create one automatically: what we
698                // do is strip off all the filename extensions (note that it's likely that inp ends
699                // with `l.rs`, so we potentially have to strip off more than one extension) and
700                // then add `_l` to the end.
701                let mut stem = lexerp.to_str().unwrap();
702                loop {
703                    let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
704                    if stem == new_stem {
705                        break;
706                    }
707                    stem = new_stem;
708                }
709                format!("{}_l", stem)
710            }
711        };
712        let mod_name = format_ident!("{}", mod_name);
713        let mut lexerdef_func_impl = {
714            let LexFlags {
715                allow_wholeline_comments,
716                dot_matches_new_line,
717                multi_line,
718                octal,
719                posix_escapes,
720                case_insensitive,
721                unicode,
722                swap_greed,
723                ignore_whitespace,
724                size_limit,
725                dfa_size_limit,
726                nest_limit,
727            } = lex_flags;
728            let allow_wholeline_comments = QuoteOption(allow_wholeline_comments);
729            let dot_matches_new_line = QuoteOption(dot_matches_new_line);
730            let multi_line = QuoteOption(multi_line);
731            let octal = QuoteOption(octal);
732            let posix_escapes = QuoteOption(posix_escapes);
733            let case_insensitive = QuoteOption(case_insensitive);
734            let unicode = QuoteOption(unicode);
735            let swap_greed = QuoteOption(swap_greed);
736            let ignore_whitespace = QuoteOption(ignore_whitespace);
737            let size_limit = QuoteOption(size_limit);
738            let dfa_size_limit = QuoteOption(dfa_size_limit);
739            let nest_limit = QuoteOption(nest_limit);
740
741            // Code gen for the lexerdef() `lex_flags` variable.
742            quote! {
743                let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
744                lex_flags.allow_wholeline_comments = #allow_wholeline_comments.or(::lrlex::DEFAULT_LEX_FLAGS.allow_wholeline_comments);
745                lex_flags.dot_matches_new_line = #dot_matches_new_line.or(::lrlex::DEFAULT_LEX_FLAGS.dot_matches_new_line);
746                lex_flags.multi_line = #multi_line.or(::lrlex::DEFAULT_LEX_FLAGS.multi_line);
747                lex_flags.octal = #octal.or(::lrlex::DEFAULT_LEX_FLAGS.octal);
748                lex_flags.posix_escapes = #posix_escapes.or(::lrlex::DEFAULT_LEX_FLAGS.posix_escapes);
749                lex_flags.case_insensitive = #case_insensitive.or(::lrlex::DEFAULT_LEX_FLAGS.case_insensitive);
750                lex_flags.unicode = #unicode.or(::lrlex::DEFAULT_LEX_FLAGS.unicode);
751                lex_flags.swap_greed = #swap_greed.or(::lrlex::DEFAULT_LEX_FLAGS.swap_greed);
752                lex_flags.ignore_whitespace = #ignore_whitespace.or(::lrlex::DEFAULT_LEX_FLAGS.ignore_whitespace);
753                lex_flags.size_limit = #size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.size_limit);
754                lex_flags.dfa_size_limit = #dfa_size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.dfa_size_limit);
755                lex_flags.nest_limit = #nest_limit.or(::lrlex::DEFAULT_LEX_FLAGS.nest_limit);
756                let lex_flags = lex_flags;
757            }
758        };
759        {
760            let start_states = lexerdef.iter_start_states();
761            let rules = lexerdef.iter_rules().map(|r| {
762                    let tok_id = QuoteOption(r.tok_id);
763                    let n = QuoteOption(r.name().map(QuoteToString));
764                    let target_state =
765                        QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y))));
766                    let n_span = r.name_span();
767                    let regex = QuoteToString(&r.re_str);
768                    let start_states = r.start_states();
769                    // Code gen to construct a rule.
770                    //
771                    // We cannot `impl ToToken for Rule` because `Rule` never stores `lex_flags`,
772                    // Thus we reference the local lex_flags variable bound earlier.
773                    quote! {
774                        Rule::new(::lrlex::unstable_api::InternalPublicApi, #tok_id, #n, #n_span, #regex.to_string(),
775                                vec![#(#start_states),*], #target_state, &lex_flags).unwrap()
776                    }
777                });
778            // Code gen for `lexerdef()`s rules and the stack of `start_states`.
779            lexerdef_func_impl.append_all(quote! {
780                let start_states: Vec<StartState> = vec![#(#start_states),*];
781                let rules = vec![#(#rules),*];
782            });
783        }
784        let lexerdef_ty = match lexerkind {
785            LexerKind::LRNonStreamingLexer => {
786                quote!(::lrlex::LRNonStreamingLexerDef)
787            }
788        };
789        // Code gen for the lexerdef() return value referencing variables bound earlier.
790        lexerdef_func_impl.append_all(quote! {
791            #lexerdef_ty::from_rules(start_states, rules)
792        });
793
794        let mut token_consts = TokenStream::new();
795        if let Some(rim) = self.rule_ids_map {
796            let mut rim_sorted = Vec::from_iter(rim.iter());
797            rim_sorted.sort_by_key(|(k, _)| *k);
798            for (name, id) in rim_sorted {
799                if RE_TOKEN_ID.is_match(name) {
800                    let tok_ident = format_ident!("N_{}", name.to_ascii_uppercase());
801                    let storaget =
802                        str::parse::<TokenStream>(type_name::<LexerTypesT::StorageT>()).unwrap();
803                    // Code gen for the constant token values.
804                    let tok_const = quote! {
805                        #[allow(dead_code)]
806                        pub const #tok_ident: #storaget = #id;
807                    };
808                    token_consts.extend(tok_const)
809                }
810            }
811        }
812        let token_consts = token_consts.into_iter();
813        let out_tokens = {
814            let lexerdef_param = str::parse::<TokenStream>(type_name::<LexerTypesT>()).unwrap();
815            let mod_vis = self.visibility;
816            // Code gen for the generated module.
817            quote! {
818                #mod_vis mod #mod_name {
819                    use ::lrlex::{LexerDef, Rule, StartState};
820                    #[allow(dead_code)]
821                    pub fn lexerdef() -> #lexerdef_ty<#lexerdef_param> {
822                        #lexerdef_func_impl
823                    }
824
825                    #(#token_consts)*
826                }
827            }
828        };
829        // Try and run a code formatter on the generated code.
830        let unformatted = out_tokens.to_string();
831        let mut outs = String::new();
832        // Record the time that this version of lrlex was built. If the source code changes and rustc
833        // forces a recompile, this will change this value, causing anything which depends on this
834        // build of lrlex to be recompiled too.
835        let timestamp = env!("VERGEN_BUILD_TIMESTAMP");
836        write!(outs, "// lrlex build time: {}\n\n", quote!(#timestamp),).ok();
837        outs.push_str(
838            &syn::parse_str(&unformatted)
839                .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
840                .unwrap_or(unformatted),
841        );
842        // If the file we're about to write out already exists with the same contents, then we
843        // don't overwrite it (since that will force a recompile of the file, and relinking of the
844        // binary etc).
845        if let Ok(curs) = read_to_string(outp) {
846            if curs == outs {
847                return Ok(CTLexer {
848                    missing_from_lexer,
849                    missing_from_parser,
850                });
851            }
852        }
853        let mut f = File::create(outp)?;
854        f.write_all(outs.as_bytes())?;
855        Ok(CTLexer {
856            missing_from_lexer,
857            missing_from_parser,
858        })
859    }
860
861    /// Given the filename `a/b.l` as input, statically compile the file `src/a/b.l` into a Rust
862    /// module which can then be imported using `lrlex_mod!("a/b.l")`. This is a convenience
863    /// function around [`process_file`](struct.CTLexerBuilder.html#method.process_file) which makes
864    /// it easier to compile `.l` files stored in a project's `src/` directory: please see
865    /// [`process_file`](#method.process_file) for additional constraints and information about the
866    /// generated files.
867    #[deprecated(
868        since = "0.11.0",
869        note = "Please use lexer_in_src_dir() and build() instead"
870    )]
871    #[allow(deprecated)]
872    pub fn process_file_in_src(
873        self,
874        srcp: &str,
875    ) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>> {
876        let mut inp = current_dir()?;
877        inp.push("src");
878        inp.push(srcp);
879        let mut outp = PathBuf::new();
880        outp.push(var("OUT_DIR").unwrap());
881        outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
882        create_dir_all(&outp)?;
883        let mut leaf = Path::new(srcp)
884            .file_name()
885            .unwrap()
886            .to_str()
887            .unwrap()
888            .to_owned();
889        write!(leaf, ".{}", RUST_FILE_EXT).ok();
890        outp.push(leaf);
891        self.process_file(inp, outp)
892    }
893
894    /// Statically compile the `.l` file `inp` into Rust, placing the output into the file `outp`.
895    /// The latter defines a module as follows:
896    ///
897    /// ```text
898    ///    mod modname {
899    ///      pub fn lexerdef() -> LexerDef<LexerTypesT::StorageT> { ... }
900    ///
901    ///      ...
902    ///    }
903    /// ```
904    ///
905    /// where:
906    ///  * `modname` is either:
907    ///    * the module name specified [`mod_name`](#method.mod_name)
908    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the
909    ///      module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
910    ///      `_l`).
911    #[deprecated(
912        since = "0.11.0",
913        note = "Please use lexer_in_src_dir() and build() instead"
914    )]
915    pub fn process_file<P, Q>(
916        mut self,
917        inp: P,
918        outp: Q,
919    ) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>>
920    where
921        P: AsRef<Path>,
922        Q: AsRef<Path>,
923    {
924        self.lexer_path = Some(inp.as_ref().to_owned());
925        self.output_path = Some(outp.as_ref().to_owned());
926        let cl = self.build()?;
927        Ok((
928            cl.missing_from_lexer().map(|x| x.to_owned()),
929            cl.missing_from_parser()
930                .map(|x| x.iter().map(|(n, _)| n.to_owned()).collect::<HashSet<_>>()),
931        ))
932    }
933
934    /// If passed false, tokens used in the grammar but not defined in the lexer will cause a
935    /// panic at lexer generation time. Defaults to false.
936    pub fn allow_missing_terms_in_lexer(mut self, allow: bool) -> Self {
937        self.allow_missing_terms_in_lexer = allow;
938        self
939    }
940
941    /// If passed false, tokens defined in the lexer but not used in the grammar will cause a
942    /// warning at lexer generation time. Defaults to false (since lexers sometimes define tokens such
943    /// as reserved words, which are intentionally not in the grammar).
944    pub fn allow_missing_tokens_in_parser(mut self, allow: bool) -> Self {
945        self.allow_missing_tokens_in_parser = allow;
946        self
947    }
948
949    /// If set to true, [CTLexerBuilder::build] will return an error if the given lexer contains
950    /// any warnings. Defaults to `true`.
951    pub fn warnings_are_errors(mut self, flag: bool) -> Self {
952        self.warnings_are_errors = flag;
953        self
954    }
955
956    /// If set to true, [CTParserBuilder::build] will print warnings to stderr, or via cargo when
957    /// running under cargo. Defaults to `true`.
958    pub fn show_warnings(mut self, flag: bool) -> Self {
959        self.show_warnings = flag;
960        self
961    }
962
963    /// Enables `// comment` style parsing according to `flag``.
964    /// When enabled comments can appear at the beginning of a line,
965    /// and regular expressions with the `/` character should be escaped via `\/`.
966    ///
967    /// The default value is `false`.
968    ///
969    /// Setting this flag will override the same flag within a `%grmtools` section.
970    pub fn allow_wholeline_comments(mut self, flag: bool) -> Self {
971        let key = "allow_wholeline_comments".to_string();
972        self.header.insert(
973            key,
974            HeaderValue(
975                Location::Other("CTLexerBuilder".to_string()),
976                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
977            ),
978        );
979        self
980    }
981
982    /// Sets the `regex::RegexBuilder` option of the same name.
983    /// The default value is `true`.
984    ///
985    /// Setting this flag will override the same flag within a `%grmtools` section.
986    pub fn dot_matches_new_line(mut self, flag: bool) -> Self {
987        let key = "dot_matches_new_line".to_string();
988        self.header.insert(
989            key,
990            HeaderValue(
991                Location::Other("CTLexerBuilder".to_string()),
992                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
993            ),
994        );
995        self
996    }
997
998    /// Sets the `regex::RegexBuilder` option of the same name.
999    /// The default value is `true`.
1000    ///
1001    /// Setting this flag will override the same flag within a `%grmtools` section.
1002    pub fn multi_line(mut self, flag: bool) -> Self {
1003        let key = "multi_line".to_string();
1004        self.header.insert(
1005            key,
1006            HeaderValue(
1007                Location::Other("CTLexerBuilder".to_string()),
1008                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1009            ),
1010        );
1011        self
1012    }
1013
1014    /// Enables posix lex compatible escape sequences according to `flag`.
1015    /// The default value is `false`.
1016    ///
1017    /// Setting this flag will override the same flag within a `%grmtools` section.
1018    pub fn posix_escapes(mut self, flag: bool) -> Self {
1019        let key = "posix_escapes".to_string();
1020        self.header.insert(
1021            key,
1022            HeaderValue(
1023                Location::Other("CTLexerBuilder".to_string()),
1024                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1025            ),
1026        );
1027        self
1028    }
1029
1030    /// Sets the `regex::RegexBuilder` option of the same name.
1031    /// The default value is `true`.
1032    ///
1033    /// Setting this flag will override the same flag within a `%grmtools` section.
1034    pub fn octal(mut self, flag: bool) -> Self {
1035        let key = "octal".to_string();
1036        self.header.insert(
1037            key,
1038            HeaderValue(
1039                Location::Other("CTLexerBuilder".to_string()),
1040                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1041            ),
1042        );
1043        self
1044    }
1045
1046    /// Sets the `regex::RegexBuilder` option of the same name.
1047    /// Default value is specified by regex.
1048    ///
1049    /// Setting this flag will override the same flag within a `%grmtools` section.
1050    pub fn swap_greed(mut self, flag: bool) -> Self {
1051        let key = "swap_greed".to_string();
1052        self.header.insert(
1053            key,
1054            HeaderValue(
1055                Location::Other("CTLexerBuilder".to_string()),
1056                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1057            ),
1058        );
1059        self
1060    }
1061
1062    /// Sets the `regex::RegexBuilder` option of the same name.
1063    /// Default value is specified by regex.
1064    ///
1065    /// Setting this flag will override the same flag within a `%grmtools` section.
1066    pub fn ignore_whitespace(mut self, flag: bool) -> Self {
1067        let key = "ignore_whitespace".to_string();
1068        self.header.insert(
1069            key,
1070            HeaderValue(
1071                Location::Other("CTLexerBuilder".to_string()),
1072                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1073            ),
1074        );
1075        self
1076    }
1077
1078    /// Sets the `regex::RegexBuilder` option of the same name.
1079    /// Default value is specified by regex.
1080    ///
1081    /// Setting this flag will override the same flag within a `%grmtools` section.
1082    pub fn unicode(mut self, flag: bool) -> Self {
1083        let key = "unicode".to_string();
1084        self.header.insert(
1085            key,
1086            HeaderValue(
1087                Location::Other("CTLexerBuilder".to_string()),
1088                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1089            ),
1090        );
1091        self
1092    }
1093
1094    /// Sets the `regex::RegexBuilder` option of the same name.
1095    /// Default value is specified by regex.
1096    ///
1097    /// Setting this flag will override the same flag within a `%grmtools` section.
1098    pub fn case_insensitive(mut self, flag: bool) -> Self {
1099        let key = "case_insensitive".to_string();
1100        self.header.insert(
1101            key,
1102            HeaderValue(
1103                Location::Other("CTLexerBuilder".to_string()),
1104                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
1105            ),
1106        );
1107        self
1108    }
1109
1110    /// Sets the `regex::RegexBuilder` option of the same name.
1111    /// Default value is specified by regex.
1112    ///
1113    /// Setting this flag will override the same flag within a `%grmtools` section.
1114    pub fn size_limit(mut self, sz: usize) -> Self {
1115        let key = "size_limit".to_string();
1116        self.header.insert(
1117            key,
1118            HeaderValue(
1119                Location::Other("CTLexerBuilder".to_string()),
1120                Value::Setting(Setting::Num(
1121                    sz as u64,
1122                    Location::Other("CTLexerBuilder".to_string()),
1123                )),
1124            ),
1125        );
1126        self
1127    }
1128
1129    /// Sets the `regex::RegexBuilder` option of the same name.
1130    /// Default value is specified by regex.
1131    ///
1132    /// Setting this flag will override the same flag within a `%grmtools` section.
1133    pub fn dfa_size_limit(mut self, sz: usize) -> Self {
1134        let key = "dfa_size_limit".to_string();
1135        self.header.insert(
1136            key,
1137            HeaderValue(
1138                Location::Other("CTLexerBuilder".to_string()),
1139                Value::Setting(Setting::Num(
1140                    sz as u64,
1141                    Location::Other("CTLexerBuilder".to_string()),
1142                )),
1143            ),
1144        );
1145        self
1146    }
1147
1148    /// Sets the `regex::RegexBuilder` option of the same name.
1149    /// Default value is specified by regex.
1150    ///
1151    /// Setting this flag will override the same flag within a `%grmtools` section.
1152    pub fn nest_limit(mut self, lim: u32) -> Self {
1153        let key = "nest_limit".to_string();
1154        self.header.insert(
1155            key,
1156            HeaderValue(
1157                Location::Other("CTLexerBuilder".to_string()),
1158                Value::Setting(Setting::Num(
1159                    lim as u64,
1160                    Location::Other("CTLexerBuilder".to_string()),
1161                )),
1162            ),
1163        );
1164        self
1165    }
1166
1167    #[cfg(test)]
1168    pub fn inspect_lexerkind(
1169        mut self,
1170        cb: Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>,
1171    ) -> Self {
1172        self.inspect_lexerkind_cb = Some(cb);
1173        self
1174    }
1175}
1176
1177/// An interface to the result of [CTLexerBuilder::build()].
1178pub struct CTLexer {
1179    missing_from_lexer: Option<HashSet<String>>,
1180    missing_from_parser: Option<HashSet<(String, Span)>>,
1181}
1182
1183impl CTLexer {
1184    fn missing_from_lexer(&self) -> Option<&HashSet<String>> {
1185        self.missing_from_lexer.as_ref()
1186    }
1187
1188    fn missing_from_parser(&self) -> Option<&HashSet<(String, Span)>> {
1189        self.missing_from_parser.as_ref()
1190    }
1191}
1192
1193/// Exports all token IDs used by a parser as a separate Rust module.
1194///
1195/// This builder will create a Rust module named `mod_name`
1196/// that can be imported with [`lrlex_mod!(mod_name)`](crate::lrlex_mod).
1197/// The module will contain one `const` `StorageT` per token in `token_map`,
1198/// with the token prefixed by `T_`. In addition, it will contain
1199/// an array of all token IDs `TOK_IDS`.
1200///
1201/// For example, if `StorageT` is `u8`, `mod_name` is `x`, and `token_map` is
1202/// `HashMap{"ID": 0, "INT": 1}` the generated module will look roughly as follows:
1203///
1204/// ```rust,ignore
1205/// mod x {
1206///   pub const T_ID: u8 = 0;
1207///   pub const T_INT: u8 = 1;
1208///   pub const TOK_IDS: &[u8] = &[T_ID, T_INT];
1209/// }
1210/// ```
1211///
1212/// See the [custom lexer example] for more usage details.
1213///
1214/// [custom lexer example]: https://github.com/softdevteam/grmtools/tree/master/lrlex/examples/calc_manual_lex
1215#[derive(Debug, Clone)]
1216pub struct CTTokenMapBuilder<StorageT: Display + ToTokens> {
1217    mod_name: String,
1218    token_map: Vec<(String, TokenStream)>,
1219    rename_map: Option<HashMap<String, String>>,
1220    allow_dead_code: bool,
1221    _marker: PhantomData<StorageT>,
1222}
1223
1224impl<StorageT: Display + ToTokens> CTTokenMapBuilder<StorageT> {
1225    /// Create a new token map builder.
1226    ///
1227    /// See the [builder documentation] for more info.
1228    ///
1229    /// [builder documentation]: CTTokenMapBuilder
1230    pub fn new(
1231        mod_name: impl Into<String>,
1232        token_map: impl Borrow<HashMap<String, StorageT>>,
1233    ) -> Self {
1234        Self {
1235            mod_name: mod_name.into(),
1236            token_map: token_map
1237                .borrow()
1238                .iter()
1239                .map(|(tok_name, tok_value)| (tok_name.clone(), tok_value.to_token_stream()))
1240                .collect(),
1241            rename_map: None,
1242            allow_dead_code: false,
1243            _marker: PhantomData,
1244        }
1245    }
1246
1247    /// Set a token rename map.
1248    ///
1249    /// Rename map is used to specify identifier names for tokens whose names
1250    /// are not valid Rust identifiers. For example, if `token_map`
1251    /// is `HashMap{"+": 0, "ID": 1}` and `rename_map` is `HashMap{"+": "PLUS"}`
1252    /// then the generated module will look roughly as follows:
1253    ///
1254    /// ```rust,ignore
1255    /// mod x {
1256    ///   pub const T_PLUS: u8 = 0;
1257    ///   pub const T_ID: u8 = 1;
1258    /// }
1259    /// ```
1260    pub fn rename_map<M, I, K, V>(mut self, rename_map: Option<M>) -> Self
1261    where
1262        M: IntoIterator<Item = I>,
1263        I: Borrow<(K, V)>,
1264        K: AsRef<str>,
1265        V: AsRef<str>,
1266    {
1267        self.rename_map = rename_map.map(|rename_map| {
1268            rename_map
1269                .into_iter()
1270                .map(|it| {
1271                    let (k, v) = it.borrow();
1272                    let k = k.as_ref().into();
1273                    let v = v.as_ref().into();
1274                    (k, v)
1275                })
1276                .collect()
1277        });
1278        self
1279    }
1280
1281    /// Control whether the builder will add `#[allow(dead_code)]`
1282    /// to the generated module.
1283    ///
1284    /// By default, all tokens are `#[deny(dead_code)]`, meaning that you'll
1285    /// get a warning if your custom lexer doesn't use any of them.
1286    /// This function can be used to disable this behavior.
1287    pub fn allow_dead_code(mut self, allow_dead_code: bool) -> Self {
1288        self.allow_dead_code = allow_dead_code;
1289        self
1290    }
1291
1292    /// Build the token map module.
1293    pub fn build(&self) -> Result<(), Box<dyn Error>> {
1294        // Record the time that this version of lrlex was built. If the source code changes and rustc
1295        // forces a recompile, this will change this value, causing anything which depends on this
1296        // build of lrlex to be recompiled too.
1297        let mut outs = String::new();
1298        let timestamp = env!("VERGEN_BUILD_TIMESTAMP");
1299        let mod_ident = format_ident!("{}", self.mod_name);
1300        write!(outs, "// lrlex build time: {}\n\n", quote!(#timestamp),).ok();
1301        let storaget = str::parse::<TokenStream>(type_name::<StorageT>()).unwrap();
1302        // Sort the tokens so that they're always in the same order.
1303        // This will prevent unneeded rebuilds.
1304        let mut token_map_sorted = self.token_map.clone();
1305        token_map_sorted.sort_by(|(l, _), (r, _)| l.cmp(r));
1306        let (token_array, tokens) = token_map_sorted
1307            .iter()
1308            .map(|(k, id)| {
1309                let name = match &self.rename_map {
1310                    Some(rmap) => rmap.get(k).unwrap_or(k),
1311                    _ => k,
1312                };
1313                let tok_ident: Ident = syn::parse_str(&format!("T_{}", name.to_ascii_uppercase()))
1314                    .map_err(|e| {
1315                        format!(
1316                            "token name {:?} is not a valid Rust identifier: {}; \
1317                            consider renaming it via `CTTokenMapBuilder::rename_map`.",
1318                            name, e
1319                        )
1320                    })?;
1321                Ok((
1322                    // Note: the array of all tokens can't use `tok_ident` because
1323                    // it will confuse the dead code checker. For this reason,
1324                    // we use `id` here.
1325                    quote! {
1326                        #id,
1327                    },
1328                    quote! {
1329                        pub const #tok_ident: #storaget = #id;
1330                    },
1331                ))
1332            })
1333            .collect::<Result<(TokenStream, TokenStream), Box<dyn Error>>>()?;
1334        let unused_annotation;
1335        if self.allow_dead_code {
1336            unused_annotation = quote! {#[allow(dead_code)]};
1337        } else {
1338            unused_annotation = quote! {};
1339        };
1340        // Since the formatter doesn't preserve comments and we don't want to lose build time,
1341        // just format the module contents.
1342        let unformatted = quote! {
1343            #unused_annotation
1344            mod #mod_ident {
1345                #tokens
1346                #[allow(dead_code)]
1347                pub const TOK_IDS: &[#storaget] = &[#token_array];
1348            }
1349        }
1350        .to_string();
1351        let out_mod = syn::parse_str(&unformatted)
1352            .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
1353            .unwrap_or(unformatted);
1354        outs.push_str(&out_mod);
1355        let mut outp = PathBuf::from(var("OUT_DIR")?);
1356        outp.push(&self.mod_name);
1357        outp.set_extension("rs");
1358
1359        // If the file we're about to write out already exists with the same contents, then we
1360        // don't overwrite it (since that will force a recompile of the file, and relinking of the
1361        // binary etc).
1362        if let Ok(curs) = read_to_string(&outp) {
1363            if curs == outs {
1364                return Ok(());
1365            }
1366        }
1367
1368        let mut f = File::create(outp)?;
1369        f.write_all(outs.as_bytes())?;
1370        Ok(())
1371    }
1372}
1373
1374/// Create a Rust module named `mod_name` that can be imported with
1375/// [`lrlex_mod!(mod_name)`](crate::lrlex_mod).
1376///
1377/// This function is deprecated in favour of [`CTTokenMapBuilder`].
1378#[deprecated(since = "0.14.0", note = "use `lrlex::CTTokenMapBuilder` instead")]
1379pub fn ct_token_map<StorageT: Display + ToTokens>(
1380    mod_name: &str,
1381    token_map: impl Borrow<HashMap<String, StorageT>>,
1382    rename_map: Option<&HashMap<&str, &str>>,
1383) -> Result<(), Box<dyn Error>> {
1384    CTTokenMapBuilder::new(mod_name, token_map)
1385        .rename_map(rename_map)
1386        .allow_dead_code(true)
1387        .build()
1388}
1389
1390/// Indents a multi-line string and trims any trailing newline.
1391/// This currently assumes that indentation on blank lines does not matter.
1392///
1393/// The algorithm used by this function is:
1394/// 1. Prefix `s` with the indentation, indenting the first line.
1395/// 2. Trim any trailing newlines.
1396/// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first.
1397///
1398/// It is plausible that we should a step 4, but currently do not:
1399/// 4. Replace all `\n{indent}\n` with `\n\n`
1400fn indent(indent: &str, s: &str) -> String {
1401    format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
1402}
1403
1404#[cfg(test)]
1405mod test {
1406    use std::fs::File;
1407    use std::io::Write;
1408
1409    use super::{CTLexerBuilder, LexerKind};
1410    #[test]
1411    fn test_grmtools_section_lexerkind() {
1412        let lexerkinds = [
1413            "LRNonStreamingLexer",
1414            "lrnonstreaminglexer",
1415            "LexerKind::lrnonstreaminglexer",
1416            "lexerkind::LRNonStreamingLexer",
1417        ];
1418        for (i, kind) in lexerkinds.iter().enumerate() {
1419            let lex_src = format!(
1420                "
1421%grmtools{{lexerkind: {}}}
1422%%
1423. ;
1424",
1425                kind
1426            );
1427            let lex_path = format!(
1428                "{}/test_grmtools_section_lexerkind_{}.l",
1429                env!("OUT_DIR"),
1430                i
1431            );
1432            let mut l_file = File::create(lex_path.clone()).unwrap();
1433            l_file.write_all(lex_src.as_bytes()).unwrap();
1434            CTLexerBuilder::new()
1435                .output_path(format!("{}.rs", lex_path.clone()))
1436                .lexer_path(lex_path.clone())
1437                .inspect_lexerkind(Box::new(move |lexerkind| {
1438                    assert!(matches!(lexerkind, LexerKind::LRNonStreamingLexer));
1439                    Ok(())
1440                }))
1441                .build()
1442                .unwrap();
1443        }
1444    }
1445}