1use std::{
4 any::type_name,
5 collections::{HashMap, HashSet},
6 env::{current_dir, var},
7 error::Error,
8 fmt::{Debug, Display, Write as _},
9 fs::{self, create_dir_all, read_to_string, File},
10 hash::Hash,
11 io::Write,
12 path::{Path, PathBuf},
13 str::FromStr,
14 sync::Mutex,
15};
16
17use bincode::Encode;
18use cfgrammar::{
19 header::{
20 GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, HeaderValue, Namespaced,
21 Setting, Value,
22 },
23 markmap::MergeBehavior,
24 newlinecache::NewlineCache,
25 span::Location,
26 Spanned,
27};
28use glob::glob;
29use lazy_static::lazy_static;
30use lrpar::{CTParserBuilder, LexerTypes};
31use num_traits::{AsPrimitive, PrimInt, Unsigned};
32use proc_macro2::TokenStream;
33use quote::{format_ident, quote, ToTokens, TokenStreamExt};
34use regex::Regex;
35
36use crate::{DefaultLexerTypes, LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef};
37
38const RUST_FILE_EXT: &str = "rs";
39
40lazy_static! {
41 static ref RE_TOKEN_ID: Regex = Regex::new(r"^[a-zA-Z_][a-zA-Z_0-9]*$").unwrap();
42 static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
43}
44
45#[non_exhaustive]
46pub enum LexerKind {
47 LRNonStreamingLexer,
48}
49
50impl<T: Clone> TryFrom<&Value<T>> for LexerKind {
51 type Error = cfgrammar::header::HeaderError<T>;
52 fn try_from(it: &Value<T>) -> Result<LexerKind, Self::Error> {
53 match it {
54 Value::Flag(_, loc) => Err(HeaderError {
55 kind: HeaderErrorKind::ConversionError(
56 "LexerKind",
57 "Expected `LexerKind` found bool",
58 ),
59 locations: vec![loc.clone()],
60 }),
61 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
62 kind: HeaderErrorKind::ConversionError(
63 "LexerKind",
64 "Expected `LexerKind` found numeric",
65 ),
66 locations: vec![loc.clone()],
67 }),
68 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
69 kind: HeaderErrorKind::ConversionError(
70 "LexerKind",
71 "Expected `LexerKind` found string",
72 ),
73 locations: vec![loc.clone()],
74 }),
75 Value::Setting(Setting::Constructor {
76 ctor:
77 Namespaced {
78 namespace: _,
79 member: (_, loc),
80 },
81 arg: _,
82 }) => Err(HeaderError {
83 kind: HeaderErrorKind::ConversionError(
84 "LexerKind",
85 "Expected `LexerKind` found constructor",
86 ),
87 locations: vec![loc.clone()],
88 }),
89 Value::Setting(Setting::Unitary(Namespaced {
90 namespace,
91 member: (member, member_loc),
92 })) => {
93 if let Some((ns, loc)) = namespace {
94 if ns.to_lowercase() != "lexerkind" {
95 return Err(HeaderError {
96 kind: HeaderErrorKind::ConversionError(
97 "LexerKind",
98 "Expected namespace `LexerKind`",
99 ),
100 locations: vec![loc.clone()],
101 });
102 }
103 }
104 if member.to_lowercase() != "lrnonstreaminglexer" {
105 return Err(HeaderError {
106 kind: HeaderErrorKind::ConversionError(
107 "LexerKind",
108 "Unknown `LexerKind` Variant",
109 ),
110 locations: vec![member_loc.clone()],
111 });
112 }
113
114 Ok(LexerKind::LRNonStreamingLexer)
115 }
116 }
117 }
118}
119
120#[derive(Clone, PartialEq, Eq, Debug)]
122#[non_exhaustive]
123pub enum Visibility {
124 Private,
126 Public,
128 PublicSuper,
130 PublicSelf,
132 PublicCrate,
134 PublicIn(String),
136}
137
138impl ToTokens for Visibility {
139 fn to_tokens(&self, tokens: &mut TokenStream) {
140 tokens.extend(match self {
141 Visibility::Private => quote!(),
142 Visibility::Public => quote! {pub},
143 Visibility::PublicSuper => quote! {pub(super)},
144 Visibility::PublicSelf => quote! {pub(self)},
145 Visibility::PublicCrate => quote! {pub(crate)},
146 Visibility::PublicIn(data) => {
147 let other = str::parse::<TokenStream>(data).unwrap();
148 quote! {pub(in #other)}
149 }
150 })
151 }
152}
153
154#[derive(Clone, Copy, PartialEq, Eq, Debug)]
158#[non_exhaustive]
159pub enum RustEdition {
160 Rust2015,
161 Rust2018,
162 Rust2021,
163}
164
165struct QuoteOption<T>(Option<T>);
171
172impl<T: ToTokens> ToTokens for QuoteOption<T> {
173 fn to_tokens(&self, tokens: &mut TokenStream) {
174 tokens.append_all(match self.0 {
175 Some(ref t) => quote! { ::std::option::Option::Some(#t) },
176 None => quote! { ::std::option::Option::None },
177 });
178 }
179}
180
181struct QuoteTuple<T>(T);
184
185impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
186 fn to_tokens(&self, tokens: &mut TokenStream) {
187 let (a, b) = &self.0;
188 tokens.append_all(quote!((#a, #b)));
189 }
190}
191
192struct QuoteToString<'a>(&'a str);
194
195impl ToTokens for QuoteToString<'_> {
196 fn to_tokens(&self, tokens: &mut TokenStream) {
197 let x = &self.0;
198 tokens.append_all(quote! { #x.to_string() });
199 }
200}
201
202pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes<u32>>
205where
206 LexerTypesT::StorageT: Debug + Eq + Hash + ToTokens,
207 usize: num_traits::AsPrimitive<LexerTypesT::StorageT>,
208{
209 lrpar_config: Option<Box<dyn Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>>>,
210 lexer_path: Option<PathBuf>,
211 output_path: Option<PathBuf>,
212 lexerkind: Option<LexerKind>,
213 mod_name: Option<&'a str>,
214 visibility: Visibility,
215 rust_edition: RustEdition,
216 rule_ids_map: Option<HashMap<String, LexerTypesT::StorageT>>,
217 allow_missing_terms_in_lexer: bool,
218 allow_missing_tokens_in_parser: bool,
219 header: Header<Location>,
220 #[cfg(test)]
221 inspect_lexerkind_cb: Option<Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>>,
222}
223
224impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
225 pub fn new() -> Self {
227 CTLexerBuilder::<DefaultLexerTypes<u32>>::new_with_lexemet()
228 }
229}
230
231impl<'a, LexerTypesT: LexerTypes<LexErrorT = crate::LRLexError> + 'static>
232 CTLexerBuilder<'a, LexerTypesT>
233where
234 LexerTypesT::StorageT:
235 'static + Debug + Eq + Hash + PrimInt + Encode + TryFrom<usize> + Unsigned + ToTokens,
236 usize: AsPrimitive<LexerTypesT::StorageT>,
237{
238 pub fn new_with_lexemet() -> Self {
255 let mut header = Header::new();
256 header.set_default_merge_behavior(MergeBehavior::Ours);
257 CTLexerBuilder {
258 lrpar_config: None,
259 lexer_path: None,
260 output_path: None,
261 lexerkind: None,
262 mod_name: None,
263 visibility: Visibility::Private,
264 rust_edition: RustEdition::Rust2021,
265 rule_ids_map: None,
266 allow_missing_terms_in_lexer: false,
267 allow_missing_tokens_in_parser: true,
268 header,
269 #[cfg(test)]
270 inspect_lexerkind_cb: None,
271 }
272 }
273
274 pub fn lrpar_config<F>(mut self, config_func: F) -> Self
293 where
294 F: 'static + Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT>,
295 {
296 self.lrpar_config = Some(Box::new(config_func));
297 self
298 }
299
300 pub fn lexer_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
318 where
319 P: AsRef<Path>,
320 {
321 if !srcp.as_ref().is_relative() {
322 return Err(format!(
323 "Lexer path '{}' must be a relative path.",
324 srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
325 )
326 .into());
327 }
328
329 let mut lexp = current_dir()?;
330 lexp.push("src");
331 lexp.push(srcp.as_ref());
332 self.lexer_path = Some(lexp);
333
334 let mut outp = PathBuf::new();
335 outp.push(var("OUT_DIR").unwrap());
336 outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
337 create_dir_all(&outp)?;
338 let mut leaf = srcp
339 .as_ref()
340 .file_name()
341 .unwrap()
342 .to_str()
343 .unwrap()
344 .to_owned();
345 write!(leaf, ".{}", RUST_FILE_EXT).ok();
346 outp.push(leaf);
347 Ok(self.output_path(outp))
348 }
349
350 pub fn lexer_path<P>(mut self, inp: P) -> Self
354 where
355 P: AsRef<Path>,
356 {
357 self.lexer_path = Some(inp.as_ref().to_owned());
358 self
359 }
360
361 pub fn output_path<P>(mut self, outp: P) -> Self
366 where
367 P: AsRef<Path>,
368 {
369 self.output_path = Some(outp.as_ref().to_owned());
370 self
371 }
372
373 pub fn lexerkind(mut self, lexerkind: LexerKind) -> Self {
375 self.lexerkind = Some(lexerkind);
376 self
377 }
378
379 pub fn mod_name(mut self, mod_name: &'a str) -> Self {
383 self.mod_name = Some(mod_name);
384 self
385 }
386
387 pub fn visibility(mut self, vis: Visibility) -> Self {
389 self.visibility = vis;
390 self
391 }
392
393 pub fn rust_edition(mut self, edition: RustEdition) -> Self {
396 self.rust_edition = edition;
397 self
398 }
399
400 pub fn rule_ids_map<T: std::borrow::Borrow<HashMap<String, LexerTypesT::StorageT>> + Clone>(
405 mut self,
406 rule_ids_map: T,
407 ) -> Self {
408 self.rule_ids_map = Some(rule_ids_map.borrow().to_owned());
409 self
410 }
411
412 pub fn build(mut self) -> Result<CTLexer, Box<dyn Error>> {
432 let lexerp = self
433 .lexer_path
434 .as_ref()
435 .expect("lexer_path must be specified before processing.");
436 let outp = self
437 .output_path
438 .as_ref()
439 .expect("output_path must be specified before processing.");
440
441 {
442 let mut lk = GENERATED_PATHS.lock().unwrap();
443 if lk.contains(outp.as_path()) {
444 return Err(format!("Generating two lexers to the same path ('{}') is not allowed: use CTLexerBuilder::output_path (and, optionally, CTLexerBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
445 }
446 lk.insert(outp.clone());
447 }
448 let lex_src = read_to_string(lexerp)
449 .map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
450 let mut header = self.header;
451 let (parsed_header, _) = GrmtoolsSectionParser::new(&lex_src, false)
452 .parse()
453 .map_err(|es| {
454 es.iter()
455 .map(|e| e.to_string())
456 .collect::<Vec<_>>()
457 .join("\n")
458 })?;
459 header.merge_from(parsed_header)?;
460 header.mark_used(&"lexerkind".to_string());
461 let lexerkind = match self.lexerkind {
462 Some(lexerkind) => lexerkind,
463 None => {
464 if let Some(HeaderValue(_, lk_val)) = header.get("lexerkind") {
465 LexerKind::try_from(lk_val)?
466 } else {
467 LexerKind::LRNonStreamingLexer
468 }
469 }
470 };
471 let line_cache = NewlineCache::from_str(&lex_src).unwrap();
472 #[cfg(test)]
473 if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
474 inspect_lexerkind_cb(lexerkind)?
475 }
476 let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) = match lexerkind
477 {
478 LexerKind::LRNonStreamingLexer => {
479 let lex_flags = LexFlags::try_from(&mut header)?;
480 let lexerdef =
481 LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(&lex_src, lex_flags)
482 .map_err(|errs| {
483 errs.iter()
484 .map(|e| {
485 if let Some((line, column)) = line_cache
486 .byte_to_line_num_and_col_num(
487 &lex_src,
488 e.spans().first().unwrap().start(),
489 )
490 {
491 format!("{} at line {line} column {column}", e)
492 } else {
493 format!("{}", e)
494 }
495 })
496 .collect::<Vec<_>>()
497 .join("\n")
498 })?;
499 let lex_flags = lexerdef.lex_flags().cloned();
500 (lexerdef, lex_flags.unwrap())
501 }
502 };
503
504 if let Some(ref lrcfg) = self.lrpar_config {
505 let mut lexerdef = lexerdef.clone();
506 let mut ctp = CTParserBuilder::<LexerTypesT>::new().inspect_rt(Box::new(
507 move |yacc_header, rtpb, rule_ids_map, grm_path| {
508 let owned_map = rule_ids_map
509 .iter()
510 .map(|(x, y)| (&**x, *y))
511 .collect::<HashMap<_, _>>();
512 lexerdef.set_rule_ids(&owned_map);
513 yacc_header.mark_used(&"test_files".to_string());
514 let test_glob = yacc_header.get("test_files");
515 match test_glob {
516 Some(HeaderValue(_, Value::Setting(Setting::String(test_files, _)))) => {
517 let path_joined = grm_path.parent().unwrap().join(test_files);
518 for path in
519 glob(&path_joined.to_string_lossy()).map_err(|e| e.to_string())?
520 {
521 let path = path?;
522 let input = fs::read_to_string(&path)?;
523 let l: LRNonStreamingLexer<LexerTypesT> = lexerdef.lexer(&input);
524 for e in rtpb.parse_noaction(&l) {
525 Err(format!("parsing {}: {}", path.display(), e))?
526 }
527 }
528 Ok(())
529 }
530 Some(_) => Err("Invalid value for setting 'test_files'".into()),
531 None => Ok(()),
532 }
533 },
534 ));
535 ctp = lrcfg(ctp);
536 let map = ctp.build()?;
537 self.rule_ids_map = Some(map.token_map().to_owned());
538 }
539
540 let mut lexerdef = Box::new(lexerdef);
541 let unused_header_values = header.unused();
542 if !unused_header_values.is_empty() {
543 return Err(
544 format!("Unused header values: {}", unused_header_values.join(", ")).into(),
545 );
546 }
547
548 let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
549 Some(ref rim) => {
550 let owned_map = rim
552 .iter()
553 .map(|(x, y)| (&**x, *y))
554 .collect::<HashMap<_, _>>();
555 let (x, y) = lexerdef.set_rule_ids(&owned_map);
556 (
557 x.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
558 y.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
559 )
560 }
561 None => (None, None),
562 };
563
564 let mut has_unallowed_missing = false;
565 if !self.allow_missing_terms_in_lexer {
566 if let Some(ref mfl) = missing_from_lexer {
567 eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:");
568 for n in mfl {
569 eprintln!(" {}", n);
570 }
571 has_unallowed_missing = true;
572 }
573 }
574 if !self.allow_missing_tokens_in_parser {
575 if let Some(ref mfp) = missing_from_parser {
576 eprintln!("Error: the following tokens are defined in the lexer but not used in the grammar:");
577 for n in mfp {
578 eprintln!(" {}", n);
579 }
580 has_unallowed_missing = true;
581 }
582 }
583 if has_unallowed_missing {
584 fs::remove_file(outp).ok();
585 panic!();
586 }
587
588 let mod_name = match self.mod_name {
589 Some(s) => s.to_owned(),
590 None => {
591 let mut stem = lexerp.to_str().unwrap();
596 loop {
597 let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
598 if stem == new_stem {
599 break;
600 }
601 stem = new_stem;
602 }
603 format!("{}_l", stem)
604 }
605 };
606 let mod_name = format_ident!("{}", mod_name);
607 let mut lexerdef_func_impl = {
608 let LexFlags {
609 allow_wholeline_comments,
610 dot_matches_new_line,
611 multi_line,
612 octal,
613 posix_escapes,
614 case_insensitive,
615 unicode,
616 swap_greed,
617 ignore_whitespace,
618 size_limit,
619 dfa_size_limit,
620 nest_limit,
621 } = lex_flags;
622 let allow_wholeline_comments = QuoteOption(allow_wholeline_comments);
623 let dot_matches_new_line = QuoteOption(dot_matches_new_line);
624 let multi_line = QuoteOption(multi_line);
625 let octal = QuoteOption(octal);
626 let posix_escapes = QuoteOption(posix_escapes);
627 let case_insensitive = QuoteOption(case_insensitive);
628 let unicode = QuoteOption(unicode);
629 let swap_greed = QuoteOption(swap_greed);
630 let ignore_whitespace = QuoteOption(ignore_whitespace);
631 let size_limit = QuoteOption(size_limit);
632 let dfa_size_limit = QuoteOption(dfa_size_limit);
633 let nest_limit = QuoteOption(nest_limit);
634
635 quote! {
637 let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
638 lex_flags.allow_wholeline_comments = #allow_wholeline_comments.or(::lrlex::DEFAULT_LEX_FLAGS.allow_wholeline_comments);
639 lex_flags.dot_matches_new_line = #dot_matches_new_line.or(::lrlex::DEFAULT_LEX_FLAGS.dot_matches_new_line);
640 lex_flags.multi_line = #multi_line.or(::lrlex::DEFAULT_LEX_FLAGS.multi_line);
641 lex_flags.octal = #octal.or(::lrlex::DEFAULT_LEX_FLAGS.octal);
642 lex_flags.posix_escapes = #posix_escapes.or(::lrlex::DEFAULT_LEX_FLAGS.posix_escapes);
643 lex_flags.case_insensitive = #case_insensitive.or(::lrlex::DEFAULT_LEX_FLAGS.case_insensitive);
644 lex_flags.unicode = #unicode.or(::lrlex::DEFAULT_LEX_FLAGS.unicode);
645 lex_flags.swap_greed = #swap_greed.or(::lrlex::DEFAULT_LEX_FLAGS.swap_greed);
646 lex_flags.ignore_whitespace = #ignore_whitespace.or(::lrlex::DEFAULT_LEX_FLAGS.ignore_whitespace);
647 lex_flags.size_limit = #size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.size_limit);
648 lex_flags.dfa_size_limit = #dfa_size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.dfa_size_limit);
649 lex_flags.nest_limit = #nest_limit.or(::lrlex::DEFAULT_LEX_FLAGS.nest_limit);
650 let lex_flags = lex_flags;
651 }
652 };
653 {
654 let start_states = lexerdef.iter_start_states();
655 let rules = lexerdef.iter_rules().map(|r| {
656 let tok_id = QuoteOption(r.tok_id);
657 let n = QuoteOption(r.name().map(QuoteToString));
658 let target_state =
659 QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y))));
660 let n_span = r.name_span();
661 let regex = QuoteToString(&r.re_str);
662 let start_states = r.start_states();
663 quote! {
668 Rule::new(::lrlex::unstable_api::InternalPublicApi, #tok_id, #n, #n_span, #regex.to_string(),
669 vec![#(#start_states),*], #target_state, &lex_flags).unwrap()
670 }
671 });
672 lexerdef_func_impl.append_all(quote! {
674 let start_states: Vec<StartState> = vec![#(#start_states),*];
675 let rules = vec![#(#rules),*];
676 });
677 }
678 let lexerdef_ty = match lexerkind {
679 LexerKind::LRNonStreamingLexer => {
680 quote!(::lrlex::LRNonStreamingLexerDef)
681 }
682 };
683 lexerdef_func_impl.append_all(quote! {
685 #lexerdef_ty::from_rules(start_states, rules)
686 });
687
688 let mut token_consts = TokenStream::new();
689 if let Some(rim) = self.rule_ids_map {
690 for (name, id) in rim {
691 if RE_TOKEN_ID.is_match(&name) {
692 let tok_ident = format_ident!("N_{}", name.to_ascii_uppercase());
693 let storaget =
694 str::parse::<TokenStream>(type_name::<LexerTypesT::StorageT>()).unwrap();
695 let tok_const = quote! {
697 #[allow(dead_code)]
698 pub const #tok_ident: #storaget = #id;
699 };
700 token_consts.extend(tok_const)
701 }
702 }
703 }
704 let token_consts = token_consts.into_iter();
705 let out_tokens = {
706 let lexerdef_param = str::parse::<TokenStream>(type_name::<LexerTypesT>()).unwrap();
707 let mod_vis = self.visibility;
708 quote! {
710 #mod_vis mod #mod_name {
711 use ::lrlex::{LexerDef, Rule, StartState};
712 #[allow(dead_code)]
713 pub fn lexerdef() -> #lexerdef_ty<#lexerdef_param> {
714 #lexerdef_func_impl
715 }
716
717 #(#token_consts)*
718 }
719 }
720 };
721 let unformatted = out_tokens.to_string();
723 let outs = syn::parse_str(&unformatted)
724 .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
725 .unwrap_or(unformatted);
726 if let Ok(curs) = read_to_string(outp) {
730 if curs == outs {
731 return Ok(CTLexer {
732 missing_from_lexer,
733 missing_from_parser,
734 });
735 }
736 }
737 let mut f = File::create(outp)?;
738 f.write_all(outs.as_bytes())?;
739 Ok(CTLexer {
740 missing_from_lexer,
741 missing_from_parser,
742 })
743 }
744
745 #[deprecated(
752 since = "0.11.0",
753 note = "Please use lexer_in_src_dir() and build() instead"
754 )]
755 #[allow(deprecated)]
756 pub fn process_file_in_src(
757 self,
758 srcp: &str,
759 ) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>> {
760 let mut inp = current_dir()?;
761 inp.push("src");
762 inp.push(srcp);
763 let mut outp = PathBuf::new();
764 outp.push(var("OUT_DIR").unwrap());
765 outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
766 create_dir_all(&outp)?;
767 let mut leaf = Path::new(srcp)
768 .file_name()
769 .unwrap()
770 .to_str()
771 .unwrap()
772 .to_owned();
773 write!(leaf, ".{}", RUST_FILE_EXT).ok();
774 outp.push(leaf);
775 self.process_file(inp, outp)
776 }
777
778 #[deprecated(
796 since = "0.11.0",
797 note = "Please use lexer_in_src_dir() and build() instead"
798 )]
799 pub fn process_file<P, Q>(
800 mut self,
801 inp: P,
802 outp: Q,
803 ) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>>
804 where
805 P: AsRef<Path>,
806 Q: AsRef<Path>,
807 {
808 self.lexer_path = Some(inp.as_ref().to_owned());
809 self.output_path = Some(outp.as_ref().to_owned());
810 let cl = self.build()?;
811 Ok((
812 cl.missing_from_lexer().map(|x| x.to_owned()),
813 cl.missing_from_parser().map(|x| x.to_owned()),
814 ))
815 }
816
817 pub fn allow_missing_terms_in_lexer(mut self, allow: bool) -> Self {
820 self.allow_missing_terms_in_lexer = allow;
821 self
822 }
823
824 pub fn allow_missing_tokens_in_parser(mut self, allow: bool) -> Self {
828 self.allow_missing_tokens_in_parser = allow;
829 self
830 }
831
832 pub fn allow_wholeline_comments(mut self, flag: bool) -> Self {
840 let key = "allow_wholeline_comments".to_string();
841 self.header.insert(
842 key,
843 HeaderValue(
844 Location::Other("CTLexerBuilder".to_string()),
845 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
846 ),
847 );
848 self
849 }
850
851 pub fn dot_matches_new_line(mut self, flag: bool) -> Self {
856 let key = "dot_matches_new_line".to_string();
857 self.header.insert(
858 key,
859 HeaderValue(
860 Location::Other("CTLexerBuilder".to_string()),
861 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
862 ),
863 );
864 self
865 }
866
867 pub fn multi_line(mut self, flag: bool) -> Self {
872 let key = "multi_line".to_string();
873 self.header.insert(
874 key,
875 HeaderValue(
876 Location::Other("CTLexerBuilder".to_string()),
877 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
878 ),
879 );
880 self
881 }
882
883 pub fn posix_escapes(mut self, flag: bool) -> Self {
888 let key = "posix_escapes".to_string();
889 self.header.insert(
890 key,
891 HeaderValue(
892 Location::Other("CTLexerBuilder".to_string()),
893 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
894 ),
895 );
896 self
897 }
898
899 pub fn octal(mut self, flag: bool) -> Self {
904 let key = "octal".to_string();
905 self.header.insert(
906 key,
907 HeaderValue(
908 Location::Other("CTLexerBuilder".to_string()),
909 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
910 ),
911 );
912 self
913 }
914
915 pub fn swap_greed(mut self, flag: bool) -> Self {
920 let key = "swap_greed".to_string();
921 self.header.insert(
922 key,
923 HeaderValue(
924 Location::Other("CTLexerBuilder".to_string()),
925 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
926 ),
927 );
928 self
929 }
930
931 pub fn ignore_whitespace(mut self, flag: bool) -> Self {
936 let key = "ignore_whitespace".to_string();
937 self.header.insert(
938 key,
939 HeaderValue(
940 Location::Other("CTLexerBuilder".to_string()),
941 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
942 ),
943 );
944 self
945 }
946
947 pub fn unicode(mut self, flag: bool) -> Self {
952 let key = "unicode".to_string();
953 self.header.insert(
954 key,
955 HeaderValue(
956 Location::Other("CTLexerBuilder".to_string()),
957 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
958 ),
959 );
960 self
961 }
962
963 pub fn case_insensitive(mut self, flag: bool) -> Self {
968 let key = "case_insensitive".to_string();
969 self.header.insert(
970 key,
971 HeaderValue(
972 Location::Other("CTLexerBuilder".to_string()),
973 Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
974 ),
975 );
976 self
977 }
978
979 pub fn size_limit(mut self, sz: usize) -> Self {
984 let key = "size_limit".to_string();
985 self.header.insert(
986 key,
987 HeaderValue(
988 Location::Other("CTLexerBuilder".to_string()),
989 Value::Setting(Setting::Num(
990 sz as u64,
991 Location::Other("CTLexerBuilder".to_string()),
992 )),
993 ),
994 );
995 self
996 }
997
998 pub fn dfa_size_limit(mut self, sz: usize) -> Self {
1003 let key = "dfa_size_limit".to_string();
1004 self.header.insert(
1005 key,
1006 HeaderValue(
1007 Location::Other("CTLexerBuilder".to_string()),
1008 Value::Setting(Setting::Num(
1009 sz as u64,
1010 Location::Other("CTLexerBuilder".to_string()),
1011 )),
1012 ),
1013 );
1014 self
1015 }
1016
1017 pub fn nest_limit(mut self, lim: u32) -> Self {
1022 let key = "nest_limit".to_string();
1023 self.header.insert(
1024 key,
1025 HeaderValue(
1026 Location::Other("CTLexerBuilder".to_string()),
1027 Value::Setting(Setting::Num(
1028 lim as u64,
1029 Location::Other("CTLexerBuilder".to_string()),
1030 )),
1031 ),
1032 );
1033 self
1034 }
1035
1036 #[cfg(test)]
1037 pub fn inspect_lexerkind(
1038 mut self,
1039 cb: Box<dyn Fn(LexerKind) -> Result<(), Box<dyn Error>>>,
1040 ) -> Self {
1041 self.inspect_lexerkind_cb = Some(cb);
1042 self
1043 }
1044}
1045
1046pub struct CTLexer {
1048 missing_from_lexer: Option<HashSet<String>>,
1049 missing_from_parser: Option<HashSet<String>>,
1050}
1051
1052impl CTLexer {
1053 fn missing_from_lexer(&self) -> Option<&HashSet<String>> {
1054 self.missing_from_lexer.as_ref()
1055 }
1056
1057 fn missing_from_parser(&self) -> Option<&HashSet<String>> {
1058 self.missing_from_parser.as_ref()
1059 }
1060}
1061
1062pub fn ct_token_map<StorageT: Display>(
1087 mod_name: &str,
1088 token_map: &HashMap<String, StorageT>,
1089 rename_map: Option<&HashMap<&str, &str>>,
1090) -> Result<(), Box<dyn Error>> {
1091 let mut outs = String::new();
1095 let timestamp = env!("VERGEN_BUILD_TIMESTAMP");
1096 write!(
1097 outs,
1098 "// lrlex build time: {}\n\nmod {} {{\n",
1099 quote!(#timestamp),
1100 mod_name
1101 )
1102 .ok();
1103 outs.push_str(
1104 &token_map
1105 .iter()
1106 .map(|(k, v)| {
1107 let k = match rename_map {
1108 Some(rmap) => *rmap.get(k.as_str()).unwrap_or(&k.as_str()),
1109 _ => k,
1110 };
1111 format!(
1112 " #[allow(dead_code)] pub const T_{}: {} = {};",
1113 k,
1114 type_name::<StorageT>(),
1115 v
1116 )
1117 })
1118 .collect::<Vec<_>>()
1119 .join("\n"),
1120 );
1121 outs.push_str("\n}");
1122
1123 let mut outp = PathBuf::from(var("OUT_DIR")?);
1124 outp.push(mod_name);
1125 outp.set_extension("rs");
1126
1127 if let Ok(curs) = read_to_string(&outp) {
1131 if curs == outs {
1132 return Ok(());
1133 }
1134 }
1135
1136 let mut f = File::create(outp)?;
1137 f.write_all(outs.as_bytes())?;
1138 Ok(())
1139}
1140
1141#[cfg(test)]
1142mod test {
1143 use std::fs::File;
1144 use std::io::Write;
1145
1146 use super::{CTLexerBuilder, LexerKind};
1147 #[test]
1148 fn test_grmtools_section_lexerkind() {
1149 let lexerkinds = [
1150 "LRNonStreamingLexer",
1151 "lrnonstreaminglexer",
1152 "LexerKind::lrnonstreaminglexer",
1153 "lexerkind::LRNonStreamingLexer",
1154 ];
1155 for (i, kind) in lexerkinds.iter().enumerate() {
1156 let lex_src = format!(
1157 "
1158%grmtools{{lexerkind: {}}}
1159%%
1160. ;
1161",
1162 kind
1163 );
1164 let lex_path = format!(
1165 "{}/test_grmtools_section_lexerkind_{}.l",
1166 env!("OUT_DIR"),
1167 i
1168 );
1169 let mut l_file = File::create(lex_path.clone()).unwrap();
1170 l_file.write_all(lex_src.as_bytes()).unwrap();
1171 CTLexerBuilder::new()
1172 .output_path(format!("{}.rs", lex_path.clone()))
1173 .lexer_path(lex_path.clone())
1174 .inspect_lexerkind(Box::new(move |lexerkind| {
1175 assert!(matches!(lexerkind, LexerKind::LRNonStreamingLexer));
1176 Ok(())
1177 }))
1178 .build()
1179 .unwrap();
1180 }
1181 }
1182}