1use std::{
4 any::type_name,
5 collections::{HashMap, HashSet},
6 env::{current_dir, var},
7 error::Error,
8 fmt::{self, Debug, Write as fmtWrite},
9 fs::{self, File, create_dir_all, read_to_string},
10 hash::Hash,
11 io::Write,
12 marker::PhantomData,
13 path::{Path, PathBuf},
14 sync::{LazyLock, Mutex},
15};
16
17use crate::{
18 LexerTypes, RTParserBuilder, RecoveryKind,
19 diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
20};
21
22#[cfg(feature = "_unstable_api")]
23use crate::unstable_api::UnstableApi;
24
25use bincode::{Decode, Encode, decode_from_slice, encode_to_vec};
26use cfgrammar::{
27 Location, RIdx, Span, Symbol,
28 header::{GrmtoolsSectionParser, Header, HeaderValue, Value},
29 markmap::{Entry, MergeBehavior},
30 yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind, ast::ASTWithValidityInfo},
31};
32use filetime::FileTime;
33use lrtable::{Minimiser, StateGraph, StateTable, from_yacc, statetable::Conflicts};
34use num_traits::{AsPrimitive, PrimInt, Unsigned};
35use proc_macro2::{Literal, TokenStream};
36use quote::{ToTokens, TokenStreamExt, format_ident, quote};
37use syn::{Generics, parse_quote};
38
39const ACTION_PREFIX: &str = "__gt_";
40const GLOBAL_PREFIX: &str = "__GT_";
41const ACTIONS_KIND: &str = "__GtActionsKind";
42const ACTIONS_KIND_PREFIX: &str = "Ak";
43const ACTIONS_KIND_HIDDEN: &str = "__GtActionsKindHidden";
44
45const RUST_FILE_EXT: &str = "rs";
46
47const WARNING: &str = "[Warning]";
48const ERROR: &str = "[Error]";
49
50static GENERATED_PATHS: LazyLock<Mutex<HashSet<PathBuf>>> =
51 LazyLock::new(|| Mutex::new(HashSet::new()));
52
53struct CTConflictsError<StorageT: Eq + Hash> {
54 conflicts_diagnostic: String,
55 #[cfg(test)]
56 #[cfg_attr(test, allow(dead_code))]
57 stable: StateTable<StorageT>,
58 phantom: PhantomData<StorageT>,
59}
60
61struct QuoteOption<T>(Option<T>);
67
68impl<T: ToTokens> ToTokens for QuoteOption<T> {
69 fn to_tokens(&self, tokens: &mut TokenStream) {
70 tokens.append_all(match self.0 {
71 Some(ref t) => quote! { ::std::option::Option::Some(#t) },
72 None => quote! { ::std::option::Option::None },
73 });
74 }
75}
76
77struct UnsuffixedUsize(usize);
82
83impl ToTokens for UnsuffixedUsize {
84 fn to_tokens(&self, tokens: &mut TokenStream) {
85 tokens.append(Literal::usize_unsuffixed(self.0))
86 }
87}
88
89struct QuoteTuple<T>(T);
92
93impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
94 fn to_tokens(&self, tokens: &mut TokenStream) {
95 let (a, b) = &self.0;
96 tokens.append_all(quote!((#a, #b)));
97 }
98}
99
100struct QuoteToString<'a>(&'a str);
102
103impl ToTokens for QuoteToString<'_> {
104 fn to_tokens(&self, tokens: &mut TokenStream) {
105 let x = &self.0;
106 tokens.append_all(quote! { #x.to_string() });
107 }
108}
109
110impl<StorageT> fmt::Display for CTConflictsError<StorageT>
111where
112 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
113 usize: AsPrimitive<StorageT>,
114{
115 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
116 write!(f, "{}", self.conflicts_diagnostic)
117 }
118}
119
120impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
121where
122 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
123 usize: AsPrimitive<StorageT>,
124{
125 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 write!(f, "{}", self.conflicts_diagnostic)
127 }
128}
129
130impl<StorageT> Error for CTConflictsError<StorageT>
131where
132 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
133 usize: AsPrimitive<StorageT>,
134{
135}
136
137struct ErrorString(String);
139impl fmt::Display for ErrorString {
140 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
141 let ErrorString(s) = self;
142 write!(f, "{}", s)
143 }
144}
145impl fmt::Debug for ErrorString {
146 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147 let ErrorString(s) = self;
148 write!(f, "{}", s)
149 }
150}
151impl Error for ErrorString {}
152
153#[derive(Clone, PartialEq, Eq, Debug)]
155#[non_exhaustive]
156pub enum Visibility {
157 Private,
159 Public,
161 PublicSuper,
163 PublicSelf,
165 PublicCrate,
167 PublicIn(String),
169}
170
171#[derive(Clone, Copy, PartialEq, Eq, Debug)]
175#[non_exhaustive]
176pub enum RustEdition {
177 Rust2015,
178 Rust2018,
179 Rust2021,
180}
181
182impl RustEdition {
183 fn to_variant_tokens(self) -> TokenStream {
184 match self {
185 RustEdition::Rust2015 => quote!(::lrpar::RustEdition::Rust2015),
186 RustEdition::Rust2018 => quote!(::lrpar::RustEdition::Rust2018),
187 RustEdition::Rust2021 => quote!(::lrpar::RustEdition::Rust2021),
188 }
189 }
190}
191
192impl ToTokens for Visibility {
193 fn to_tokens(&self, tokens: &mut TokenStream) {
194 tokens.extend(match self {
195 Visibility::Private => quote!(),
196 Visibility::Public => quote! {pub},
197 Visibility::PublicSuper => quote! {pub(super)},
198 Visibility::PublicSelf => quote! {pub(self)},
199 Visibility::PublicCrate => quote! {pub(crate)},
200 Visibility::PublicIn(data) => {
201 let other = str::parse::<TokenStream>(data).unwrap();
202 quote! {pub(in #other)}
203 }
204 })
205 }
206}
207
208impl Visibility {
209 fn to_variant_tokens(&self) -> TokenStream {
210 match self {
211 Visibility::Private => quote!(::lrpar::Visibility::Private),
212 Visibility::Public => quote!(::lrpar::Visibility::Public),
213 Visibility::PublicSuper => quote!(::lrpar::Visibility::PublicSuper),
214 Visibility::PublicSelf => quote!(::lrpar::Visibility::PublicSelf),
215 Visibility::PublicCrate => quote!(::lrpar::Visibility::PublicCrate),
216 Visibility::PublicIn(data) => {
217 let data = QuoteToString(data);
218 quote!(::lrpar::Visibility::PublicIn(#data))
219 }
220 }
221 }
222}
223
224pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes>
227where
228 LexerTypesT::StorageT: Eq + Hash,
229 usize: AsPrimitive<LexerTypesT::StorageT>,
230{
231 grammar_path: Option<PathBuf>,
235 grammar_src: Option<String>,
237 from_ast: Option<ASTWithValidityInfo>,
239 output_path: Option<PathBuf>,
240 mod_name: Option<&'a str>,
241 recoverer: Option<RecoveryKind>,
242 yacckind: Option<YaccKind>,
243 error_on_conflicts: bool,
244 warnings_are_errors: bool,
245 show_warnings: bool,
246 visibility: Visibility,
247 rust_edition: RustEdition,
248 inspect_rt: Option<
249 Box<
250 dyn for<'b> FnMut(
251 &'b mut Header<Location>,
252 RTParserBuilder<LexerTypesT::StorageT, LexerTypesT>,
253 &'b HashMap<String, LexerTypesT::StorageT>,
254 &PathBuf,
255 ) -> Result<(), Box<dyn Error>>,
256 >,
257 >,
258 #[cfg(test)]
260 inspect_callback: Option<Box<dyn Fn(RecoveryKind) -> Result<(), Box<dyn Error>>>>,
261 phantom: PhantomData<LexerTypesT>,
262}
263
264impl<
265 'a,
266 StorageT: 'static + Debug + Hash + PrimInt + Encode + Unsigned,
267 LexerTypesT: LexerTypes<StorageT = StorageT>,
268> CTParserBuilder<'a, LexerTypesT>
269where
270 usize: AsPrimitive<StorageT>,
271{
272 pub fn new() -> Self {
294 CTParserBuilder {
295 grammar_path: None,
296 grammar_src: None,
297 from_ast: None,
298 output_path: None,
299 mod_name: None,
300 recoverer: None,
301 yacckind: None,
302 error_on_conflicts: true,
303 warnings_are_errors: true,
304 show_warnings: true,
305 visibility: Visibility::Private,
306 rust_edition: RustEdition::Rust2021,
307 inspect_rt: None,
308 #[cfg(test)]
309 inspect_callback: None,
310 phantom: PhantomData,
311 }
312 }
313
314 pub fn grammar_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
331 where
332 P: AsRef<Path>,
333 {
334 if !srcp.as_ref().is_relative() {
335 return Err(format!(
336 "Grammar path '{}' must be a relative path.",
337 srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
338 )
339 .into());
340 }
341
342 let mut grmp = current_dir()?;
343 grmp.push("src");
344 grmp.push(srcp.as_ref());
345 self.grammar_path = Some(grmp);
346
347 let mut outp = PathBuf::new();
348 outp.push(var("OUT_DIR").unwrap());
349 outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
350 create_dir_all(&outp)?;
351 let mut leaf = srcp
352 .as_ref()
353 .file_name()
354 .unwrap()
355 .to_str()
356 .unwrap()
357 .to_owned();
358 write!(leaf, ".{}", RUST_FILE_EXT).ok();
359 outp.push(leaf);
360 Ok(self.output_path(outp))
361 }
362
363 #[cfg(feature = "_unstable_api")]
366 pub fn grammar_ast(mut self, valid_ast: ASTWithValidityInfo, _api_key: UnstableApi) -> Self {
367 self.from_ast = Some(valid_ast);
368 self
369 }
370
371 pub fn grammar_path<P>(mut self, inp: P) -> Self
375 where
376 P: AsRef<Path>,
377 {
378 self.grammar_path = Some(inp.as_ref().to_owned());
379 self
380 }
381
382 #[cfg(feature = "_unstable_api")]
383 pub fn with_grammar_src(mut self, src: String, _api_key: UnstableApi) -> Self {
384 self.grammar_src = Some(src);
385 self
386 }
387
388 pub fn output_path<P>(mut self, outp: P) -> Self
393 where
394 P: AsRef<Path>,
395 {
396 self.output_path = Some(outp.as_ref().to_owned());
397 self
398 }
399
400 pub fn mod_name(mut self, mod_name: &'a str) -> Self {
404 self.mod_name = Some(mod_name);
405 self
406 }
407
408 pub fn visibility(mut self, vis: Visibility) -> Self {
410 self.visibility = vis;
411 self
412 }
413
414 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
416 self.recoverer = Some(rk);
417 self
418 }
419
420 pub fn yacckind(mut self, yk: YaccKind) -> Self {
422 self.yacckind = Some(yk);
423 self
424 }
425
426 pub fn error_on_conflicts(mut self, b: bool) -> Self {
429 self.error_on_conflicts = b;
430 self
431 }
432
433 pub fn warnings_are_errors(mut self, b: bool) -> Self {
436 self.warnings_are_errors = b;
437 self
438 }
439
440 pub fn show_warnings(mut self, b: bool) -> Self {
443 self.show_warnings = b;
444 self
445 }
446
447 pub fn rust_edition(mut self, edition: RustEdition) -> Self {
450 self.rust_edition = edition;
451 self
452 }
453
454 #[cfg(test)]
455 pub fn inspect_recoverer(
456 mut self,
457 cb: Box<dyn for<'h, 'y> Fn(RecoveryKind) -> Result<(), Box<dyn Error>>>,
458 ) -> Self {
459 self.inspect_callback = Some(cb);
460 self
461 }
462
463 #[doc(hidden)]
464 pub fn inspect_rt(
465 mut self,
466 cb: Box<
467 dyn for<'b, 'y> FnMut(
468 &'b mut Header<Location>,
469 RTParserBuilder<'y, StorageT, LexerTypesT>,
470 &'b HashMap<String, StorageT>,
471 &PathBuf,
472 ) -> Result<(), Box<dyn Error>>,
473 >,
474 ) -> Self {
475 self.inspect_rt = Some(cb);
476 self
477 }
478
479 pub fn build(mut self) -> Result<CTParser<StorageT>, Box<dyn Error>> {
532 let grmp = self
533 .grammar_path
534 .as_ref()
535 .expect("grammar_path must be specified before processing.");
536 let outp = self
537 .output_path
538 .as_ref()
539 .expect("output_path must be specified before processing.");
540 let mut header = Header::new();
541
542 match header.entry("yacckind".to_string()) {
543 Entry::Occupied(_) => unreachable!(),
544 Entry::Vacant(mut v) => match self.yacckind {
545 Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."),
546 Some(yk) => {
547 let yk_value = Value::try_from(yk)?;
548 let mut o = v.insert_entry(HeaderValue(
549 Location::Other("CTParserBuilder".to_string()),
550 yk_value,
551 ));
552 o.set_merge_behavior(MergeBehavior::Ours);
553 }
554 None => {
555 v.mark_required();
556 }
557 },
558 }
559 if let Some(recoverer) = self.recoverer {
560 match header.entry("recoverer".to_string()) {
561 Entry::Occupied(_) => unreachable!(),
562 Entry::Vacant(v) => {
563 let rk_value: Value<Location> = Value::try_from(recoverer)?;
564 let mut o = v.insert_entry(HeaderValue(
565 Location::Other("CTParserBuilder".to_string()),
566 rk_value,
567 ));
568 o.set_merge_behavior(MergeBehavior::Ours);
569 }
570 }
571 }
572
573 {
574 let mut lk = GENERATED_PATHS.lock().unwrap();
575 if lk.contains(outp.as_path()) {
576 return Err(format!("Generating two parsers to the same path ('{}') is not allowed: use CTParserBuilder::output_path (and, optionally, CTParserBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
577 }
578 lk.insert(outp.clone());
579 }
580
581 let inc = if let Some(grammar_src) = &self.grammar_src {
582 grammar_src.clone()
583 } else {
584 read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?
585 };
586
587 let yacc_diag = SpannedDiagnosticFormatter::new(&inc, grmp);
588 let parsed_header = GrmtoolsSectionParser::new(&inc, false).parse();
589 if let Err(errs) = parsed_header {
590 let mut out = String::new();
591 out.push_str(&format!(
592 "\n{ERROR}{}\n",
593 yacc_diag.file_location_msg(" parsing the `%grmtools` section", None)
594 ));
595 for e in errs {
596 out.push_str(&indent(" ", &yacc_diag.format_error(e).to_string()));
597 }
598 return Err(ErrorString(out))?;
599 }
600 let (parsed_header, _) = parsed_header.unwrap();
601 header.merge_from(parsed_header)?;
602 self.yacckind = header
603 .get("yacckind")
604 .map(|HeaderValue(_, val)| val)
605 .map(YaccKind::try_from)
606 .transpose()?;
607 header.mark_used(&"yacckind".to_string());
608 let ast_validation = if let Some(ast) = &self.from_ast {
609 ast.clone()
610 } else if let Some(yk) = self.yacckind {
611 ASTWithValidityInfo::new(yk, &inc)
612 } else {
613 Err("Missing 'yacckind'".to_string())?
614 };
615
616 header.mark_used(&"recoverer".to_string());
617 let rk_val = header.get("recoverer").map(|HeaderValue(_, rk_val)| rk_val);
618
619 if let Some(rk_val) = rk_val {
620 self.recoverer = Some(RecoveryKind::try_from(rk_val)?);
621 } else {
622 self.recoverer = Some(RecoveryKind::CPCTPlus);
624 }
625 self.yacckind = Some(ast_validation.yacc_kind());
626 let warnings = ast_validation.ast().warnings();
627 let res = YaccGrammar::<StorageT>::new_from_ast_with_validity_info(&ast_validation);
628 let grm = match res {
629 Ok(_) if self.warnings_are_errors && !warnings.is_empty() => {
630 let mut out = String::new();
631 out.push_str(&format!(
632 "\n{ERROR}{}\n",
633 yacc_diag.file_location_msg("", None)
634 ));
635 for e in warnings {
636 out.push_str(&format!(
637 "{}\n",
638 indent(" ", &yacc_diag.format_warning(e).to_string())
639 ));
640 }
641 return Err(ErrorString(out))?;
642 }
643 Ok(grm) => {
644 if !warnings.is_empty() {
645 for w in warnings {
646 let ws_loc = yacc_diag.file_location_msg("", None);
647 let ws = indent(" ", &yacc_diag.format_warning(w).to_string());
648 if std::env::var("OUT_DIR").is_ok() && self.show_warnings {
650 for line in ws_loc.lines().chain(ws.lines()) {
651 println!("cargo:warning={}", line);
652 }
653 } else if self.show_warnings {
654 eprintln!("{}", ws_loc);
655 eprintln!("{WARNING} {}", ws);
656 }
657 }
658 }
659 grm
660 }
661 Err(errs) => {
662 let mut out = String::new();
663 out.push_str(&format!(
664 "\n{ERROR}{}\n",
665 yacc_diag.file_location_msg("", None)
666 ));
667 for e in errs {
668 out.push_str(&indent(" ", &yacc_diag.format_error(e).to_string()));
669 out.push('\n');
670 }
671
672 return Err(ErrorString(out))?;
673 }
674 };
675
676 #[cfg(test)]
677 if let Some(cb) = &self.inspect_callback {
678 cb(self.recoverer.expect("has a default value"))?;
679 }
680
681 let rule_ids = grm
682 .tokens_map()
683 .iter()
684 .map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
685 .collect::<HashMap<_, _>>();
686
687 let derived_mod_name = match self.mod_name {
688 Some(s) => s.to_owned(),
689 None => {
690 let mut stem = grmp.to_str().unwrap();
695 loop {
696 let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
697 if stem == new_stem {
698 break;
699 }
700 stem = new_stem;
701 }
702 format!("{}_y", stem)
703 }
704 };
705
706 let cache = self.rebuild_cache(&derived_mod_name, &grm);
707
708 if let Ok(ref inmd) = fs::metadata(grmp) {
716 if let Ok(ref out_rs_md) = fs::metadata(outp) {
717 if FileTime::from_last_modification_time(out_rs_md)
718 > FileTime::from_last_modification_time(inmd)
719 {
720 if let Ok(outc) = read_to_string(outp) {
721 if outc.contains(&cache.to_string()) {
722 return Ok(CTParser {
723 regenerated: false,
724 rule_ids,
725 yacc_grammar: grm,
726 grammar_src: inc,
727 grammar_path: self.grammar_path.unwrap(),
728 conflicts: None,
729 });
730 } else {
731 #[cfg(grmtools_extra_checks)]
732 if std::env::var("CACHE_EXPECTED").is_ok() {
733 eprintln!("outc: {}", outc);
734 eprintln!("using cache: {}", cache,);
735 panic!("The cache regenerated however, it was expected to match");
737 }
738 }
739 }
740 }
741 }
742 }
743
744 fs::remove_file(outp).ok();
751
752 let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
753 if self.error_on_conflicts {
754 if let Some(c) = stable.conflicts() {
755 match (grm.expect(), grm.expectrr()) {
756 (Some(i), Some(j)) if i == c.sr_len() && j == c.rr_len() => (),
757 (Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (),
758 (None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (),
759 (None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (),
760 _ => {
761 let conflicts_diagnostic = yacc_diag.format_conflicts::<LexerTypesT>(
762 &grm,
763 ast_validation.ast(),
764 c,
765 &sgraph,
766 &stable,
767 );
768 return Err(Box::new(CTConflictsError {
769 conflicts_diagnostic,
770 phantom: PhantomData,
771 #[cfg(test)]
772 stable,
773 }));
774 }
775 }
776 }
777 }
778
779 if let Some(ref mut inspector_rt) = self.inspect_rt {
780 let rt: RTParserBuilder<'_, StorageT, LexerTypesT> =
781 RTParserBuilder::new(&grm, &stable);
782 let rt = if let Some(rk) = self.recoverer {
783 rt.recoverer(rk)
784 } else {
785 rt
786 };
787 inspector_rt(&mut header, rt, &rule_ids, grmp)?
788 }
789
790 let unused_keys = header.unused();
791 if !unused_keys.is_empty() {
792 return Err(format!("Unused keys in header: {}", unused_keys.join(", ")).into());
793 }
794 let missing_keys = header
795 .missing()
796 .iter()
797 .map(|s| s.as_str())
798 .collect::<Vec<_>>();
799 if !missing_keys.is_empty() {
800 return Err(format!(
801 "Required values were missing from the header: {}",
802 missing_keys.join(", ")
803 )
804 .into());
805 }
806
807 self.output_file(
808 &grm,
809 &stable,
810 &derived_mod_name,
811 outp,
812 &format!("/* CACHE INFORMATION {} */\n", cache),
813 &yacc_diag,
814 )?;
815 let conflicts = if stable.conflicts().is_some() {
816 Some((sgraph, stable))
817 } else {
818 None
819 };
820 Ok(CTParser {
821 regenerated: true,
822 rule_ids,
823 yacc_grammar: grm,
824 grammar_src: inc,
825 grammar_path: self.grammar_path.unwrap(),
826 conflicts,
827 })
828 }
829
830 #[deprecated(
837 since = "0.11.0",
838 note = "Please use grammar_in_src_dir(), build(), and token_map() instead"
839 )]
840 #[allow(deprecated)]
841 pub fn process_file_in_src(
842 &mut self,
843 srcp: &str,
844 ) -> Result<HashMap<String, StorageT>, Box<dyn Error>> {
845 let mut inp = current_dir()?;
846 inp.push("src");
847 inp.push(srcp);
848 let mut outp = PathBuf::new();
849 outp.push(var("OUT_DIR").unwrap());
850 outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
851 create_dir_all(&outp)?;
852 let mut leaf = Path::new(srcp)
853 .file_name()
854 .unwrap()
855 .to_str()
856 .unwrap()
857 .to_owned();
858 write!(leaf, ".{}", RUST_FILE_EXT).ok();
859 outp.push(leaf);
860 self.process_file(inp, outp)
861 }
862
863 #[deprecated(
899 since = "0.11.0",
900 note = "Please use grammar_path(), output_path(), build(), and token_map() instead"
901 )]
902 pub fn process_file<P, Q>(
903 &mut self,
904 inp: P,
905 outp: Q,
906 ) -> Result<HashMap<String, StorageT>, Box<dyn Error>>
907 where
908 P: AsRef<Path>,
909 Q: AsRef<Path>,
910 {
911 self.grammar_path = Some(inp.as_ref().to_owned());
912 self.output_path = Some(outp.as_ref().to_owned());
913 let cl: CTParserBuilder<LexerTypesT> = CTParserBuilder {
914 grammar_path: self.grammar_path.clone(),
915 grammar_src: None,
916 from_ast: None,
917 output_path: self.output_path.clone(),
918 mod_name: self.mod_name,
919 recoverer: self.recoverer,
920 yacckind: self.yacckind,
921 error_on_conflicts: self.error_on_conflicts,
922 warnings_are_errors: self.warnings_are_errors,
923 show_warnings: self.show_warnings,
924 visibility: self.visibility.clone(),
925 rust_edition: self.rust_edition,
926 inspect_rt: None,
927 #[cfg(test)]
928 inspect_callback: None,
929 phantom: PhantomData,
930 };
931 Ok(cl.build()?.rule_ids)
932 }
933
934 fn output_file<P: AsRef<Path>>(
935 &self,
936 grm: &YaccGrammar<StorageT>,
937 stable: &StateTable<StorageT>,
938 mod_name: &str,
939 outp_rs: P,
940 cache: &str,
941 diag: &SpannedDiagnosticFormatter,
942 ) -> Result<(), Box<dyn Error>> {
943 let visibility = self.visibility.clone();
944 let user_actions = if let Some(
945 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools,
946 ) = self.yacckind
947 {
948 Some(self.gen_user_actions(grm, diag)?)
949 } else {
950 None
951 };
952 let rule_consts = self.gen_rule_consts(grm)?;
953 let token_epp = self.gen_token_epp(grm)?;
954 let parse_function = self.gen_parse_function(grm, stable)?;
955 let action_wrappers = match self.yacckind.unwrap() {
956 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
957 Some(self.gen_wrappers(grm)?)
958 }
959 YaccKind::Original(YaccOriginalActionKind::NoAction)
960 | YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => None,
961 _ => unreachable!(),
962 };
963
964 let additional_decls =
965 if let Some(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)) =
966 self.yacckind
967 {
968 Some(quote! {
971 #[allow(unused_imports)]
972 pub use ::lrpar::parser::_deprecated_moved_::Node;
973 })
974 } else {
975 None
976 };
977
978 let mod_name =
979 match syn::parse_str::<proc_macro2::Ident>(mod_name) {
980 Ok(s) => s,
981 Err(e) => return Err(format!(
982 "CTParserBuilder::mod_name(\"{}\") is not a valid rust identifier due to '{}'",
983 mod_name, e
984 )
985 .into()),
986 };
987 let out_tokens = quote! {
988 #visibility mod #mod_name {
989 #user_actions
991 mod _parser_ {
992 #![allow(clippy::type_complexity)]
993 #![allow(clippy::unnecessary_wraps)]
994 #![deny(unsafe_code)]
995 #[allow(unused_imports)]
996 use super::*;
997 #additional_decls
998 #parse_function
999 #rule_consts
1000 #token_epp
1001 #action_wrappers
1002 } #[allow(unused_imports)]
1004 pub use _parser_::*;
1005 #[allow(unused_imports)]
1006 use ::lrpar::Lexeme;
1007 } };
1009 let unformatted = out_tokens.to_string();
1011 let outs = syn::parse_str(&unformatted)
1012 .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
1013 .unwrap_or(unformatted);
1014 let mut f = File::create(outp_rs)?;
1015 f.write_all(outs.as_bytes())?;
1016 f.write_all(cache.as_bytes())?;
1017 Ok(())
1018 }
1019
1020 fn rebuild_cache(&self, derived_mod_name: &'_ str, grm: &YaccGrammar<StorageT>) -> TokenStream {
1023 let Self {
1030 grammar_path,
1033 grammar_src: _,
1035 from_ast: _,
1037 mod_name,
1038 recoverer,
1039 yacckind,
1040 output_path: _,
1041 error_on_conflicts,
1042 warnings_are_errors,
1043 show_warnings,
1044 visibility,
1045 rust_edition,
1046 inspect_rt: _,
1047 #[cfg(test)]
1048 inspect_callback: _,
1049 phantom: _,
1050 } = self;
1051 let build_time = env!("VERGEN_BUILD_TIMESTAMP");
1052 let grammar_path = grammar_path.as_ref().unwrap().to_string_lossy();
1053 let mod_name = QuoteOption(mod_name.as_deref());
1054 let visibility = visibility.to_variant_tokens();
1055 let rust_edition = rust_edition.to_variant_tokens();
1056 let yacckind = yacckind.expect("is_some() by this point");
1057 let rule_map = grm
1058 .iter_tidxs()
1059 .map(|tidx| {
1060 QuoteTuple((
1061 usize::from(tidx),
1062 grm.token_name(tidx).unwrap_or("<unknown>"),
1063 ))
1064 })
1065 .collect::<Vec<_>>();
1066 let cache_info = quote! {
1067 BUILD_TIME = #build_time
1068 DERIVED_MOD_NAME = #derived_mod_name
1069 GRAMMAR_PATH = #grammar_path
1070 MOD_NAME = #mod_name
1071 RECOVERER = #recoverer
1072 YACC_KIND = #yacckind
1073 ERROR_ON_CONFLICTS = #error_on_conflicts
1074 SHOW_WARNINGS = #show_warnings
1075 WARNINGS_ARE_ERRORS = #warnings_are_errors
1076 RUST_EDITION = #rust_edition
1077 RULE_IDS_MAP = [#(#rule_map,)*]
1078 VISIBILITY = #visibility
1079 };
1080 let cache_info_str = cache_info.to_string();
1081 quote!(#cache_info_str)
1082 }
1083
1084 fn gen_parse_function(
1086 &self,
1087 grm: &YaccGrammar<StorageT>,
1088 stable: &StateTable<StorageT>,
1089 ) -> Result<TokenStream, Box<dyn Error>> {
1090 let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1091 let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1092 let recoverer = self.recoverer;
1093 let run_parser = match self.yacckind.unwrap() {
1094 YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
1095 quote! {
1096 ::lrpar::RTParserBuilder::new(grm, stable)
1097 .recoverer(#recoverer)
1098 .parse_map(
1099 lexer,
1100 &|lexeme| Node::Term{lexeme},
1101 &|ridx, nodes| Node::Nonterm{ridx, nodes}
1102 )
1103 }
1104 }
1105 YaccKind::Original(YaccOriginalActionKind::NoAction) => {
1106 quote! {
1107 ::lrpar::RTParserBuilder::new(grm, stable)
1108 .recoverer(#recoverer)
1109 .parse_map(lexer, &|_| (), &|_, _| ()).1
1110 }
1111 }
1112 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1113 let actionskind = str::parse::<TokenStream>(ACTIONS_KIND)?;
1114 let parsed_parse_generics = make_generics(grm.parse_generics().as_deref())?;
1115 let (_, type_generics, _) = parsed_parse_generics.split_for_impl();
1116 let (action_fn_parse_param, action_fn_parse_param_ty) = match grm.parse_param() {
1119 Some((name, ty)) => {
1120 let name = str::parse::<TokenStream>(name)?;
1121 let ty = str::parse::<TokenStream>(ty)?;
1122 (quote!(#name), quote!(#ty))
1123 }
1124 None => (quote!(()), quote!(())),
1125 };
1126 let wrappers = grm.iter_pidxs().map(|pidx| {
1127 let pidx = usize::from(pidx);
1128 format_ident!("{}wrapper_{}", ACTION_PREFIX, pidx)
1129 });
1130 let edition_lifetime = if self.rust_edition != RustEdition::Rust2015 {
1131 quote!('_,)
1132 } else {
1133 quote!()
1134 };
1135 let ridx = usize::from(self.user_start_ridx(grm));
1136 let action_ident = format_ident!("{}{}", ACTIONS_KIND_PREFIX, ridx);
1137
1138 quote! {
1139 let actions: ::std::vec::Vec<
1140 &dyn Fn(
1141 ::cfgrammar::RIdx<#storaget>,
1142 &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1143 ::cfgrammar::Span,
1144 ::std::vec::Drain<#edition_lifetime ::lrpar::parser::AStackType<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #actionskind #type_generics>>,
1145 #action_fn_parse_param_ty
1146 ) -> #actionskind #type_generics
1147 > = ::std::vec![#(&#wrappers,)*];
1148 match ::lrpar::RTParserBuilder::new(grm, stable)
1149 .recoverer(#recoverer)
1150 .parse_actions(lexer, &actions, #action_fn_parse_param) {
1151 (Some(#actionskind::#action_ident(x)), y) => (Some(x), y),
1152 (None, y) => (None, y),
1153 _ => unreachable!()
1154 }
1155 }
1156 }
1157 kind => panic!("YaccKind {:?} not supported", kind),
1158 };
1159
1160 let parsed_parse_generics: Generics = match self.yacckind.unwrap() {
1161 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1162 make_generics(grm.parse_generics().as_deref())?
1163 }
1164 _ => make_generics(None)?,
1165 };
1166 let (generics, _, where_clause) = parsed_parse_generics.split_for_impl();
1167
1168 let parse_fn_parse_param = match self.yacckind.unwrap() {
1170 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1171 if let Some((name, tyname)) = grm.parse_param() {
1172 let name = str::parse::<TokenStream>(name)?;
1173 let tyname = str::parse::<TokenStream>(tyname)?;
1174 Some(quote! {#name: #tyname})
1175 } else {
1176 None
1177 }
1178 }
1179 _ => None,
1180 };
1181 let parse_fn_return_ty = match self.yacckind.unwrap() {
1182 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
1183 let actiont = grm
1184 .actiontype(self.user_start_ridx(grm))
1185 .as_ref()
1186 .map(|at| str::parse::<TokenStream>(at))
1187 .transpose()?;
1188 quote! {
1189 (::std::option::Option<#actiont>, ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>)
1190 }
1191 }
1192 YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => quote! {
1193 (::std::option::Option<Node<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #storaget>>,
1194 ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>)
1195 },
1196 YaccKind::Original(YaccOriginalActionKind::NoAction) => quote! {
1197 ::std::vec::Vec<::lrpar::LexParseError<#storaget, #lexertypest>>
1198 },
1199 _ => unreachable!(),
1200 };
1201
1202 let grm_data = encode_to_vec(grm, bincode::config::standard())?;
1203 let stable_data = encode_to_vec(stable, bincode::config::standard())?;
1204 Ok(quote! {
1205 const __GRM_DATA: &[u8] = &[#(#grm_data,)*];
1206 const __STABLE_DATA: &[u8] = &[#(#stable_data,)*];
1207
1208 fn __lrpar_parser_data() -> &'static ::lrpar::ParserData<#storaget> {
1209 static DATA: ::std::sync::OnceLock<::lrpar::ParserData<#storaget>>
1210 = ::std::sync::OnceLock::new();
1211 DATA.get_or_init(
1212 || ::lrpar::ctbuilder::_reconstitute(__GRM_DATA, __STABLE_DATA)
1213 )
1214 }
1215
1216 #[allow(dead_code)]
1217 pub fn parse #generics (
1218 lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1219 #parse_fn_parse_param
1220 ) -> #parse_fn_return_ty
1221 #where_clause
1222 {
1223 let __data = __lrpar_parser_data();
1224 let grm = __data.grm();
1225 let stable = __data.stable();
1226 #run_parser
1227 }
1228 })
1229 }
1230
1231 fn gen_rule_consts(
1232 &self,
1233 grm: &YaccGrammar<StorageT>,
1234 ) -> Result<TokenStream, proc_macro2::LexError> {
1235 let mut toks = TokenStream::new();
1236 for ridx in grm.iter_rules() {
1237 if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
1238 let r_const = format_ident!("R_{}", grm.rule_name_str(ridx).to_ascii_uppercase());
1239 let storage_ty = str::parse::<TokenStream>(type_name::<StorageT>())?;
1240 let ridx = UnsuffixedUsize(usize::from(ridx));
1241 toks.extend(quote! {
1242 #[allow(dead_code)]
1243 pub const #r_const: #storage_ty = #ridx;
1244 });
1245 }
1246 }
1247 Ok(toks)
1248 }
1249
1250 fn gen_token_epp(
1251 &self,
1252 grm: &YaccGrammar<StorageT>,
1253 ) -> Result<TokenStream, proc_macro2::LexError> {
1254 let mut tidxs = Vec::new();
1255 for tidx in grm.iter_tidxs() {
1256 tidxs.push(QuoteOption(grm.token_epp(tidx)));
1257 }
1258 let const_epp_ident = format_ident!("{}EPP", GLOBAL_PREFIX);
1259 let storage_ty = str::parse::<TokenStream>(type_name::<StorageT>())?;
1260 Ok(quote! {
1261 const #const_epp_ident: &[::std::option::Option<&str>] = &[
1262 #(#tidxs,)*
1263 ];
1264
1265 #[allow(dead_code)]
1268 pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<#storage_ty>) -> ::std::option::Option<&'a str> {
1269 #const_epp_ident[usize::from(tidx)]
1270 }
1271 })
1272 }
1273
1274 fn gen_wrappers(&self, grm: &YaccGrammar<StorageT>) -> Result<TokenStream, Box<dyn Error>> {
1276 let parsed_parse_generics = make_generics(grm.parse_generics().as_deref())?;
1277 let (generics, type_generics, where_clause) = parsed_parse_generics.split_for_impl();
1278
1279 let (parse_paramname, parse_paramdef);
1280 match grm.parse_param() {
1281 Some((name, tyname)) => {
1282 parse_paramname = str::parse::<TokenStream>(name)?;
1283 let ty = str::parse::<TokenStream>(tyname)?;
1284 parse_paramdef = quote!(#parse_paramname: #ty);
1285 }
1286 None => {
1287 parse_paramname = quote!(());
1288 parse_paramdef = quote! {_: ()};
1289 }
1290 };
1291
1292 let mut wrappers = TokenStream::new();
1293 for pidx in grm.iter_pidxs() {
1294 let ridx = grm.prod_to_rule(pidx);
1295
1296 let wrapper_fn = format_ident!("{}wrapper_{}", ACTION_PREFIX, usize::from(pidx));
1300 let ridx_var = format_ident!("{}ridx", ACTION_PREFIX);
1301 let lexer_var = format_ident!("{}lexer", ACTION_PREFIX);
1302 let span_var = format_ident!("{}span", ACTION_PREFIX);
1303 let args_var = format_ident!("{}args", ACTION_PREFIX);
1304 let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1305 let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1306 let actionskind = str::parse::<TokenStream>(ACTIONS_KIND)?;
1307 let edition_lifetime = if self.rust_edition != RustEdition::Rust2015 {
1308 Some(quote!('_,))
1309 } else {
1310 None
1311 };
1312 let mut wrapper_fn_body = TokenStream::new();
1313 if grm.action(pidx).is_some() {
1314 for i in 0..grm.prod(pidx).len() {
1316 let arg = format_ident!("{}arg_{}", ACTION_PREFIX, i + 1);
1317 wrapper_fn_body.extend(match grm.prod(pidx)[i] {
1318 Symbol::Rule(ref_ridx) => {
1319 let ref_ridx = usize::from(ref_ridx);
1320 let actionvariant = format_ident!("{}{}", ACTIONS_KIND_PREFIX, ref_ridx);
1321 quote! {
1322 #[allow(clippy::let_unit_value)]
1323 let #arg = match #args_var.next().unwrap() {
1324 ::lrpar::parser::AStackType::ActionType(#actionskind::#type_generics::#actionvariant(x)) => x,
1325 _ => unreachable!()
1326 };
1327 }
1328 }
1329 Symbol::Token(_) => {
1330 quote! {
1331 let #arg = match #args_var.next().unwrap() {
1332 ::lrpar::parser::AStackType::Lexeme(l) => {
1333 if l.faulty() {
1334 Err(l)
1335 } else {
1336 Ok(l)
1337 }
1338 },
1339 ::lrpar::parser::AStackType::ActionType(_) => unreachable!()
1340 };
1341 }
1342 }
1343 })
1344 }
1345
1346 let args = (0..grm.prod(pidx).len())
1348 .map(|i| format_ident!("{}arg_{}", ACTION_PREFIX, i + 1))
1349 .collect::<Vec<_>>();
1350 let action_fn = format_ident!("{}action_{}", ACTION_PREFIX, usize::from(pidx));
1351 let actionsvariant = format_ident!("{}{}", ACTIONS_KIND_PREFIX, usize::from(ridx));
1352
1353 wrapper_fn_body.extend(match grm.actiontype(ridx) {
1354 Some(s) if s == "()" => {
1355 quote! {
1359 #action_fn(#ridx_var, #lexer_var, #span_var, #parse_paramname, #(#args,)*);
1360 #actionskind::#type_generics::#actionsvariant(())
1361 }
1362 }
1363 _ => {
1364 quote! {
1365 #actionskind::#type_generics::#actionsvariant(#action_fn(#ridx_var, #lexer_var, #span_var, #parse_paramname, #(#args,)*))
1366 }
1367 }
1368 })
1369 } else if pidx == grm.start_prod() {
1370 wrapper_fn_body.extend(quote!(unreachable!()));
1371 } else {
1372 unreachable!(
1373 "Production in rule '{}' must have an action body, which should have been handled by gen_user_actions.",
1374 grm.rule_name_str(grm.prod_to_rule(pidx))
1375 );
1376 };
1377
1378 let attrib = if pidx == grm.start_prod() {
1379 Some(quote!(#[allow(unused_variables)]))
1381 } else {
1382 None
1383 };
1384 wrappers.extend(quote! {
1385 #attrib
1386 fn #wrapper_fn #generics (
1387 #ridx_var: ::cfgrammar::RIdx<#storaget>,
1388 #lexer_var: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1389 #span_var: ::cfgrammar::Span,
1390 mut #args_var: ::std::vec::Drain<#edition_lifetime ::lrpar::parser::AStackType<<#lexertypest as ::lrpar::LexerTypes>::LexemeT, #actionskind #type_generics>>,
1391 #parse_paramdef
1392 ) -> #actionskind #type_generics
1393 #where_clause
1394 {
1395 #wrapper_fn_body
1396 }
1397 })
1398 }
1399 let mut actionskindvariants = Vec::new();
1400 let actionskindhidden = format_ident!("_{}", ACTIONS_KIND_HIDDEN);
1401 let actionskind = str::parse::<TokenStream>(ACTIONS_KIND).unwrap();
1402 let mut phantom_data_type = Vec::new();
1403 for ridx in grm.iter_rules() {
1404 if let Some(actiont) = grm.actiontype(ridx) {
1405 let actionskindvariant =
1406 format_ident!("{}{}", ACTIONS_KIND_PREFIX, usize::from(ridx));
1407 let actiont = str::parse::<TokenStream>(actiont).unwrap();
1408 actionskindvariants.push(quote! {
1409 #actionskindvariant(#actiont)
1410 })
1411 }
1412 }
1413 for lifetime in parsed_parse_generics.lifetimes() {
1414 let lifetime = &lifetime.lifetime;
1415 phantom_data_type.push(quote! { &#lifetime () });
1416 }
1417 for type_param in parsed_parse_generics.type_params() {
1418 let ident = &type_param.ident;
1419 phantom_data_type.push(quote! { #ident });
1420 }
1421 actionskindvariants.push(quote! {
1422 #actionskindhidden(::std::marker::PhantomData<(#(#phantom_data_type,)*)>)
1423 });
1424 wrappers.extend(quote! {
1425 #[allow(dead_code)]
1426 enum #actionskind #generics #where_clause {
1427 #(#actionskindvariants,)*
1428 }
1429 });
1430 Ok(wrappers)
1431 }
1432
1433 fn gen_user_actions(
1435 &self,
1436 grm: &YaccGrammar<StorageT>,
1437 diag: &SpannedDiagnosticFormatter,
1438 ) -> Result<TokenStream, Box<dyn Error>> {
1439 let programs = grm
1440 .programs()
1441 .as_ref()
1442 .map(|s| str::parse::<TokenStream>(s))
1443 .transpose()?;
1444 let mut action_fns = TokenStream::new();
1445 let parsed_parse_generics = make_generics(grm.parse_generics().as_deref())?;
1447 let (generics, _, where_clause) = parsed_parse_generics.split_for_impl();
1448 let (parse_paramname, parse_paramdef, parse_param_unit);
1449 match grm.parse_param() {
1450 Some((name, tyname)) => {
1451 parse_param_unit = tyname.trim() == "()";
1452 parse_paramname = str::parse::<TokenStream>(name)?;
1453 let ty = str::parse::<TokenStream>(tyname)?;
1454 parse_paramdef = quote!(#parse_paramname: #ty);
1455 }
1456 None => {
1457 parse_param_unit = true;
1458 parse_paramname = quote!(());
1459 parse_paramdef = quote! {_: ()};
1460 }
1461 };
1462 for pidx in grm.iter_pidxs() {
1463 if pidx == grm.start_prod() {
1464 continue;
1465 }
1466
1467 let mut args = Vec::with_capacity(grm.prod(pidx).len());
1469 for i in 0..grm.prod(pidx).len() {
1470 let argt = match grm.prod(pidx)[i] {
1471 Symbol::Rule(ref_ridx) => {
1472 if let Some(action_type) = grm.actiontype(ref_ridx).as_ref() {
1473 str::parse::<TokenStream>(action_type)?
1474 } else {
1475 let mut s = String::from("\n");
1476 let rule_span = grm.rule_name_span(ref_ridx);
1477 s.push_str(&diag.file_location_msg("Error", Some(rule_span)));
1478 s.push_str("\n");
1479 s.push_str(&diag.underline_span_with_text(
1480 rule_span,
1481 "Rule missing action type".to_string(),
1482 '^',
1483 ));
1484 return Err(ErrorString(s).into());
1485 }
1486 }
1487 Symbol::Token(_) => {
1488 let lexemet =
1489 str::parse::<TokenStream>(type_name::<LexerTypesT::LexemeT>())?;
1490 quote!(::std::result::Result<#lexemet, #lexemet>)
1491 }
1492 };
1493 let arg = format_ident!("{}arg_{}", ACTION_PREFIX, i + 1);
1494 args.push(quote!(mut #arg: #argt));
1495 }
1496
1497 let returnt = {
1501 let actiont = grm.actiontype(grm.prod_to_rule(pidx)).as_ref().unwrap();
1502 if actiont == "()" {
1503 None
1504 } else {
1505 let actiont = str::parse::<TokenStream>(actiont)?;
1506 Some(quote!( -> #actiont))
1507 }
1508 };
1509 let action_fn = format_ident!("{}action_{}", ACTION_PREFIX, usize::from(pidx));
1510 let lexer_var = format_ident!("{}lexer", ACTION_PREFIX);
1511 let span_var = format_ident!("{}span", ACTION_PREFIX);
1512 let ridx_var = format_ident!("{}ridx", ACTION_PREFIX);
1513 let storaget = str::parse::<TokenStream>(type_name::<StorageT>())?;
1514 let lexertypest = str::parse::<TokenStream>(type_name::<LexerTypesT>())?;
1515 let bind_parse_param = if !parse_param_unit {
1516 Some(quote! {let _ = #parse_paramname;})
1517 } else {
1518 None
1519 };
1520
1521 let pre_action = grm.action(pidx).as_ref().ok_or_else(|| {
1524 let mut s = String::from("\n");
1525 let span = grm.prod_span(pidx);
1526 s.push_str(&diag.file_location_msg("Error", Some(span)));
1527 s.push_str("\n");
1528 s.push_str(&diag.underline_span_with_text(
1529 span,
1530 "Production is missing action code".to_string(),
1531 '^',
1532 ));
1533 ErrorString(s)
1534 })?;
1535 let mut last = 0;
1536 let mut outs = String::new();
1537 loop {
1538 match pre_action[last..].find('$') {
1539 Some(off) => {
1540 if pre_action[last + off..].starts_with("$$") {
1541 outs.push_str(&pre_action[last..last + off + "$".len()]);
1542 last = last + off + "$$".len();
1543 } else if pre_action[last + off..].starts_with("$lexer") {
1544 outs.push_str(&pre_action[last..last + off]);
1545 write!(outs, "{prefix}lexer", prefix = ACTION_PREFIX).ok();
1546 last = last + off + "$lexer".len();
1547 } else if pre_action[last + off..].starts_with("$span") {
1548 outs.push_str(&pre_action[last..last + off]);
1549 write!(outs, "{prefix}span", prefix = ACTION_PREFIX).ok();
1550 last = last + off + "$span".len();
1551 } else if last + off + 1 < pre_action.len()
1552 && pre_action[last + off + 1..].starts_with(|c: char| c.is_numeric())
1553 {
1554 outs.push_str(&pre_action[last..last + off]);
1555 write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok();
1556 last = last + off + "$".len();
1557 } else {
1558 let span = grm.action_span(pidx).unwrap();
1559 let inner_span =
1560 Span::new(span.start() + last + off + "$".len(), span.end());
1561 let mut s = String::from("\n");
1562 s.push_str(&diag.file_location_msg("Error", Some(inner_span)));
1563 s.push_str("\n");
1564 s.push_str(&diag.underline_span_with_text(
1565 inner_span,
1566 "Unknown text following '$'".to_string(),
1567 '^',
1568 ));
1569 return Err(ErrorString(s).into());
1570 }
1571 }
1572 None => {
1573 outs.push_str(&pre_action[last..]);
1574 break;
1575 }
1576 }
1577 }
1578
1579 let action_body = str::parse::<TokenStream>(&outs)?;
1580 action_fns.extend(quote! {
1581 #[allow(clippy::too_many_arguments)]
1582 fn #action_fn #generics (
1583 #ridx_var: ::cfgrammar::RIdx<#storaget>,
1584 #lexer_var: &'lexer dyn ::lrpar::NonStreamingLexer<'input, #lexertypest>,
1585 #span_var: ::cfgrammar::Span,
1586 #parse_paramdef,
1587 #(#args,)*
1588 ) #returnt
1589 #where_clause
1590 {
1591 #bind_parse_param
1592 #action_body
1593 }
1594 })
1595 }
1596 Ok(quote! {
1597 #programs
1598 #action_fns
1599 })
1600 }
1601
1602 fn user_start_ridx(&self, grm: &YaccGrammar<StorageT>) -> RIdx<StorageT> {
1606 debug_assert_eq!(grm.prod(grm.start_prod()).len(), 1);
1607 match grm.prod(grm.start_prod())[0] {
1608 Symbol::Rule(ridx) => ridx,
1609 _ => unreachable!(),
1610 }
1611 }
1612}
1613
1614#[doc(hidden)]
1617pub struct ParserData<StorageT: Eq + Hash> {
1618 grm: YaccGrammar<StorageT>,
1619 stable: StateTable<StorageT>,
1620}
1621
1622impl<StorageT: Eq + Hash> ParserData<StorageT> {
1623 pub fn grm(&self) -> &YaccGrammar<StorageT> {
1624 &self.grm
1625 }
1626
1627 pub fn stable(&self) -> &StateTable<StorageT> {
1628 &self.stable
1629 }
1630}
1631
1632#[doc(hidden)]
1635pub fn _reconstitute<StorageT: Decode<()> + Eq + Hash + PrimInt + Unsigned + 'static>(
1636 grm_buf: &[u8],
1637 stable_buf: &[u8],
1638) -> ParserData<StorageT> {
1639 let (grm, _) = decode_from_slice(grm_buf, bincode::config::standard()).unwrap();
1640 let (stable, _) = decode_from_slice(stable_buf, bincode::config::standard()).unwrap();
1641 ParserData { grm, stable }
1642}
1643
1644pub struct CTParser<StorageT = u32>
1646where
1647 StorageT: Eq + Hash,
1648{
1649 regenerated: bool,
1650 rule_ids: HashMap<String, StorageT>,
1651 yacc_grammar: YaccGrammar<StorageT>,
1652 grammar_src: String,
1653 grammar_path: PathBuf,
1654 conflicts: Option<(StateGraph<StorageT>, StateTable<StorageT>)>,
1655}
1656
1657impl<StorageT> CTParser<StorageT>
1658where
1659 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
1660 usize: AsPrimitive<StorageT>,
1661{
1662 pub fn regenerated(&self) -> bool {
1664 self.regenerated
1665 }
1666
1667 pub fn token_map(&self) -> &HashMap<String, StorageT> {
1670 &self.rule_ids
1671 }
1672
1673 #[allow(private_interfaces)]
1679 pub fn conflicts(
1680 &self,
1681 _: crate::unstable::UnstableApi,
1682 ) -> Option<(
1683 &YaccGrammar<StorageT>,
1684 &StateGraph<StorageT>,
1685 &StateTable<StorageT>,
1686 &Conflicts<StorageT>,
1687 )> {
1688 if let Some((sgraph, stable)) = &self.conflicts {
1689 return Some((
1690 &self.yacc_grammar,
1691 sgraph,
1692 stable,
1693 stable.conflicts().unwrap(),
1694 ));
1695 }
1696 None
1697 }
1698
1699 #[doc(hidden)]
1700 pub fn yacc_grammar(&self) -> &YaccGrammar<StorageT> {
1701 &self.yacc_grammar
1702 }
1703 #[doc(hidden)]
1704 pub fn grammar_src(&self) -> &str {
1705 &self.grammar_src
1706 }
1707 #[doc(hidden)]
1708 pub fn grammar_path(&self) -> &Path {
1709 self.grammar_path.as_path()
1710 }
1711}
1712
1713fn indent(indent: &str, s: &str) -> String {
1724 format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
1725}
1726
1727fn make_generics(parse_generics: Option<&str>) -> Result<Generics, Box<dyn Error>> {
1728 if let Some(parse_generics) = parse_generics {
1729 let tokens = str::parse::<TokenStream>(parse_generics)?;
1730 match syn::parse2(quote!(<'lexer, 'input: 'lexer, #tokens>)) {
1731 Ok(res) => Ok(res),
1732 Err(err) => Err(format!("unable to parse %parse-generics: {}", err).into()),
1733 }
1734 } else {
1735 Ok(parse_quote!(<'lexer, 'input: 'lexer>))
1736 }
1737}
1738
1739#[cfg(all(not(target_arch = "wasm32"), test))]
1741mod test {
1742 use std::{fs::File, io::Write, path::PathBuf};
1743
1744 use super::{CTConflictsError, CTParserBuilder};
1745 use crate::test_utils::TestLexerTypes;
1746 use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
1747 use tempfile::TempDir;
1748
1749 #[test]
1750 fn test_conflicts() {
1751 let temp = TempDir::new().unwrap();
1752 let mut file_path = PathBuf::from(temp.as_ref());
1753 file_path.push("grm.y");
1754 let mut f = File::create(&file_path).unwrap();
1755 let _ = f.write_all(
1756 "%start A
1757%%
1758A : 'a' 'b' | B 'b';
1759B : 'a' | C;
1760C : 'a';"
1761 .as_bytes(),
1762 );
1763
1764 match CTParserBuilder::<TestLexerTypes>::new()
1765 .error_on_conflicts(false)
1766 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1767 .grammar_path(file_path.to_str().unwrap())
1768 .output_path(file_path.with_extension("ignored"))
1769 .build()
1770 .unwrap()
1771 .conflicts(crate::unstable::UnstableApi)
1772 {
1773 Some((_, _, _, conflicts)) => {
1774 assert_eq!(conflicts.sr_len(), 1);
1775 assert_eq!(conflicts.rr_len(), 1);
1776 }
1777 None => panic!("Expected error data"),
1778 }
1779 }
1780
1781 #[test]
1782 fn test_conflicts_error() {
1783 let temp = TempDir::new().unwrap();
1784 let mut file_path = PathBuf::from(temp.as_ref());
1785 file_path.push("grm.y");
1786 let mut f = File::create(&file_path).unwrap();
1787 let _ = f.write_all(
1788 "%start A
1789%%
1790A : 'a' 'b' | B 'b';
1791B : 'a' | C;
1792C : 'a';"
1793 .as_bytes(),
1794 );
1795
1796 match CTParserBuilder::<TestLexerTypes>::new()
1797 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1798 .grammar_path(file_path.to_str().unwrap())
1799 .output_path(file_path.with_extension("ignored"))
1800 .build()
1801 {
1802 Ok(_) => panic!("Expected error"),
1803 Err(e) => {
1804 let cs = e.downcast_ref::<CTConflictsError<u16>>();
1805 assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1806 assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1807 }
1808 }
1809 }
1810
1811 #[test]
1812 fn test_expect_error() {
1813 let temp = TempDir::new().unwrap();
1814 let mut file_path = PathBuf::from(temp.as_ref());
1815 file_path.push("grm.y");
1816 let mut f = File::create(&file_path).unwrap();
1817 let _ = f.write_all(
1818 "%start A
1819%expect 2
1820%%
1821A: 'a' 'b' | B 'b';
1822B: 'a';"
1823 .as_bytes(),
1824 );
1825
1826 match CTParserBuilder::<TestLexerTypes>::new()
1827 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1828 .grammar_path(file_path.to_str().unwrap())
1829 .output_path(file_path.with_extension("ignored"))
1830 .build()
1831 {
1832 Ok(_) => panic!("Expected error"),
1833 Err(e) => {
1834 let cs = e.downcast_ref::<CTConflictsError<u16>>();
1835 assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 0);
1836 assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1837 }
1838 }
1839 }
1840
1841 #[test]
1842 fn test_expectrr_error() {
1843 let temp = TempDir::new().unwrap();
1844 let mut file_path = PathBuf::from(temp.as_ref());
1845 file_path.push("grm.y");
1846 let mut f = File::create(&file_path).unwrap();
1847 let _ = f.write_all(
1848 "%start A
1849%expect 1
1850%expect-rr 2
1851%%
1852A : 'a' 'b' | B 'b';
1853B : 'a' | C;
1854C : 'a';"
1855 .as_bytes(),
1856 );
1857
1858 match CTParserBuilder::<TestLexerTypes>::new()
1859 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1860 .grammar_path(file_path.to_str().unwrap())
1861 .output_path(file_path.with_extension("ignored"))
1862 .build()
1863 {
1864 Ok(_) => panic!("Expected error"),
1865 Err(e) => {
1866 let cs = e.downcast_ref::<CTConflictsError<u16>>();
1867 assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1868 assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1869 }
1870 }
1871 }
1872
1873 #[test]
1874 fn test_invalid_identifier_in_derived_mod_name() {
1877 let temp = TempDir::new().unwrap();
1878 let mut file_path = PathBuf::from(temp.as_ref());
1879 file_path.push("contains-a-dash.y");
1880 let mut f = File::create(&file_path).unwrap();
1881 let _ = f.write_all(
1882 "%start A
1883%%
1884A : 'a';"
1885 .as_bytes(),
1886 );
1887 match CTParserBuilder::<TestLexerTypes>::new()
1888 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1889 .grammar_path(file_path.to_str().unwrap())
1890 .output_path(file_path.with_extension("ignored"))
1891 .build()
1892 {
1893 Ok(_) => panic!("Expected error"),
1894 Err(e) => {
1895 let err_string = e.to_string();
1896 assert_eq!(
1897 err_string,
1898 "CTParserBuilder::mod_name(\"contains-a-dash_y\") is not a valid rust identifier due to 'unexpected token'"
1899 );
1900 }
1901 }
1902 }
1903
1904 #[cfg(test)]
1905 #[test]
1906 fn test_recoverer_header() -> Result<(), Box<dyn std::error::Error>> {
1907 use crate::RecoveryKind as RK;
1908 #[rustfmt::skip]
1909 let recovery_kinds = [
1910 (Some(RK::None), Some(RK::None), Some(RK::None)),
1913 (Some(RK::None), Some(RK::CPCTPlus), Some(RK::None)),
1914 (Some(RK::CPCTPlus), Some(RK::CPCTPlus), Some(RK::CPCTPlus)),
1915 (Some(RK::CPCTPlus), Some(RK::None), Some(RK::CPCTPlus)),
1916 (None, Some(RK::CPCTPlus), Some(RK::CPCTPlus)),
1917 (None, Some(RK::None), Some(RK::None)),
1918 (None, None, Some(RK::CPCTPlus)),
1919 (Some(RK::None), None, Some(RK::None)),
1920 (Some(RK::CPCTPlus), None, Some(RK::CPCTPlus)),
1921 ];
1922
1923 for (i, (builder_arg, header_arg, expected_rk)) in
1924 recovery_kinds.iter().cloned().enumerate()
1925 {
1926 let y_src = if let Some(header_arg) = header_arg {
1927 format!(
1928 "\
1929 %grmtools{{yacckind: Original(NoAction), recoverer: {}}} \
1930 %% \
1931 start: ; \
1932 ",
1933 match header_arg {
1934 RK::None => "RecoveryKind::None",
1935 RK::CPCTPlus => "RecoveryKind::CPCTPlus",
1936 }
1937 )
1938 } else {
1939 r#"
1940 %grmtools{yacckind: Original(NoAction)}
1941 %%
1942 Start: ;
1943 "#
1944 .to_string()
1945 };
1946 let out_dir = std::env::var("OUT_DIR").unwrap();
1947 let y_path = format!("{out_dir}/recoverykind_test_{i}.y");
1948 let y_out_path = format!("{y_path}.rs");
1949 std::fs::File::create(y_path.clone()).unwrap();
1950 std::fs::write(y_path.clone(), y_src).unwrap();
1951 let mut cp_builder = CTParserBuilder::<TestLexerTypes>::new();
1952 cp_builder = cp_builder
1953 .output_path(y_out_path.clone())
1954 .grammar_path(y_path.clone());
1955 cp_builder = if let Some(builder_arg) = builder_arg {
1956 cp_builder.recoverer(builder_arg)
1957 } else {
1958 cp_builder
1959 }
1960 .inspect_recoverer(Box::new(move |rk| {
1961 if matches!(
1962 (rk, expected_rk),
1963 (RK::None, Some(RK::None)) | (RK::CPCTPlus, Some(RK::CPCTPlus))
1964 ) {
1965 Ok(())
1966 } else {
1967 panic!("Unexpected recovery kind")
1968 }
1969 }));
1970 cp_builder.build()?;
1971 }
1972 Ok(())
1973 }
1974}