1use std::{
2 collections::{HashMap, HashSet},
3 fmt,
4 str::FromStr,
5};
6
7use indexmap::{IndexMap, IndexSet};
8
9use super::{
10 parser::YaccParser, Precedence, YaccGrammarError, YaccGrammarErrorKind, YaccGrammarWarning,
11 YaccGrammarWarningKind, YaccKind,
12};
13
14use crate::{
15 header::{GrmtoolsSectionParser, HeaderError, HeaderErrorKind, HeaderValue},
16 Span,
17};
18pub struct ASTWithValidityInfo {
21 yacc_kind: YaccKind,
22 ast: GrammarAST,
23 errs: Vec<YaccGrammarError>,
24}
25
26impl ASTWithValidityInfo {
27 pub fn new(yacc_kind: YaccKind, s: &str) -> Self {
35 let mut errs = Vec::new();
36 let ast = {
37 let mut yp = YaccParser::new(yacc_kind, s);
38 yp.parse().map_err(|e| errs.extend(e)).ok();
39 let mut ast = yp.build();
40 ast.complete_and_validate().map_err(|e| errs.push(e)).ok();
41 ast
42 };
43 ASTWithValidityInfo {
44 ast,
45 errs,
46 yacc_kind,
47 }
48 }
49
50 pub fn ast(&self) -> &GrammarAST {
55 &self.ast
56 }
57
58 pub fn is_valid(&self) -> bool {
61 self.errors().is_empty()
62 }
63
64 pub fn yacc_kind(&self) -> YaccKind {
66 self.yacc_kind
67 }
68
69 pub fn errors(&self) -> &[YaccGrammarError] {
71 self.errs.as_slice()
72 }
73}
74
75impl FromStr for ASTWithValidityInfo {
76 type Err = Vec<YaccGrammarError>;
77 fn from_str(src: &str) -> Result<Self, Vec<YaccGrammarError>> {
79 let mut errs = Vec::new();
80 let (header, _) = GrmtoolsSectionParser::new(src, true)
81 .parse()
82 .map_err(|mut errs| errs.drain(..).map(|e| e.into()).collect::<Vec<_>>())?;
83 if let Some(HeaderValue(_, yk_val)) = header.get("yacckind") {
84 let yacc_kind = YaccKind::try_from(yk_val).map_err(|e| vec![e.into()])?;
85 let ast = {
86 let mut yp = YaccParser::new(yacc_kind, src);
88 yp.parse().map_err(|e| errs.extend(e)).ok();
89 let mut ast = yp.build();
90 ast.complete_and_validate().map_err(|e| errs.push(e)).ok();
91 ast
92 };
93 Ok(ASTWithValidityInfo {
94 ast,
95 errs,
96 yacc_kind,
97 })
98 } else {
99 Err(vec![HeaderError {
100 kind: HeaderErrorKind::InvalidEntry("yacckind"),
101 locations: vec![Span::new(0, 0)],
102 }
103 .into()])
104 }
105 }
106}
107
108#[derive(Debug)]
112pub struct GrammarAST {
113 pub start: Option<(String, Span)>,
114 pub rules: IndexMap<String, Rule>,
116 pub prods: Vec<Production>,
117 pub token_directives: HashSet<usize>,
122 pub tokens: IndexSet<String>,
123 pub spans: Vec<Span>,
124 pub precs: HashMap<String, (Precedence, Span)>,
125 pub avoid_insert: Option<HashMap<String, Span>>,
126 pub implicit_tokens: Option<HashMap<String, Span>>,
127 pub epp: HashMap<String, (Span, (String, Span))>,
131 pub expect: Option<(usize, Span)>,
132 pub expectrr: Option<(usize, Span)>,
133 pub parse_param: Option<(String, String)>,
134 pub programs: Option<String>,
135 pub expect_unused: Vec<Symbol>,
138}
139
140#[derive(Debug)]
141pub struct Rule {
142 pub name: (String, Span),
143 pub pidxs: Vec<usize>, pub actiont: Option<String>,
145}
146
147#[derive(Debug)]
148#[cfg_attr(test, derive(Eq, PartialEq))]
149pub struct Production {
150 pub symbols: Vec<Symbol>,
151 pub precedence: Option<String>,
152 pub action: Option<String>,
153 pub prod_span: Span,
154}
155
156#[derive(Clone, Debug)]
157#[cfg_attr(test, derive(Eq, PartialEq))]
158pub enum Symbol {
159 Rule(String, Span),
160 Token(String, Span),
161}
162
163#[derive(Eq, PartialEq, Debug, Copy, Clone)]
166pub(crate) enum SymbolIdx {
167 Rule(usize),
168 Token(usize),
169}
170
171impl SymbolIdx {
172 pub(crate) fn symbol(self, ast: &GrammarAST) -> Symbol {
173 match self {
174 SymbolIdx::Rule(idx) => {
175 let (rule_name, rule_span) = &ast.rules[idx].name;
176 Symbol::Rule(rule_name.clone(), *rule_span)
177 }
178 SymbolIdx::Token(idx) => {
179 let tok = &ast.tokens[idx];
180 Symbol::Token(tok.clone(), ast.spans[idx])
181 }
182 }
183 }
184}
185impl fmt::Display for Symbol {
186 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
187 match *self {
188 Symbol::Rule(ref s, _) => write!(f, "{}", s),
189 Symbol::Token(ref s, _) => write!(f, "{}", s),
190 }
191 }
192}
193
194impl GrammarAST {
195 pub fn new() -> GrammarAST {
196 GrammarAST {
197 start: None,
198 rules: IndexMap::new(), prods: Vec::new(),
201 spans: Vec::new(),
202 tokens: IndexSet::new(),
203 token_directives: HashSet::new(),
204 precs: HashMap::new(),
205 avoid_insert: None,
206 implicit_tokens: None,
207 epp: HashMap::new(),
208 expect: None,
209 expectrr: None,
210 parse_param: None,
211 programs: None,
212 expect_unused: Vec::new(),
213 }
214 }
215
216 pub fn add_rule(&mut self, (name, name_span): (String, Span), actiont: Option<String>) {
217 self.rules.insert(
218 name.clone(),
219 Rule {
220 name: (name, name_span),
221 pidxs: Vec::new(),
222 actiont,
223 },
224 );
225 }
226
227 pub fn add_prod(
228 &mut self,
229 rule_name: String,
230 symbols: Vec<Symbol>,
231 precedence: Option<String>,
232 action: Option<String>,
233 prod_span: Span,
234 ) {
235 self.rules[&rule_name].pidxs.push(self.prods.len());
236 self.prods.push(Production {
237 symbols,
238 precedence,
239 action,
240 prod_span,
241 });
242 }
243
244 #[deprecated(since = "0.10.2", note = "Please use set_programs instead")]
245 pub fn add_programs(&mut self, s: String) {
246 self.set_programs(s);
247 }
248
249 pub fn set_programs(&mut self, s: String) {
250 self.programs = Some(s)
251 }
252
253 pub fn get_rule(&self, key: &str) -> Option<&Rule> {
254 self.rules.get(key)
255 }
256
257 pub fn has_token(&self, s: &str) -> bool {
258 self.tokens.contains(s)
259 }
260
261 pub(crate) fn complete_and_validate(&mut self) -> Result<(), YaccGrammarError> {
271 match self.start {
272 None => {
273 return Err(YaccGrammarError {
274 kind: YaccGrammarErrorKind::NoStartRule,
275 spans: vec![Span::new(0, 0)],
276 });
277 }
278 Some((ref s, span)) => {
279 if !self.rules.contains_key(s) {
280 return Err(YaccGrammarError {
281 kind: YaccGrammarErrorKind::InvalidStartRule(s.clone()),
282 spans: vec![span],
283 });
284 }
285 }
286 }
287 for rule in self.rules.values() {
288 for &pidx in &rule.pidxs {
289 let prod = &self.prods[pidx];
290 if let Some(ref n) = prod.precedence {
291 if !self.tokens.contains(n) {
292 return Err(YaccGrammarError {
293 kind: YaccGrammarErrorKind::UnknownToken(n.clone()),
294 spans: vec![Span::new(0, 0)],
295 });
296 }
297 if !self.precs.contains_key(n) {
298 return Err(YaccGrammarError {
299 kind: YaccGrammarErrorKind::NoPrecForToken(n.clone()),
300 spans: vec![Span::new(0, 0)],
301 });
302 }
303 }
304 for sym in &prod.symbols {
305 match *sym {
306 Symbol::Rule(ref name, span) => {
307 if !self.rules.contains_key(name) {
308 return Err(YaccGrammarError {
309 kind: YaccGrammarErrorKind::UnknownRuleRef(name.clone()),
310 spans: vec![span],
311 });
312 }
313 }
314 Symbol::Token(ref name, span) => {
315 if !self.tokens.contains(name) {
316 return Err(YaccGrammarError {
317 kind: YaccGrammarErrorKind::UnknownToken(name.clone()),
318 spans: vec![span],
319 });
320 }
321 }
322 }
323 }
324 }
325 }
326
327 for (k, (sp, _)) in self.epp.iter() {
328 if self.tokens.contains(k) {
329 continue;
330 }
331 if let Some(ref it) = self.implicit_tokens {
332 if it.contains_key(k) {
333 continue;
334 }
335 }
336 return Err(YaccGrammarError {
337 kind: YaccGrammarErrorKind::UnknownEPP(k.clone()),
338 spans: vec![*sp],
339 });
340 }
341
342 for sym in &self.expect_unused {
343 match sym {
344 Symbol::Rule(sym_name, sym_span) => {
345 if self.get_rule(sym_name).is_none() {
346 return Err(YaccGrammarError {
347 kind: YaccGrammarErrorKind::UnknownRuleRef(sym_name.clone()),
348 spans: vec![*sym_span],
349 });
350 }
351 }
352 Symbol::Token(sym_name, sym_span) => {
353 if !self.has_token(sym_name) {
354 return Err(YaccGrammarError {
355 kind: YaccGrammarErrorKind::UnknownToken(sym_name.clone()),
356 spans: vec![*sym_span],
357 });
358 }
359 }
360 }
361 }
362 Ok(())
363 }
364
365 pub fn warnings(&self) -> Vec<YaccGrammarWarning> {
366 self.unused_symbols()
367 .map(|symidx| {
368 let (kind, span) = match symidx.symbol(self) {
369 Symbol::Rule(_, span) => (YaccGrammarWarningKind::UnusedRule, span),
370 Symbol::Token(_, span) => (YaccGrammarWarningKind::UnusedToken, span),
371 };
372 YaccGrammarWarning {
373 kind,
374 spans: vec![span],
375 }
376 })
377 .collect()
378 }
379
380 pub(crate) fn unused_symbols(&self) -> impl Iterator<Item = SymbolIdx> + '_ {
383 let start_rule_name = self.start.as_ref().map(|(name, _)| name.clone());
384 let start_rule = self
385 .rules
386 .iter()
387 .find(|(rule_name, _)| start_rule_name.as_ref() == Some(rule_name));
388 let mut seen_rules = HashSet::new();
389 let mut seen_tokens = HashSet::new();
390 let mut expected_unused_tokens = HashSet::new();
391 let mut expected_unused_rules = HashSet::new();
392 for sym in &self.expect_unused {
393 match sym {
394 Symbol::Rule(sym_name, _) => {
395 expected_unused_rules.insert(sym_name);
396 }
397 Symbol::Token(sym_name, _) => {
398 expected_unused_tokens.insert(sym_name);
399 }
400 }
401 }
402 if let Some(implicit_tokens) = self.implicit_tokens.as_ref() {
403 expected_unused_tokens.extend(implicit_tokens.keys())
404 }
405 if let Some((start_name, start_rule)) = start_rule {
406 let mut todo = Vec::new();
407 todo.extend(start_rule.pidxs.iter().copied());
408 seen_rules.insert(start_name);
409
410 while let Some(pidx) = todo.pop() {
411 let prod = &self.prods[pidx];
412 for sym in &prod.symbols {
413 match sym {
414 Symbol::Rule(name, _) => {
415 if seen_rules.insert(name) {
416 if let Some(rule) = self.rules.get(name) {
417 todo.extend(&rule.pidxs);
418 }
419 }
420 }
421 Symbol::Token(name, _) => {
422 seen_tokens.insert(name);
423 }
424 };
425 }
426 }
427 }
428 self.rules
429 .iter()
430 .enumerate()
431 .filter_map(move |(rule_id, (rule_name, _))| {
432 if expected_unused_rules.contains(rule_name) || seen_rules.contains(rule_name) {
433 None
434 } else {
435 Some(SymbolIdx::Rule(rule_id))
436 }
437 })
438 .chain(
439 self.tokens
440 .iter()
441 .enumerate()
442 .filter_map(move |(tok_idx, tok)| {
443 if expected_unused_tokens.contains(tok) || seen_tokens.contains(tok) {
444 None
445 } else {
446 Some(SymbolIdx::Token(tok_idx))
447 }
448 }),
449 )
450 }
451}
452
453#[cfg(test)]
454mod test {
455 use super::{
456 super::{AssocKind, Precedence},
457 GrammarAST, Span, Symbol, YaccGrammarError, YaccGrammarErrorKind,
458 };
459
460 fn rule(n: &str) -> Symbol {
461 Symbol::Rule(n.to_string(), Span::new(0, 0))
462 }
463
464 fn token(n: &str) -> Symbol {
465 Symbol::Token(n.to_string(), Span::new(0, 0))
466 }
467
468 #[test]
469 fn test_empty_grammar() {
470 let mut grm = GrammarAST::new();
471 match grm.complete_and_validate() {
472 Err(YaccGrammarError {
473 kind: YaccGrammarErrorKind::NoStartRule,
474 ..
475 }) => (),
476 _ => panic!("Validation error"),
477 }
478 }
479
480 #[test]
481 fn test_invalid_start_rule() {
482 let mut grm = GrammarAST::new();
483 let empty_span = Span::new(0, 0);
484 grm.start = Some(("A".to_string(), empty_span));
485 grm.add_rule(("B".to_string(), empty_span), None);
486 grm.add_prod("B".to_string(), vec![], None, None, empty_span);
487 match grm.complete_and_validate() {
488 Err(YaccGrammarError {
489 kind: YaccGrammarErrorKind::InvalidStartRule(_),
490 ..
491 }) => (),
492 _ => panic!("Validation error"),
493 }
494 }
495
496 #[test]
497 fn test_valid_start_rule() {
498 let mut grm = GrammarAST::new();
499 let empty_span = Span::new(0, 0);
500 grm.start = Some(("A".to_string(), empty_span));
501 grm.add_rule(("A".to_string(), empty_span), None);
502 grm.add_prod("A".to_string(), vec![], None, None, empty_span);
503 assert!(grm.complete_and_validate().is_ok());
504 }
505
506 #[test]
507 fn test_valid_rule_ref() {
508 let mut grm = GrammarAST::new();
509 let empty_span = Span::new(0, 0);
510 grm.start = Some(("A".to_string(), empty_span));
511 grm.add_rule(("A".to_string(), empty_span), None);
512 grm.add_rule(("B".to_string(), empty_span), None);
513 grm.add_prod("A".to_string(), vec![rule("B")], None, None, empty_span);
514 grm.add_prod("B".to_string(), vec![], None, None, empty_span);
515 assert!(grm.complete_and_validate().is_ok());
516 }
517
518 #[test]
519 fn test_invalid_rule_ref() {
520 let mut grm = GrammarAST::new();
521 let empty_span = Span::new(0, 0);
522 grm.start = Some(("A".to_string(), empty_span));
523 grm.add_rule(("A".to_string(), empty_span), None);
524 grm.add_prod("A".to_string(), vec![rule("B")], None, None, empty_span);
525 match grm.complete_and_validate() {
526 Err(YaccGrammarError {
527 kind: YaccGrammarErrorKind::UnknownRuleRef(_),
528 ..
529 }) => (),
530 _ => panic!("Validation error"),
531 }
532 }
533
534 #[test]
535 fn test_valid_token_ref() {
536 let mut grm = GrammarAST::new();
537 let empty_span = Span::new(0, 0);
538 grm.tokens.insert("b".to_string());
539 grm.start = Some(("A".to_string(), empty_span));
540 grm.add_rule(("A".to_string(), empty_span), None);
541 grm.add_prod("A".to_string(), vec![token("b")], None, None, empty_span);
542 assert!(grm.complete_and_validate().is_ok());
543 }
544
545 #[test]
546 fn test_redefine_rules_as_tokens() {
547 let mut grm = GrammarAST::new();
550 let empty_span = Span::new(0, 0);
551 grm.tokens.insert("b".to_string());
552 grm.start = Some(("A".to_string(), empty_span));
553 grm.add_rule(("A".to_string(), empty_span), None);
554 grm.add_prod("A".to_string(), vec![rule("b")], None, None, empty_span);
555 assert!(grm.complete_and_validate().is_err());
556 }
557
558 #[test]
559 fn test_invalid_token_ref() {
560 let mut grm = GrammarAST::new();
561 let empty_span = Span::new(0, 0);
562 grm.start = Some(("A".to_string(), empty_span));
563 grm.add_rule(("A".to_string(), empty_span), None);
564 grm.add_prod("A".to_string(), vec![token("b")], None, None, empty_span);
565 match grm.complete_and_validate() {
566 Err(YaccGrammarError {
567 kind: YaccGrammarErrorKind::UnknownToken(_),
568 ..
569 }) => (),
570 _ => panic!("Validation error"),
571 }
572 }
573
574 #[test]
575 fn test_invalid_rule_forgotten_token() {
576 let mut grm = GrammarAST::new();
577 let empty_span = Span::new(0, 0);
578 grm.start = Some(("A".to_string(), empty_span));
579 grm.add_rule(("A".to_string(), empty_span), None);
580 grm.add_prod(
581 "A".to_string(),
582 vec![rule("b"), token("b")],
583 None,
584 None,
585 Span::new(0, 2),
586 );
587 match grm.complete_and_validate() {
588 Err(YaccGrammarError {
589 kind: YaccGrammarErrorKind::UnknownRuleRef(_),
590 ..
591 }) => (),
592 _ => panic!("Validation error"),
593 }
594 }
595
596 #[test]
597 fn test_invalid_epp() {
598 let mut grm = GrammarAST::new();
599 let empty_span = Span::new(2, 3);
600 grm.start = Some(("A".to_string(), empty_span));
601 grm.add_rule(("A".to_string(), empty_span), None);
602 grm.add_prod("A".to_string(), vec![], None, None, empty_span);
603 grm.epp
604 .insert("k".to_owned(), (empty_span, ("v".to_owned(), empty_span)));
605 match grm.complete_and_validate() {
606 Err(YaccGrammarError {
607 kind: YaccGrammarErrorKind::UnknownEPP(_),
608 spans,
609 }) if spans.len() == 1 && spans[0] == Span::new(2, 3) => (),
610 _ => panic!("Validation error"),
611 }
612 }
613
614 #[test]
615 fn test_precedence_override() {
616 let mut grm = GrammarAST::new();
617 let empty_span = Span::new(0, 0);
618 grm.precs.insert(
619 "b".to_string(),
620 (
621 Precedence {
622 level: 1,
623 kind: AssocKind::Left,
624 },
625 Span::new(0, 0),
626 ),
627 );
628 grm.start = Some(("A".to_string(), empty_span));
629 grm.tokens.insert("b".to_string());
630 grm.add_rule(("A".to_string(), empty_span), None);
631 grm.add_prod(
632 "A".to_string(),
633 vec![token("b")],
634 Some("b".to_string()),
635 None,
636 empty_span,
637 );
638 assert!(grm.complete_and_validate().is_ok());
639 }
640
641 #[test]
642 fn test_invalid_precedence_override() {
643 let mut grm = GrammarAST::new();
644 let empty_span = Span::new(0, 0);
645 grm.start = Some(("A".to_string(), empty_span));
646 grm.add_rule(("A".to_string(), empty_span), None);
647 grm.add_prod(
648 "A".to_string(),
649 vec![token("b")],
650 Some("b".to_string()),
651 None,
652 empty_span,
653 );
654 match grm.complete_and_validate() {
655 Err(YaccGrammarError {
656 kind: YaccGrammarErrorKind::UnknownToken(_),
657 ..
658 }) => (),
659 _ => panic!("Validation error"),
660 }
661 grm.tokens.insert("b".to_string());
662 match grm.complete_and_validate() {
663 Err(YaccGrammarError {
664 kind: YaccGrammarErrorKind::NoPrecForToken(_),
665 ..
666 }) => (),
667 _ => panic!("Validation error"),
668 }
669 }
670
671 #[test]
672 fn test_ast_unused_symbols() {
673 let mut grm = GrammarAST::new();
674 let empty_span = Span::new(0, 0);
675 grm.start = Some(("A".to_string(), empty_span));
676 grm.add_rule(("A".to_string(), empty_span), None);
677 grm.add_prod("A".to_string(), vec![], None, None, empty_span);
678 grm.tokens.insert("b".to_string());
679 grm.spans.push(Span::new(4, 5));
680 grm.add_rule(("B".to_string(), Span::new(1, 2)), None);
681 grm.add_prod("B".to_string(), vec![token("b")], None, None, empty_span);
682
683 assert_eq!(
684 grm.unused_symbols()
685 .map(|sym_idx| sym_idx.symbol(&grm))
686 .collect::<Vec<Symbol>>()
687 .as_slice(),
688 &[
689 Symbol::Rule("B".to_string(), Span::new(1, 2)),
690 Symbol::Token("b".to_string(), Span::new(4, 5))
691 ]
692 )
693 }
694
695 #[test]
696 fn token_rule_confusion_issue_557() {
697 use super::*;
698 use crate::yacc::*;
699 let ast_validity = ASTWithValidityInfo::new(
700 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
701 r#"
702 %start start
703 %%
704 start: "a" a;
705 a: "c";"#,
706 );
707 assert!(ast_validity.ast().prods[0]
708 .symbols
709 .contains(&Symbol::Rule("a".to_string(), Span::new(64, 65))));
710 let ast_validity = ASTWithValidityInfo::new(
711 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
712 r#"
713 %start start
714 %%
715 start: "a" x;
716 x: "c";"#,
717 );
718 assert!(ast_validity.ast().prods[0]
719 .symbols
720 .contains(&Symbol::Rule("x".to_string(), Span::new(64, 65))));
721 let ast_validity = ASTWithValidityInfo::new(
722 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
723 r#"
724 %start start
725 %token a
726 %%
727 start: "a" a;
728 "#,
729 );
730 assert_eq!(
731 ast_validity.ast().prods[0].symbols,
732 [
733 Symbol::Token("a".to_string(), Span::new(66, 67)),
734 Symbol::Token("a".to_string(), Span::new(69, 70))
735 ]
736 );
737 }
738
739 #[test]
740 fn test_token_directives() {
741 use super::*;
742 use crate::yacc::*;
743
744 let ast_validity = ASTWithValidityInfo::new(
746 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
747 r#"
748 %left "a"
749 %token a
750 %start start
751 %%
752 start: "a" a "b";
753 "#,
754 );
755 assert!(ast_validity
756 .ast()
757 .token_directives
758 .contains(&ast_validity.ast().tokens.get_index_of("a").unwrap()));
759 assert!(!ast_validity
760 .ast()
761 .token_directives
762 .contains(&ast_validity.ast().tokens.get_index_of("b").unwrap()));
763 }
764}