1use crate::{
2 Location, Span, Spanned,
3 markmap::{Entry, MarkMap},
4 yacc::{
5 YaccGrammarError, YaccGrammarErrorKind, YaccKind, YaccOriginalActionKind, parser::SpansKind,
6 },
7};
8use regex::{Regex, RegexBuilder};
9use std::{error::Error, fmt, sync::LazyLock};
10
11#[derive(Debug, Clone)]
18#[doc(hidden)]
19pub struct HeaderError<T> {
20 pub kind: HeaderErrorKind,
21 pub locations: Vec<T>,
22}
23
24impl<T: fmt::Debug> Error for HeaderError<T> {}
25impl<T> fmt::Display for HeaderError<T> {
26 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
27 write!(f, "{}", self.kind)
28 }
29}
30
31impl From<HeaderError<Span>> for YaccGrammarError {
32 fn from(e: HeaderError<Span>) -> YaccGrammarError {
33 YaccGrammarError {
34 kind: YaccGrammarErrorKind::Header(e.kind, e.spanskind()),
35 spans: e.locations,
36 }
37 }
38}
39
40impl Spanned for HeaderError<Span> {
41 fn spans(&self) -> &[Span] {
42 self.locations.as_slice()
43 }
44 fn spanskind(&self) -> SpansKind {
45 self.spanskind()
46 }
47}
48
49#[derive(Debug, PartialEq)]
52#[doc(hidden)]
53pub struct HeaderValue<T>(pub T, pub Value<T>);
54
55impl From<HeaderValue<Span>> for HeaderValue<Location> {
56 fn from(hv: HeaderValue<Span>) -> HeaderValue<Location> {
57 HeaderValue(hv.0.into(), hv.1.into())
58 }
59}
60
61#[derive(Debug, Eq, PartialEq, Copy, Clone)]
62#[non_exhaustive]
63#[doc(hidden)]
64pub enum HeaderErrorKind {
65 MissingGrmtoolsSection,
66 IllegalName,
67 ExpectedToken(char),
68 UnexpectedToken(char, &'static str),
69 DuplicateEntry,
70 InvalidEntry(&'static str),
71 ConversionError(&'static str, &'static str),
72}
73
74impl fmt::Display for HeaderErrorKind {
75 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76 let s = match self {
77 HeaderErrorKind::MissingGrmtoolsSection => "Missing %grmtools section",
78 HeaderErrorKind::IllegalName => "Illegal name",
79 HeaderErrorKind::ExpectedToken(c) => &format!("Expected token: '{}'", c),
80 HeaderErrorKind::UnexpectedToken(c, hint) => {
81 &format!("Unxpected token: '{}', {} ", c, hint)
82 }
83 HeaderErrorKind::InvalidEntry(s) => &format!("Invalid entry: '{}'", s),
84 HeaderErrorKind::DuplicateEntry => "Duplicate Entry",
85 HeaderErrorKind::ConversionError(t, err_str) => {
86 &format!("Converting header value to type '{}': {}", t, err_str)
87 }
88 };
89 write!(f, "{}", s)
90 }
91}
92
93impl<T> HeaderError<T> {
94 pub fn spanskind(&self) -> SpansKind {
96 match self.kind {
97 HeaderErrorKind::DuplicateEntry => SpansKind::DuplicationError,
98 _ => SpansKind::Error,
99 }
100 }
101}
102
103#[derive(Debug, Eq, PartialEq)]
120#[doc(hidden)]
121pub struct Namespaced<T> {
122 pub namespace: Option<(String, T)>,
123 pub member: (String, T),
124}
125
126#[derive(Debug, Eq, PartialEq)]
127#[doc(hidden)]
128pub enum Setting<T> {
129 Unitary(Namespaced<T>),
131 Constructor {
135 ctor: Namespaced<T>,
136 arg: Namespaced<T>,
137 },
138 Num(u64, T),
139 String(String, T),
140}
141
142#[doc(hidden)]
144pub struct GrmtoolsSectionParser<'input> {
145 src: &'input str,
146 required: bool,
147}
148
149#[derive(Debug, Eq, PartialEq)]
154#[doc(hidden)]
155pub enum Value<T> {
156 Flag(bool, T),
157 Setting(Setting<T>),
158}
159
160impl From<Value<Span>> for Value<Location> {
161 fn from(v: Value<Span>) -> Value<Location> {
162 match v {
163 Value::Flag(flag, u) => Value::Flag(flag, u.into()),
164 Value::Setting(s) => Value::Setting(match s {
165 Setting::Unitary(Namespaced {
166 namespace,
167 member: (m, ml),
168 }) => Setting::Unitary(Namespaced {
169 namespace: namespace.map(|(n, nl)| (n, nl.into())),
170 member: (m, ml.into()),
171 }),
172 Setting::Constructor {
173 ctor:
174 Namespaced {
175 namespace: ctor_ns,
176 member: (ctor_m, ctor_ml),
177 },
178 arg:
179 Namespaced {
180 namespace: arg_ns,
181 member: (arg_m, arg_ml),
182 },
183 } => Setting::Constructor {
184 ctor: Namespaced {
185 namespace: ctor_ns.map(|(ns, ns_l)| (ns, ns_l.into())),
186 member: (ctor_m, ctor_ml.into()),
187 },
188 arg: Namespaced {
189 namespace: arg_ns.map(|(ns, ns_l)| (ns, ns_l.into())),
190 member: (arg_m, arg_ml.into()),
191 },
192 },
193 Setting::Num(num, num_loc) => Setting::Num(num, num_loc.into()),
194 Setting::String(s, str_loc) => Setting::String(s, str_loc.into()),
195 }),
196 }
197 }
198}
199
200impl<T> Value<T> {
201 pub fn expect_string_with_context(&self, ctxt: &str) -> Result<&str, Box<dyn Error>> {
202 let found = match self {
203 Value::Flag(_, _) => "bool".to_string(),
204 Value::Setting(Setting::String(s, _)) => {
205 return Ok(s);
206 }
207 Value::Setting(Setting::Num(_, _)) => "numeric".to_string(),
208 Value::Setting(Setting::Unitary(Namespaced {
209 namespace,
210 member: (member, _),
211 })) => {
212 if let Some((ns, _)) = namespace {
213 format!("'{ns}::{member}'")
214 } else {
215 format!("'{member}'")
216 }
217 }
218 Value::Setting(Setting::Constructor {
219 ctor:
220 Namespaced {
221 namespace: ctor_ns,
222 member: (ctor_memb, _),
223 },
224 arg:
225 Namespaced {
226 namespace: arg_ns,
227 member: (arg_memb, _),
228 },
229 }) => {
230 format!(
231 "'{}({})'",
232 if let Some((ns, _)) = ctor_ns {
233 format!("{ns}::{ctor_memb}")
234 } else {
235 arg_memb.to_string()
236 },
237 if let Some((ns, _)) = arg_ns {
238 format!("{ns}::{arg_memb}")
239 } else {
240 arg_memb.to_string()
241 }
242 )
243 }
244 };
245 Err(format!("Expected 'String' value, found {}, at {ctxt}", found).into())
246 }
247}
248
249static RE_LEADING_WS: LazyLock<Regex> =
250 LazyLock::new(|| Regex::new(r"^[\p{Pattern_White_Space}]*").unwrap());
251static RE_NAME: LazyLock<Regex> = LazyLock::new(|| {
252 RegexBuilder::new(r"^[A-Z][A-Z_]*")
253 .case_insensitive(true)
254 .build()
255 .unwrap()
256});
257static RE_DIGITS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[0-9]+").unwrap());
258static RE_STRING: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^\"(\\.|[^"\\])*\""#).unwrap());
259
260const MAGIC: &str = "%grmtools";
261
262fn add_duplicate_occurrence<T: Eq + PartialEq + Clone>(
263 errs: &mut Vec<HeaderError<T>>,
264 kind: HeaderErrorKind,
265 orig_loc: T,
266 dup_loc: T,
267) {
268 if !errs.iter_mut().any(|e| {
269 if e.kind == kind && e.locations[0] == orig_loc {
270 e.locations.push(dup_loc.clone());
271 true
272 } else {
273 false
274 }
275 }) {
276 errs.push(HeaderError {
277 kind,
278 locations: vec![orig_loc, dup_loc],
279 });
280 }
281}
282
283impl<'input> GrmtoolsSectionParser<'input> {
284 pub fn parse_value(
285 &'_ self,
286 mut i: usize,
287 ) -> Result<(String, Span, Value<Span>, usize), HeaderError<Span>> {
288 if let Some(j) = self.lookahead_is("!", i) {
289 let (flag_name, k) = self.parse_name(j)?;
290 Ok((
291 flag_name,
292 Span::new(j, k),
293 Value::Flag(false, Span::new(i, k)),
294 self.parse_ws(k),
295 ))
296 } else {
297 let (key_name, j) = self.parse_name(i)?;
298 let key_span = Span::new(i, j);
299 i = self.parse_ws(j);
300 if let Some(j) = self.lookahead_is(":", i) {
301 i = self.parse_ws(j);
302 match RE_DIGITS.find(&self.src[i..]) {
303 Some(m) => {
304 let num_span = Span::new(i + m.start(), i + m.end());
305 let num_str = &self.src[num_span.start()..num_span.end()];
306 let num = str::parse::<u64>(num_str).unwrap();
308 let val = Setting::Num(num, num_span);
309 i = self.parse_ws(num_span.end());
310 Ok((key_name, key_span, Value::Setting(val), i))
311 }
312 None => match RE_STRING.find(&self.src[i..]) {
313 Some(m) => {
314 let end = i + m.end();
315 let str_span = Span::new(i + m.start() + 1, end - 1);
317 let str = &self.src[str_span.start()..str_span.end()];
318 let setting = Setting::String(str.to_string(), str_span);
319 i = self.parse_ws(end);
321 Ok((key_name, key_span, Value::Setting(setting), i))
322 }
323 None => {
324 let (path_val, j) = self.parse_namespaced(i)?;
325 i = self.parse_ws(j);
326 if let Some(j) = self.lookahead_is("(", i) {
327 let (arg, j) = self.parse_namespaced(j)?;
328 i = self.parse_ws(j);
329 if let Some(j) = self.lookahead_is(")", i) {
330 i = self.parse_ws(j);
331 Ok((
332 key_name,
333 key_span,
334 Value::Setting(Setting::Constructor {
335 ctor: path_val,
336 arg,
337 }),
338 i,
339 ))
340 } else {
341 Err(HeaderError {
342 kind: HeaderErrorKind::ExpectedToken(')'),
343 locations: vec![Span::new(i, i)],
344 })
345 }
346 } else {
347 Ok((
348 key_name,
349 key_span,
350 Value::Setting(Setting::Unitary(path_val)),
351 i,
352 ))
353 }
354 }
355 },
356 }
357 } else {
358 Ok((key_name, key_span, Value::Flag(true, key_span), i))
359 }
360 }
361 }
362
363 fn parse_namespaced(
364 &self,
365 mut i: usize,
366 ) -> Result<(Namespaced<Span>, usize), HeaderError<Span>> {
367 let (name, j) = self.parse_name(i)?;
369 let name_span = Span::new(i, j);
370 i = self.parse_ws(j);
371 if let Some(j) = self.lookahead_is("::", i) {
372 i = self.parse_ws(j);
373 let (member_val, j) = self.parse_name(i)?;
374 let member_val_span = Span::new(i, j);
375 i = self.parse_ws(j);
376 Ok((
377 Namespaced {
378 namespace: Some((name, name_span)),
379 member: (member_val, member_val_span),
380 },
381 i,
382 ))
383 } else {
384 Ok((
385 Namespaced {
386 namespace: None,
387 member: (name, name_span),
388 },
389 i,
390 ))
391 }
392 }
393
394 pub fn new(src: &'input str, required: bool) -> Self {
404 Self { src, required }
405 }
406
407 #[allow(clippy::type_complexity)]
408 pub fn parse(&'_ self) -> Result<(Header<Span>, usize), Vec<HeaderError<Span>>> {
409 let mut errs = Vec::new();
410 if let Some(mut i) = self.lookahead_is(MAGIC, self.parse_ws(0)) {
411 let mut ret = Header::new();
412 i = self.parse_ws(i);
413 let section_start_pos = i;
414 if let Some(j) = self.lookahead_is("{", i) {
415 i = self.parse_ws(j);
416 while self.lookahead_is("}", i).is_none() && i < self.src.len() {
417 let (key, key_loc, val, j) = match self.parse_value(i) {
418 Ok((key, key_loc, val, pos)) => (key, key_loc, val, pos),
419 Err(e) => {
420 errs.push(e);
421 return Err(errs);
422 }
423 };
424 match ret.entry(key) {
425 Entry::Occupied(orig) => {
426 let HeaderValue(orig_loc, _): &HeaderValue<Span> = orig.get();
427 add_duplicate_occurrence(
428 &mut errs,
429 HeaderErrorKind::DuplicateEntry,
430 *orig_loc,
431 key_loc,
432 )
433 }
434 Entry::Vacant(entry) => {
435 entry.insert(HeaderValue(key_loc, val));
436 }
437 }
438 if let Some(j) = self.lookahead_is(",", j) {
439 i = self.parse_ws(j);
440 continue;
441 } else {
442 i = j;
443 break;
444 }
445 }
446 if let Some(j) = self.lookahead_is("*", i) {
447 errs.push(HeaderError {
448 kind: HeaderErrorKind::UnexpectedToken(
449 '*',
450 "perhaps this is a glob, in which case it requires string quoting.",
451 ),
452 locations: vec![Span::new(i, j)],
453 });
454 Err(errs)
455 } else if let Some(i) = self.lookahead_is("}", i) {
456 if errs.is_empty() {
457 Ok((ret, i))
458 } else {
459 Err(errs)
460 }
461 } else {
462 errs.push(HeaderError {
463 kind: HeaderErrorKind::ExpectedToken('}'),
464 locations: vec![Span::new(section_start_pos, i)],
465 });
466 Err(errs)
467 }
468 } else {
469 errs.push(HeaderError {
470 kind: HeaderErrorKind::ExpectedToken('{'),
471 locations: vec![Span::new(i, i)],
472 });
473 Err(errs)
474 }
475 } else if self.required {
476 errs.push(HeaderError {
477 kind: HeaderErrorKind::MissingGrmtoolsSection,
478 locations: vec![Span::new(0, 0)],
479 });
480 Err(errs)
481 } else {
482 Ok((Header::new(), 0))
483 }
484 }
485
486 fn parse_name(&self, i: usize) -> Result<(String, usize), HeaderError<Span>> {
487 match RE_NAME.find(&self.src[i..]) {
488 Some(m) => {
489 assert_eq!(m.start(), 0);
490 Ok((
491 self.src[i..i + m.end()].to_string().to_lowercase(),
492 i + m.end(),
493 ))
494 }
495 None => {
496 if self.src[i..].starts_with("*") {
497 Err(HeaderError {
498 kind: HeaderErrorKind::UnexpectedToken(
499 '*',
500 "perhaps this is a glob, in which case it requires string quoting.",
501 ),
502 locations: vec![Span::new(i, i)],
503 })
504 } else {
505 Err(HeaderError {
506 kind: HeaderErrorKind::IllegalName,
507 locations: vec![Span::new(i, i)],
508 })
509 }
510 }
511 }
512 }
513
514 fn lookahead_is(&self, s: &'static str, i: usize) -> Option<usize> {
515 if self.src[i..].starts_with(s) {
516 Some(i + s.len())
517 } else {
518 None
519 }
520 }
521
522 fn parse_ws(&self, i: usize) -> usize {
523 RE_LEADING_WS
524 .find(&self.src[i..])
525 .map(|m| m.end() + i)
526 .unwrap_or(i)
527 }
528}
529
530#[doc(hidden)]
532pub type Header<T> = MarkMap<String, HeaderValue<T>>;
533
534impl TryFrom<YaccKind> for Value<Location> {
535 type Error = HeaderError<Location>;
536 fn try_from(kind: YaccKind) -> Result<Value<Location>, HeaderError<Location>> {
537 let from_loc = Location::Other("From<YaccKind>".to_string());
538 Ok(match kind {
539 YaccKind::Grmtools => Value::Setting(Setting::Unitary(Namespaced {
540 namespace: Some(("yacckind".to_string(), from_loc.clone())),
541 member: ("grmtools".to_string(), from_loc),
542 })),
543 YaccKind::Eco => Value::Setting(Setting::Unitary(Namespaced {
544 namespace: Some(("yacckind".to_string(), from_loc.clone())),
545 member: ("eco".to_string(), from_loc),
546 })),
547 YaccKind::Original(action_kind) => Value::Setting(Setting::Constructor {
548 ctor: Namespaced {
549 namespace: Some(("yacckind".to_string(), from_loc.clone())),
550 member: ("original".to_string(), from_loc.clone()),
551 },
552 arg: match action_kind {
553 YaccOriginalActionKind::NoAction => Namespaced {
554 namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
555 member: ("noaction".to_string(), from_loc),
556 },
557 YaccOriginalActionKind::UserAction => Namespaced {
558 namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
559 member: ("useraction".to_string(), from_loc),
560 },
561 YaccOriginalActionKind::GenericParseTree => Namespaced {
562 namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
563 member: ("genericparsetree".to_string(), from_loc),
564 },
565 },
566 }),
567 })
568 }
569}
570
571impl<T: Clone> TryFrom<&Value<T>> for YaccKind {
572 type Error = HeaderError<T>;
573 fn try_from(value: &Value<T>) -> Result<YaccKind, HeaderError<T>> {
574 let mut err_locs = Vec::new();
575 match value {
576 Value::Flag(_, loc) => Err(HeaderError {
577 kind: HeaderErrorKind::ConversionError(
578 "From<YaccKind>",
579 "Cannot convert boolean to YaccKind",
580 ),
581 locations: vec![loc.clone()],
582 }),
583 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
584 kind: HeaderErrorKind::ConversionError(
585 "From<YaccKind>",
586 "Cannot convert number to YaccKind",
587 ),
588 locations: vec![loc.clone()],
589 }),
590 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
591 kind: HeaderErrorKind::ConversionError(
592 "From<YaccKind>",
593 "Cannot convert string to YaccKind",
594 ),
595 locations: vec![loc.clone()],
596 }),
597
598 Value::Setting(Setting::Unitary(Namespaced {
599 namespace,
600 member: (yk_value, yk_value_loc),
601 })) => {
602 if let Some((ns, ns_loc)) = namespace {
603 if ns != "yacckind" {
604 err_locs.push(ns_loc.clone());
605 }
606 }
607 let yacckinds = [
608 ("grmtools".to_string(), YaccKind::Grmtools),
609 ("eco".to_string(), YaccKind::Eco),
610 ];
611 let yk_found = yacckinds
612 .iter()
613 .find_map(|(yk_str, yk)| (yk_str == yk_value).then_some(yk));
614 if let Some(yk) = yk_found {
615 if err_locs.is_empty() {
616 Ok(*yk)
617 } else {
618 Err(HeaderError {
619 kind: HeaderErrorKind::InvalidEntry("yacckind"),
620 locations: err_locs,
621 })
622 }
623 } else {
624 err_locs.push(yk_value_loc.clone());
625 Err(HeaderError {
626 kind: HeaderErrorKind::InvalidEntry("yacckind"),
627 locations: err_locs,
628 })
629 }
630 }
631 Value::Setting(Setting::Constructor {
632 ctor:
633 Namespaced {
634 namespace: yk_namespace,
635 member: (yk_str, yk_loc),
636 },
637 arg:
638 Namespaced {
639 namespace: ak_namespace,
640 member: (ak_str, ak_loc),
641 },
642 }) => {
643 if let Some((yk_ns, yk_ns_loc)) = yk_namespace {
644 if yk_ns != "yacckind" {
645 err_locs.push(yk_ns_loc.clone());
646 }
647 }
648
649 if yk_str != "original" {
650 err_locs.push(yk_loc.clone());
651 }
652
653 if let Some((ak_ns, ak_ns_loc)) = ak_namespace {
654 if ak_ns != "yaccoriginalactionkind" {
655 err_locs.push(ak_ns_loc.clone());
656 }
657 }
658 let actionkinds = [
659 ("noaction", YaccOriginalActionKind::NoAction),
660 ("useraction", YaccOriginalActionKind::UserAction),
661 ("genericparsetree", YaccOriginalActionKind::GenericParseTree),
662 ];
663 let yk_found = actionkinds.iter().find_map(|(actionkind_str, actionkind)| {
664 (ak_str == actionkind_str).then_some(YaccKind::Original(*actionkind))
665 });
666
667 if let Some(yk) = yk_found {
668 if err_locs.is_empty() {
669 Ok(yk)
670 } else {
671 Err(HeaderError {
672 kind: HeaderErrorKind::InvalidEntry("yacckind"),
673 locations: err_locs,
674 })
675 }
676 } else {
677 err_locs.push(ak_loc.clone());
678 Err(HeaderError {
679 kind: HeaderErrorKind::InvalidEntry("yacckind"),
680 locations: err_locs,
681 })
682 }
683 }
684 }
685 }
686}
687
688#[cfg(test)]
689mod test {
690 use super::*;
691
692 #[test]
693 fn test_header_missing_curly_bracket() {
694 let srcs = [
695 "%grmtools { a",
696 "%grmtools { a, b",
697 "%grmtools { a, b,",
698 "%grmtools { yacckind",
699 "%grmtools { yacckind:",
700 "%grmtools { yacckind: GrmTools",
701 "%grmtools { yacckind: GrmTools,",
702 r#"%grmtools { test_files: ""#,
703 r#"%grmtools { test_files: "test"#,
704 r#"%grmtools { test_files: "test""#,
705 r#"%grmtools { test_files: "test","#,
706 "%grmtools { !flag",
707 "%grmtools { !flag,",
708 ];
709 for src in srcs {
710 for flag in [true, false] {
711 let parser = GrmtoolsSectionParser::new(src, flag);
712 let res = parser.parse();
713 assert!(res.is_err());
714 }
715 }
716 }
717
718 #[test]
719 fn test_header_missing_curly_bracket_empty() {
720 let src = "%grmtools {";
721 for flag in [true, false] {
722 let parser = GrmtoolsSectionParser::new(src, flag);
723 let res = parser.parse();
724 assert!(res.is_err());
725 }
726 }
727
728 #[test]
729 fn test_header_missing_curly_bracket_invalid() {
730 let src = "%grmtools {####";
731 for flag in [true, false] {
732 let parser = GrmtoolsSectionParser::new(src, flag);
733 let res = parser.parse();
734 assert!(res.is_err());
735 }
736 }
737
738 #[test]
739 fn test_header_duplicates() {
740 let src = "%grmtools {dupe, !dupe, dupe: test}";
741 for flag in [true, false] {
742 let parser = GrmtoolsSectionParser::new(src, flag);
743 let res = parser.parse();
744 let errs = res.unwrap_err();
745 assert_eq!(errs.len(), 1);
746 assert_eq!(errs[0].kind, HeaderErrorKind::DuplicateEntry);
747 assert_eq!(errs[0].locations.len(), 3);
748 }
749 }
750
751 #[test]
752 fn test_unquoted_globs() {
753 let srcs = [
754 "%grmtools {test_files: *.test,}",
755 "%grmtools {test_files: foo*.test,}",
756 ];
757 for src in srcs {
758 let parser = GrmtoolsSectionParser::new(src, true);
759 let res = parser.parse();
760 let errs = res.unwrap_err();
761 assert_eq!(errs.len(), 1);
762 match errs[0] {
763 HeaderError {
764 kind: HeaderErrorKind::UnexpectedToken('*', _),
765 locations: _,
766 } => (),
767 _ => panic!("Expected glob specific error"),
768 }
769 }
770 }
771}