1use crate::{
2 Location, Span, Spanned,
3 markmap::{Entry, MarkMap},
4 yacc::{
5 YaccGrammarError, YaccGrammarErrorKind, YaccKind, YaccOriginalActionKind, parser::SpansKind,
6 },
7};
8use regex::{Regex, RegexBuilder};
9use std::{error::Error, fmt, sync::LazyLock};
10
11#[derive(Debug, Clone)]
18#[doc(hidden)]
19pub struct HeaderError<T> {
20 pub kind: HeaderErrorKind,
21 pub locations: Vec<T>,
22}
23
24impl<T: fmt::Debug> Error for HeaderError<T> {}
25impl<T> fmt::Display for HeaderError<T> {
26 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
27 write!(f, "{}", self.kind)
28 }
29}
30
31impl From<HeaderError<Span>> for YaccGrammarError {
32 fn from(e: HeaderError<Span>) -> YaccGrammarError {
33 YaccGrammarError {
34 kind: YaccGrammarErrorKind::Header(e.kind, e.spanskind()),
35 spans: e.locations,
36 }
37 }
38}
39
40impl Spanned for HeaderError<Span> {
41 fn spans(&self) -> &[Span] {
42 self.locations.as_slice()
43 }
44 fn spanskind(&self) -> SpansKind {
45 self.spanskind()
46 }
47}
48
49#[derive(Debug, PartialEq)]
52#[doc(hidden)]
53pub struct HeaderValue<T>(pub T, pub Value<T>);
54
55impl From<HeaderValue<Span>> for HeaderValue<Location> {
56 fn from(hv: HeaderValue<Span>) -> HeaderValue<Location> {
57 HeaderValue(hv.0.into(), hv.1.into())
58 }
59}
60
61#[derive(Debug, Eq, PartialEq, Copy, Clone)]
62#[non_exhaustive]
63#[doc(hidden)]
64pub enum HeaderErrorKind {
65 MissingGrmtoolsSection,
66 IllegalName,
67 ExpectedToken(char),
68 UnexpectedToken(char, &'static str),
69 DuplicateEntry,
70 InvalidEntry(&'static str),
71 ConversionError(&'static str, &'static str),
72}
73
74impl fmt::Display for HeaderErrorKind {
75 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76 let s = match self {
77 HeaderErrorKind::MissingGrmtoolsSection => "Missing %grmtools section",
78 HeaderErrorKind::IllegalName => "Illegal name",
79 HeaderErrorKind::ExpectedToken(c) => &format!("Expected token: '{}'", c),
80 HeaderErrorKind::UnexpectedToken(c, hint) => {
81 &format!("Unxpected token: '{}', {} ", c, hint)
82 }
83 HeaderErrorKind::InvalidEntry(s) => &format!("Invalid entry: '{}'", s),
84 HeaderErrorKind::DuplicateEntry => "Duplicate Entry",
85 HeaderErrorKind::ConversionError(t, err_str) => {
86 &format!("Converting header value to type '{}': {}", t, err_str)
87 }
88 };
89 write!(f, "{}", s)
90 }
91}
92
93impl<T> HeaderError<T> {
94 pub fn spanskind(&self) -> SpansKind {
96 match self.kind {
97 HeaderErrorKind::DuplicateEntry => SpansKind::DuplicationError,
98 _ => SpansKind::Error,
99 }
100 }
101}
102
103#[derive(Debug, Eq, PartialEq)]
120#[doc(hidden)]
121pub struct Namespaced<T> {
122 pub namespace: Option<(String, T)>,
123 pub member: (String, T),
124}
125
126#[derive(Debug, Eq, PartialEq)]
127#[doc(hidden)]
128pub enum Setting<T> {
129 Unitary(Namespaced<T>),
131 Constructor {
135 ctor: Namespaced<T>,
136 arg: Namespaced<T>,
137 },
138 Num(u64, T),
139 String(String, T),
140 Array(Vec<Setting<T>>, T, T),
142}
143
144#[doc(hidden)]
146pub struct GrmtoolsSectionParser<'input> {
147 src: &'input str,
148 required: bool,
149}
150
151#[derive(Debug, Eq, PartialEq)]
156#[doc(hidden)]
157pub enum Value<T> {
158 Flag(bool, T),
159 Setting(Setting<T>),
160}
161
162impl From<Setting<Span>> for Setting<Location> {
163 fn from(s: Setting<Span>) -> Setting<Location> {
164 match s {
165 Setting::Unitary(Namespaced {
166 namespace,
167 member: (m, ml),
168 }) => Setting::Unitary(Namespaced {
169 namespace: namespace.map(|(n, nl)| (n, nl.into())),
170 member: (m, ml.into()),
171 }),
172 Setting::Constructor {
173 ctor:
174 Namespaced {
175 namespace: ctor_ns,
176 member: (ctor_m, ctor_ml),
177 },
178 arg:
179 Namespaced {
180 namespace: arg_ns,
181 member: (arg_m, arg_ml),
182 },
183 } => Setting::Constructor {
184 ctor: Namespaced {
185 namespace: ctor_ns.map(|(ns, ns_l)| (ns, ns_l.into())),
186 member: (ctor_m, ctor_ml.into()),
187 },
188 arg: Namespaced {
189 namespace: arg_ns.map(|(ns, ns_l)| (ns, ns_l.into())),
190 member: (arg_m, arg_ml.into()),
191 },
192 },
193 Setting::Num(num, num_loc) => Setting::Num(num, num_loc.into()),
194 Setting::String(s, str_loc) => Setting::String(s, str_loc.into()),
195 Setting::Array(mut xs, arr_open_loc, arr_close_loc) => Setting::Array(
196 xs.drain(..).map(|x| x.into()).collect(),
197 arr_open_loc.into(),
198 arr_close_loc.into(),
199 ),
200 }
201 }
202}
203
204impl From<Value<Span>> for Value<Location> {
205 fn from(v: Value<Span>) -> Value<Location> {
206 match v {
207 Value::Flag(flag, u) => Value::Flag(flag, u.into()),
208 Value::Setting(s) => Value::Setting(s.into()),
209 }
210 }
211}
212
213static RE_LEADING_WS: LazyLock<Regex> =
214 LazyLock::new(|| Regex::new(r"^[\p{Pattern_White_Space}]*").unwrap());
215static RE_NAME: LazyLock<Regex> = LazyLock::new(|| {
216 RegexBuilder::new(r"^[A-Z][A-Z_]*")
217 .case_insensitive(true)
218 .build()
219 .unwrap()
220});
221static RE_DIGITS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[0-9]+").unwrap());
222static RE_STRING: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^\"(\\.|[^"\\])*\""#).unwrap());
223
224const MAGIC: &str = "%grmtools";
225
226fn add_duplicate_occurrence<T: Eq + PartialEq + Clone>(
227 errs: &mut Vec<HeaderError<T>>,
228 kind: HeaderErrorKind,
229 orig_loc: T,
230 dup_loc: T,
231) {
232 if !errs.iter_mut().any(|e| {
233 if e.kind == kind && e.locations[0] == orig_loc {
234 e.locations.push(dup_loc.clone());
235 true
236 } else {
237 false
238 }
239 }) {
240 errs.push(HeaderError {
241 kind,
242 locations: vec![orig_loc, dup_loc],
243 });
244 }
245}
246
247impl<'input> GrmtoolsSectionParser<'input> {
248 fn parse_setting(&'_ self, mut i: usize) -> Result<(Setting<Span>, usize), HeaderError<Span>> {
249 i = self.parse_ws(i);
250 match RE_DIGITS.find(&self.src[i..]) {
251 Some(m) => {
252 let num_span = Span::new(i + m.start(), i + m.end());
253 let num_str = &self.src[num_span.start()..num_span.end()];
254 let num = str::parse::<u64>(num_str).unwrap();
256 let val = Setting::Num(num, num_span);
257 i = self.parse_ws(num_span.end());
258 Ok((val, i))
259 }
260 None => match RE_STRING.find(&self.src[i..]) {
261 Some(m) => {
262 let end = i + m.end();
263 let str_span = Span::new(i + m.start() + 1, end - 1);
265 let str = &self.src[str_span.start()..str_span.end()];
266 let setting = Setting::String(str.to_string(), str_span);
267 i = self.parse_ws(end);
269 Ok((setting, i))
270 }
271 None => {
272 if let Some(mut j) = self.lookahead_is("[", i) {
273 let mut vals = Vec::new();
274 let open_pos = j;
275
276 loop {
277 j = self.parse_ws(j);
278 if let Some(end_pos) = self.lookahead_is("]", j) {
279 return Ok((
280 Setting::Array(
281 vals,
282 Span::new(i, open_pos),
283 Span::new(j, end_pos),
284 ),
285 end_pos,
286 ));
287 }
288 if let Ok((val, k)) = self.parse_setting(j) {
289 vals.push(val);
290 j = self.parse_ws(k);
291 }
292 if let Some(k) = self.lookahead_is(",", j) {
293 j = k
294 }
295 }
296 } else {
297 let (path_val, j) = self.parse_namespaced(i)?;
298 i = self.parse_ws(j);
299 if let Some(j) = self.lookahead_is("(", i) {
300 let (arg, j) = self.parse_namespaced(j)?;
301 i = self.parse_ws(j);
302 if let Some(j) = self.lookahead_is(")", i) {
303 i = self.parse_ws(j);
304 Ok((
305 Setting::Constructor {
306 ctor: path_val,
307 arg,
308 },
309 i,
310 ))
311 } else {
312 Err(HeaderError {
313 kind: HeaderErrorKind::ExpectedToken(')'),
314 locations: vec![Span::new(i, i)],
315 })
316 }
317 } else {
318 Ok((Setting::Unitary(path_val), i))
319 }
320 }
321 }
322 },
323 }
324 }
325
326 pub fn parse_key_value(
327 &'_ self,
328 mut i: usize,
329 ) -> Result<(String, Span, Value<Span>, usize), HeaderError<Span>> {
330 if let Some(j) = self.lookahead_is("!", i) {
331 let (flag_name, k) = self.parse_name(j)?;
332 Ok((
333 flag_name,
334 Span::new(j, k),
335 Value::Flag(false, Span::new(i, k)),
336 self.parse_ws(k),
337 ))
338 } else {
339 let (key_name, j) = self.parse_name(i)?;
340 let key_span = Span::new(i, j);
341 i = self.parse_ws(j);
342 if let Some(j) = self.lookahead_is(":", i) {
343 let (val, j) = self.parse_setting(j)?;
344 Ok((key_name, key_span, Value::Setting(val), j))
345 } else {
346 Ok((key_name, key_span, Value::Flag(true, key_span), i))
347 }
348 }
349 }
350
351 fn parse_namespaced(
352 &self,
353 mut i: usize,
354 ) -> Result<(Namespaced<Span>, usize), HeaderError<Span>> {
355 let (name, j) = self.parse_name(i)?;
357 let name_span = Span::new(i, j);
358 i = self.parse_ws(j);
359 if let Some(j) = self.lookahead_is("::", i) {
360 i = self.parse_ws(j);
361 let (member_val, j) = self.parse_name(i)?;
362 let member_val_span = Span::new(i, j);
363 i = self.parse_ws(j);
364 Ok((
365 Namespaced {
366 namespace: Some((name, name_span)),
367 member: (member_val, member_val_span),
368 },
369 i,
370 ))
371 } else {
372 Ok((
373 Namespaced {
374 namespace: None,
375 member: (name, name_span),
376 },
377 i,
378 ))
379 }
380 }
381
382 pub fn new(src: &'input str, required: bool) -> Self {
392 Self { src, required }
393 }
394
395 #[allow(clippy::type_complexity)]
396 pub fn parse(&'_ self) -> Result<(Header<Span>, usize), Vec<HeaderError<Span>>> {
397 let mut errs = Vec::new();
398 if let Some(mut i) = self.lookahead_is(MAGIC, self.parse_ws(0)) {
399 let mut ret = Header::new();
400 i = self.parse_ws(i);
401 let section_start_pos = i;
402 if let Some(j) = self.lookahead_is("{", i) {
403 i = self.parse_ws(j);
404 while self.lookahead_is("}", i).is_none() && i < self.src.len() {
405 let (key, key_loc, val, j) = match self.parse_key_value(i) {
406 Ok((key, key_loc, val, pos)) => (key, key_loc, val, pos),
407 Err(e) => {
408 errs.push(e);
409 return Err(errs);
410 }
411 };
412 match ret.entry(key) {
413 Entry::Occupied(orig) => {
414 let HeaderValue(orig_loc, _): &HeaderValue<Span> = orig.get();
415 add_duplicate_occurrence(
416 &mut errs,
417 HeaderErrorKind::DuplicateEntry,
418 *orig_loc,
419 key_loc,
420 )
421 }
422 Entry::Vacant(entry) => {
423 entry.insert(HeaderValue(key_loc, val));
424 }
425 }
426 if let Some(j) = self.lookahead_is(",", j) {
427 i = self.parse_ws(j);
428 continue;
429 } else {
430 i = self.parse_ws(j);
431 break;
432 }
433 }
434 if let Some(j) = self.lookahead_is("*", i) {
435 errs.push(HeaderError {
436 kind: HeaderErrorKind::UnexpectedToken(
437 '*',
438 "perhaps this is a glob, in which case it requires string quoting.",
439 ),
440 locations: vec![Span::new(i, j)],
441 });
442 Err(errs)
443 } else if let Some(i) = self.lookahead_is("}", i) {
444 if errs.is_empty() {
445 Ok((ret, i))
446 } else {
447 Err(errs)
448 }
449 } else {
450 errs.push(HeaderError {
451 kind: HeaderErrorKind::ExpectedToken('}'),
452 locations: vec![Span::new(section_start_pos, i)],
453 });
454 Err(errs)
455 }
456 } else {
457 errs.push(HeaderError {
458 kind: HeaderErrorKind::ExpectedToken('{'),
459 locations: vec![Span::new(i, i)],
460 });
461 Err(errs)
462 }
463 } else if self.required {
464 errs.push(HeaderError {
465 kind: HeaderErrorKind::MissingGrmtoolsSection,
466 locations: vec![Span::new(0, 0)],
467 });
468 Err(errs)
469 } else {
470 Ok((Header::new(), 0))
471 }
472 }
473
474 fn parse_name(&self, i: usize) -> Result<(String, usize), HeaderError<Span>> {
475 match RE_NAME.find(&self.src[i..]) {
476 Some(m) => {
477 assert_eq!(m.start(), 0);
478 Ok((
479 self.src[i..i + m.end()].to_string().to_lowercase(),
480 i + m.end(),
481 ))
482 }
483 None => {
484 if self.src[i..].starts_with("*") {
485 Err(HeaderError {
486 kind: HeaderErrorKind::UnexpectedToken(
487 '*',
488 "perhaps this is a glob, in which case it requires string quoting.",
489 ),
490 locations: vec![Span::new(i, i)],
491 })
492 } else {
493 Err(HeaderError {
494 kind: HeaderErrorKind::IllegalName,
495 locations: vec![Span::new(i, i)],
496 })
497 }
498 }
499 }
500 }
501
502 fn lookahead_is(&self, s: &'static str, i: usize) -> Option<usize> {
503 if self.src[i..].starts_with(s) {
504 Some(i + s.len())
505 } else {
506 None
507 }
508 }
509
510 fn parse_ws(&self, i: usize) -> usize {
511 RE_LEADING_WS
512 .find(&self.src[i..])
513 .map(|m| m.end() + i)
514 .unwrap_or(i)
515 }
516}
517
518#[doc(hidden)]
520pub type Header<T> = MarkMap<String, HeaderValue<T>>;
521
522impl TryFrom<YaccKind> for Value<Location> {
523 type Error = HeaderError<Location>;
524 fn try_from(kind: YaccKind) -> Result<Value<Location>, HeaderError<Location>> {
525 let from_loc = Location::Other("From<YaccKind>".to_string());
526 Ok(match kind {
527 YaccKind::Grmtools => Value::Setting(Setting::Unitary(Namespaced {
528 namespace: Some(("yacckind".to_string(), from_loc.clone())),
529 member: ("grmtools".to_string(), from_loc),
530 })),
531 YaccKind::Eco => Value::Setting(Setting::Unitary(Namespaced {
532 namespace: Some(("yacckind".to_string(), from_loc.clone())),
533 member: ("eco".to_string(), from_loc),
534 })),
535 YaccKind::Original(action_kind) => Value::Setting(Setting::Constructor {
536 ctor: Namespaced {
537 namespace: Some(("yacckind".to_string(), from_loc.clone())),
538 member: ("original".to_string(), from_loc.clone()),
539 },
540 arg: match action_kind {
541 YaccOriginalActionKind::NoAction => Namespaced {
542 namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
543 member: ("noaction".to_string(), from_loc),
544 },
545 YaccOriginalActionKind::UserAction => Namespaced {
546 namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
547 member: ("useraction".to_string(), from_loc),
548 },
549 YaccOriginalActionKind::GenericParseTree => Namespaced {
550 namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
551 member: ("genericparsetree".to_string(), from_loc),
552 },
553 },
554 }),
555 })
556 }
557}
558
559impl<T: Clone> TryFrom<&Value<T>> for YaccKind {
560 type Error = HeaderError<T>;
561 fn try_from(value: &Value<T>) -> Result<YaccKind, HeaderError<T>> {
562 let mut err_locs = Vec::new();
563 match value {
564 Value::Flag(_, loc) => Err(HeaderError {
565 kind: HeaderErrorKind::ConversionError(
566 "From<YaccKind>",
567 "Cannot convert boolean to YaccKind",
568 ),
569 locations: vec![loc.clone()],
570 }),
571 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
572 kind: HeaderErrorKind::ConversionError(
573 "From<YaccKind>",
574 "Cannot convert number to YaccKind",
575 ),
576 locations: vec![loc.clone()],
577 }),
578 Value::Setting(Setting::Array(_, loc, _)) => Err(HeaderError {
579 kind: HeaderErrorKind::ConversionError(
580 "From<YaccKind>",
581 "Cannot convert array to YaccKind",
582 ),
583 locations: vec![loc.clone()],
584 }),
585 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
586 kind: HeaderErrorKind::ConversionError(
587 "From<YaccKind>",
588 "Cannot convert string to YaccKind",
589 ),
590 locations: vec![loc.clone()],
591 }),
592
593 Value::Setting(Setting::Unitary(Namespaced {
594 namespace,
595 member: (yk_value, yk_value_loc),
596 })) => {
597 if let Some((ns, ns_loc)) = namespace {
598 if ns != "yacckind" {
599 err_locs.push(ns_loc.clone());
600 }
601 }
602 let yacckinds = [
603 ("grmtools".to_string(), YaccKind::Grmtools),
604 ("eco".to_string(), YaccKind::Eco),
605 ];
606 let yk_found = yacckinds
607 .iter()
608 .find_map(|(yk_str, yk)| (yk_str == yk_value).then_some(yk));
609 if let Some(yk) = yk_found {
610 if err_locs.is_empty() {
611 Ok(*yk)
612 } else {
613 Err(HeaderError {
614 kind: HeaderErrorKind::InvalidEntry("yacckind"),
615 locations: err_locs,
616 })
617 }
618 } else {
619 err_locs.push(yk_value_loc.clone());
620 Err(HeaderError {
621 kind: HeaderErrorKind::InvalidEntry("yacckind"),
622 locations: err_locs,
623 })
624 }
625 }
626 Value::Setting(Setting::Constructor {
627 ctor:
628 Namespaced {
629 namespace: yk_namespace,
630 member: (yk_str, yk_loc),
631 },
632 arg:
633 Namespaced {
634 namespace: ak_namespace,
635 member: (ak_str, ak_loc),
636 },
637 }) => {
638 if let Some((yk_ns, yk_ns_loc)) = yk_namespace {
639 if yk_ns != "yacckind" {
640 err_locs.push(yk_ns_loc.clone());
641 }
642 }
643
644 if yk_str != "original" {
645 err_locs.push(yk_loc.clone());
646 }
647
648 if let Some((ak_ns, ak_ns_loc)) = ak_namespace {
649 if ak_ns != "yaccoriginalactionkind" {
650 err_locs.push(ak_ns_loc.clone());
651 }
652 }
653 let actionkinds = [
654 ("noaction", YaccOriginalActionKind::NoAction),
655 ("useraction", YaccOriginalActionKind::UserAction),
656 ("genericparsetree", YaccOriginalActionKind::GenericParseTree),
657 ];
658 let yk_found = actionkinds.iter().find_map(|(actionkind_str, actionkind)| {
659 (ak_str == actionkind_str).then_some(YaccKind::Original(*actionkind))
660 });
661
662 if let Some(yk) = yk_found {
663 if err_locs.is_empty() {
664 Ok(yk)
665 } else {
666 Err(HeaderError {
667 kind: HeaderErrorKind::InvalidEntry("yacckind"),
668 locations: err_locs,
669 })
670 }
671 } else {
672 err_locs.push(ak_loc.clone());
673 Err(HeaderError {
674 kind: HeaderErrorKind::InvalidEntry("yacckind"),
675 locations: err_locs,
676 })
677 }
678 }
679 }
680 }
681}
682
683#[cfg(test)]
684mod test {
685 use super::*;
686
687 #[test]
688 fn test_header_missing_curly_bracket() {
689 let srcs = [
690 "%grmtools { a",
691 "%grmtools { a, b",
692 "%grmtools { a, b,",
693 "%grmtools { yacckind",
694 "%grmtools { yacckind:",
695 "%grmtools { yacckind: GrmTools",
696 "%grmtools { yacckind: GrmTools,",
697 r#"%grmtools { test_files: ""#,
698 r#"%grmtools { test_files: "test"#,
699 r#"%grmtools { test_files: "test""#,
700 r#"%grmtools { test_files: "test","#,
701 "%grmtools { !flag",
702 "%grmtools { !flag,",
703 ];
704 for src in srcs {
705 for flag in [true, false] {
706 let parser = GrmtoolsSectionParser::new(src, flag);
707 let res = parser.parse();
708 assert!(res.is_err());
709 }
710 }
711 }
712
713 #[test]
714 fn test_header_missing_curly_bracket_empty() {
715 let src = "%grmtools {";
716 for flag in [true, false] {
717 let parser = GrmtoolsSectionParser::new(src, flag);
718 let res = parser.parse();
719 assert!(res.is_err());
720 }
721 }
722
723 #[test]
724 fn test_header_missing_curly_bracket_invalid() {
725 let src = "%grmtools {####";
726 for flag in [true, false] {
727 let parser = GrmtoolsSectionParser::new(src, flag);
728 let res = parser.parse();
729 assert!(res.is_err());
730 }
731 }
732
733 #[test]
734 fn test_header_duplicates() {
735 let src = "%grmtools {dupe, !dupe, dupe: test}";
736 for flag in [true, false] {
737 let parser = GrmtoolsSectionParser::new(src, flag);
738 let res = parser.parse();
739 let errs = res.unwrap_err();
740 assert_eq!(errs.len(), 1);
741 assert_eq!(errs[0].kind, HeaderErrorKind::DuplicateEntry);
742 assert_eq!(errs[0].locations.len(), 3);
743 }
744 }
745
746 #[test]
747 fn test_unquoted_globs() {
748 let srcs = [
749 "%grmtools {test_files: *.test,}",
750 "%grmtools {test_files: foo*.test,}",
751 ];
752 for src in srcs {
753 let parser = GrmtoolsSectionParser::new(src, true);
754 let res = parser.parse();
755 let errs = res.unwrap_err();
756 assert_eq!(errs.len(), 1);
757 match errs[0] {
758 HeaderError {
759 kind: HeaderErrorKind::UnexpectedToken('*', _),
760 locations: _,
761 } => (),
762 _ => panic!("Expected glob specific error"),
763 }
764 }
765 }
766}