cfgrammar/
header.rs

1use crate::{
2    markmap::{Entry, MarkMap},
3    yacc::{ParserError, YaccKind, YaccOriginalActionKind},
4    Location, Span,
5};
6use lazy_static::lazy_static;
7use regex::{Regex, RegexBuilder};
8use std::{error::Error, fmt};
9
10/// An error regarding the `%grmtools` header section.
11///
12/// It could be any of:
13///
14/// * An error during parsing the section.
15/// * An error resulting from a value in the section having an invalid value.
16#[derive(Debug, Clone)]
17pub struct HeaderError {
18    pub kind: HeaderErrorKind,
19    pub locations: Vec<Location>,
20}
21
22impl Error for HeaderError {}
23impl fmt::Display for HeaderError {
24    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
25        write!(f, "{}", self.kind)
26    }
27}
28
29impl From<HeaderError> for ParserError {
30    fn from(e: HeaderError) -> ParserError {
31        ParserError::HeaderError(e)
32    }
33}
34
35#[derive(Debug, Eq, PartialEq, Copy, Clone)]
36#[non_exhaustive]
37#[doc(hidden)]
38pub enum HeaderErrorKind {
39    MissingGrmtoolsSection,
40    IllegalName,
41    ExpectedToken(char),
42    DuplicateEntry,
43    InvalidEntry(&'static str),
44    ConversionError(&'static str, &'static str),
45}
46
47impl fmt::Display for HeaderErrorKind {
48    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49        let s = match self {
50            HeaderErrorKind::MissingGrmtoolsSection => "Missing %grmtools section",
51            HeaderErrorKind::IllegalName => "Illegal name",
52            HeaderErrorKind::ExpectedToken(c) => &format!("Expected token: '{}", c),
53            HeaderErrorKind::InvalidEntry(s) => &format!("Invalid entry: '{}'", s),
54            HeaderErrorKind::DuplicateEntry => "Duplicate Entry",
55            HeaderErrorKind::ConversionError(t, err_str) => {
56                &format!("Converting header value to type '{}': {}", t, err_str)
57            }
58        };
59        write!(f, "{}", s)
60    }
61}
62
63/// Indicates a value prefixed by an optional namespace.
64/// `Foo::Bar` with optional `Foo` specified being
65/// ```rust,ignore
66/// Namespaced{
67///     namespace: Some(("Foo", ...)),
68///     member: ("Bar", ...)
69/// }
70/// ```
71///
72/// Alternately just `Bar` alone without a namespace is represented by :
73/// ```rust,ignore
74/// Namespaced{
75///     namespace: None,
76///     member: ("Bar", ...)
77/// }
78/// ```
79#[derive(Debug, Eq, PartialEq)]
80#[doc(hidden)]
81pub struct Namespaced {
82    pub namespace: Option<(String, Location)>,
83    pub member: (String, Location),
84}
85
86#[derive(Debug, Eq, PartialEq)]
87#[doc(hidden)]
88pub enum Setting {
89    /// A value like `YaccKind::Grmtools`
90    Unitary(Namespaced),
91    /// A value like `YaccKind::Original(UserActions)`.
92    /// In that example the field ctor would be: `Namespaced { namespace: "YaccKind", member: "Original" }`.
93    /// The field would be `Namespaced{ None, UserActions }`.
94    Constructor {
95        ctor: Namespaced,
96        arg: Namespaced,
97    },
98    Num(u64, Location),
99}
100
101/// Parser for the `%grmtools` section
102#[doc(hidden)]
103pub struct GrmtoolsSectionParser<'input> {
104    src: &'input str,
105    required: bool,
106}
107
108/// The value contained within a `Header`
109///
110/// To be useful across diverse crates this types fields are limited to types derived from `core::` types.
111/// like booleans, numeric types, and string values.
112#[derive(Debug, Eq, PartialEq)]
113#[doc(hidden)]
114pub enum Value {
115    Flag(bool, Location),
116    Setting(Setting),
117}
118
119lazy_static! {
120    static ref RE_LEADING_WS: Regex = Regex::new(r"^[\p{Pattern_White_Space}]*").unwrap();
121    static ref RE_NAME: Regex = RegexBuilder::new(r"^[A-Z][A-Z_]*")
122        .case_insensitive(true)
123        .build()
124        .unwrap();
125    static ref RE_DIGITS: Regex = Regex::new(r"^[0-9]+").unwrap();
126}
127
128const MAGIC: &str = "%grmtools";
129
130fn add_duplicate_occurrence(
131    errs: &mut Vec<HeaderError>,
132    kind: HeaderErrorKind,
133    orig_loc: Location,
134    dup_loc: Location,
135) {
136    if !errs.iter_mut().any(|e| {
137        if e.kind == kind && e.locations[0] == orig_loc {
138            e.locations.push(dup_loc.clone());
139            true
140        } else {
141            false
142        }
143    }) {
144        errs.push(HeaderError {
145            kind,
146            locations: vec![orig_loc, dup_loc],
147        });
148    }
149}
150
151impl<'input> GrmtoolsSectionParser<'input> {
152    pub fn parse_value(
153        &'_ self,
154        mut i: usize,
155    ) -> Result<(String, Location, Value, usize), HeaderError> {
156        if let Some(j) = self.lookahead_is("!", i) {
157            let (flag_name, k) = self.parse_name(j)?;
158            Ok((
159                flag_name,
160                Location::Span(Span::new(j, k)),
161                Value::Flag(false, Location::Span(Span::new(i, k))),
162                self.parse_ws(k),
163            ))
164        } else {
165            let (key_name, j) = self.parse_name(i)?;
166            let key_span = Location::Span(Span::new(i, j));
167            i = self.parse_ws(j);
168            if let Some(j) = self.lookahead_is(":", i) {
169                i = self.parse_ws(j);
170                match RE_DIGITS.find(&self.src[i..]) {
171                    Some(m) => {
172                        let num_span = Span::new(i + m.start(), i + m.end());
173                        let num_str = &self.src[num_span.start()..num_span.end()];
174                        // If the above regex matches we expect this to succeed.
175                        let num = str::parse::<u64>(num_str).unwrap();
176                        let val = Setting::Num(num, Location::Span(num_span));
177                        i = self.parse_ws(num_span.end());
178                        Ok((key_name, key_span, Value::Setting(val), i))
179                    }
180                    None => {
181                        let (path_val, j) = self.parse_namespaced(i)?;
182                        i = self.parse_ws(j);
183                        if let Some(j) = self.lookahead_is("(", i) {
184                            let (arg, j) = self.parse_namespaced(j)?;
185                            i = self.parse_ws(j);
186                            if let Some(j) = self.lookahead_is(")", i) {
187                                i = self.parse_ws(j);
188                                Ok((
189                                    key_name,
190                                    key_span,
191                                    Value::Setting(Setting::Constructor {
192                                        ctor: path_val,
193                                        arg,
194                                    }),
195                                    i,
196                                ))
197                            } else {
198                                Err(HeaderError {
199                                    kind: HeaderErrorKind::ExpectedToken(')'),
200                                    locations: vec![Location::Span(Span::new(i, i))],
201                                })
202                            }
203                        } else {
204                            Ok((
205                                key_name,
206                                key_span,
207                                Value::Setting(Setting::Unitary(path_val)),
208                                i,
209                            ))
210                        }
211                    }
212                }
213            } else {
214                Ok((key_name, key_span.clone(), Value::Flag(true, key_span), i))
215            }
216        }
217    }
218
219    fn parse_namespaced(&self, mut i: usize) -> Result<(Namespaced, usize), HeaderError> {
220        // Either a name alone, or a namespace which will be followed by a member.
221        let (name, j) = self.parse_name(i)?;
222        let name_span = Location::Span(Span::new(i, j));
223        i = self.parse_ws(j);
224        if let Some(j) = self.lookahead_is("::", i) {
225            i = self.parse_ws(j);
226            let (member_val, j) = self.parse_name(i)?;
227            let member_val_span = Location::Span(Span::new(i, j));
228            i = self.parse_ws(j);
229            Ok((
230                Namespaced {
231                    namespace: Some((name, name_span)),
232                    member: (member_val, member_val_span),
233                },
234                i,
235            ))
236        } else {
237            Ok((
238                Namespaced {
239                    namespace: None,
240                    member: (name, name_span),
241                },
242                i,
243            ))
244        }
245    }
246
247    /// Parses any `%grmtools` section at the beginning of `src`.
248    /// If `required` is true, the parse function will
249    /// return an error if the `%grmtools` section is
250    /// missing.
251    ///
252    /// If required is set and the section is empty, no error will be
253    /// produced. If a caller requires a value they should
254    /// produce an error that specifies the required value.
255    ///
256    pub fn new(src: &'input str, required: bool) -> Self {
257        Self { src, required }
258    }
259
260    #[allow(clippy::type_complexity)]
261    pub fn parse(&'_ self) -> Result<(Header, usize), Vec<HeaderError>> {
262        let mut errs = Vec::new();
263        if let Some(mut i) = self.lookahead_is(MAGIC, self.parse_ws(0)) {
264            let mut ret = Header::new();
265            i = self.parse_ws(i);
266            let section_start_pos = i;
267            if let Some(j) = self.lookahead_is("{", i) {
268                i = self.parse_ws(j);
269                while self.lookahead_is("}", i).is_none() && i < self.src.len() {
270                    let (key, key_loc, val, j) = match self.parse_value(i) {
271                        Ok((key, key_loc, val, pos)) => (key, key_loc, val, pos),
272                        Err(e) => {
273                            errs.push(e);
274                            return Err(errs);
275                        }
276                    };
277                    match ret.entry(key) {
278                        Entry::Occupied(orig) => {
279                            let (orig_loc, _): &(Location, Value) = orig.get();
280                            add_duplicate_occurrence(
281                                &mut errs,
282                                HeaderErrorKind::DuplicateEntry,
283                                orig_loc.clone(),
284                                key_loc,
285                            )
286                        }
287                        Entry::Vacant(entry) => {
288                            entry.insert((key_loc, val));
289                        }
290                    }
291                    if let Some(j) = self.lookahead_is(",", j) {
292                        i = self.parse_ws(j);
293                        continue;
294                    } else {
295                        i = j;
296                        break;
297                    }
298                }
299                if let Some(i) = self.lookahead_is("}", i) {
300                    if errs.is_empty() {
301                        Ok((ret, i))
302                    } else {
303                        Err(errs)
304                    }
305                } else {
306                    errs.push(HeaderError {
307                        kind: HeaderErrorKind::ExpectedToken('}'),
308                        locations: vec![Location::Span(Span::new(
309                            section_start_pos,
310                            self.src.len(),
311                        ))],
312                    });
313                    Err(errs)
314                }
315            } else {
316                errs.push(HeaderError {
317                    kind: HeaderErrorKind::ExpectedToken('{'),
318                    locations: vec![Location::Span(Span::new(i, i))],
319                });
320                Err(errs)
321            }
322        } else if self.required {
323            errs.push(HeaderError {
324                kind: HeaderErrorKind::MissingGrmtoolsSection,
325                locations: vec![Location::Span(Span::new(0, 0))],
326            });
327            Err(errs)
328        } else {
329            Ok((Header::new(), 0))
330        }
331    }
332
333    fn parse_name(&self, i: usize) -> Result<(String, usize), HeaderError> {
334        match RE_NAME.find(&self.src[i..]) {
335            Some(m) => {
336                assert_eq!(m.start(), 0);
337                Ok((
338                    self.src[i..i + m.end()].to_string().to_lowercase(),
339                    i + m.end(),
340                ))
341            }
342            None => Err(HeaderError {
343                kind: HeaderErrorKind::IllegalName,
344                locations: vec![Location::Span(Span::new(i, i))],
345            }),
346        }
347    }
348
349    fn lookahead_is(&self, s: &'static str, i: usize) -> Option<usize> {
350        if self.src[i..].starts_with(s) {
351            Some(i + s.len())
352        } else {
353            None
354        }
355    }
356
357    fn parse_ws(&self, i: usize) -> usize {
358        RE_LEADING_WS
359            .find(&self.src[i..])
360            .map(|m| m.end() + i)
361            .unwrap_or(i)
362    }
363}
364
365/// A data structure representation of the %grmtools section.
366pub type Header = MarkMap<String, (Location, Value)>;
367
368impl TryFrom<YaccKind> for Value {
369    type Error = HeaderError;
370    fn try_from(kind: YaccKind) -> Result<Value, HeaderError> {
371        let from_loc = Location::Other("From<YaccKind>".to_string());
372        Ok(match kind {
373            YaccKind::Grmtools => Value::Setting(Setting::Unitary(Namespaced {
374                namespace: Some(("yacckind".to_string(), from_loc.clone())),
375                member: ("grmtools".to_string(), from_loc),
376            })),
377            YaccKind::Eco => Value::Setting(Setting::Unitary(Namespaced {
378                namespace: Some(("yacckind".to_string(), from_loc.clone())),
379                member: ("eco".to_string(), from_loc),
380            })),
381            YaccKind::Original(action_kind) => Value::Setting(Setting::Constructor {
382                ctor: Namespaced {
383                    namespace: Some(("yacckind".to_string(), from_loc.clone())),
384                    member: ("original".to_string(), from_loc.clone()),
385                },
386                arg: match action_kind {
387                    YaccOriginalActionKind::NoAction => Namespaced {
388                        namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
389                        member: ("noaction".to_string(), from_loc),
390                    },
391                    YaccOriginalActionKind::UserAction => Namespaced {
392                        namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
393                        member: ("useraction".to_string(), from_loc),
394                    },
395                    YaccOriginalActionKind::GenericParseTree => Namespaced {
396                        namespace: Some(("yaccoriginalactionkind".to_string(), from_loc.clone())),
397                        member: ("genericparsetree".to_string(), from_loc),
398                    },
399                },
400            }),
401        })
402    }
403}
404
405impl TryFrom<&Value> for YaccKind {
406    type Error = HeaderError;
407    fn try_from(value: &Value) -> Result<YaccKind, HeaderError> {
408        let mut err_locs = Vec::new();
409        match value {
410            Value::Flag(_, loc) => Err(HeaderError {
411                kind: HeaderErrorKind::ConversionError(
412                    "From<YaccKind>",
413                    "Cannot convert boolean to YaccKind",
414                ),
415                locations: vec![loc.clone()],
416            }),
417            Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
418                kind: HeaderErrorKind::ConversionError(
419                    "From<YaccKind>",
420                    "Cannot convert number to YaccKind",
421                ),
422                locations: vec![loc.clone()],
423            }),
424            Value::Setting(Setting::Unitary(Namespaced {
425                namespace,
426                member: (yk_value, yk_value_loc),
427            })) => {
428                if let Some((ns, ns_loc)) = namespace {
429                    if ns != "yacckind" {
430                        err_locs.push(ns_loc.clone());
431                    }
432                }
433                let yacckinds = [
434                    ("grmtools".to_string(), YaccKind::Grmtools),
435                    ("eco".to_string(), YaccKind::Eco),
436                ];
437                let yk_found = yacckinds
438                    .iter()
439                    .find_map(|(yk_str, yk)| (yk_str == yk_value).then_some(yk));
440                if let Some(yk) = yk_found {
441                    if err_locs.is_empty() {
442                        Ok(*yk)
443                    } else {
444                        Err(HeaderError {
445                            kind: HeaderErrorKind::InvalidEntry("yacckind"),
446                            locations: err_locs,
447                        })
448                    }
449                } else {
450                    err_locs.push(yk_value_loc.clone());
451                    Err(HeaderError {
452                        kind: HeaderErrorKind::InvalidEntry("yacckind"),
453                        locations: err_locs,
454                    })
455                }
456            }
457            Value::Setting(Setting::Constructor {
458                ctor:
459                    Namespaced {
460                        namespace: yk_namespace,
461                        member: (yk_str, yk_loc),
462                    },
463                arg:
464                    Namespaced {
465                        namespace: ak_namespace,
466                        member: (ak_str, ak_loc),
467                    },
468            }) => {
469                if let Some((yk_ns, yk_ns_loc)) = yk_namespace {
470                    if yk_ns != "yacckind" {
471                        err_locs.push(yk_ns_loc.clone());
472                    }
473                }
474
475                if yk_str != "original" {
476                    err_locs.push(yk_loc.clone());
477                }
478
479                if let Some((ak_ns, ak_ns_loc)) = ak_namespace {
480                    if ak_ns != "yaccoriginalactionkind" {
481                        err_locs.push(ak_ns_loc.clone());
482                    }
483                }
484                let actionkinds = [
485                    ("noaction", YaccOriginalActionKind::NoAction),
486                    ("useraction", YaccOriginalActionKind::UserAction),
487                    ("genericparsetree", YaccOriginalActionKind::GenericParseTree),
488                ];
489                let yk_found = actionkinds.iter().find_map(|(actionkind_str, actionkind)| {
490                    (ak_str == actionkind_str).then_some(YaccKind::Original(*actionkind))
491                });
492
493                if let Some(yk) = yk_found {
494                    if err_locs.is_empty() {
495                        Ok(yk)
496                    } else {
497                        Err(HeaderError {
498                            kind: HeaderErrorKind::InvalidEntry("yacckind"),
499                            locations: err_locs,
500                        })
501                    }
502                } else {
503                    err_locs.push(ak_loc.clone());
504                    Err(HeaderError {
505                        kind: HeaderErrorKind::InvalidEntry("yacckind"),
506                        locations: err_locs,
507                    })
508                }
509            }
510        }
511    }
512}
513
514#[cfg(test)]
515mod test {
516    use super::*;
517
518    #[test]
519    fn test_header_missing_curly_bracket() {
520        let src = "%grmtools { a, b";
521        for flag in [true, false] {
522            let parser = GrmtoolsSectionParser::new(src, flag);
523            let res = parser.parse();
524            assert!(res.is_err());
525        }
526    }
527
528    #[test]
529    fn test_header_missing_curly_bracket_empty() {
530        let src = "%grmtools {";
531        for flag in [true, false] {
532            let parser = GrmtoolsSectionParser::new(src, flag);
533            let res = parser.parse();
534            assert!(res.is_err());
535        }
536    }
537
538    #[test]
539    fn test_header_missing_curly_bracket_invalid() {
540        let src = "%grmtools {####";
541        for flag in [true, false] {
542            let parser = GrmtoolsSectionParser::new(src, flag);
543            let res = parser.parse();
544            assert!(res.is_err());
545        }
546    }
547
548    #[test]
549    fn test_header_duplicates() {
550        let src = "%grmtools {dupe, !dupe, dupe: test}";
551        for flag in [true, false] {
552            let parser = GrmtoolsSectionParser::new(src, flag);
553            let res = parser.parse();
554            let errs = res.unwrap_err();
555            assert_eq!(errs.len(), 1);
556            assert_eq!(errs[0].kind, HeaderErrorKind::DuplicateEntry);
557            assert_eq!(errs[0].locations.len(), 3);
558        }
559    }
560}