lrlex/
lexer.rs

1use std::{
2    collections::{HashMap, HashSet},
3    fmt::Debug,
4    hash::Hash,
5    marker::PhantomData,
6    slice::Iter,
7    str::FromStr,
8};
9
10use cfgrammar::{
11    header::{GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, HeaderValue, Value},
12    span::Location,
13    NewlineCache, Span,
14};
15use num_traits::{AsPrimitive, PrimInt, Unsigned};
16use regex::{Regex, RegexBuilder};
17
18use lrpar::{Lexeme, Lexer, LexerTypes, NonStreamingLexer};
19
20use crate::{
21    parser::{LexParser, StartState, StartStateOperation},
22    LRLexError, LexBuildError, LexBuildResult, StartStateId,
23};
24
25#[doc(hidden)]
26/// Corresponds to the options for `regex::RegexBuilder`.
27#[derive(Clone, Debug)]
28#[non_exhaustive]
29pub struct LexFlags {
30    // The following values when `None` grmtools provides default values for in `DEFAULT_LEX_FLAGS`
31    pub dot_matches_new_line: Option<bool>,
32    pub multi_line: Option<bool>,
33    pub octal: Option<bool>,
34    pub posix_escapes: Option<bool>,
35    pub allow_wholeline_comments: Option<bool>,
36
37    // All the following values when `None` default to the `regex` crate's default value.
38    pub case_insensitive: Option<bool>,
39    pub swap_greed: Option<bool>,
40    pub ignore_whitespace: Option<bool>,
41    pub unicode: Option<bool>,
42    pub size_limit: Option<usize>,
43    pub dfa_size_limit: Option<usize>,
44    pub nest_limit: Option<u32>,
45}
46
47impl<T: Clone> TryFrom<&mut Header<T>> for LexFlags {
48    type Error = HeaderError<T>;
49    fn try_from(header: &mut Header<T>) -> Result<LexFlags, HeaderError<T>> {
50        use cfgrammar::header::Setting;
51        let mut lex_flags = UNSPECIFIED_LEX_FLAGS;
52        let LexFlags {
53            dot_matches_new_line,
54            multi_line,
55            octal,
56            posix_escapes,
57            allow_wholeline_comments,
58            case_insensitive,
59            swap_greed,
60            ignore_whitespace,
61            unicode,
62            size_limit,
63            dfa_size_limit,
64            nest_limit,
65        } = &mut lex_flags;
66        macro_rules! cvt_flag {
67            ($it:ident) => {
68                header.mark_used(&stringify!($it).to_string());
69                *$it = match header.get(stringify!($it)) {
70                    Some(HeaderValue(_, Value::Flag(flag, _))) => Some(*flag),
71                    Some(HeaderValue(loc, _)) => Err(HeaderError {
72                        kind: HeaderErrorKind::ConversionError("LexFlags", "Expected boolean"),
73                        locations: vec![loc.clone()],
74                    })?,
75                    None => None,
76                }
77            };
78        }
79        cvt_flag!(dot_matches_new_line);
80        cvt_flag!(multi_line);
81        cvt_flag!(octal);
82        cvt_flag!(posix_escapes);
83        cvt_flag!(allow_wholeline_comments);
84        cvt_flag!(case_insensitive);
85        cvt_flag!(swap_greed);
86        cvt_flag!(ignore_whitespace);
87        cvt_flag!(unicode);
88        macro_rules! cvt_num {
89            ($it:ident, $num_ty: ty) => {
90                header.mark_used(&stringify!($it).to_string());
91                *$it = match header.get(stringify!($it)) {
92                    Some(HeaderValue(_, Value::Setting(Setting::Num(n, _)))) => Some(*n as $num_ty),
93                    Some(HeaderValue(loc, _)) => Err(HeaderError {
94                        kind: HeaderErrorKind::ConversionError("LexFlags", "Expected numeric"),
95                        locations: vec![loc.clone()],
96                    })?,
97                    None => None,
98                }
99            };
100        }
101        cvt_num!(size_limit, usize);
102        cvt_num!(dfa_size_limit, usize);
103        cvt_num!(nest_limit, u32);
104        Ok(lex_flags)
105    }
106}
107
108impl From<&LexFlags> for Header<Location> {
109    fn from(flags: &LexFlags) -> Header<Location> {
110        let mut header = Header::new();
111        let LexFlags {
112            dot_matches_new_line,
113            multi_line,
114            octal,
115            posix_escapes,
116            allow_wholeline_comments,
117            case_insensitive,
118            swap_greed,
119            ignore_whitespace,
120            unicode,
121            size_limit,
122            dfa_size_limit,
123            nest_limit,
124        } = flags;
125        macro_rules! cvt_flag {
126            ($it: ident) => {
127                $it.map(|x| {
128                    header.insert(
129                        stringify!($it).to_string(),
130                        HeaderValue(
131                            Location::Other("From<&LexFlags".to_string()),
132                            Value::Flag(x, Location::Other("From<&LexFlags>".to_string())),
133                        ),
134                    )
135                });
136            };
137        }
138        cvt_flag!(dot_matches_new_line);
139        cvt_flag!(multi_line);
140        cvt_flag!(octal);
141        cvt_flag!(posix_escapes);
142        cvt_flag!(allow_wholeline_comments);
143        cvt_flag!(case_insensitive);
144        cvt_flag!(swap_greed);
145        cvt_flag!(ignore_whitespace);
146        cvt_flag!(unicode);
147
148        macro_rules! cvt_num {
149            ($it: ident) => {
150                $it.map(|x| {
151                    use cfgrammar::header::Setting;
152                    header.insert(
153                        stringify!($it).to_string(),
154                        HeaderValue(
155                            Location::Other("From<&LexFlags".to_string()),
156                            Value::Setting(Setting::Num(
157                                x as u64,
158                                Location::Other("From<&LexFlags>".to_string()),
159                            )),
160                        ),
161                    )
162                });
163            };
164        }
165        cvt_num!(size_limit);
166        cvt_num!(dfa_size_limit);
167        cvt_num!(nest_limit);
168
169        header
170    }
171}
172
173/// LexFlags with flags set to default values.
174pub const DEFAULT_LEX_FLAGS: LexFlags = LexFlags {
175    allow_wholeline_comments: Some(false),
176    dot_matches_new_line: Some(true),
177    multi_line: Some(true),
178    octal: Some(true),
179    posix_escapes: Some(false),
180    case_insensitive: None,
181    ignore_whitespace: None,
182    swap_greed: None,
183    unicode: None,
184    size_limit: None,
185    dfa_size_limit: None,
186    nest_limit: None,
187};
188
189/// LexFlags with all of the values `None`.
190pub const UNSPECIFIED_LEX_FLAGS: LexFlags = LexFlags {
191    allow_wholeline_comments: None,
192    dot_matches_new_line: None,
193    multi_line: None,
194    octal: None,
195    posix_escapes: None,
196    case_insensitive: None,
197    ignore_whitespace: None,
198    swap_greed: None,
199    unicode: None,
200    size_limit: None,
201    dfa_size_limit: None,
202    nest_limit: None,
203};
204
205#[derive(Debug)]
206#[doc(hidden)]
207pub struct Rule<StorageT> {
208    /// If `Some`, this specifies the ID that lexemes resulting from this rule will have. Note that
209    /// lrlex gives rules a guaranteed unique value by default, though users can later override
210    /// that, potentially undermining uniqueness if they're not careful.
211    ///
212    /// If `None`, then this rule specifies lexemes which should not appear in the user's input.
213    pub(super) tok_id: Option<StorageT>,
214    /// This rule's name. If None, then text which matches this rule will be skipped (i.e. will not
215    /// create a lexeme).
216    #[deprecated(note = "Use the name() function")]
217    pub name: Option<String>,
218    #[deprecated(note = "Use the name_span() function")]
219    pub name_span: Span,
220    pub(super) re_str: String,
221    re: Regex,
222    /// Id(s) of permitted start conditions for the lexer to match this rule.
223    #[deprecated(note = "Use the start_states() function")]
224    pub start_states: Vec<usize>,
225    /// If Some(_), successful matching of this rule will cause the current stack of start
226    /// conditions in the lexer to be updated with the enclosed value, using the designated
227    /// operation.
228    /// If None, successful matching causes no change to the current start condition.
229    #[deprecated(note = "Use the target_state() function")]
230    pub target_state: Option<(usize, StartStateOperation)>,
231}
232
233impl<StorageT: PrimInt> Rule<StorageT> {
234    /// Create a new `Rule`. This interface is unstable and should only be used by code generated
235    /// by lrlex itself.
236    #[doc(hidden)]
237    #[allow(private_interfaces)]
238    #[allow(clippy::too_many_arguments)]
239    pub fn new(
240        _: crate::unstable_api::InternalPublicApi,
241        tok_id: Option<StorageT>,
242        name: Option<String>,
243        name_span: Span,
244        re_str: String,
245        start_states: Vec<usize>,
246        target_state: Option<(usize, StartStateOperation)>,
247        lex_flags: &LexFlags,
248    ) -> Result<Rule<StorageT>, regex::Error> {
249        let mut re = RegexBuilder::new(&format!("\\A(?:{})", re_str));
250        let mut re = re
251            .octal(lex_flags.octal.unwrap())
252            .multi_line(lex_flags.multi_line.unwrap())
253            .dot_matches_new_line(lex_flags.dot_matches_new_line.unwrap());
254
255        if let Some(flag) = lex_flags.ignore_whitespace {
256            re = re.ignore_whitespace(flag)
257        }
258        if let Some(flag) = lex_flags.unicode {
259            re = re.unicode(flag)
260        }
261        if let Some(flag) = lex_flags.case_insensitive {
262            re = re.case_insensitive(flag)
263        }
264        if let Some(flag) = lex_flags.swap_greed {
265            re = re.swap_greed(flag)
266        }
267        if let Some(sz) = lex_flags.size_limit {
268            re = re.size_limit(sz)
269        }
270        if let Some(sz) = lex_flags.dfa_size_limit {
271            re = re.dfa_size_limit(sz)
272        }
273        if let Some(lim) = lex_flags.nest_limit {
274            re = re.nest_limit(lim)
275        }
276
277        let re = re.build()?;
278        #[allow(deprecated)]
279        Ok(Rule {
280            tok_id,
281            name,
282            name_span,
283            re_str,
284            re,
285            start_states,
286            target_state,
287        })
288    }
289
290    /// Return this rule's token ID, if any.
291    ///
292    /// If `Some`, this specifies the ID that lexemes resulting from this rule will have. If
293    /// `None`, then this rule specifies lexemes which should not appear in the user's input.
294    pub fn tok_id(&self) -> Option<StorageT> {
295        self.tok_id
296    }
297
298    /// Return this rule's name. If `None`, then text which matches this rule will be skipped (i.e.
299    /// it will not result in the creation of a [Lexeme]).
300    pub fn name(&self) -> Option<&str> {
301        #[allow(deprecated)]
302        self.name.as_deref()
303    }
304
305    /// Return the [Span] of this rule's name.
306    pub fn name_span(&self) -> Span {
307        #[allow(deprecated)]
308        self.name_span
309    }
310
311    /// Return the original regular expression specified by the user for this [Rule].
312    pub fn re_str(&self) -> &str {
313        &self.re_str
314    }
315
316    /// Return the IDs of the permitted start conditions for the lexer to match this rule.
317    pub fn start_states(&self) -> &[usize] {
318        #[allow(deprecated)]
319        self.start_states.as_slice()
320    }
321
322    /// Return the IDs of the permitted start conditions for the lexer to match this rule.
323    pub fn target_state(&self) -> Option<(usize, StartStateOperation)> {
324        #[allow(deprecated)]
325        self.target_state.clone()
326    }
327}
328
329/// Methods which all lexer definitions must implement.
330pub trait LexerDef<LexerTypesT: LexerTypes>
331where
332    usize: AsPrimitive<LexerTypesT::StorageT>,
333{
334    #[doc(hidden)]
335    /// Instantiate a lexer from a set of `Rule`s. This is only intended to be used by compiled
336    /// lexers (see `ctbuilder.rs`).
337    fn from_rules(start_states: Vec<StartState>, rules: Vec<Rule<LexerTypesT::StorageT>>) -> Self
338    where
339        Self: Sized;
340
341    /// Instantiate a lexer from a string (e.g. representing a `.l` file).
342    fn from_str(s: &str) -> LexBuildResult<Self>
343    where
344        Self: Sized;
345
346    /// Get the `Rule` at index `idx`.
347    fn get_rule(&self, idx: usize) -> Option<&Rule<LexerTypesT::StorageT>>;
348
349    /// Get the `Rule` instance associated with a particular lexeme ID. Panics if no such rule
350    /// exists.
351    fn get_rule_by_id(&self, tok_id: LexerTypesT::StorageT) -> &Rule<LexerTypesT::StorageT>;
352
353    /// Get the `Rule` instance associated with a particular name.
354    fn get_rule_by_name(&self, n: &str) -> Option<&Rule<LexerTypesT::StorageT>>;
355
356    /// Set the id attribute on rules to the corresponding value in `map`. This is typically used
357    /// to synchronise a parser's notion of lexeme IDs with the lexers. While doing this, it keeps
358    /// track of which lexemes:
359    ///   1) are defined in the lexer but not referenced by the parser
360    ///   2) and referenced by the parser but not defined in the lexer
361    ///
362    /// and returns them as a tuple `(Option<HashSet<&str>>, Option<HashSet<&str>>)` in the order
363    /// (*defined_in_lexer_missing_from_parser*, *referenced_in_parser_missing_from_lexer*). Since
364    /// in most cases both sets are expected to be empty, `None` is returned to avoid a `HashSet`
365    /// allocation.
366    ///
367    /// Lexing and parsing can continue if either set is non-empty, so it is up to the caller as to
368    /// what action they take if either return set is non-empty. A non-empty set #1 is often
369    /// benign: some lexers deliberately define tokens which are not used (e.g. reserving future
370    /// keywords). A non-empty set #2 is more likely to be an error since there are parts of the
371    /// grammar where nothing the user can input will be parseable.
372    fn set_rule_ids<'a>(
373        &'a mut self,
374        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
375    ) -> (Option<HashSet<&'a str>>, Option<HashSet<&'a str>>);
376
377    fn set_rule_ids_spanned<'a>(
378        &'a mut self,
379        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
380    ) -> (Option<HashSet<&'a str>>, Option<HashSet<(&'a str, Span)>>);
381
382    /// Returns an iterator over all rules in this AST.
383    fn iter_rules(&self) -> Iter<Rule<LexerTypesT::StorageT>>;
384
385    /// Returns an iterator over all start states in this AST.
386    fn iter_start_states(&self) -> Iter<StartState>;
387}
388
389/// This struct represents, in essence, a .l file in memory. From it one can produce an
390/// [LRNonStreamingLexer] which actually lexes inputs.
391#[derive(Debug)]
392pub struct LRNonStreamingLexerDef<LexerTypesT: LexerTypes>
393where
394    usize: AsPrimitive<LexerTypesT::StorageT>,
395{
396    rules: Vec<Rule<LexerTypesT::StorageT>>,
397    start_states: Vec<StartState>,
398    lex_flags: LexFlags,
399    phantom: PhantomData<LexerTypesT>,
400}
401
402impl<LexerTypesT: LexerTypes> LexerDef<LexerTypesT> for LRNonStreamingLexerDef<LexerTypesT>
403where
404    usize: AsPrimitive<LexerTypesT::StorageT>,
405    LexerTypesT::StorageT: TryFrom<usize>,
406{
407    fn from_rules(
408        start_states: Vec<StartState>,
409        rules: Vec<Rule<LexerTypesT::StorageT>>,
410    ) -> LRNonStreamingLexerDef<LexerTypesT> {
411        LRNonStreamingLexerDef {
412            rules,
413            start_states,
414            lex_flags: DEFAULT_LEX_FLAGS,
415            phantom: PhantomData,
416        }
417    }
418
419    /// Given a `.l` file in an `&str`, returns a `LrNonStreamingLexerDef`
420    /// after merging the `%grmtools` section with the default set of `LexFlags`.
421    fn from_str(s: &str) -> LexBuildResult<LRNonStreamingLexerDef<LexerTypesT>> {
422        let (mut header, pos) = GrmtoolsSectionParser::new(s, false)
423            .parse()
424            .map_err(|mut errs| errs.drain(..).map(LexBuildError::from).collect::<Vec<_>>())?;
425        let flags = LexFlags::try_from(&mut header).map_err(|e| vec![e.into()])?;
426        LexParser::<LexerTypesT>::new_with_lex_flags(s[pos..].to_string(), flags.clone()).map(|p| {
427            LRNonStreamingLexerDef {
428                rules: p.rules,
429                start_states: p.start_states,
430                lex_flags: flags,
431                phantom: PhantomData,
432            }
433        })
434    }
435
436    fn get_rule(&self, idx: usize) -> Option<&Rule<LexerTypesT::StorageT>> {
437        self.rules.get(idx)
438    }
439
440    fn get_rule_by_id(&self, tok_id: LexerTypesT::StorageT) -> &Rule<LexerTypesT::StorageT> {
441        self.rules
442            .iter()
443            .find(|r| r.tok_id == Some(tok_id))
444            .unwrap()
445    }
446
447    fn get_rule_by_name(&self, n: &str) -> Option<&Rule<LexerTypesT::StorageT>> {
448        self.rules.iter().find(|r| r.name() == Some(n))
449    }
450
451    fn set_rule_ids<'a>(
452        &'a mut self,
453        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
454    ) -> (Option<HashSet<&'a str>>, Option<HashSet<&'a str>>) {
455        let (missing_from_parser, missing_from_lexer) = self.set_rule_ids_spanned(rule_ids_map);
456        let missing_from_lexer =
457            missing_from_lexer.map(|missing| missing.iter().map(|(name, _)| *name).collect());
458        (missing_from_parser, missing_from_lexer)
459    }
460
461    fn set_rule_ids_spanned<'a>(
462        &'a mut self,
463        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
464    ) -> (Option<HashSet<&'a str>>, Option<HashSet<(&'a str, Span)>>) {
465        // Because we have to iter_mut over self.rules, we can't easily store a reference to the
466        // rule's name at the same time. Instead, we store the index of each such rule and
467        // recover the names later. This has the unfortunate consequence of extended the mutable
468        // borrow for the rest of the 'a lifetime. To avoid that we could return idx's here.
469        // But the original `set_rule_ids` invalidates indexes.  In the spirit of keeping that
470        // behavior consistent, this also returns the span.
471        let mut missing_from_parser_idxs = Vec::new();
472        let mut rules_with_names = 0;
473        for (i, r) in self.rules.iter_mut().enumerate() {
474            if let Some(n) = r.name() {
475                match rule_ids_map.get(n) {
476                    Some(tok_id) => r.tok_id = Some(*tok_id),
477                    None => {
478                        r.tok_id = None;
479                        missing_from_parser_idxs.push(i);
480                    }
481                }
482                rules_with_names += 1;
483            }
484        }
485
486        let missing_from_parser = if missing_from_parser_idxs.is_empty() {
487            None
488        } else {
489            let mut mfp = HashSet::with_capacity(missing_from_parser_idxs.len());
490            for i in &missing_from_parser_idxs {
491                mfp.insert((self.rules[*i].name().unwrap(), self.rules[*i].name_span()));
492            }
493            Some(mfp)
494        };
495
496        let missing_from_lexer =
497            if rules_with_names - missing_from_parser_idxs.len() == rule_ids_map.len() {
498                None
499            } else {
500                Some(
501                    rule_ids_map
502                        .keys()
503                        .cloned()
504                        .collect::<HashSet<&str>>()
505                        .difference(
506                            &self
507                                .rules
508                                .iter()
509                                .filter_map(|x| x.name())
510                                .collect::<HashSet<&str>>(),
511                        )
512                        .cloned()
513                        .collect::<HashSet<&str>>(),
514                )
515            };
516
517        (missing_from_lexer, missing_from_parser)
518    }
519
520    fn iter_rules(&self) -> Iter<Rule<LexerTypesT::StorageT>> {
521        self.rules.iter()
522    }
523
524    fn iter_start_states(&self) -> Iter<StartState> {
525        self.start_states.iter()
526    }
527}
528
529impl<
530        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
531        LexerTypesT: LexerTypes<StorageT = StorageT>,
532    > LRNonStreamingLexerDef<LexerTypesT>
533where
534    usize: AsPrimitive<StorageT>,
535    LexerTypesT::StorageT: TryFrom<usize>,
536{
537    /// Uses the `lex_flags` passed in ignoring any settings in the `%grmtools` section.
538    pub fn new_with_options(
539        s: &str,
540        lex_flags: LexFlags,
541    ) -> LexBuildResult<LRNonStreamingLexerDef<LexerTypesT>> {
542        let (_, pos) = GrmtoolsSectionParser::new(s, false).parse().unwrap();
543        LexParser::<LexerTypesT>::new_with_lex_flags(s[pos..].to_string(), lex_flags.clone()).map(
544            |p| LRNonStreamingLexerDef {
545                rules: p.rules,
546                start_states: p.start_states,
547                lex_flags,
548                phantom: PhantomData,
549            },
550        )
551    }
552
553    /// Return an [LRNonStreamingLexer] for the `String` `s` that will lex relative to this
554    /// [LRNonStreamingLexerDef].
555    pub fn lexer<'lexer, 'input: 'lexer>(
556        &'lexer self,
557        s: &'input str,
558    ) -> LRNonStreamingLexer<'lexer, 'input, LexerTypesT> {
559        let mut lexemes = vec![];
560        let mut i = 0;
561        let mut state_stack: Vec<(usize, &StartState)> = Vec::new();
562        let initial_state = match self.get_start_state_by_id(0) {
563            None => {
564                lexemes.push(Err(LRLexError::new(Span::new(i, i))));
565                return LRNonStreamingLexer::new(s, lexemes, NewlineCache::from_str(s).unwrap());
566            }
567            Some(state) => state,
568        };
569        state_stack.push((1, initial_state));
570
571        while i < s.len() {
572            let old_i = i;
573            let mut longest = 0; // Length of the longest match
574            let mut longest_ridx = 0; // This is only valid iff longest != 0
575            let current_state = match state_stack.last() {
576                None => {
577                    lexemes.push(Err(LRLexError::new(Span::new(i, i))));
578                    return LRNonStreamingLexer::new(
579                        s,
580                        lexemes,
581                        NewlineCache::from_str(s).unwrap(),
582                    );
583                }
584                Some((_, s)) => s,
585            };
586            for (ridx, r) in self.iter_rules().enumerate() {
587                if !Self::state_matches(current_state, r.start_states()) {
588                    continue;
589                }
590                if let Some(m) = r.re.find(&s[old_i..]) {
591                    let len = m.end();
592                    // Note that by using ">", we implicitly prefer an earlier over a later rule, if
593                    // both match an input of the same length.
594                    if len > longest {
595                        longest = len;
596                        longest_ridx = ridx;
597                    }
598                }
599            }
600            if longest > 0 {
601                let r = self.get_rule(longest_ridx).unwrap();
602                if r.name().is_some() {
603                    match r.tok_id {
604                        Some(tok_id) => {
605                            lexemes.push(Ok(Lexeme::new(tok_id, old_i, longest)));
606                        }
607                        None => {
608                            lexemes.push(Err(LRLexError::new(Span::new(old_i, old_i))));
609                            break;
610                        }
611                    }
612                }
613                if let Some((target_state_id, op)) = &r.target_state() {
614                    let state = match self.get_start_state_by_id(*target_state_id) {
615                        None => {
616                            // TODO: I can see an argument for lexing state to be either `None` or `Some(target_state_id)` here
617                            lexemes.push(Err(LRLexError::new(Span::new(old_i, old_i))));
618                            break;
619                        }
620                        Some(state) => state,
621                    };
622                    let head = state_stack.last_mut();
623                    match op {
624                        StartStateOperation::ReplaceStack => {
625                            state_stack.clear();
626                            state_stack.push((1, state));
627                        }
628                        StartStateOperation::Push => match head {
629                            Some((count, s)) if s.id == state.id => *count += 1,
630                            _ => state_stack.push((1, state)),
631                        },
632                        StartStateOperation::Pop => match head {
633                            Some((count, _s)) if *count > 1 => {
634                                *count -= 1;
635                            }
636                            Some(_) => {
637                                state_stack.pop();
638                                if state_stack.is_empty() {
639                                    state_stack.push((1, initial_state));
640                                }
641                            }
642                            None => {
643                                lexemes.push(Err(LRLexError::new(Span::new(old_i, old_i))));
644                                break;
645                            }
646                        },
647                    }
648                }
649                i += longest;
650            } else {
651                lexemes.push(Err(LRLexError::new_with_lexing_state(
652                    Span::new(old_i, old_i),
653                    StartStateId::new(current_state.id),
654                )));
655                break;
656            }
657        }
658        LRNonStreamingLexer::new(s, lexemes, NewlineCache::from_str(s).unwrap())
659    }
660
661    fn state_matches(state: &StartState, rule_states: &[usize]) -> bool {
662        if rule_states.is_empty() {
663            !state.exclusive
664        } else {
665            rule_states.contains(&state.id)
666        }
667    }
668
669    fn get_start_state_by_id(&self, id: usize) -> Option<&StartState> {
670        self.start_states.iter().find(|state| state.id == id)
671    }
672
673    /// Returns the final `LexFlags` used for this lex source
674    /// after all forced and default flags have been resolved.
675    pub fn lex_flags(&self) -> Option<&LexFlags> {
676        Some(&self.lex_flags)
677    }
678}
679
680/// An `LRNonStreamingLexer` holds a reference to a string and can lex it into [lrpar::Lexeme]s.
681/// Although the struct is tied to a single string, no guarantees are made about whether the
682/// lexemes are cached or not.
683pub struct LRNonStreamingLexer<'lexer, 'input: 'lexer, LexerTypesT: LexerTypes>
684where
685    usize: AsPrimitive<LexerTypesT::StorageT>,
686    LexerTypesT::StorageT: 'static + Debug + PrimInt,
687{
688    s: &'input str,
689    lexemes: Vec<Result<LexerTypesT::LexemeT, LRLexError>>,
690    newlines: NewlineCache,
691    phantom: PhantomData<(&'lexer (), LexerTypesT::StorageT)>,
692}
693
694impl<
695        'lexer,
696        'input: 'lexer,
697        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
698        LexerTypesT: LexerTypes<StorageT = StorageT>,
699    > LRNonStreamingLexer<'lexer, 'input, LexerTypesT>
700where
701    usize: AsPrimitive<StorageT>,
702{
703    /// Create a new `LRNonStreamingLexer` that read in: the input `s`; and derived `lexemes` and
704    /// `newlines`.
705    ///
706    /// Note that if one or more lexemes or newlines was not created from `s`, subsequent calls to
707    /// the `LRNonStreamingLexer` may cause `panic`s.
708    pub fn new(
709        s: &'input str,
710        lexemes: Vec<Result<LexerTypesT::LexemeT, LRLexError>>,
711        newlines: NewlineCache,
712    ) -> LRNonStreamingLexer<'lexer, 'input, LexerTypesT> {
713        LRNonStreamingLexer {
714            s,
715            lexemes,
716            newlines,
717            phantom: PhantomData,
718        }
719    }
720}
721
722impl<
723        'lexer,
724        'input: 'lexer,
725        StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
726        LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = LRLexError>,
727    > Lexer<LexerTypesT> for LRNonStreamingLexer<'lexer, 'input, LexerTypesT>
728where
729    usize: AsPrimitive<StorageT>,
730{
731    fn iter<'a>(
732        &'a self,
733    ) -> Box<dyn Iterator<Item = Result<LexerTypesT::LexemeT, LexerTypesT::LexErrorT>> + 'a> {
734        Box::new(self.lexemes.iter().cloned())
735    }
736}
737
738impl<'lexer, 'input: 'lexer, LexerTypesT: LexerTypes<LexErrorT = LRLexError>>
739    NonStreamingLexer<'input, LexerTypesT> for LRNonStreamingLexer<'lexer, 'input, LexerTypesT>
740where
741    usize: AsPrimitive<LexerTypesT::StorageT>,
742{
743    fn span_str(&self, span: Span) -> &'input str {
744        if span.end() > self.s.len() {
745            panic!(
746                "Span {:?} exceeds known input length {}",
747                span,
748                self.s.len()
749            );
750        }
751        &self.s[span.start()..span.end()]
752    }
753
754    fn span_lines_str(&self, span: Span) -> &'input str {
755        debug_assert!(span.end() >= span.start());
756        if span.end() > self.s.len() {
757            panic!(
758                "Span {:?} exceeds known input length {}",
759                span,
760                self.s.len()
761            );
762        }
763
764        let (st, en) = self.newlines.span_line_bytes(span);
765        &self.s[st..en]
766    }
767
768    fn line_col(&self, span: Span) -> ((usize, usize), (usize, usize)) {
769        debug_assert!(span.end() >= span.start());
770        if span.end() > self.s.len() {
771            panic!(
772                "Span {:?} exceeds known input length {}",
773                span,
774                self.s.len()
775            );
776        }
777
778        (
779            self.newlines
780                .byte_to_line_num_and_col_num(self.s, span.start())
781                .unwrap(),
782            self.newlines
783                .byte_to_line_num_and_col_num(self.s, span.end())
784                .unwrap(),
785        )
786    }
787}
788
789#[cfg(test)]
790mod test {
791    use super::*;
792    use crate::{DefaultLexeme, DefaultLexerTypes};
793    use lrpar::LexError;
794    use std::collections::HashMap;
795
796    #[test]
797    fn test_basic() {
798        let src = r"
799%%
800[0-9]+ 'int'
801[a-zA-Z]+ 'id'
802[ \t] ;"
803            .to_string();
804        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
805        let mut map = HashMap::new();
806        map.insert("int", 0);
807        map.insert("id", 1);
808        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
809
810        let lexemes = lexerdef
811            .lexer("abc 123")
812            .iter()
813            .map(|x| x.unwrap())
814            .collect::<Vec<_>>();
815        assert_eq!(lexemes.len(), 2);
816        let lex1 = lexemes[0];
817        assert_eq!(lex1.tok_id(), 1u8);
818        assert_eq!(lex1.span().start(), 0);
819        assert_eq!(lex1.span().len(), 3);
820        let lex2 = lexemes[1];
821        assert_eq!(lex2.tok_id(), 0);
822        assert_eq!(lex2.span().start(), 4);
823        assert_eq!(lex2.span().len(), 3);
824    }
825
826    #[test]
827    fn test_posix_escapes() {
828        let src = r#"%%
829\\ 'slash'
830\a 'alert'
831\b 'backspace'
832\f 'feed'
833\n 'newline'
834\r 'return'
835\t 'tab'
836\v 'vtab'
837\q 'normal_char'
838"#
839        .to_string();
840        let mut options = DEFAULT_LEX_FLAGS;
841        options.posix_escapes = Some(true);
842        let lexerdef =
843            LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::new_with_options(&src, options)
844                .unwrap();
845        let lexemes = lexerdef
846            .lexer("\\\x07\x08\x0c\n\r\t\x0bq")
847            .iter()
848            .map(|x| x.unwrap())
849            .collect::<Vec<_>>();
850        assert_eq!(lexemes.len(), 9);
851        for i in 0..9u8 {
852            let lexeme = lexemes[i as usize];
853            assert_eq!(lexeme.tok_id(), i);
854        }
855    }
856
857    #[test]
858    fn test_non_posix_escapes() {
859        let src = r#"%%
860\\ 'slash'
861\a 'alert'
862a\b a 'work_break'
863\f 'feed'
864\n 'newline'
865\r 'return'
866\t 'tab'
867\v 'vtab'
868\q 'normal_char'
869"#
870        .to_string();
871        let mut options = DEFAULT_LEX_FLAGS;
872        options.posix_escapes = Some(false);
873        let lexerdef =
874            LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::new_with_options(&src, options)
875                .unwrap();
876        let lexemes = lexerdef
877            .lexer("\\\x07a a\x0c\n\r\t\x0bq")
878            .iter()
879            .map(|x| x.unwrap())
880            .collect::<Vec<_>>();
881        assert_eq!(lexemes.len(), 9);
882        for i in 0..9u8 {
883            let lexeme = lexemes[i as usize];
884            assert_eq!(lexeme.tok_id(), i);
885        }
886    }
887
888    #[test]
889    fn test_basic_error() {
890        let src = "
891%%
892[0-9]+ 'int'"
893            .to_string();
894        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
895        match lexerdef.lexer("abc").iter().next().unwrap() {
896            Ok(_) => panic!("Invalid input lexed"),
897            Err(e) => {
898                if e.span().start() != 0 || e.span().end() != 0 {
899                    panic!("Incorrect span returned {:?}", e.span());
900                }
901            }
902        };
903    }
904
905    #[test]
906    fn test_longest_match() {
907        let src = "%%
908if 'IF'
909[a-z]+ 'ID'
910[ ] ;"
911            .to_string();
912        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
913        let mut map = HashMap::new();
914        map.insert("IF", 0);
915        map.insert("ID", 1);
916        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
917
918        let lexemes = lexerdef
919            .lexer("iff if")
920            .iter()
921            .map(|x| x.unwrap())
922            .collect::<Vec<DefaultLexeme<u8>>>();
923        assert_eq!(lexemes.len(), 2);
924        let lex1 = lexemes[0];
925        assert_eq!(lex1.tok_id(), 1u8);
926        assert_eq!(lex1.span().start(), 0);
927        assert_eq!(lex1.span().len(), 3);
928        let lex2 = lexemes[1];
929        assert_eq!(lex2.tok_id(), 0);
930        assert_eq!(lex2.span().start(), 4);
931        assert_eq!(lex2.span().len(), 2);
932    }
933
934    #[test]
935    fn test_multibyte() {
936        let src = "%%
937[a❤]+ 'ID'
938[ ] ;"
939            .to_string();
940        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
941        let mut map = HashMap::new();
942        map.insert("ID", 0u8);
943        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
944
945        let lexer = lexerdef.lexer("a ❤ a");
946        let lexemes = lexer
947            .iter()
948            .map(|x| x.unwrap())
949            .collect::<Vec<DefaultLexeme<u8>>>();
950        assert_eq!(lexemes.len(), 3);
951        let lex1 = lexemes[0];
952        assert_eq!(lex1.span().start(), 0);
953        assert_eq!(lex1.span().len(), 1);
954        assert_eq!(lexer.span_str(lex1.span()), "a");
955        let lex2 = lexemes[1];
956        assert_eq!(lex2.span().start(), 2);
957        assert_eq!(lex2.span().len(), 3);
958        assert_eq!(lexer.span_str(lex2.span()), "❤");
959        let lex3 = lexemes[2];
960        assert_eq!(lex3.span().start(), 6);
961        assert_eq!(lex3.span().len(), 1);
962        assert_eq!(lexer.span_str(lex3.span()), "a");
963    }
964
965    #[test]
966    fn test_line_col() {
967        let src = "%%
968[a-z]+ 'ID'
969[ \\n] ;"
970            .to_string();
971        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
972        let mut map = HashMap::new();
973        map.insert("ID", 0u8);
974        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
975
976        let lexer = lexerdef.lexer("a b c");
977        let lexemes = lexer
978            .iter()
979            .map(|x| x.unwrap())
980            .collect::<Vec<DefaultLexeme<u8>>>();
981        assert_eq!(lexemes.len(), 3);
982        assert_eq!(lexer.line_col(lexemes[1].span()), ((1, 3), (1, 4)));
983        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "a b c");
984        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "a b c");
985
986        let lexer = lexerdef.lexer("a b c\n");
987        let lexemes = lexer.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
988        assert_eq!(lexemes.len(), 3);
989        assert_eq!(lexer.line_col(lexemes[1].span()), ((1, 3), (1, 4)));
990        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "a b c");
991        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "a b c");
992
993        let lexer = lexerdef.lexer(" a\nb\n  c d");
994        let lexemes = lexer.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
995        assert_eq!(lexemes.len(), 4);
996        assert_eq!(lexer.line_col(lexemes[0].span()), ((1, 2), (1, 3)));
997        assert_eq!(lexer.line_col(lexemes[1].span()), ((2, 1), (2, 2)));
998        assert_eq!(lexer.line_col(lexemes[2].span()), ((3, 3), (3, 4)));
999        assert_eq!(lexer.line_col(lexemes[3].span()), ((3, 5), (3, 6)));
1000        assert_eq!(lexer.span_lines_str(lexemes[0].span()), " a");
1001        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "b");
1002        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "  c d");
1003        assert_eq!(lexer.span_lines_str(lexemes[3].span()), "  c d");
1004
1005        let mut s = Vec::new();
1006        let mut offs = vec![0];
1007        for i in 0..71 {
1008            offs.push(offs[i] + i + 1);
1009            s.push(vec!["a"; i].join(" "));
1010        }
1011        let s = s.join("\n");
1012        let lexer = lexerdef.lexer(&s);
1013        let lexemes = lexer.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
1014        assert_eq!(lexemes.len(), offs[70]);
1015        assert_eq!(lexer.span_lines_str(Span::new(0, 0)), "");
1016        assert_eq!(lexer.span_lines_str(Span::new(0, 2)), "\na");
1017        assert_eq!(lexer.span_lines_str(Span::new(0, 4)), "\na\na a");
1018        assert_eq!(lexer.span_lines_str(Span::new(0, 7)), "\na\na a\na a a");
1019        assert_eq!(lexer.span_lines_str(Span::new(4, 7)), "a a\na a a");
1020        assert_eq!(lexer.span_lines_str(lexemes[0].span()), "a");
1021        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "a a");
1022        assert_eq!(lexer.span_lines_str(lexemes[3].span()), "a a a");
1023        for i in 0..70 {
1024            assert_eq!(
1025                lexer.span_lines_str(lexemes[offs[i]].span()),
1026                vec!["a"; i + 1].join(" ")
1027            );
1028        }
1029    }
1030
1031    #[test]
1032    fn test_line_col_multibyte() {
1033        let src = "%%
1034[a-z❤]+ 'ID'
1035[ \\n] ;"
1036            .to_string();
1037        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1038        let mut map = HashMap::new();
1039        map.insert("ID", 0u8);
1040        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
1041
1042        let lexer = lexerdef.lexer(" a\n❤ b");
1043        let lexemes = lexer
1044            .iter()
1045            .map(|x| x.unwrap())
1046            .collect::<Vec<DefaultLexeme<u8>>>();
1047        assert_eq!(lexemes.len(), 3);
1048        assert_eq!(lexer.line_col(lexemes[0].span()), ((1, 2), (1, 3)));
1049        assert_eq!(lexer.line_col(lexemes[1].span()), ((2, 1), (2, 2)));
1050        assert_eq!(lexer.line_col(lexemes[2].span()), ((2, 3), (2, 4)));
1051        assert_eq!(lexer.span_lines_str(lexemes[0].span()), " a");
1052        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "❤ b");
1053        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "❤ b");
1054    }
1055
1056    #[test]
1057    #[should_panic]
1058    fn test_bad_line_col() {
1059        let src = "%%
1060[a-z]+ 'ID'
1061[ \\n] ;"
1062            .to_string();
1063        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1064        let mut map = HashMap::new();
1065        map.insert("ID", 0u8);
1066        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
1067
1068        let lexer = lexerdef.lexer("a b c");
1069
1070        lexer.line_col(Span::new(100, 100));
1071    }
1072
1073    #[test]
1074    fn test_missing_from_lexer_and_parser() {
1075        let src = "%%
1076[a-z]+ 'ID'
1077[ \\n] ;"
1078            .to_string();
1079        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1080        let mut map = HashMap::new();
1081        map.insert("INT", 0u8);
1082        let mut missing_from_lexer = HashSet::new();
1083        missing_from_lexer.insert("INT");
1084        let mut missing_from_parser = HashSet::new();
1085        missing_from_parser.insert("ID");
1086        assert_eq!(
1087            lexerdef.set_rule_ids(&map),
1088            (Some(missing_from_lexer), Some(missing_from_parser))
1089        );
1090
1091        match lexerdef.lexer(" a ").iter().next().unwrap() {
1092            Ok(_) => panic!("Invalid input lexed"),
1093            Err(e) => {
1094                if e.span().start() != 1 || e.span().end() != 1 {
1095                    panic!("Incorrect span returned {:?}", e.span());
1096                }
1097            }
1098        };
1099    }
1100
1101    #[test]
1102    fn test_multiline_lexeme() {
1103        let src = "%%
1104'.*' 'STR'
1105[ \\n] ;"
1106            .to_string();
1107        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1108        let mut map = HashMap::new();
1109        map.insert("STR", 0u8);
1110        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
1111
1112        let lexer = lexerdef.lexer("'a\nb'\n");
1113        let lexemes = lexer
1114            .iter()
1115            .map(|x| x.unwrap())
1116            .collect::<Vec<DefaultLexeme<u8>>>();
1117        assert_eq!(lexemes.len(), 1);
1118        assert_eq!(lexer.line_col(lexemes[0].span()), ((1, 1), (2, 3)));
1119        assert_eq!(lexer.span_lines_str(lexemes[0].span()), "'a\nb'");
1120    }
1121
1122    #[test]
1123    fn test_token_span() {
1124        let src = "%%
1125a 'A'
1126b 'B'
1127[ \\n] ;"
1128            .to_string();
1129        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1130        assert_eq!(
1131            lexerdef.get_rule_by_name("A").unwrap().name_span(),
1132            Span::new(6, 7)
1133        );
1134        assert_eq!(
1135            lexerdef.get_rule_by_name("B").unwrap().name_span(),
1136            Span::new(12, 13)
1137        );
1138        let anonymous_rules = lexerdef
1139            .iter_rules()
1140            .filter(|rule| rule.name().is_none())
1141            .collect::<Vec<_>>();
1142        assert_eq!(anonymous_rules[0].name_span(), Span::new(21, 21));
1143    }
1144
1145    #[test]
1146    fn test_token_start_states() {
1147        let src = "%x EXCLUSIVE_START
1148%s INCLUSIVE_START
1149%%
1150a 'A'
1151b 'B'
1152[ \\n] ;"
1153            .to_string();
1154        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1155        assert_eq!(
1156            lexerdef.get_rule_by_name("A").unwrap().name_span(),
1157            Span::new(44, 45)
1158        );
1159        assert_eq!(
1160            lexerdef.get_rule_by_name("B").unwrap().name_span(),
1161            Span::new(50, 51)
1162        );
1163    }
1164
1165    #[test]
1166    fn test_rule_start_states() {
1167        let src = "%x EXCLUSIVE_START
1168%s INCLUSIVE_START
1169%%
1170<EXCLUSIVE_START>a 'A'
1171<INCLUSIVE_START>b 'B'
1172[ \\n] ;"
1173            .to_string();
1174        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1175        let a_rule = lexerdef.get_rule_by_name("A").unwrap();
1176        assert_eq!(a_rule.name_span(), Span::new(61, 62));
1177        assert_eq!(a_rule.re_str, "a");
1178
1179        let b_rule = lexerdef.get_rule_by_name("B").unwrap();
1180        assert_eq!(b_rule.name_span(), Span::new(84, 85));
1181        assert_eq!(b_rule.re_str, "b");
1182    }
1183
1184    #[test]
1185    fn test_state_matches_regular_no_rule_states() {
1186        let all_states = &[
1187            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1188            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1189        ];
1190        let rule_states = vec![];
1191        let current_state = &all_states[0];
1192        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1193            current_state,
1194            &rule_states,
1195        );
1196        assert!(m);
1197    }
1198
1199    #[test]
1200    fn test_state_matches_exclusive_no_rule_states() {
1201        let all_states = &[
1202            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1203            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1204        ];
1205        let rule_states = vec![];
1206        let current_state = &all_states[1];
1207        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1208            current_state,
1209            &rule_states,
1210        );
1211        assert!(!m);
1212    }
1213
1214    #[test]
1215    fn test_state_matches_regular_matching_rule_states() {
1216        let all_states = &[
1217            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1218            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1219        ];
1220        let rule_states = vec![0];
1221        let current_state = &all_states[0];
1222        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1223            current_state,
1224            &rule_states,
1225        );
1226        assert!(m);
1227    }
1228
1229    #[test]
1230    fn test_state_matches_exclusive_matching_rule_states() {
1231        let all_states = &[
1232            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1233            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1234        ];
1235        let rule_states = vec![1];
1236        let current_state = &all_states[1];
1237        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1238            current_state,
1239            &rule_states,
1240        );
1241        assert!(m);
1242    }
1243
1244    #[test]
1245    fn test_state_matches_regular_other_rule_states() {
1246        let all_states = &[
1247            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1248            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1249        ];
1250        let rule_states = vec![1];
1251        let current_state = &all_states[0];
1252        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1253            current_state,
1254            &rule_states,
1255        );
1256        assert!(!m);
1257    }
1258
1259    #[test]
1260    fn test_state_matches_exclusive_other_rule_states() {
1261        let all_states = &[
1262            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1263            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1264        ];
1265        let rule_states = vec![0];
1266        let current_state = &all_states[1];
1267        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1268            current_state,
1269            &rule_states,
1270        );
1271        assert!(!m);
1272    }
1273}