lrlex/
lexer.rs

1use std::{
2    collections::{HashMap, HashSet},
3    fmt::Debug,
4    hash::Hash,
5    marker::PhantomData,
6    slice::Iter,
7    str::FromStr,
8};
9
10use cfgrammar::{
11    NewlineCache, Span,
12    header::{GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, HeaderValue, Value},
13    span::Location,
14};
15use num_traits::{AsPrimitive, PrimInt, Unsigned};
16use regex::{Regex, RegexBuilder};
17
18use lrpar::{Lexeme, Lexer, LexerTypes, NonStreamingLexer};
19
20use crate::{
21    LRLexError, LexBuildError, LexBuildResult, StartStateId,
22    parser::{LexParser, StartState, StartStateOperation},
23};
24
25#[doc(hidden)]
26/// Corresponds to the options for `regex::RegexBuilder`.
27#[derive(Clone, Debug)]
28#[non_exhaustive]
29pub struct LexFlags {
30    // The following values when `None` grmtools provides default values for in `DEFAULT_LEX_FLAGS`
31    pub dot_matches_new_line: Option<bool>,
32    pub multi_line: Option<bool>,
33    pub octal: Option<bool>,
34    pub posix_escapes: Option<bool>,
35    pub allow_wholeline_comments: Option<bool>,
36
37    // All the following values when `None` default to the `regex` crate's default value.
38    pub case_insensitive: Option<bool>,
39    pub swap_greed: Option<bool>,
40    pub ignore_whitespace: Option<bool>,
41    pub unicode: Option<bool>,
42    pub size_limit: Option<usize>,
43    pub dfa_size_limit: Option<usize>,
44    pub nest_limit: Option<u32>,
45}
46
47impl<T: Clone> TryFrom<&mut Header<T>> for LexFlags {
48    type Error = HeaderError<T>;
49    fn try_from(header: &mut Header<T>) -> Result<LexFlags, HeaderError<T>> {
50        use cfgrammar::header::Setting;
51        let mut lex_flags = UNSPECIFIED_LEX_FLAGS;
52        let LexFlags {
53            dot_matches_new_line,
54            multi_line,
55            octal,
56            posix_escapes,
57            allow_wholeline_comments,
58            case_insensitive,
59            swap_greed,
60            ignore_whitespace,
61            unicode,
62            size_limit,
63            dfa_size_limit,
64            nest_limit,
65        } = &mut lex_flags;
66        macro_rules! cvt_flag {
67            ($it:ident) => {
68                header.mark_used(&stringify!($it).to_string());
69                *$it = match header.get(stringify!($it)) {
70                    Some(HeaderValue(_, Value::Flag(flag, _))) => Some(*flag),
71                    Some(HeaderValue(loc, _)) => Err(HeaderError {
72                        kind: HeaderErrorKind::ConversionError("LexFlags", "Expected boolean"),
73                        locations: vec![loc.clone()],
74                    })?,
75                    None => None,
76                }
77            };
78        }
79        cvt_flag!(dot_matches_new_line);
80        cvt_flag!(multi_line);
81        cvt_flag!(octal);
82        cvt_flag!(posix_escapes);
83        cvt_flag!(allow_wholeline_comments);
84        cvt_flag!(case_insensitive);
85        cvt_flag!(swap_greed);
86        cvt_flag!(ignore_whitespace);
87        cvt_flag!(unicode);
88        macro_rules! cvt_num {
89            ($it:ident, $num_ty: ty) => {
90                header.mark_used(&stringify!($it).to_string());
91                *$it = match header.get(stringify!($it)) {
92                    Some(HeaderValue(_, Value::Setting(Setting::Num(n, _)))) => Some(*n as $num_ty),
93                    Some(HeaderValue(loc, _)) => Err(HeaderError {
94                        kind: HeaderErrorKind::ConversionError("LexFlags", "Expected numeric"),
95                        locations: vec![loc.clone()],
96                    })?,
97                    None => None,
98                }
99            };
100        }
101        cvt_num!(size_limit, usize);
102        cvt_num!(dfa_size_limit, usize);
103        cvt_num!(nest_limit, u32);
104        Ok(lex_flags)
105    }
106}
107
108impl From<&LexFlags> for Header<Location> {
109    fn from(flags: &LexFlags) -> Header<Location> {
110        let mut header = Header::new();
111        let LexFlags {
112            dot_matches_new_line,
113            multi_line,
114            octal,
115            posix_escapes,
116            allow_wholeline_comments,
117            case_insensitive,
118            swap_greed,
119            ignore_whitespace,
120            unicode,
121            size_limit,
122            dfa_size_limit,
123            nest_limit,
124        } = flags;
125        macro_rules! cvt_flag {
126            ($it: ident) => {
127                $it.map(|x| {
128                    header.insert(
129                        stringify!($it).to_string(),
130                        HeaderValue(
131                            Location::Other("From<&LexFlags".to_string()),
132                            Value::Flag(x, Location::Other("From<&LexFlags>".to_string())),
133                        ),
134                    )
135                });
136            };
137        }
138        cvt_flag!(dot_matches_new_line);
139        cvt_flag!(multi_line);
140        cvt_flag!(octal);
141        cvt_flag!(posix_escapes);
142        cvt_flag!(allow_wholeline_comments);
143        cvt_flag!(case_insensitive);
144        cvt_flag!(swap_greed);
145        cvt_flag!(ignore_whitespace);
146        cvt_flag!(unicode);
147
148        macro_rules! cvt_num {
149            ($it: ident) => {
150                $it.map(|x| {
151                    use cfgrammar::header::Setting;
152                    header.insert(
153                        stringify!($it).to_string(),
154                        HeaderValue(
155                            Location::Other("From<&LexFlags".to_string()),
156                            Value::Setting(Setting::Num(
157                                x as u64,
158                                Location::Other("From<&LexFlags>".to_string()),
159                            )),
160                        ),
161                    )
162                });
163            };
164        }
165        cvt_num!(size_limit);
166        cvt_num!(dfa_size_limit);
167        cvt_num!(nest_limit);
168
169        header
170    }
171}
172
173/// LexFlags with flags set to default values.
174#[doc(hidden)]
175pub const DEFAULT_LEX_FLAGS: LexFlags = LexFlags {
176    allow_wholeline_comments: Some(false),
177    dot_matches_new_line: Some(true),
178    multi_line: Some(true),
179    octal: Some(true),
180    posix_escapes: Some(false),
181    case_insensitive: None,
182    ignore_whitespace: None,
183    swap_greed: None,
184    unicode: None,
185    size_limit: None,
186    dfa_size_limit: None,
187    nest_limit: None,
188};
189
190#[doc(hidden)]
191/// LexFlags with all of the values `None`.
192pub const UNSPECIFIED_LEX_FLAGS: LexFlags = LexFlags {
193    allow_wholeline_comments: None,
194    dot_matches_new_line: None,
195    multi_line: None,
196    octal: None,
197    posix_escapes: None,
198    case_insensitive: None,
199    ignore_whitespace: None,
200    swap_greed: None,
201    unicode: None,
202    size_limit: None,
203    dfa_size_limit: None,
204    nest_limit: None,
205};
206
207#[derive(Debug, Clone)]
208#[doc(hidden)]
209pub struct Rule<StorageT> {
210    /// If `Some`, this specifies the ID that lexemes resulting from this rule will have. Note that
211    /// lrlex gives rules a guaranteed unique value by default, though users can later override
212    /// that, potentially undermining uniqueness if they're not careful.
213    ///
214    /// If `None`, then this rule specifies lexemes which should not appear in the user's input.
215    pub(super) tok_id: Option<StorageT>,
216    /// This rule's name. If None, then text which matches this rule will be skipped (i.e. will not
217    /// create a lexeme).
218    #[deprecated(note = "Use the name() function")]
219    pub name: Option<String>,
220    #[deprecated(note = "Use the name_span() function")]
221    pub name_span: Span,
222    pub(super) re_str: String,
223    re: Regex,
224    /// Id(s) of permitted start conditions for the lexer to match this rule.
225    #[deprecated(note = "Use the start_states() function")]
226    pub start_states: Vec<usize>,
227    /// If Some(_), successful matching of this rule will cause the current stack of start
228    /// conditions in the lexer to be updated with the enclosed value, using the designated
229    /// operation.
230    /// If None, successful matching causes no change to the current start condition.
231    #[deprecated(note = "Use the target_state() function")]
232    pub target_state: Option<(usize, StartStateOperation)>,
233}
234
235impl<StorageT: PrimInt> Rule<StorageT> {
236    /// Create a new `Rule`. This interface is unstable and should only be used by code generated
237    /// by lrlex itself.
238    #[doc(hidden)]
239    #[allow(private_interfaces)]
240    #[allow(clippy::too_many_arguments)]
241    pub fn new(
242        _: crate::unstable_api::InternalPublicApi,
243        tok_id: Option<StorageT>,
244        name: Option<String>,
245        name_span: Span,
246        re_str: String,
247        start_states: Vec<usize>,
248        target_state: Option<(usize, StartStateOperation)>,
249        lex_flags: &LexFlags,
250    ) -> Result<Rule<StorageT>, regex::Error> {
251        let mut re = RegexBuilder::new(&format!("\\A(?:{})", re_str));
252        let mut re = re
253            .octal(lex_flags.octal.unwrap())
254            .multi_line(lex_flags.multi_line.unwrap())
255            .dot_matches_new_line(lex_flags.dot_matches_new_line.unwrap());
256
257        if let Some(flag) = lex_flags.ignore_whitespace {
258            re = re.ignore_whitespace(flag)
259        }
260        if let Some(flag) = lex_flags.unicode {
261            re = re.unicode(flag)
262        }
263        if let Some(flag) = lex_flags.case_insensitive {
264            re = re.case_insensitive(flag)
265        }
266        if let Some(flag) = lex_flags.swap_greed {
267            re = re.swap_greed(flag)
268        }
269        if let Some(sz) = lex_flags.size_limit {
270            re = re.size_limit(sz)
271        }
272        if let Some(sz) = lex_flags.dfa_size_limit {
273            re = re.dfa_size_limit(sz)
274        }
275        if let Some(lim) = lex_flags.nest_limit {
276            re = re.nest_limit(lim)
277        }
278
279        let re = re.build()?;
280        #[allow(deprecated)]
281        Ok(Rule {
282            tok_id,
283            name,
284            name_span,
285            re_str,
286            re,
287            start_states,
288            target_state,
289        })
290    }
291
292    /// Return this rule's token ID, if any.
293    ///
294    /// If `Some`, this specifies the ID that lexemes resulting from this rule will have. If
295    /// `None`, then this rule specifies lexemes which should not appear in the user's input.
296    pub fn tok_id(&self) -> Option<StorageT> {
297        self.tok_id
298    }
299
300    /// Return this rule's name. If `None`, then text which matches this rule will be skipped (i.e.
301    /// it will not result in the creation of a [Lexeme]).
302    pub fn name(&self) -> Option<&str> {
303        #[allow(deprecated)]
304        self.name.as_deref()
305    }
306
307    /// Return the [Span] of this rule's name.
308    pub fn name_span(&self) -> Span {
309        #[allow(deprecated)]
310        self.name_span
311    }
312
313    /// Return the original regular expression specified by the user for this [Rule].
314    pub fn re_str(&self) -> &str {
315        &self.re_str
316    }
317
318    /// Return the IDs of the permitted start conditions for the lexer to match this rule.
319    pub fn start_states(&self) -> &[usize] {
320        #[allow(deprecated)]
321        self.start_states.as_slice()
322    }
323
324    /// Return the IDs of the permitted start conditions for the lexer to match this rule.
325    pub fn target_state(&self) -> Option<(usize, StartStateOperation)> {
326        #[allow(deprecated)]
327        self.target_state.clone()
328    }
329}
330
331/// Methods which all lexer definitions must implement.
332pub trait LexerDef<LexerTypesT: LexerTypes>
333where
334    usize: AsPrimitive<LexerTypesT::StorageT>,
335{
336    #[doc(hidden)]
337    /// Instantiate a lexer from a set of `Rule`s. This is only intended to be used by compiled
338    /// lexers (see `ctbuilder.rs`).
339    fn from_rules(start_states: Vec<StartState>, rules: Vec<Rule<LexerTypesT::StorageT>>) -> Self
340    where
341        Self: Sized;
342
343    /// Instantiate a lexer from a string (e.g. representing a `.l` file).
344    fn from_str(s: &str) -> LexBuildResult<Self>
345    where
346        Self: Sized;
347
348    /// Get the `Rule` at index `idx`.
349    fn get_rule(&self, idx: usize) -> Option<&Rule<LexerTypesT::StorageT>>;
350
351    /// Get the `Rule` instance associated with a particular lexeme ID. Panics if no such rule
352    /// exists.
353    fn get_rule_by_id(&self, tok_id: LexerTypesT::StorageT) -> &Rule<LexerTypesT::StorageT>;
354
355    /// Get the `Rule` instance associated with a particular name.
356    fn get_rule_by_name(&self, n: &str) -> Option<&Rule<LexerTypesT::StorageT>>;
357
358    /// Set the id attribute on rules to the corresponding value in `map`. This is typically used
359    /// to synchronise a parser's notion of lexeme IDs with the lexers. While doing this, it keeps
360    /// track of which lexemes:
361    ///   1) are defined in the lexer but not referenced by the parser
362    ///   2) and referenced by the parser but not defined in the lexer
363    ///
364    /// and returns them as a tuple `(Option<HashSet<&str>>, Option<HashSet<&str>>)` in the order
365    /// (*defined_in_lexer_missing_from_parser*, *referenced_in_parser_missing_from_lexer*). Since
366    /// in most cases both sets are expected to be empty, `None` is returned to avoid a `HashSet`
367    /// allocation.
368    ///
369    /// Lexing and parsing can continue if either set is non-empty, so it is up to the caller as to
370    /// what action they take if either return set is non-empty. A non-empty set #1 is often
371    /// benign: some lexers deliberately define tokens which are not used (e.g. reserving future
372    /// keywords). A non-empty set #2 is more likely to be an error since there are parts of the
373    /// grammar where nothing the user can input will be parseable.
374    fn set_rule_ids<'a>(
375        &'a mut self,
376        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
377    ) -> (Option<HashSet<&'a str>>, Option<HashSet<&'a str>>);
378
379    fn set_rule_ids_spanned<'a>(
380        &'a mut self,
381        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
382    ) -> (Option<HashSet<&'a str>>, Option<HashSet<(&'a str, Span)>>);
383
384    /// Returns an iterator over all rules in this AST.
385    fn iter_rules(&self) -> Iter<'_, Rule<LexerTypesT::StorageT>>;
386
387    /// Returns an iterator over all start states in this AST.
388    fn iter_start_states(&self) -> Iter<'_, StartState>;
389}
390
391/// This struct represents, in essence, a .l file in memory. From it one can produce an
392/// [LRNonStreamingLexer] which actually lexes inputs.
393#[derive(Debug, Clone)]
394pub struct LRNonStreamingLexerDef<LexerTypesT: LexerTypes>
395where
396    usize: AsPrimitive<LexerTypesT::StorageT>,
397{
398    rules: Vec<Rule<LexerTypesT::StorageT>>,
399    start_states: Vec<StartState>,
400    lex_flags: LexFlags,
401    phantom: PhantomData<LexerTypesT>,
402}
403
404impl<LexerTypesT: LexerTypes> LexerDef<LexerTypesT> for LRNonStreamingLexerDef<LexerTypesT>
405where
406    usize: AsPrimitive<LexerTypesT::StorageT>,
407    LexerTypesT::StorageT: TryFrom<usize>,
408{
409    fn from_rules(
410        start_states: Vec<StartState>,
411        rules: Vec<Rule<LexerTypesT::StorageT>>,
412    ) -> LRNonStreamingLexerDef<LexerTypesT> {
413        LRNonStreamingLexerDef {
414            rules,
415            start_states,
416            lex_flags: DEFAULT_LEX_FLAGS,
417            phantom: PhantomData,
418        }
419    }
420
421    /// Given a `.l` file in an `&str`, returns a `LrNonStreamingLexerDef`
422    /// after merging the `%grmtools` section with the default set of `LexFlags`.
423    fn from_str(s: &str) -> LexBuildResult<LRNonStreamingLexerDef<LexerTypesT>> {
424        let (mut header, pos) = GrmtoolsSectionParser::new(s, false)
425            .parse()
426            .map_err(|mut errs| errs.drain(..).map(LexBuildError::from).collect::<Vec<_>>())?;
427        let flags = LexFlags::try_from(&mut header).map_err(|e| vec![e.into()])?;
428        LexParser::<LexerTypesT>::new_with_lex_flags(s[pos..].to_string(), flags.clone()).map(|p| {
429            LRNonStreamingLexerDef {
430                rules: p.rules,
431                start_states: p.start_states,
432                lex_flags: flags,
433                phantom: PhantomData,
434            }
435        })
436    }
437
438    fn get_rule(&self, idx: usize) -> Option<&Rule<LexerTypesT::StorageT>> {
439        self.rules.get(idx)
440    }
441
442    fn get_rule_by_id(&self, tok_id: LexerTypesT::StorageT) -> &Rule<LexerTypesT::StorageT> {
443        self.rules
444            .iter()
445            .find(|r| r.tok_id == Some(tok_id))
446            .unwrap()
447    }
448
449    fn get_rule_by_name(&self, n: &str) -> Option<&Rule<LexerTypesT::StorageT>> {
450        self.rules.iter().find(|r| r.name() == Some(n))
451    }
452
453    fn set_rule_ids<'a>(
454        &'a mut self,
455        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
456    ) -> (Option<HashSet<&'a str>>, Option<HashSet<&'a str>>) {
457        let (missing_from_parser, missing_from_lexer) = self.set_rule_ids_spanned(rule_ids_map);
458        let missing_from_lexer =
459            missing_from_lexer.map(|missing| missing.iter().map(|(name, _)| *name).collect());
460        (missing_from_parser, missing_from_lexer)
461    }
462
463    fn set_rule_ids_spanned<'a>(
464        &'a mut self,
465        rule_ids_map: &HashMap<&'a str, LexerTypesT::StorageT>,
466    ) -> (Option<HashSet<&'a str>>, Option<HashSet<(&'a str, Span)>>) {
467        // Because we have to iter_mut over self.rules, we can't easily store a reference to the
468        // rule's name at the same time. Instead, we store the index of each such rule and
469        // recover the names later. This has the unfortunate consequence of extended the mutable
470        // borrow for the rest of the 'a lifetime. To avoid that we could return idx's here.
471        // But the original `set_rule_ids` invalidates indexes.  In the spirit of keeping that
472        // behavior consistent, this also returns the span.
473        let mut missing_from_parser_idxs = Vec::new();
474        let mut rules_with_names = 0;
475        for (i, r) in self.rules.iter_mut().enumerate() {
476            if let Some(n) = r.name() {
477                match rule_ids_map.get(n) {
478                    Some(tok_id) => r.tok_id = Some(*tok_id),
479                    None => {
480                        r.tok_id = None;
481                        missing_from_parser_idxs.push(i);
482                    }
483                }
484                rules_with_names += 1;
485            }
486        }
487
488        let missing_from_parser = if missing_from_parser_idxs.is_empty() {
489            None
490        } else {
491            let mut mfp = HashSet::with_capacity(missing_from_parser_idxs.len());
492            for i in &missing_from_parser_idxs {
493                mfp.insert((self.rules[*i].name().unwrap(), self.rules[*i].name_span()));
494            }
495            Some(mfp)
496        };
497
498        let missing_from_lexer =
499            if rules_with_names - missing_from_parser_idxs.len() == rule_ids_map.len() {
500                None
501            } else {
502                Some(
503                    rule_ids_map
504                        .keys()
505                        .cloned()
506                        .collect::<HashSet<&str>>()
507                        .difference(
508                            &self
509                                .rules
510                                .iter()
511                                .filter_map(|x| x.name())
512                                .collect::<HashSet<&str>>(),
513                        )
514                        .cloned()
515                        .collect::<HashSet<&str>>(),
516                )
517            };
518
519        (missing_from_lexer, missing_from_parser)
520    }
521
522    fn iter_rules(&self) -> Iter<'_, Rule<LexerTypesT::StorageT>> {
523        self.rules.iter()
524    }
525
526    fn iter_start_states(&self) -> Iter<'_, StartState> {
527        self.start_states.iter()
528    }
529}
530
531impl<
532    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
533    LexerTypesT: LexerTypes<StorageT = StorageT>,
534> LRNonStreamingLexerDef<LexerTypesT>
535where
536    usize: AsPrimitive<StorageT>,
537    LexerTypesT::StorageT: TryFrom<usize>,
538{
539    /// Uses the `lex_flags` passed in ignoring any settings in the `%grmtools` section.
540    pub fn new_with_options(
541        s: &str,
542        lex_flags: LexFlags,
543    ) -> LexBuildResult<LRNonStreamingLexerDef<LexerTypesT>> {
544        let (_, pos) = GrmtoolsSectionParser::new(s, false).parse().unwrap();
545        LexParser::<LexerTypesT>::new_with_lex_flags(s[pos..].to_string(), lex_flags.clone()).map(
546            |p| LRNonStreamingLexerDef {
547                rules: p.rules,
548                start_states: p.start_states,
549                lex_flags,
550                phantom: PhantomData,
551            },
552        )
553    }
554
555    /// Return an [LRNonStreamingLexer] for the `String` `s` that will lex relative to this
556    /// [LRNonStreamingLexerDef].
557    pub fn lexer<'lexer, 'input: 'lexer>(
558        &'lexer self,
559        s: &'input str,
560    ) -> LRNonStreamingLexer<'lexer, 'input, LexerTypesT> {
561        let mut lexemes = vec![];
562        let mut i = 0;
563        let mut state_stack: Vec<(usize, &StartState)> = Vec::new();
564        let initial_state = match self.get_start_state_by_id(0) {
565            None => {
566                lexemes.push(Err(LRLexError::new(Span::new(i, i))));
567                return LRNonStreamingLexer::new(s, lexemes, NewlineCache::from_str(s).unwrap());
568            }
569            Some(state) => state,
570        };
571        state_stack.push((1, initial_state));
572
573        while i < s.len() {
574            let old_i = i;
575            let mut longest = 0; // Length of the longest match
576            let mut longest_ridx = 0; // This is only valid iff longest != 0
577            let current_state = match state_stack.last() {
578                None => {
579                    lexemes.push(Err(LRLexError::new(Span::new(i, i))));
580                    return LRNonStreamingLexer::new(
581                        s,
582                        lexemes,
583                        NewlineCache::from_str(s).unwrap(),
584                    );
585                }
586                Some((_, s)) => s,
587            };
588            for (ridx, r) in self.iter_rules().enumerate() {
589                if !Self::state_matches(current_state, r.start_states()) {
590                    continue;
591                }
592                if let Some(m) = r.re.find(&s[old_i..]) {
593                    let len = m.end();
594                    // Note that by using ">", we implicitly prefer an earlier over a later rule, if
595                    // both match an input of the same length.
596                    if len > longest {
597                        longest = len;
598                        longest_ridx = ridx;
599                    }
600                }
601            }
602            if longest > 0 {
603                let r = self.get_rule(longest_ridx).unwrap();
604                if r.name().is_some() {
605                    match r.tok_id {
606                        Some(tok_id) => {
607                            lexemes.push(Ok(Lexeme::new(tok_id, old_i, longest)));
608                        }
609                        None => {
610                            lexemes.push(Err(LRLexError::new(Span::new(old_i, old_i))));
611                            break;
612                        }
613                    }
614                }
615                if let Some((target_state_id, op)) = &r.target_state() {
616                    let state = match self.get_start_state_by_id(*target_state_id) {
617                        None => {
618                            // TODO: I can see an argument for lexing state to be either `None` or `Some(target_state_id)` here
619                            lexemes.push(Err(LRLexError::new(Span::new(old_i, old_i))));
620                            break;
621                        }
622                        Some(state) => state,
623                    };
624                    let head = state_stack.last_mut();
625                    match op {
626                        StartStateOperation::ReplaceStack => {
627                            state_stack.clear();
628                            state_stack.push((1, state));
629                        }
630                        StartStateOperation::Push => match head {
631                            Some((count, s)) if s.id == state.id => *count += 1,
632                            _ => state_stack.push((1, state)),
633                        },
634                        StartStateOperation::Pop => match head {
635                            Some((count, _s)) if *count > 1 => {
636                                *count -= 1;
637                            }
638                            Some(_) => {
639                                state_stack.pop();
640                                if state_stack.is_empty() {
641                                    state_stack.push((1, initial_state));
642                                }
643                            }
644                            None => {
645                                lexemes.push(Err(LRLexError::new(Span::new(old_i, old_i))));
646                                break;
647                            }
648                        },
649                    }
650                }
651                i += longest;
652            } else {
653                lexemes.push(Err(LRLexError::new_with_lexing_state(
654                    Span::new(old_i, old_i),
655                    StartStateId::new(current_state.id),
656                )));
657                break;
658            }
659        }
660        LRNonStreamingLexer::new(s, lexemes, NewlineCache::from_str(s).unwrap())
661    }
662
663    fn state_matches(state: &StartState, rule_states: &[usize]) -> bool {
664        if rule_states.is_empty() {
665            !state.exclusive
666        } else {
667            rule_states.contains(&state.id)
668        }
669    }
670
671    fn get_start_state_by_id(&self, id: usize) -> Option<&StartState> {
672        self.start_states.iter().find(|state| state.id == id)
673    }
674
675    /// Returns the final `LexFlags` used for this lex source
676    /// after all forced and default flags have been resolved.
677    pub(crate) fn lex_flags(&self) -> Option<&LexFlags> {
678        Some(&self.lex_flags)
679    }
680}
681
682/// An `LRNonStreamingLexer` holds a reference to a string and can lex it into [lrpar::Lexeme]s.
683/// Although the struct is tied to a single string, no guarantees are made about whether the
684/// lexemes are cached or not.
685pub struct LRNonStreamingLexer<'lexer, 'input: 'lexer, LexerTypesT: LexerTypes>
686where
687    usize: AsPrimitive<LexerTypesT::StorageT>,
688    LexerTypesT::StorageT: 'static + Debug + PrimInt,
689{
690    s: &'input str,
691    lexemes: Vec<Result<LexerTypesT::LexemeT, LRLexError>>,
692    newlines: NewlineCache,
693    phantom: PhantomData<(&'lexer (), LexerTypesT::StorageT)>,
694}
695
696impl<
697    'lexer,
698    'input: 'lexer,
699    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
700    LexerTypesT: LexerTypes<StorageT = StorageT>,
701> LRNonStreamingLexer<'lexer, 'input, LexerTypesT>
702where
703    usize: AsPrimitive<StorageT>,
704{
705    /// Create a new `LRNonStreamingLexer` that read in: the input `s`; and derived `lexemes` and
706    /// `newlines`.
707    ///
708    /// Note that if one or more lexemes or newlines was not created from `s`, subsequent calls to
709    /// the `LRNonStreamingLexer` may cause `panic`s.
710    pub fn new(
711        s: &'input str,
712        lexemes: Vec<Result<LexerTypesT::LexemeT, LRLexError>>,
713        newlines: NewlineCache,
714    ) -> LRNonStreamingLexer<'lexer, 'input, LexerTypesT> {
715        LRNonStreamingLexer {
716            s,
717            lexemes,
718            newlines,
719            phantom: PhantomData,
720        }
721    }
722}
723
724impl<
725    'lexer,
726    'input: 'lexer,
727    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
728    LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = LRLexError>,
729> Lexer<LexerTypesT> for LRNonStreamingLexer<'lexer, 'input, LexerTypesT>
730where
731    usize: AsPrimitive<StorageT>,
732{
733    fn iter<'a>(
734        &'a self,
735    ) -> Box<dyn Iterator<Item = Result<LexerTypesT::LexemeT, LexerTypesT::LexErrorT>> + 'a> {
736        Box::new(self.lexemes.iter().cloned())
737    }
738}
739
740impl<'lexer, 'input: 'lexer, LexerTypesT: LexerTypes<LexErrorT = LRLexError>>
741    NonStreamingLexer<'input, LexerTypesT> for LRNonStreamingLexer<'lexer, 'input, LexerTypesT>
742where
743    usize: AsPrimitive<LexerTypesT::StorageT>,
744{
745    fn span_str(&self, span: Span) -> &'input str {
746        if span.end() > self.s.len() {
747            panic!(
748                "Span {:?} exceeds known input length {}",
749                span,
750                self.s.len()
751            );
752        }
753        &self.s[span.start()..span.end()]
754    }
755
756    fn span_lines_str(&self, span: Span) -> &'input str {
757        debug_assert!(span.end() >= span.start());
758        if span.end() > self.s.len() {
759            panic!(
760                "Span {:?} exceeds known input length {}",
761                span,
762                self.s.len()
763            );
764        }
765
766        let (st, en) = self.newlines.span_line_bytes(span);
767        &self.s[st..en]
768    }
769
770    fn line_col(&self, span: Span) -> ((usize, usize), (usize, usize)) {
771        debug_assert!(span.end() >= span.start());
772        if span.end() > self.s.len() {
773            panic!(
774                "Span {:?} exceeds known input length {}",
775                span,
776                self.s.len()
777            );
778        }
779
780        (
781            self.newlines
782                .byte_to_line_num_and_col_num(self.s, span.start())
783                .unwrap(),
784            self.newlines
785                .byte_to_line_num_and_col_num(self.s, span.end())
786                .unwrap(),
787        )
788    }
789}
790
791#[cfg(test)]
792mod test {
793    use super::*;
794    use crate::{DefaultLexeme, DefaultLexerTypes};
795    use lrpar::LexError;
796    use std::collections::HashMap;
797
798    #[test]
799    fn test_basic() {
800        let src = r"
801%%
802[0-9]+ 'int'
803[a-zA-Z]+ 'id'
804[ \t] ;"
805            .to_string();
806        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
807        let mut map = HashMap::new();
808        map.insert("int", 0);
809        map.insert("id", 1);
810        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
811
812        let lexemes = lexerdef
813            .lexer("abc 123")
814            .iter()
815            .map(|x| x.unwrap())
816            .collect::<Vec<_>>();
817        assert_eq!(lexemes.len(), 2);
818        let lex1 = lexemes[0];
819        assert_eq!(lex1.tok_id(), 1u8);
820        assert_eq!(lex1.span().start(), 0);
821        assert_eq!(lex1.span().len(), 3);
822        let lex2 = lexemes[1];
823        assert_eq!(lex2.tok_id(), 0);
824        assert_eq!(lex2.span().start(), 4);
825        assert_eq!(lex2.span().len(), 3);
826    }
827
828    #[test]
829    fn test_posix_escapes() {
830        let src = r#"%%
831\\ 'slash'
832\a 'alert'
833\b 'backspace'
834\f 'feed'
835\n 'newline'
836\r 'return'
837\t 'tab'
838\v 'vtab'
839\q 'normal_char'
840"#
841        .to_string();
842        let mut options = DEFAULT_LEX_FLAGS;
843        options.posix_escapes = Some(true);
844        let lexerdef =
845            LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::new_with_options(&src, options)
846                .unwrap();
847        let lexemes = lexerdef
848            .lexer("\\\x07\x08\x0c\n\r\t\x0bq")
849            .iter()
850            .map(|x| x.unwrap())
851            .collect::<Vec<_>>();
852        assert_eq!(lexemes.len(), 9);
853        for i in 0..9u8 {
854            let lexeme = lexemes[i as usize];
855            assert_eq!(lexeme.tok_id(), i);
856        }
857    }
858
859    #[test]
860    fn test_non_posix_escapes() {
861        let src = r#"%%
862\\ 'slash'
863\a 'alert'
864a\b a 'work_break'
865\f 'feed'
866\n 'newline'
867\r 'return'
868\t 'tab'
869\v 'vtab'
870\q 'normal_char'
871"#
872        .to_string();
873        let mut options = DEFAULT_LEX_FLAGS;
874        options.posix_escapes = Some(false);
875        let lexerdef =
876            LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::new_with_options(&src, options)
877                .unwrap();
878        let lexemes = lexerdef
879            .lexer("\\\x07a a\x0c\n\r\t\x0bq")
880            .iter()
881            .map(|x| x.unwrap())
882            .collect::<Vec<_>>();
883        assert_eq!(lexemes.len(), 9);
884        for i in 0..9u8 {
885            let lexeme = lexemes[i as usize];
886            assert_eq!(lexeme.tok_id(), i);
887        }
888    }
889
890    #[test]
891    fn test_basic_error() {
892        let src = "
893%%
894[0-9]+ 'int'"
895            .to_string();
896        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
897        match lexerdef.lexer("abc").iter().next().unwrap() {
898            Ok(_) => panic!("Invalid input lexed"),
899            Err(e) => {
900                if e.span().start() != 0 || e.span().end() != 0 {
901                    panic!("Incorrect span returned {:?}", e.span());
902                }
903            }
904        };
905    }
906
907    #[test]
908    fn test_longest_match() {
909        let src = "%%
910if 'IF'
911[a-z]+ 'ID'
912[ ] ;"
913            .to_string();
914        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
915        let mut map = HashMap::new();
916        map.insert("IF", 0);
917        map.insert("ID", 1);
918        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
919
920        let lexemes = lexerdef
921            .lexer("iff if")
922            .iter()
923            .map(|x| x.unwrap())
924            .collect::<Vec<DefaultLexeme<u8>>>();
925        assert_eq!(lexemes.len(), 2);
926        let lex1 = lexemes[0];
927        assert_eq!(lex1.tok_id(), 1u8);
928        assert_eq!(lex1.span().start(), 0);
929        assert_eq!(lex1.span().len(), 3);
930        let lex2 = lexemes[1];
931        assert_eq!(lex2.tok_id(), 0);
932        assert_eq!(lex2.span().start(), 4);
933        assert_eq!(lex2.span().len(), 2);
934    }
935
936    #[test]
937    fn test_multibyte() {
938        let src = "%%
939[a❤]+ 'ID'
940[ ] ;"
941            .to_string();
942        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
943        let mut map = HashMap::new();
944        map.insert("ID", 0u8);
945        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
946
947        let lexer = lexerdef.lexer("a ❤ a");
948        let lexemes = lexer
949            .iter()
950            .map(|x| x.unwrap())
951            .collect::<Vec<DefaultLexeme<u8>>>();
952        assert_eq!(lexemes.len(), 3);
953        let lex1 = lexemes[0];
954        assert_eq!(lex1.span().start(), 0);
955        assert_eq!(lex1.span().len(), 1);
956        assert_eq!(lexer.span_str(lex1.span()), "a");
957        let lex2 = lexemes[1];
958        assert_eq!(lex2.span().start(), 2);
959        assert_eq!(lex2.span().len(), 3);
960        assert_eq!(lexer.span_str(lex2.span()), "❤");
961        let lex3 = lexemes[2];
962        assert_eq!(lex3.span().start(), 6);
963        assert_eq!(lex3.span().len(), 1);
964        assert_eq!(lexer.span_str(lex3.span()), "a");
965    }
966
967    #[test]
968    fn test_line_col() {
969        let src = "%%
970[a-z]+ 'ID'
971[ \\n] ;"
972            .to_string();
973        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
974        let mut map = HashMap::new();
975        map.insert("ID", 0u8);
976        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
977
978        let lexer = lexerdef.lexer("a b c");
979        let lexemes = lexer
980            .iter()
981            .map(|x| x.unwrap())
982            .collect::<Vec<DefaultLexeme<u8>>>();
983        assert_eq!(lexemes.len(), 3);
984        assert_eq!(lexer.line_col(lexemes[1].span()), ((1, 3), (1, 4)));
985        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "a b c");
986        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "a b c");
987
988        let lexer = lexerdef.lexer("a b c\n");
989        let lexemes = lexer.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
990        assert_eq!(lexemes.len(), 3);
991        assert_eq!(lexer.line_col(lexemes[1].span()), ((1, 3), (1, 4)));
992        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "a b c");
993        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "a b c");
994
995        let lexer = lexerdef.lexer(" a\nb\n  c d");
996        let lexemes = lexer.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
997        assert_eq!(lexemes.len(), 4);
998        assert_eq!(lexer.line_col(lexemes[0].span()), ((1, 2), (1, 3)));
999        assert_eq!(lexer.line_col(lexemes[1].span()), ((2, 1), (2, 2)));
1000        assert_eq!(lexer.line_col(lexemes[2].span()), ((3, 3), (3, 4)));
1001        assert_eq!(lexer.line_col(lexemes[3].span()), ((3, 5), (3, 6)));
1002        assert_eq!(lexer.span_lines_str(lexemes[0].span()), " a");
1003        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "b");
1004        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "  c d");
1005        assert_eq!(lexer.span_lines_str(lexemes[3].span()), "  c d");
1006
1007        let mut s = Vec::new();
1008        let mut offs = vec![0];
1009        for i in 0..71 {
1010            offs.push(offs[i] + i + 1);
1011            s.push(vec!["a"; i].join(" "));
1012        }
1013        let s = s.join("\n");
1014        let lexer = lexerdef.lexer(&s);
1015        let lexemes = lexer.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
1016        assert_eq!(lexemes.len(), offs[70]);
1017        assert_eq!(lexer.span_lines_str(Span::new(0, 0)), "");
1018        assert_eq!(lexer.span_lines_str(Span::new(0, 2)), "\na");
1019        assert_eq!(lexer.span_lines_str(Span::new(0, 4)), "\na\na a");
1020        assert_eq!(lexer.span_lines_str(Span::new(0, 7)), "\na\na a\na a a");
1021        assert_eq!(lexer.span_lines_str(Span::new(4, 7)), "a a\na a a");
1022        assert_eq!(lexer.span_lines_str(lexemes[0].span()), "a");
1023        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "a a");
1024        assert_eq!(lexer.span_lines_str(lexemes[3].span()), "a a a");
1025        for i in 0..70 {
1026            assert_eq!(
1027                lexer.span_lines_str(lexemes[offs[i]].span()),
1028                vec!["a"; i + 1].join(" ")
1029            );
1030        }
1031    }
1032
1033    #[test]
1034    fn test_line_col_multibyte() {
1035        let src = "%%
1036[a-z❤]+ 'ID'
1037[ \\n] ;"
1038            .to_string();
1039        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1040        let mut map = HashMap::new();
1041        map.insert("ID", 0u8);
1042        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
1043
1044        let lexer = lexerdef.lexer(" a\n❤ b");
1045        let lexemes = lexer
1046            .iter()
1047            .map(|x| x.unwrap())
1048            .collect::<Vec<DefaultLexeme<u8>>>();
1049        assert_eq!(lexemes.len(), 3);
1050        assert_eq!(lexer.line_col(lexemes[0].span()), ((1, 2), (1, 3)));
1051        assert_eq!(lexer.line_col(lexemes[1].span()), ((2, 1), (2, 2)));
1052        assert_eq!(lexer.line_col(lexemes[2].span()), ((2, 3), (2, 4)));
1053        assert_eq!(lexer.span_lines_str(lexemes[0].span()), " a");
1054        assert_eq!(lexer.span_lines_str(lexemes[1].span()), "❤ b");
1055        assert_eq!(lexer.span_lines_str(lexemes[2].span()), "❤ b");
1056    }
1057
1058    #[test]
1059    #[should_panic]
1060    fn test_bad_line_col() {
1061        let src = "%%
1062[a-z]+ 'ID'
1063[ \\n] ;"
1064            .to_string();
1065        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1066        let mut map = HashMap::new();
1067        map.insert("ID", 0u8);
1068        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
1069
1070        let lexer = lexerdef.lexer("a b c");
1071
1072        lexer.line_col(Span::new(100, 100));
1073    }
1074
1075    #[test]
1076    fn test_missing_from_lexer_and_parser() {
1077        let src = "%%
1078[a-z]+ 'ID'
1079[ \\n] ;"
1080            .to_string();
1081        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1082        let mut map = HashMap::new();
1083        map.insert("INT", 0u8);
1084        let mut missing_from_lexer = HashSet::new();
1085        missing_from_lexer.insert("INT");
1086        let mut missing_from_parser = HashSet::new();
1087        missing_from_parser.insert("ID");
1088        assert_eq!(
1089            lexerdef.set_rule_ids(&map),
1090            (Some(missing_from_lexer), Some(missing_from_parser))
1091        );
1092
1093        match lexerdef.lexer(" a ").iter().next().unwrap() {
1094            Ok(_) => panic!("Invalid input lexed"),
1095            Err(e) => {
1096                if e.span().start() != 1 || e.span().end() != 1 {
1097                    panic!("Incorrect span returned {:?}", e.span());
1098                }
1099            }
1100        };
1101    }
1102
1103    #[test]
1104    fn test_multiline_lexeme() {
1105        let src = "%%
1106'.*' 'STR'
1107[ \\n] ;"
1108            .to_string();
1109        let mut lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1110        let mut map = HashMap::new();
1111        map.insert("STR", 0u8);
1112        assert_eq!(lexerdef.set_rule_ids(&map), (None, None));
1113
1114        let lexer = lexerdef.lexer("'a\nb'\n");
1115        let lexemes = lexer
1116            .iter()
1117            .map(|x| x.unwrap())
1118            .collect::<Vec<DefaultLexeme<u8>>>();
1119        assert_eq!(lexemes.len(), 1);
1120        assert_eq!(lexer.line_col(lexemes[0].span()), ((1, 1), (2, 3)));
1121        assert_eq!(lexer.span_lines_str(lexemes[0].span()), "'a\nb'");
1122    }
1123
1124    #[test]
1125    fn test_token_span() {
1126        let src = "%%
1127a 'A'
1128b 'B'
1129[ \\n] ;"
1130            .to_string();
1131        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1132        assert_eq!(
1133            lexerdef.get_rule_by_name("A").unwrap().name_span(),
1134            Span::new(6, 7)
1135        );
1136        assert_eq!(
1137            lexerdef.get_rule_by_name("B").unwrap().name_span(),
1138            Span::new(12, 13)
1139        );
1140        let anonymous_rules = lexerdef
1141            .iter_rules()
1142            .filter(|rule| rule.name().is_none())
1143            .collect::<Vec<_>>();
1144        assert_eq!(anonymous_rules[0].name_span(), Span::new(21, 21));
1145    }
1146
1147    #[test]
1148    fn test_token_start_states() {
1149        let src = "%x EXCLUSIVE_START
1150%s INCLUSIVE_START
1151%%
1152a 'A'
1153b 'B'
1154[ \\n] ;"
1155            .to_string();
1156        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1157        assert_eq!(
1158            lexerdef.get_rule_by_name("A").unwrap().name_span(),
1159            Span::new(44, 45)
1160        );
1161        assert_eq!(
1162            lexerdef.get_rule_by_name("B").unwrap().name_span(),
1163            Span::new(50, 51)
1164        );
1165    }
1166
1167    #[test]
1168    fn test_rule_start_states() {
1169        let src = "%x EXCLUSIVE_START
1170%s INCLUSIVE_START
1171%%
1172<EXCLUSIVE_START>a 'A'
1173<INCLUSIVE_START>b 'B'
1174[ \\n] ;"
1175            .to_string();
1176        let lexerdef = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::from_str(&src).unwrap();
1177        let a_rule = lexerdef.get_rule_by_name("A").unwrap();
1178        assert_eq!(a_rule.name_span(), Span::new(61, 62));
1179        assert_eq!(a_rule.re_str, "a");
1180
1181        let b_rule = lexerdef.get_rule_by_name("B").unwrap();
1182        assert_eq!(b_rule.name_span(), Span::new(84, 85));
1183        assert_eq!(b_rule.re_str, "b");
1184    }
1185
1186    #[test]
1187    fn test_state_matches_regular_no_rule_states() {
1188        let all_states = &[
1189            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1190            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1191        ];
1192        let rule_states = vec![];
1193        let current_state = &all_states[0];
1194        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1195            current_state,
1196            &rule_states,
1197        );
1198        assert!(m);
1199    }
1200
1201    #[test]
1202    fn test_state_matches_exclusive_no_rule_states() {
1203        let all_states = &[
1204            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1205            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1206        ];
1207        let rule_states = vec![];
1208        let current_state = &all_states[1];
1209        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1210            current_state,
1211            &rule_states,
1212        );
1213        assert!(!m);
1214    }
1215
1216    #[test]
1217    fn test_state_matches_regular_matching_rule_states() {
1218        let all_states = &[
1219            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1220            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1221        ];
1222        let rule_states = vec![0];
1223        let current_state = &all_states[0];
1224        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1225            current_state,
1226            &rule_states,
1227        );
1228        assert!(m);
1229    }
1230
1231    #[test]
1232    fn test_state_matches_exclusive_matching_rule_states() {
1233        let all_states = &[
1234            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1235            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1236        ];
1237        let rule_states = vec![1];
1238        let current_state = &all_states[1];
1239        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1240            current_state,
1241            &rule_states,
1242        );
1243        assert!(m);
1244    }
1245
1246    #[test]
1247    fn test_state_matches_regular_other_rule_states() {
1248        let all_states = &[
1249            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1250            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1251        ];
1252        let rule_states = vec![1];
1253        let current_state = &all_states[0];
1254        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1255            current_state,
1256            &rule_states,
1257        );
1258        assert!(!m);
1259    }
1260
1261    #[test]
1262    fn test_state_matches_exclusive_other_rule_states() {
1263        let all_states = &[
1264            StartState::new(0, "INITIAL", false, Span::new(0, 0)),
1265            StartState::new(1, "EXCLUSIVE", true, Span::new(0, 0)),
1266        ];
1267        let rule_states = vec![0];
1268        let current_state = &all_states[1];
1269        let m = LRNonStreamingLexerDef::<DefaultLexerTypes<u8>>::state_matches(
1270            current_state,
1271            &rule_states,
1272        );
1273        assert!(!m);
1274    }
1275}