lrpar/
lex_api.rs

1#![allow(clippy::len_without_is_empty)]
2
3use std::{cmp, error::Error, fmt, hash::Hash, marker};
4
5use cfgrammar::Span;
6use num_traits::{AsPrimitive, PrimInt, Unsigned};
7
8pub trait LexerTypes: fmt::Debug
9where
10    usize: AsPrimitive<Self::StorageT>,
11{
12    type LexemeT: Lexeme<Self::StorageT>;
13    type StorageT: 'static + fmt::Debug + Hash + PrimInt + Unsigned;
14    type LexErrorT: LexError;
15}
16
17/// The base trait which all lexers which want to interact with `lrpar` must implement.
18pub trait Lexer<LexerTypesT: LexerTypes>
19where
20    usize: AsPrimitive<LexerTypesT::StorageT>,
21{
22    /// Iterate over all the lexemes in this lexer. Note that:
23    ///   * The lexer may or may not stop after the first [LexError] is encountered.
24    ///   * There are no guarantees about what happens if this function is called more than once.
25    ///     For example, a streaming lexer may only produce [Lexeme]s on the first call.
26    fn iter<'a>(
27        &'a self,
28    ) -> Box<dyn Iterator<Item = Result<LexerTypesT::LexemeT, LexerTypesT::LexErrorT>> + 'a>;
29}
30
31/// A `NonStreamingLexer` is one that takes input in one go, and is then able to hand out
32/// substrings to that input and calculate line and column numbers from a [Span].
33pub trait NonStreamingLexer<'input, LexerTypesT: LexerTypes>: Lexer<LexerTypesT>
34where
35    usize: AsPrimitive<LexerTypesT::StorageT>,
36{
37    /// Return the user input associated with a [Span].
38    ///
39    /// The [Span] must be well formed:
40    ///   * The start/end byte indexes must be valid UTF-8 character indexes.
41    ///   * The end byte index must not exceed the input's length.
42    ///
43    /// If these requirements are not respected this function may panic or return unexpected
44    /// portions of the input.
45    fn span_str(&self, span: Span) -> &'input str;
46
47    /// Return the lines containing the input at `span` (including *all* the text on the lines
48    /// that `span` starts and ends on).
49    ///
50    /// The [Span] must be well formed:
51    ///   * The start/end byte indexes must be valid UTF-8 character indexes.
52    ///   * The end byte index must not exceed the input's length.
53    ///
54    /// If these requirements are not respected this function may panic or return unexpected
55    /// portions of the input.
56    fn span_lines_str(&self, span: Span) -> &'input str;
57
58    /// Return `((start line, start column), (end line, end column))` for `span`. Note that column
59    /// *characters* (not bytes) are returned.
60    ///
61    /// The [Span] must be well formed:
62    ///   * The start/end byte indexes must be valid UTF-8 character indexes.
63    ///   * The end byte index must not exceed the input's length.
64    ///
65    /// If these requirements are not respected this function may panic or return unexpected
66    /// portions of the input.
67    fn line_col(&self, span: Span) -> ((usize, usize), (usize, usize));
68}
69
70/// A lexeme represents a segment of the user's input that conforms to a known type: this trait
71/// captures the common behaviour of all lexeme structs.
72///
73/// Lexemes are assumed to have a definition which describes all possible correct lexemes (e.g. the
74/// regular expression `[0-9]+` defines all integer lexemes). This trait also allows "faulty"
75/// lexemes to be represented -- that is, lexemes that have resulted from error recovery of some
76/// sort. Faulty lexemes can violate the lexeme's type definition in any possible way (e.g. they
77/// might span more or less input than the definition would suggest is possible).
78pub trait Lexeme<StorageT>: fmt::Debug + fmt::Display + cmp::Eq + Hash + marker::Copy {
79    /// Create a new lexeme with ID `tok_id`, a starting position in the input `start`, and length
80    /// `len`.
81    ///
82    /// Lexemes created using this function are expected to be "correct" in the sense that they
83    /// fully respect the lexeme's definition semantics. To create faulty lexemes, use
84    /// [new_faulty](Lexeme::new_faulty).
85    fn new(tok_id: StorageT, start: usize, len: usize) -> Self
86    where
87        Self: Sized;
88
89    /// Create a new faulty lexeme with ID `tok_id` and a starting position in the input `start`.
90    fn new_faulty(tok_id: StorageT, start: usize, len: usize) -> Self
91    where
92        Self: Sized;
93
94    /// The token ID.
95    fn tok_id(&self) -> StorageT;
96
97    /// Obtain this `Lexeme`'s [Span].
98    fn span(&self) -> Span;
99
100    /// Returns `true` if this lexeme is "faulty" i.e. is the result of error recovery in some way.
101    /// If `true`, note that the lexeme's span may be greater or less than you may expect from the
102    /// lexeme's definition.
103    fn faulty(&self) -> bool;
104}
105
106/// A lexing error.
107pub trait LexError: Error {
108    /// Return the span associated with this error.
109    fn span(&self) -> Span;
110}