lrpar/lex_api.rs
1#![allow(clippy::len_without_is_empty)]
2
3use std::{cmp, error::Error, fmt, hash::Hash, marker};
4
5use cfgrammar::Span;
6use num_traits::{AsPrimitive, PrimInt, Unsigned};
7
8pub trait LexerTypes: fmt::Debug
9where
10 usize: AsPrimitive<Self::StorageT>,
11{
12 type LexemeT: Lexeme<Self::StorageT>;
13 type StorageT: 'static + fmt::Debug + Hash + PrimInt + Unsigned;
14 type LexErrorT: LexError;
15}
16
17/// The base trait which all lexers which want to interact with `lrpar` must implement.
18pub trait Lexer<LexerTypesT: LexerTypes>
19where
20 usize: AsPrimitive<LexerTypesT::StorageT>,
21{
22 /// Iterate over all the lexemes in this lexer. Note that:
23 /// * The lexer may or may not stop after the first [LexError] is encountered.
24 /// * There are no guarantees about what happens if this function is called more than once.
25 /// For example, a streaming lexer may only produce [Lexeme]s on the first call.
26 fn iter<'a>(
27 &'a self,
28 ) -> Box<dyn Iterator<Item = Result<LexerTypesT::LexemeT, LexerTypesT::LexErrorT>> + 'a>;
29}
30
31/// A `NonStreamingLexer` is one that takes input in one go, and is then able to hand out
32/// substrings to that input and calculate line and column numbers from a [Span].
33pub trait NonStreamingLexer<'input, LexerTypesT: LexerTypes>: Lexer<LexerTypesT>
34where
35 usize: AsPrimitive<LexerTypesT::StorageT>,
36{
37 /// Return the user input associated with a [Span].
38 ///
39 /// The [Span] must be well formed:
40 /// * The start/end byte indexes must be valid UTF-8 character indexes.
41 /// * The end byte index must not exceed the input's length.
42 ///
43 /// If these requirements are not respected this function may panic or return unexpected
44 /// portions of the input.
45 fn span_str(&self, span: Span) -> &'input str;
46
47 /// Return the lines containing the input at `span` (including *all* the text on the lines
48 /// that `span` starts and ends on).
49 ///
50 /// The [Span] must be well formed:
51 /// * The start/end byte indexes must be valid UTF-8 character indexes.
52 /// * The end byte index must not exceed the input's length.
53 ///
54 /// If these requirements are not respected this function may panic or return unexpected
55 /// portions of the input.
56 fn span_lines_str(&self, span: Span) -> &'input str;
57
58 /// Return `((start line, start column), (end line, end column))` for `span`. Note that column
59 /// *characters* (not bytes) are returned.
60 ///
61 /// The [Span] must be well formed:
62 /// * The start/end byte indexes must be valid UTF-8 character indexes.
63 /// * The end byte index must not exceed the input's length.
64 ///
65 /// If these requirements are not respected this function may panic or return unexpected
66 /// portions of the input.
67 fn line_col(&self, span: Span) -> ((usize, usize), (usize, usize));
68}
69
70/// A lexeme represents a segment of the user's input that conforms to a known type: this trait
71/// captures the common behaviour of all lexeme structs.
72///
73/// Lexemes are assumed to have a definition which describes all possible correct lexemes (e.g. the
74/// regular expression `[0-9]+` defines all integer lexemes). This trait also allows "faulty"
75/// lexemes to be represented -- that is, lexemes that have resulted from error recovery of some
76/// sort. Faulty lexemes can violate the lexeme's type definition in any possible way (e.g. they
77/// might span more or less input than the definition would suggest is possible).
78pub trait Lexeme<StorageT>: fmt::Debug + fmt::Display + cmp::Eq + Hash + marker::Copy {
79 /// Create a new lexeme with ID `tok_id`, a starting position in the input `start`, and length
80 /// `len`.
81 ///
82 /// Lexemes created using this function are expected to be "correct" in the sense that they
83 /// fully respect the lexeme's definition semantics. To create faulty lexemes, use
84 /// [new_faulty](Lexeme::new_faulty).
85 fn new(tok_id: StorageT, start: usize, len: usize) -> Self
86 where
87 Self: Sized;
88
89 /// Create a new faulty lexeme with ID `tok_id` and a starting position in the input `start`.
90 fn new_faulty(tok_id: StorageT, start: usize, len: usize) -> Self
91 where
92 Self: Sized;
93
94 /// The token ID.
95 fn tok_id(&self) -> StorageT;
96
97 /// Obtain this `Lexeme`'s [Span].
98 fn span(&self) -> Span;
99
100 /// Returns `true` if this lexeme is "faulty" i.e. is the result of error recovery in some way.
101 /// If `true`, note that the lexeme's span may be greater or less than you may expect from the
102 /// lexeme's definition.
103 fn faulty(&self) -> bool;
104}
105
106/// A lexing error.
107pub trait LexError: Error {
108 /// Return the span associated with this error.
109 fn span(&self) -> Span;
110}