lrlex/
mod.rs

1//! `lrlex` is a partial replacement for [`lex`](http://dinosaur.compilertools.net/lex/index.html)
2//! / [`flex`](https://westes.github.io/flex/manual/). It takes in a `.l` file and statically
3//! compiles it to Rust code. The resulting [LRNonStreamingLexerDef] can then be given an input
4//! string, from which it instantiates an [LRNonStreamingLexer]. This provides an iterator which
5//! can produce the sequence of [lrpar::Lexeme]s for that input, as well as answer basic queries
6//! about [cfgrammar::Span]s (e.g. extracting substrings, calculating line and column numbers).
7
8#![allow(clippy::new_without_default)]
9#![allow(clippy::type_complexity)]
10#![allow(clippy::unnecessary_wraps)]
11#![allow(clippy::upper_case_acronyms)]
12#![forbid(unsafe_code)]
13#![deny(unreachable_pub)]
14
15use std::{error::Error, fmt};
16
17mod ctbuilder;
18#[doc(hidden)]
19pub mod defaults;
20mod lexer;
21mod parser;
22
23pub use crate::{
24    ctbuilder::{ct_token_map, CTLexer, CTLexerBuilder, LexerKind, RustEdition, Visibility},
25    defaults::{DefaultLexeme, DefaultLexerTypes},
26    lexer::{
27        LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef, Rule, DEFAULT_LEX_FLAGS,
28        UNSPECIFIED_LEX_FLAGS,
29    },
30    parser::StartState,
31    parser::StartStateOperation,
32};
33
34use cfgrammar::yacc::parser::SpansKind;
35use cfgrammar::{Span, Spanned};
36
37pub type LexBuildResult<T> = Result<T, Vec<LexBuildError>>;
38
39/// Any error from the Lex parser returns an instance of this struct.
40#[derive(Debug)]
41pub struct LexBuildError {
42    pub(crate) kind: LexErrorKind,
43    pub(crate) spans: Vec<Span>,
44}
45
46impl Error for LexBuildError {}
47
48/// The various different possible Lex parser errors.
49#[derive(Debug, Clone)]
50#[non_exhaustive]
51pub enum LexErrorKind {
52    PrematureEnd,
53    RoutinesNotSupported,
54    UnknownDeclaration,
55    MissingSpace,
56    InvalidName,
57    UnknownStartState,
58    DuplicateStartState,
59    InvalidStartState,
60    InvalidStartStateName,
61    DuplicateName,
62    RegexError(regex::Error),
63    InvalidGrmtoolsSectionValue,
64    InvalidNumber,
65    DuplicateGrmtoolsSectionValue,
66    VerbatimNotSupported,
67}
68
69impl LexErrorKind {
70    fn is_same_kind(&self, other: &Self) -> bool {
71        use LexErrorKind as EK;
72        matches!(
73            (self, other),
74            (EK::PrematureEnd, EK::PrematureEnd)
75                | (EK::RoutinesNotSupported, EK::RoutinesNotSupported)
76                | (EK::UnknownDeclaration, EK::UnknownDeclaration)
77                | (EK::MissingSpace, EK::MissingSpace)
78                | (EK::InvalidName, EK::InvalidName)
79                | (EK::UnknownStartState, EK::UnknownStartState)
80                | (EK::DuplicateStartState, EK::DuplicateStartState)
81                | (EK::InvalidStartState, EK::InvalidStartState)
82                | (EK::InvalidStartStateName, EK::InvalidStartStateName)
83                | (EK::DuplicateName, EK::DuplicateName)
84                | (
85                    EK::InvalidGrmtoolsSectionValue,
86                    EK::InvalidGrmtoolsSectionValue
87                )
88                | (EK::InvalidNumber, EK::InvalidNumber)
89                | (
90                    EK::DuplicateGrmtoolsSectionValue,
91                    EK::DuplicateGrmtoolsSectionValue
92                )
93                | (EK::RegexError(_), EK::RegexError(_))
94                | (EK::VerbatimNotSupported, EK::VerbatimNotSupported)
95        )
96    }
97}
98
99impl Spanned for LexBuildError {
100    fn spans(&self) -> &[Span] {
101        self.spans.as_slice()
102    }
103
104    fn spanskind(&self) -> SpansKind {
105        match self.kind {
106            LexErrorKind::PrematureEnd
107            | LexErrorKind::RoutinesNotSupported
108            | LexErrorKind::UnknownDeclaration
109            | LexErrorKind::MissingSpace
110            | LexErrorKind::InvalidName
111            | LexErrorKind::UnknownStartState
112            | LexErrorKind::InvalidStartState
113            | LexErrorKind::InvalidStartStateName
114            | LexErrorKind::InvalidGrmtoolsSectionValue
115            | LexErrorKind::InvalidNumber
116            | LexErrorKind::VerbatimNotSupported
117            | LexErrorKind::RegexError(_) => SpansKind::Error,
118            LexErrorKind::DuplicateName
119            | LexErrorKind::DuplicateStartState
120            | LexErrorKind::DuplicateGrmtoolsSectionValue => SpansKind::DuplicationError,
121        }
122    }
123}
124
125impl fmt::Display for LexBuildError {
126    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
127        let s = match &self.kind {
128            LexErrorKind::VerbatimNotSupported => "Verbatim code not supported",
129            LexErrorKind::PrematureEnd => "File ends prematurely",
130            LexErrorKind::RoutinesNotSupported => "Routines not currently supported",
131            LexErrorKind::UnknownDeclaration => "Unknown declaration",
132            LexErrorKind::MissingSpace => "Rule is missing a space",
133            LexErrorKind::InvalidName => "Invalid rule name",
134            LexErrorKind::UnknownStartState => "Start state not known",
135            LexErrorKind::DuplicateStartState => "Start state already exists",
136            LexErrorKind::InvalidStartState => "Invalid start state",
137            LexErrorKind::InvalidStartStateName => "Invalid start state name",
138            LexErrorKind::InvalidGrmtoolsSectionValue => "Invalid grmtools section value",
139            LexErrorKind::InvalidNumber => "Invalid number",
140            LexErrorKind::DuplicateGrmtoolsSectionValue => "Duplicate grmtools section value",
141            LexErrorKind::DuplicateName => "Rule name already exists",
142            LexErrorKind::RegexError(e) => return write!(f, "Invalid regular expression: {e}"),
143        };
144        write!(f, "{s}")
145    }
146}
147
148#[derive(Copy, Clone, Debug)]
149pub struct StartStateId {
150    _id: usize,
151}
152
153impl StartStateId {
154    fn new(id: usize) -> Self {
155        Self { _id: id }
156    }
157}
158
159/// A Lexing error.
160#[derive(Clone, Debug)]
161pub struct LRLexError {
162    span: Span,
163    lexing_state: Option<StartStateId>,
164}
165
166impl lrpar::LexError for LRLexError {
167    fn span(&self) -> Span {
168        self.span
169    }
170}
171
172impl LRLexError {
173    /// Construct a new LRLex error covering `span`.
174    pub fn new(span: Span) -> Self {
175        LRLexError {
176            span,
177            lexing_state: None,
178        }
179    }
180
181    /// Construct a new LRLex error covering `span` for `lexing_state`.
182    pub fn new_with_lexing_state(span: Span, lexing_state: StartStateId) -> Self {
183        LRLexError {
184            span,
185            lexing_state: Some(lexing_state),
186        }
187    }
188
189    /// Returns the state, if there was one, that the lexer was in when the error was detected.
190    pub fn lexing_state(&self) -> Option<StartStateId> {
191        self.lexing_state
192    }
193}
194
195impl Error for LRLexError {}
196
197impl fmt::Display for LRLexError {
198    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199        write!(
200            f,
201            "Couldn't lex input starting at byte {}",
202            self.span.start()
203        )
204    }
205}
206
207#[deprecated(
208    since = "0.8.0",
209    note = "This struct has been renamed to LRNonStreamingLexerDef"
210)]
211pub type NonStreamingLexerDef<StorageT> = LRNonStreamingLexerDef<StorageT>;
212
213/// A convenience macro for including statically compiled `.l` files. A file `src/a/b/c.l`
214/// processed by [CTLexerBuilder::lexer_in_src_dir] can then be used in a crate with
215/// `lrlex_mod!("a/b/c.l")`.
216///
217/// Note that you can use `lrlex_mod` with [CTLexerBuilder::output_path] if, and only if, the
218/// output file was placed in [std::env::var]`("OUT_DIR")` or one of its subdirectories.
219#[macro_export]
220macro_rules! lrlex_mod {
221    ($path:expr) => {
222        include!(concat!(env!("OUT_DIR"), "/", $path, ".rs"));
223    };
224}
225
226/// This private module with pub items which is directly related to
227/// the "Sealed trait" pattern. These items are used within the current
228/// crate. See `unstable_api` module for enabling usage outside the crate.
229mod unstable {
230    #![allow(unused)]
231    #![allow(unreachable_pub)]
232    pub struct UnstableApi;
233    pub trait UnstableTrait {}
234}
235
236/// A module for lifting restrictions on visibility by enabling unstable features.
237///
238/// See the sources for a complete list of features, and members.
239pub mod unstable_api {
240    /// Unstable functions that take a value `UnstableApi` require
241    /// the "_unstable_api" feature. This feature controls
242    /// whether the value has `pub` visibility outside the crate.
243    #[cfg(feature = "_unstable_api")]
244    pub use crate::unstable::UnstableApi;
245
246    /// This is a a supertrait for traits that are considered to be Unstable.
247    /// Unstable traits do not provide any semver guarantees.
248    ///
249    /// Enabling the `_unsealed_unstable traits` makes this supertrait publicly
250    /// Visible.
251    ///
252    ///
253    /// Declaring an unstable Api within the crate:
254    /// ```ignore_rust
255    /// // Within the crate use `crate::unstable::` .
256    /// pub trait Foo: crate::unstable::UnstableTrait {
257    ///     fn foo(key: crate::unstable::UnstableApi);
258    /// }
259    /// ```
260    ///
261    /// Deriving the trait outside the crate (requires feature `_unsealed_unstable_traits`)
262    /// ```ignore_rust
263    /// struct Bar;
264    /// impl unstable_api::UnstableTrait for Bar{}
265    /// impl Foo for Bar {
266    ///   fn foo(key: unstable_api::UnstableApi) {
267    ///     ...
268    ///   }
269    /// }
270    /// ```
271    ///
272    ///
273    /// Calling an implementation of the trait outside the crate (requires feature `_unstable_api`:
274    /// ```ignore_rust
275    ///   let x: &dyn Foo = ...;
276    ///   x.foo(unstable_api::UnstableApi);
277    /// ```
278    #[cfg(feature = "_unsealed_unstable_traits")]
279    pub use crate::unstable::UnstableTrait;
280
281    /// An value that acts as a key to inform callers that they are
282    /// calling an unstable internal api. This value is public by default.
283    /// Access to it does not require any features to be enabled.
284    ///
285    /// Q. When this should be used?
286    ///
287    /// A. When generated code needs to call internal api within it,
288    /// where you do not want the caller to have to enable any features
289    /// to use the generated code.
290    pub struct InternalPublicApi;
291}