lrlex/
mod.rs

1//! `lrlex` is a partial replacement for [`lex`](http://dinosaur.compilertools.net/lex/index.html)
2//! / [`flex`](https://westes.github.io/flex/manual/). It takes in a `.l` file and statically
3//! compiles it to Rust code. The resulting [LRNonStreamingLexerDef] can then be given an input
4//! string, from which it instantiates an [LRNonStreamingLexer]. This provides an iterator which
5//! can produce the sequence of [lrpar::Lexeme]s for that input, as well as answer basic queries
6//! about [cfgrammar::Span]s (e.g. extracting substrings, calculating line and column numbers).
7
8#![allow(clippy::new_without_default)]
9#![allow(clippy::type_complexity)]
10#![allow(clippy::unnecessary_wraps)]
11#![allow(clippy::upper_case_acronyms)]
12#![forbid(unsafe_code)]
13#![deny(unreachable_pub)]
14
15use std::{error::Error, fmt};
16
17mod ctbuilder;
18#[doc(hidden)]
19pub mod defaults;
20mod lexer;
21mod parser;
22
23pub use crate::{
24    ctbuilder::{ct_token_map, CTLexer, CTLexerBuilder, LexerKind, RustEdition, Visibility},
25    defaults::{DefaultLexeme, DefaultLexerTypes},
26    lexer::{
27        LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef, Rule, DEFAULT_LEX_FLAGS,
28        UNSPECIFIED_LEX_FLAGS,
29    },
30    parser::StartState,
31    parser::StartStateOperation,
32};
33
34use cfgrammar::header::{HeaderError, HeaderErrorKind};
35use cfgrammar::yacc::parser::SpansKind;
36use cfgrammar::{Span, Spanned};
37
38pub type LexBuildResult<T> = Result<T, Vec<LexBuildError>>;
39
40/// Any error from the Lex parser returns an instance of this struct.
41#[derive(Debug)]
42pub struct LexBuildError {
43    pub(crate) kind: LexErrorKind,
44    pub(crate) spans: Vec<Span>,
45}
46
47impl Error for LexBuildError {}
48
49/// The various different possible Lex parser errors.
50#[derive(Debug, Clone)]
51#[non_exhaustive]
52pub enum LexErrorKind {
53    PrematureEnd,
54    RoutinesNotSupported,
55    UnknownDeclaration,
56    MissingSpace,
57    InvalidName,
58    UnknownStartState,
59    DuplicateStartState,
60    InvalidStartState,
61    InvalidStartStateName,
62    DuplicateName,
63    RegexError(regex::Error),
64    VerbatimNotSupported,
65    Header(HeaderErrorKind, SpansKind),
66}
67
68impl LexErrorKind {
69    fn is_same_kind(&self, other: &Self) -> bool {
70        use LexErrorKind as EK;
71        matches!(
72            (self, other),
73            (EK::PrematureEnd, EK::PrematureEnd)
74                | (EK::RoutinesNotSupported, EK::RoutinesNotSupported)
75                | (EK::UnknownDeclaration, EK::UnknownDeclaration)
76                | (EK::MissingSpace, EK::MissingSpace)
77                | (EK::InvalidName, EK::InvalidName)
78                | (EK::UnknownStartState, EK::UnknownStartState)
79                | (EK::DuplicateStartState, EK::DuplicateStartState)
80                | (EK::InvalidStartState, EK::InvalidStartState)
81                | (EK::InvalidStartStateName, EK::InvalidStartStateName)
82                | (EK::DuplicateName, EK::DuplicateName)
83                | (EK::RegexError(_), EK::RegexError(_))
84                | (EK::VerbatimNotSupported, EK::VerbatimNotSupported)
85        )
86    }
87}
88
89impl Spanned for LexBuildError {
90    fn spans(&self) -> &[Span] {
91        self.spans.as_slice()
92    }
93
94    fn spanskind(&self) -> SpansKind {
95        match self.kind {
96            LexErrorKind::PrematureEnd
97            | LexErrorKind::RoutinesNotSupported
98            | LexErrorKind::UnknownDeclaration
99            | LexErrorKind::MissingSpace
100            | LexErrorKind::InvalidName
101            | LexErrorKind::UnknownStartState
102            | LexErrorKind::InvalidStartState
103            | LexErrorKind::InvalidStartStateName
104            | LexErrorKind::VerbatimNotSupported
105            | LexErrorKind::RegexError(_) => SpansKind::Error,
106            LexErrorKind::DuplicateName | LexErrorKind::DuplicateStartState => {
107                SpansKind::DuplicationError
108            }
109            LexErrorKind::Header(_, spanskind) => spanskind,
110        }
111    }
112}
113
114impl fmt::Display for LexBuildError {
115    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
116        let s = match &self.kind {
117            LexErrorKind::VerbatimNotSupported => "Verbatim code not supported",
118            LexErrorKind::PrematureEnd => "File ends prematurely",
119            LexErrorKind::RoutinesNotSupported => "Routines not currently supported",
120            LexErrorKind::UnknownDeclaration => "Unknown declaration",
121            LexErrorKind::MissingSpace => "Rule is missing a space",
122            LexErrorKind::InvalidName => "Invalid rule name",
123            LexErrorKind::UnknownStartState => "Start state not known",
124            LexErrorKind::DuplicateStartState => "Start state already exists",
125            LexErrorKind::InvalidStartState => "Invalid start state",
126            LexErrorKind::InvalidStartStateName => "Invalid start state name",
127            LexErrorKind::DuplicateName => "Rule name already exists",
128            LexErrorKind::RegexError(e) => return write!(f, "Invalid regular expression: {e}"),
129            LexErrorKind::Header(e, _) => return write!(f, "In '%grmtools' section {e}"),
130        };
131        write!(f, "{s}")
132    }
133}
134
135impl From<HeaderError<Span>> for LexBuildError {
136    fn from(e: HeaderError<Span>) -> LexBuildError {
137        LexBuildError {
138            kind: LexErrorKind::Header(e.kind, e.spanskind()),
139            spans: e.locations,
140        }
141    }
142}
143
144#[derive(Copy, Clone, Debug)]
145pub struct StartStateId {
146    _id: usize,
147}
148
149impl StartStateId {
150    fn new(id: usize) -> Self {
151        Self { _id: id }
152    }
153}
154
155/// A Lexing error.
156#[derive(Clone, Debug)]
157pub struct LRLexError {
158    span: Span,
159    lexing_state: Option<StartStateId>,
160}
161
162impl lrpar::LexError for LRLexError {
163    fn span(&self) -> Span {
164        self.span
165    }
166}
167
168impl LRLexError {
169    /// Construct a new LRLex error covering `span`.
170    pub fn new(span: Span) -> Self {
171        LRLexError {
172            span,
173            lexing_state: None,
174        }
175    }
176
177    /// Construct a new LRLex error covering `span` for `lexing_state`.
178    pub fn new_with_lexing_state(span: Span, lexing_state: StartStateId) -> Self {
179        LRLexError {
180            span,
181            lexing_state: Some(lexing_state),
182        }
183    }
184
185    /// Returns the state, if there was one, that the lexer was in when the error was detected.
186    pub fn lexing_state(&self) -> Option<StartStateId> {
187        self.lexing_state
188    }
189}
190
191impl Error for LRLexError {}
192
193impl fmt::Display for LRLexError {
194    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
195        write!(
196            f,
197            "Couldn't lex input starting at byte {}",
198            self.span.start()
199        )
200    }
201}
202
203#[deprecated(
204    since = "0.8.0",
205    note = "This struct has been renamed to LRNonStreamingLexerDef"
206)]
207pub type NonStreamingLexerDef<StorageT> = LRNonStreamingLexerDef<StorageT>;
208
209/// A convenience macro for including statically compiled `.l` files. A file `src/a/b/c.l`
210/// processed by [CTLexerBuilder::lexer_in_src_dir] can then be used in a crate with
211/// `lrlex_mod!("a/b/c.l")`.
212///
213/// Note that you can use `lrlex_mod` with [CTLexerBuilder::output_path] if, and only if, the
214/// output file was placed in [std::env::var]`("OUT_DIR")` or one of its subdirectories.
215#[macro_export]
216macro_rules! lrlex_mod {
217    ($path:expr) => {
218        include!(concat!(env!("OUT_DIR"), "/", $path, ".rs"));
219    };
220}
221
222/// This private module with pub items which is directly related to
223/// the "Sealed trait" pattern. These items are used within the current
224/// crate. See `unstable_api` module for enabling usage outside the crate.
225mod unstable {
226    #![allow(unused)]
227    #![allow(unreachable_pub)]
228    pub struct UnstableApi;
229    pub trait UnstableTrait {}
230}
231
232/// A module for lifting restrictions on visibility by enabling unstable features.
233///
234/// See the sources for a complete list of features, and members.
235pub mod unstable_api {
236    /// Unstable functions that take a value `UnstableApi` require
237    /// the "_unstable_api" feature. This feature controls
238    /// whether the value has `pub` visibility outside the crate.
239    #[cfg(feature = "_unstable_api")]
240    pub use crate::unstable::UnstableApi;
241
242    /// This is a a supertrait for traits that are considered to be Unstable.
243    /// Unstable traits do not provide any semver guarantees.
244    ///
245    /// Enabling the `_unsealed_unstable traits` makes this supertrait publicly
246    /// Visible.
247    ///
248    ///
249    /// Declaring an unstable Api within the crate:
250    /// ```ignore_rust
251    /// // Within the crate use `crate::unstable::` .
252    /// pub trait Foo: crate::unstable::UnstableTrait {
253    ///     fn foo(key: crate::unstable::UnstableApi);
254    /// }
255    /// ```
256    ///
257    /// Deriving the trait outside the crate (requires feature `_unsealed_unstable_traits`)
258    /// ```ignore_rust
259    /// struct Bar;
260    /// impl unstable_api::UnstableTrait for Bar{}
261    /// impl Foo for Bar {
262    ///   fn foo(key: unstable_api::UnstableApi) {
263    ///     ...
264    ///   }
265    /// }
266    /// ```
267    ///
268    ///
269    /// Calling an implementation of the trait outside the crate (requires feature `_unstable_api`:
270    /// ```ignore_rust
271    ///   let x: &dyn Foo = ...;
272    ///   x.foo(unstable_api::UnstableApi);
273    /// ```
274    #[cfg(feature = "_unsealed_unstable_traits")]
275    pub use crate::unstable::UnstableTrait;
276
277    /// An value that acts as a key to inform callers that they are
278    /// calling an unstable internal api. This value is public by default.
279    /// Access to it does not require any features to be enabled.
280    ///
281    /// Q. When this should be used?
282    ///
283    /// A. When generated code needs to call internal api within it,
284    /// where you do not want the caller to have to enable any features
285    /// to use the generated code.
286    pub struct InternalPublicApi;
287}