lrlex/
mod.rs

1//! `lrlex` is a partial replacement for [`lex`](http://dinosaur.compilertools.net/lex/index.html)
2//! / [`flex`](https://westes.github.io/flex/manual/). It takes in a `.l` file and statically
3//! compiles it to Rust code. The resulting [LRNonStreamingLexerDef] can then be given an input
4//! string, from which it instantiates an [LRNonStreamingLexer]. This provides an iterator which
5//! can produce the sequence of [lrpar::Lexeme]s for that input, as well as answer basic queries
6//! about [cfgrammar::Span]s (e.g. extracting substrings, calculating line and column numbers).
7
8#![allow(clippy::new_without_default)]
9#![allow(clippy::type_complexity)]
10#![allow(clippy::unnecessary_wraps)]
11#![allow(clippy::upper_case_acronyms)]
12#![forbid(unsafe_code)]
13#![deny(unreachable_pub)]
14
15use std::{error::Error, fmt};
16
17mod ctbuilder;
18#[doc(hidden)]
19pub mod defaults;
20mod lexer;
21mod parser;
22
23#[allow(deprecated)]
24pub use crate::{
25    ctbuilder::{
26        CTLexer, CTLexerBuilder, CTTokenMapBuilder, LexerKind, RustEdition, Visibility,
27        ct_token_map,
28    },
29    defaults::{DefaultLexeme, DefaultLexerTypes},
30    lexer::{
31        DEFAULT_LEX_FLAGS, LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef, Rule,
32        UNSPECIFIED_LEX_FLAGS,
33    },
34    parser::StartState,
35    parser::StartStateOperation,
36};
37
38use cfgrammar::header::{HeaderError, HeaderErrorKind};
39use cfgrammar::yacc::parser::SpansKind;
40use cfgrammar::{Span, Spanned};
41
42pub type LexBuildResult<T> = Result<T, Vec<LexBuildError>>;
43
44/// Any error from the Lex parser returns an instance of this struct.
45#[derive(Debug)]
46pub struct LexBuildError {
47    pub(crate) kind: LexErrorKind,
48    pub(crate) spans: Vec<Span>,
49}
50
51impl Error for LexBuildError {}
52
53/// The various different possible Lex parser errors.
54#[derive(Debug, Clone)]
55#[non_exhaustive]
56pub enum LexErrorKind {
57    PrematureEnd,
58    RoutinesNotSupported,
59    UnknownDeclaration,
60    MissingSpace,
61    InvalidName,
62    UnknownStartState,
63    DuplicateStartState,
64    InvalidStartState,
65    InvalidStartStateName,
66    DuplicateName,
67    RegexError(regex::Error),
68    VerbatimNotSupported,
69    Header(HeaderErrorKind, SpansKind),
70}
71
72impl LexErrorKind {
73    fn is_same_kind(&self, other: &Self) -> bool {
74        use LexErrorKind as EK;
75        matches!(
76            (self, other),
77            (EK::PrematureEnd, EK::PrematureEnd)
78                | (EK::RoutinesNotSupported, EK::RoutinesNotSupported)
79                | (EK::UnknownDeclaration, EK::UnknownDeclaration)
80                | (EK::MissingSpace, EK::MissingSpace)
81                | (EK::InvalidName, EK::InvalidName)
82                | (EK::UnknownStartState, EK::UnknownStartState)
83                | (EK::DuplicateStartState, EK::DuplicateStartState)
84                | (EK::InvalidStartState, EK::InvalidStartState)
85                | (EK::InvalidStartStateName, EK::InvalidStartStateName)
86                | (EK::DuplicateName, EK::DuplicateName)
87                | (EK::RegexError(_), EK::RegexError(_))
88                | (EK::VerbatimNotSupported, EK::VerbatimNotSupported)
89        )
90    }
91}
92
93impl Spanned for LexBuildError {
94    fn spans(&self) -> &[Span] {
95        self.spans.as_slice()
96    }
97
98    fn spanskind(&self) -> SpansKind {
99        match self.kind {
100            LexErrorKind::PrematureEnd
101            | LexErrorKind::RoutinesNotSupported
102            | LexErrorKind::UnknownDeclaration
103            | LexErrorKind::MissingSpace
104            | LexErrorKind::InvalidName
105            | LexErrorKind::UnknownStartState
106            | LexErrorKind::InvalidStartState
107            | LexErrorKind::InvalidStartStateName
108            | LexErrorKind::VerbatimNotSupported
109            | LexErrorKind::RegexError(_) => SpansKind::Error,
110            LexErrorKind::DuplicateName | LexErrorKind::DuplicateStartState => {
111                SpansKind::DuplicationError
112            }
113            LexErrorKind::Header(_, spanskind) => spanskind,
114        }
115    }
116}
117
118impl fmt::Display for LexBuildError {
119    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
120        let s = match &self.kind {
121            LexErrorKind::VerbatimNotSupported => "Verbatim code not supported",
122            LexErrorKind::PrematureEnd => "File ends prematurely",
123            LexErrorKind::RoutinesNotSupported => "Routines not currently supported",
124            LexErrorKind::UnknownDeclaration => "Unknown declaration",
125            LexErrorKind::MissingSpace => "Rule is missing a space",
126            LexErrorKind::InvalidName => "Invalid rule name",
127            LexErrorKind::UnknownStartState => "Start state not known",
128            LexErrorKind::DuplicateStartState => "Start state already exists",
129            LexErrorKind::InvalidStartState => "Invalid start state",
130            LexErrorKind::InvalidStartStateName => "Invalid start state name",
131            LexErrorKind::DuplicateName => "Rule name already exists",
132            LexErrorKind::RegexError(e) => return write!(f, "Invalid regular expression: {e}"),
133            LexErrorKind::Header(e, _) => return write!(f, "In '%grmtools' section {e}"),
134        };
135        write!(f, "{s}")
136    }
137}
138
139impl From<HeaderError<Span>> for LexBuildError {
140    fn from(e: HeaderError<Span>) -> LexBuildError {
141        LexBuildError {
142            kind: LexErrorKind::Header(e.kind, e.spanskind()),
143            spans: e.locations,
144        }
145    }
146}
147
148#[derive(Copy, Clone, Debug)]
149pub struct StartStateId {
150    _id: usize,
151}
152
153impl StartStateId {
154    fn new(id: usize) -> Self {
155        Self { _id: id }
156    }
157}
158
159/// A Lexing error.
160#[derive(Clone, Debug)]
161pub struct LRLexError {
162    span: Span,
163    lexing_state: Option<StartStateId>,
164}
165
166impl lrpar::LexError for LRLexError {
167    fn span(&self) -> Span {
168        self.span
169    }
170}
171
172impl LRLexError {
173    /// Construct a new LRLex error covering `span`.
174    pub fn new(span: Span) -> Self {
175        LRLexError {
176            span,
177            lexing_state: None,
178        }
179    }
180
181    /// Construct a new LRLex error covering `span` for `lexing_state`.
182    pub fn new_with_lexing_state(span: Span, lexing_state: StartStateId) -> Self {
183        LRLexError {
184            span,
185            lexing_state: Some(lexing_state),
186        }
187    }
188
189    /// Returns the state, if there was one, that the lexer was in when the error was detected.
190    pub fn lexing_state(&self) -> Option<StartStateId> {
191        self.lexing_state
192    }
193}
194
195impl Error for LRLexError {}
196
197impl fmt::Display for LRLexError {
198    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199        write!(
200            f,
201            "Couldn't lex input starting at byte {}",
202            self.span.start()
203        )
204    }
205}
206
207#[deprecated(
208    since = "0.8.0",
209    note = "This struct has been renamed to LRNonStreamingLexerDef"
210)]
211pub type NonStreamingLexerDef<StorageT> = LRNonStreamingLexerDef<StorageT>;
212
213/// A convenience macro for including statically compiled `.l` files. A file `src/a/b/c.l`
214/// processed by [CTLexerBuilder::lexer_in_src_dir] can then be used in a crate with
215/// `lrlex_mod!("a/b/c.l")`.
216///
217/// Note that you can use `lrlex_mod` with [CTLexerBuilder::output_path] if, and only if, the
218/// output file was placed in [std::env::var]`("OUT_DIR")` or one of its subdirectories.
219#[macro_export]
220macro_rules! lrlex_mod {
221    ($path:expr) => {
222        include!(concat!(env!("OUT_DIR"), "/", $path, ".rs"));
223    };
224}
225
226/// This private module with pub items which is directly related to
227/// the "Sealed trait" pattern. These items are used within the current
228/// crate. See `unstable_api` module for enabling usage outside the crate.
229mod unstable {
230    #![allow(unused)]
231    #![allow(unreachable_pub)]
232    pub struct UnstableApi;
233    pub trait UnstableTrait {}
234}
235
236/// A module for lifting restrictions on visibility by enabling unstable features.
237///
238/// See the sources for a complete list of features, and members.
239pub mod unstable_api {
240    /// Unstable functions that take a value `UnstableApi` require
241    /// the "_unstable_api" feature. This feature controls
242    /// whether the value has `pub` visibility outside the crate.
243    #[cfg(feature = "_unstable_api")]
244    pub use crate::unstable::UnstableApi;
245
246    /// This is a a supertrait for traits that are considered to be Unstable.
247    /// Unstable traits do not provide any semver guarantees.
248    ///
249    /// Enabling the `_unsealed_unstable traits` makes this supertrait publicly
250    /// Visible.
251    ///
252    ///
253    /// Declaring an unstable Api within the crate:
254    /// ```ignore_rust
255    /// // Within the crate use `crate::unstable::` .
256    /// pub trait Foo: crate::unstable::UnstableTrait {
257    ///     fn foo(key: crate::unstable::UnstableApi);
258    /// }
259    /// ```
260    ///
261    /// Deriving the trait outside the crate (requires feature `_unsealed_unstable_traits`)
262    /// ```ignore_rust
263    /// struct Bar;
264    /// impl unstable_api::UnstableTrait for Bar{}
265    /// impl Foo for Bar {
266    ///   fn foo(key: unstable_api::UnstableApi) {
267    ///     ...
268    ///   }
269    /// }
270    /// ```
271    ///
272    ///
273    /// Calling an implementation of the trait outside the crate (requires feature `_unstable_api`:
274    /// ```ignore_rust
275    ///   let x: &dyn Foo = ...;
276    ///   x.foo(unstable_api::UnstableApi);
277    /// ```
278    #[cfg(feature = "_unsealed_unstable_traits")]
279    pub use crate::unstable::UnstableTrait;
280
281    /// An value that acts as a key to inform callers that they are
282    /// calling an unstable internal api. This value is public by default.
283    /// Access to it does not require any features to be enabled.
284    ///
285    /// Q. When this should be used?
286    ///
287    /// A. When generated code needs to call internal api within it,
288    /// where you do not want the caller to have to enable any features
289    /// to use the generated code.
290    pub struct InternalPublicApi;
291}