#![allow(clippy::derive_partial_eq_without_eq)]
use std::{
error::Error,
fmt::{self, Debug, Display, Write as _},
hash::Hash,
marker::PhantomData,
time::{Duration, Instant},
vec,
};
use cactus::Cactus;
use cfgrammar::{yacc::YaccGrammar, RIdx, Span, TIdx};
use lrtable::{Action, StIdx, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::{cpctplus, LexError, Lexeme, LexerTypes, NonStreamingLexer};
#[cfg(test)]
const RECOVERY_TIME_BUDGET: u64 = 60_000; #[cfg(not(test))]
const RECOVERY_TIME_BUDGET: u64 = 500; #[derive(Debug, Clone, PartialEq)]
pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
Term { lexeme: LexemeT },
Nonterm {
ridx: RIdx<StorageT>,
nodes: Vec<Node<LexemeT, StorageT>>,
},
}
impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
where
usize: AsPrimitive<StorageT>,
{
pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
let mut st = vec![(0, self)]; let mut s = String::new();
while let Some((indent, e)) = st.pop() {
for _ in 0..indent {
s.push(' ');
}
match *e {
Node::Term { lexeme } => {
let tidx = TIdx(lexeme.tok_id());
let tn = grm.token_name(tidx).unwrap();
let lt = &input[lexeme.span().start()..lexeme.span().end()];
writeln!(s, "{} {}", tn, lt).ok();
}
Node::Nonterm { ridx, ref nodes } => {
writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
for x in nodes.iter().rev() {
st.push((indent + 1, x));
}
}
}
}
s
}
}
type PStack<StorageT> = Vec<StIdx<StorageT>>; type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
RIdx<StorageT>,
&'b dyn NonStreamingLexer<'input, LexerTypesT>,
Span,
vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
ParamT,
) -> ActionT;
#[derive(Debug)]
pub enum AStackType<LexemeT, ActionT> {
ActionType(ActionT),
Lexeme(LexemeT),
}
pub(super) struct Parser<
'a,
'b: 'a,
'input: 'b,
StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
ActionT: 'a,
ParamT: Clone,
> where
usize: AsPrimitive<StorageT>,
{
rcvry_kind: RecoveryKind,
pub(super) grm: &'a YaccGrammar<StorageT>,
pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
pub(super) stable: &'a StateTable<StorageT>,
lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
param: ParamT,
}
impl<
'a,
'b: 'a,
'input: 'b,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> Parser<'a, 'b, 'input, StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>
where
usize: AsPrimitive<StorageT>,
{
fn parse_generictree(
rcvry_kind: RecoveryKind,
grm: &YaccGrammar<StorageT>,
token_cost: TokenCostFn<'a, StorageT>,
stable: &StateTable<StorageT>,
lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
lexemes: Vec<LexerTypesT::LexemeT>,
) -> (
Option<Node<LexerTypesT::LexemeT, StorageT>>,
Vec<LexParseError<StorageT, LexerTypesT>>,
) {
for tidx in grm.iter_tidxs() {
assert!(token_cost(tidx) > 0);
}
let mut actions: Vec<
ActionFn<
'a,
'b,
'input,
StorageT,
LexerTypesT,
Node<LexerTypesT::LexemeT, StorageT>,
(),
>,
> = Vec::new();
actions.resize(usize::from(grm.prods_len()), &action_generictree);
let psr = Parser {
rcvry_kind,
grm,
token_cost: Box::new(token_cost),
stable,
lexer,
lexemes,
actions: actions.as_slice(),
param: (),
};
let mut pstack = vec![stable.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
(accpt, errors)
}
}
pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
ridx: RIdx<StorageT>,
_lexer: &dyn NonStreamingLexer<LexerTypesT>,
_span: Span,
astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
_param: (),
) -> Node<LexerTypesT::LexemeT, StorageT>
where
usize: AsPrimitive<LexerTypesT::StorageT>,
LexerTypesT::LexemeT: Lexeme<StorageT>,
{
let mut nodes = Vec::with_capacity(astack.len());
for a in astack {
nodes.push(match a {
AStackType::ActionType(n) => n,
AStackType::Lexeme(lexeme) => Node::Term { lexeme },
});
}
Node::Nonterm { ridx, nodes }
}
impl<
'a,
'b: 'a,
'input: 'b,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> Parser<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>
where
usize: AsPrimitive<StorageT>,
{
fn parse_noaction(
rcvry_kind: RecoveryKind,
grm: &YaccGrammar<StorageT>,
token_cost: TokenCostFn<'a, StorageT>,
stable: &StateTable<StorageT>,
lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
lexemes: Vec<LexerTypesT::LexemeT>,
) -> Vec<LexParseError<StorageT, LexerTypesT>> {
for tidx in grm.iter_tidxs() {
assert!(token_cost(tidx) > 0);
}
let mut actions: Vec<ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>> = Vec::new();
actions.resize(usize::from(grm.prods_len()), &Parser::noaction);
let psr = Parser {
rcvry_kind,
grm,
token_cost: Box::new(token_cost),
stable,
lexer,
lexemes,
actions: actions.as_slice(),
param: (),
};
let mut pstack = vec![stable.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
errors
}
fn noaction(
_ridx: RIdx<StorageT>,
_lexer: &dyn NonStreamingLexer<LexerTypesT>,
_span: Span,
_astack: vec::Drain<AStackType<LexerTypesT::LexemeT, ()>>,
_param: (),
) {
}
}
impl<
'a,
'b: 'a,
'input: 'b,
StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
ActionT: 'a,
ParamT: Clone,
> Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
where
usize: AsPrimitive<StorageT>,
{
fn parse_actions(
rcvry_kind: RecoveryKind,
grm: &'a YaccGrammar<StorageT>,
token_cost: TokenCostFn<'a, StorageT>,
stable: &'a StateTable<StorageT>,
lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
lexemes: Vec<LexerTypesT::LexemeT>,
actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
param: ParamT,
) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
for tidx in grm.iter_tidxs() {
assert!(token_cost(tidx) > 0);
}
let psr = Parser {
rcvry_kind,
grm,
token_cost: Box::new(token_cost),
stable,
lexer,
lexemes,
actions,
param,
};
let mut pstack = vec![stable.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
(accpt, errors)
}
fn lr(
&self,
mut laidx: usize,
pstack: &mut PStack<StorageT>,
astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
spans: &mut Vec<Span>,
) -> Option<ActionT> {
let mut recoverer = None;
let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
loop {
debug_assert_eq!(astack.len(), spans.len());
let stidx = *pstack.last().unwrap();
let la_tidx = self.next_tidx(laidx);
match self.stable.action(stidx, la_tidx) {
Action::Reduce(pidx) => {
let ridx = self.grm.prod_to_rule(pidx);
let pop_idx = pstack.len() - self.grm.prod(pidx).len();
pstack.drain(pop_idx..);
let prior = *pstack.last().unwrap();
pstack.push(self.stable.goto(prior, ridx).unwrap());
let span = if spans.is_empty() {
Span::new(0, 0)
} else if pop_idx - 1 < spans.len() {
Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
} else {
Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
};
spans.truncate(pop_idx - 1);
spans.push(span);
let v = AStackType::ActionType(self.actions[usize::from(pidx)](
ridx,
self.lexer,
span,
astack.drain(pop_idx - 1..),
self.param.clone(),
));
astack.push(v);
}
Action::Shift(state_id) => {
let la_lexeme = self.next_lexeme(laidx);
pstack.push(state_id);
astack.push(AStackType::Lexeme(la_lexeme));
spans.push(la_lexeme.span());
laidx += 1;
}
Action::Accept => {
debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
debug_assert_eq!(astack.len(), 1);
match astack.drain(..).next().unwrap() {
AStackType::ActionType(v) => return Some(v),
_ => unreachable!(),
}
}
Action::Error => {
if recoverer.is_none() {
recoverer = Some(match self.rcvry_kind {
RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
RecoveryKind::None => {
let la_lexeme = self.next_lexeme(laidx);
errors.push(
ParseError {
stidx,
lexeme: la_lexeme,
repairs: vec![],
}
.into(),
);
return None;
}
});
}
let before = Instant::now();
let finish_by = before + recovery_budget;
let (new_laidx, repairs) = recoverer
.as_ref()
.unwrap()
.as_ref()
.recover(finish_by, self, laidx, pstack, astack, spans);
let after = Instant::now();
recovery_budget = recovery_budget
.checked_sub(after - before)
.unwrap_or_else(|| Duration::new(0, 0));
let keep_going = !repairs.is_empty();
let la_lexeme = self.next_lexeme(laidx);
errors.push(
ParseError {
stidx,
lexeme: la_lexeme,
repairs,
}
.into(),
);
if !keep_going {
return None;
}
laidx = new_laidx;
}
}
}
}
pub(super) fn lr_upto(
&self,
lexeme_prefix: Option<LexerTypesT::LexemeT>,
mut laidx: usize,
end_laidx: usize,
pstack: &mut PStack<StorageT>,
astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
spans: &mut Option<&mut Vec<Span>>,
) -> usize {
assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
while laidx != end_laidx && laidx <= self.lexemes.len() {
let stidx = *pstack.last().unwrap();
let la_tidx = if let Some(l) = lexeme_prefix {
TIdx(l.tok_id())
} else {
self.next_tidx(laidx)
};
match self.stable.action(stidx, la_tidx) {
Action::Reduce(pidx) => {
let ridx = self.grm.prod_to_rule(pidx);
let pop_idx = pstack.len() - self.grm.prod(pidx).len();
if let Some(ref mut astack_uw) = *astack {
if let Some(ref mut spans_uw) = *spans {
let span = if spans_uw.is_empty() {
Span::new(0, 0)
} else if pop_idx - 1 < spans_uw.len() {
Span::new(
spans_uw[pop_idx - 1].start(),
spans_uw[spans_uw.len() - 1].end(),
)
} else {
Span::new(
spans_uw[spans_uw.len() - 1].start(),
spans_uw[spans_uw.len() - 1].end(),
)
};
spans_uw.truncate(pop_idx - 1);
spans_uw.push(span);
let v = AStackType::ActionType(self.actions[usize::from(pidx)](
ridx,
self.lexer,
span,
astack_uw.drain(pop_idx - 1..),
self.param.clone(),
));
astack_uw.push(v);
} else {
unreachable!();
}
}
pstack.drain(pop_idx..);
let prior = *pstack.last().unwrap();
pstack.push(self.stable.goto(prior, ridx).unwrap());
}
Action::Shift(state_id) => {
if let Some(ref mut astack_uw) = *astack {
if let Some(ref mut spans_uw) = spans {
let la_lexeme = if let Some(l) = lexeme_prefix {
l
} else {
self.next_lexeme(laidx)
};
astack_uw.push(AStackType::Lexeme(la_lexeme));
spans_uw.push(la_lexeme.span());
}
}
pstack.push(state_id);
laidx += 1;
}
Action::Accept => {
break;
}
Action::Error => {
break;
}
}
}
laidx
}
pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
let llen = self.lexemes.len();
debug_assert!(laidx <= llen);
if laidx < llen {
self.lexemes[laidx]
} else {
let last_la_end = if llen == 0 {
0
} else {
debug_assert!(laidx > 0);
let last_la = self.lexemes[laidx - 1];
last_la.span().end()
};
Lexeme::new_faulty(
StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
last_la_end,
0,
)
}
}
pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
let ll = self.lexemes.len();
debug_assert!(laidx <= ll);
if laidx < ll {
TIdx(self.lexemes[laidx].tok_id())
} else {
self.grm.eof_token_idx()
}
}
pub(super) fn lr_cactus(
&self,
lexeme_prefix: Option<LexerTypesT::LexemeT>,
mut laidx: usize,
end_laidx: usize,
mut pstack: Cactus<StIdx<StorageT>>,
tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
) -> (usize, Cactus<StIdx<StorageT>>) {
assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
while laidx != end_laidx {
let stidx = *pstack.val().unwrap();
let la_tidx = if let Some(l) = lexeme_prefix {
TIdx(l.tok_id())
} else {
self.next_tidx(laidx)
};
match self.stable.action(stidx, la_tidx) {
Action::Reduce(pidx) => {
let ridx = self.grm.prod_to_rule(pidx);
let pop_num = self.grm.prod(pidx).len();
if let Some(ref mut tstack_uw) = *tstack {
let nodes = tstack_uw
.drain(pstack.len() - pop_num - 1..)
.collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
tstack_uw.push(Node::Nonterm { ridx, nodes });
}
for _ in 0..pop_num {
pstack = pstack.parent().unwrap();
}
let prior = *pstack.val().unwrap();
pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
}
Action::Shift(state_id) => {
if let Some(ref mut tstack_uw) = *tstack {
let la_lexeme = if let Some(l) = lexeme_prefix {
l
} else {
self.next_lexeme(laidx)
};
tstack_uw.push(Node::Term { lexeme: la_lexeme });
}
pstack = pstack.child(state_id);
laidx += 1;
}
Action::Accept => {
debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
if let Some(ref tstack_uw) = *tstack {
debug_assert_eq!(tstack_uw.len(), 1);
}
break;
}
Action::Error => {
break;
}
}
}
(laidx, pstack)
}
}
pub(super) trait Recoverer<
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
ActionT,
ParamT: Clone,
> where
usize: AsPrimitive<StorageT>,
{
fn recover(
&self,
finish_by: Instant,
parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
in_laidx: usize,
in_pstack: &mut PStack<StorageT>,
astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
spans: &mut Vec<Span>,
) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
}
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum RecoveryKind {
CPCTPlus,
None,
}
#[derive(Debug)]
pub enum LexParseError<
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> where
usize: AsPrimitive<StorageT>,
{
LexError(LexerTypesT::LexErrorT),
ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
}
impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
LexParseError<StorageT, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
pub fn pp<'a>(
&self,
lexer: &dyn NonStreamingLexer<LexerTypesT>,
epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
) -> String {
match self {
LexParseError::LexError(e) => {
let ((line, col), _) = lexer.line_col(e.span());
format!("Lexing error at line {} column {}.", line, col)
}
LexParseError::ParseError(e) => {
let ((line, col), _) = lexer.line_col(e.lexeme().span());
let mut out = format!("Parsing error at line {} column {}.", line, col);
let repairs_len = e.repairs().len();
if repairs_len == 0 {
out.push_str(" No repair sequences found.");
} else {
out.push_str(" Repair sequences found:");
for (i, rs) in e.repairs().iter().enumerate() {
let padding = ((repairs_len as f64).log10() as usize)
- (((i + 1) as f64).log10() as usize)
+ 1;
write!(out, "\n {}{}: ", " ".repeat(padding), i + 1).ok();
let mut rs_out = Vec::new();
let mut i = 0;
while i < rs.len() {
match rs[i] {
ParseRepair::Delete(l) => {
let mut j = i + 1;
let mut last_end = l.span().end();
while j < rs.len() {
if let ParseRepair::Delete(next_l) = rs[j] {
if next_l.span().start() == last_end {
last_end = next_l.span().end();
j += 1;
continue;
}
}
break;
}
let t = &lexer
.span_str(Span::new(l.span().start(), last_end))
.replace('\n', "\\n");
rs_out.push(format!("Delete {}", t));
i = j;
}
ParseRepair::Insert(tidx) => {
rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
i += 1;
}
ParseRepair::Shift(l) => {
let t = &lexer.span_str(l.span()).replace('\n', "\\n");
rs_out.push(format!("Shift {}", t));
i += 1;
}
}
}
out.push_str(&rs_out.join(", "));
}
}
out
}
}
}
}
impl<
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> fmt::Display for LexParseError<StorageT, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
LexParseError::LexError(ref e) => Display::fmt(e, f),
LexParseError::ParseError(ref e) => Display::fmt(e, f),
}
}
}
impl<
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> Error for LexParseError<StorageT, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
}
impl<
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
T: LexError,
> From<T> for LexParseError<StorageT, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
LexParseError::LexError(err)
}
}
impl<
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
fn from(
err: ParseError<LexerTypesT::LexemeT, StorageT>,
) -> LexParseError<StorageT, LexerTypesT> {
LexParseError::ParseError(err)
}
}
pub struct RTParserBuilder<
'a,
StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> where
usize: AsPrimitive<StorageT>,
{
grm: &'a YaccGrammar<StorageT>,
stable: &'a StateTable<StorageT>,
recoverer: RecoveryKind,
term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
phantom: PhantomData<(StorageT, LexerTypesT)>,
}
impl<
'a,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> RTParserBuilder<'a, StorageT, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
RTParserBuilder {
grm,
stable,
recoverer: RecoveryKind::CPCTPlus,
term_costs: &|_| 1,
phantom: PhantomData,
}
}
pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
self.recoverer = rk;
self
}
pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
self.term_costs = f;
self
}
pub fn parse_generictree(
&self,
lexer: &dyn NonStreamingLexer<LexerTypesT>,
) -> (
Option<Node<LexerTypesT::LexemeT, StorageT>>,
Vec<LexParseError<StorageT, LexerTypesT>>,
) {
let mut lexemes = vec![];
for e in lexer.iter().collect::<Vec<_>>() {
match e {
Ok(l) => lexemes.push(l),
Err(e) => return (None, vec![e.into()]),
}
}
Parser::<StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>::parse_generictree(
self.recoverer,
self.grm,
self.term_costs,
self.stable,
lexer,
lexemes,
)
}
pub fn parse_noaction(
&self,
lexer: &dyn NonStreamingLexer<LexerTypesT>,
) -> Vec<LexParseError<StorageT, LexerTypesT>> {
let mut lexemes = vec![];
for e in lexer.iter().collect::<Vec<_>>() {
match e {
Ok(l) => lexemes.push(l),
Err(e) => return vec![e.into()],
}
}
Parser::<StorageT, LexerTypesT, (), ()>::parse_noaction(
self.recoverer,
self.grm,
self.term_costs,
self.stable,
lexer,
lexemes,
)
}
pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
&self,
lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
param: ParamT,
) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
let mut lexemes = vec![];
for e in lexer.iter().collect::<Vec<_>>() {
match e {
Ok(l) => lexemes.push(l),
Err(e) => return (None, vec![e.into()]),
}
}
Parser::parse_actions(
self.recoverer,
self.grm,
self.term_costs,
self.stable,
lexer,
lexemes,
actions,
param,
)
}
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
Insert(TIdx<StorageT>),
Delete(LexemeT),
Shift(LexemeT),
}
#[derive(Clone, Debug, PartialEq)]
pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
stidx: StIdx<StorageT>,
lexeme: LexemeT,
repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
}
impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Parse error at lexeme {:?}", self.lexeme)
}
}
impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
pub fn stidx(&self) -> StIdx<StorageT> {
self.stidx
}
pub fn lexeme(&self) -> &LexemeT {
&self.lexeme
}
pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
&self.repairs
}
}
#[cfg(test)]
pub(crate) mod test {
use std::collections::HashMap;
use cfgrammar::{
yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
Span,
};
use lrtable::{from_yacc, Minimiser};
use num_traits::ToPrimitive;
use regex::Regex;
use super::*;
use crate::{
test_utils::{TestLexError, TestLexeme, TestLexerTypes},
Lexeme, Lexer,
};
pub(crate) fn do_parse<'input>(
rcvry_kind: RecoveryKind,
lexs: &str,
grms: &str,
input: &'input str,
) -> (
YaccGrammar<u16>,
SmallLexer<'input>,
Result<
Node<TestLexeme, u16>,
(
Option<Node<TestLexeme, u16>>,
Vec<LexParseError<u16, TestLexerTypes>>,
),
>,
) {
do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
}
fn do_parse_with_costs<'input>(
rcvry_kind: RecoveryKind,
lexs: &str,
grms: &str,
input: &'input str,
costs: &HashMap<&str, u8>,
) -> (
YaccGrammar<u16>,
SmallLexer<'input>,
Result<
Node<TestLexeme, u16>,
(
Option<Node<TestLexeme, u16>>,
Vec<LexParseError<u16, TestLexerTypes>>,
),
>,
) {
let grm = YaccGrammar::<u16>::new_with_storaget(
YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
grms,
)
.unwrap();
let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
let rule_ids = grm
.tokens_map()
.iter()
.map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
.collect();
let lexer_rules = small_lexer(lexs, rule_ids);
let lexemes = small_lex(lexer_rules, input);
let lexer = SmallLexer { lexemes, s: input };
let costs_tidx = costs
.iter()
.map(|(k, v)| (grm.token_idx(k).unwrap(), v))
.collect::<HashMap<_, _>>();
let (r, errs) = RTParserBuilder::new(&grm, &stable)
.recoverer(rcvry_kind)
.term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
.parse_generictree(&lexer);
if let Some(node) = r {
if errs.is_empty() {
(grm, lexer, Ok(node))
} else {
(grm, lexer, Err((Some(node), errs)))
}
} else {
(grm, lexer, Err((None, errs)))
}
}
fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
assert_eq!(expected, pt.unwrap().pp(&grm, input));
}
pub(crate) struct SmallLexer<'input> {
lexemes: Vec<TestLexeme>,
s: &'input str,
}
impl<'input> Lexer<TestLexerTypes> for SmallLexer<'input> {
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
Box::new(self.lexemes.iter().map(|x| Ok(*x)))
}
}
impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
fn span_str(&self, span: Span) -> &'input str {
&self.s[span.start()..span.end()]
}
fn span_lines_str(&self, _: Span) -> &'input str {
unreachable!();
}
fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
unreachable!();
}
}
fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
let mut rules = Vec::new();
for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
assert!(l.rfind('\'') == Some(l.len() - 1));
let i = l[..l.len() - 1].rfind('\'').unwrap();
let name = &l[i + 1..l.len() - 1];
let re = &l[..i - 1].trim();
rules.push((
ids_map[name],
Regex::new(&format!("\\A(?:{})", re)).unwrap(),
));
}
rules
}
fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
let mut lexemes = vec![];
let mut i = 0;
while i < input.len() {
let mut longest = 0; let mut longest_tok_id = 0; for (tok_id, r) in rules.iter() {
if let Some(m) = r.find(&input[i..]) {
let len = m.end();
if len > longest {
longest = len;
longest_tok_id = *tok_id;
}
}
}
assert!(longest > 0);
lexemes.push(Lexeme::new(longest_tok_id, i, longest));
i += longest;
}
lexemes
}
#[test]
fn simple_parse() {
check_parse_output(
"[a-zA-Z_] 'ID'
\\+ '+'",
"
%start E
%%
E: T '+' E
| T;
T: 'ID';
",
"a+b",
"E
T
ID a
+ +
E
T
ID b
",
);
}
#[test]
fn parse_empty_rules() {
let lexs = "[a-zA-Z_] 'ID'";
let grms = "%start S
%%
S: L;
L: 'ID'
| ;
";
check_parse_output(
lexs, grms, "", "S
L
",
);
check_parse_output(
lexs,
grms,
"x",
"S
L
ID x
",
);
}
#[test]
fn recursive_parse() {
let lexs = "\\+ '+'
\\* '*'
[0-9]+ 'INT'";
let grms = "%start Expr
%%
Expr : Expr '+' Term | Term;
Term : Term '*' Factor | Factor;
Factor : 'INT';";
check_parse_output(
lexs,
grms,
"2+3*4",
"Expr
Expr
Term
Factor
INT 2
+ +
Term
Term
Factor
INT 3
* *
Factor
INT 4
",
);
check_parse_output(
lexs,
grms,
"2*3+4",
"Expr
Expr
Term
Term
Factor
INT 2
* *
Factor
INT 3
+ +
Term
Factor
INT 4
",
);
}
#[test]
fn parse_error() {
let lexs = "\\( '('
\\) ')'
[a-zA-Z_][a-zA-Z_0-9]* 'ID'";
let grms = "%start Call
%%
Call: 'ID' '(' ')';";
check_parse_output(
lexs,
grms,
"f()",
"Call
ID f
( (
) )
",
);
let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
let (_, errs) = pr.unwrap_err();
assert_eq!(errs.len(), 1);
let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
match &errs[0] {
LexParseError::ParseError(e) => {
assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
assert!(e.lexeme().faulty());
}
_ => unreachable!(),
}
let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
let (_, errs) = pr.unwrap_err();
assert_eq!(errs.len(), 1);
let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
match &errs[0] {
LexParseError::ParseError(e) => {
assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
assert!(!e.lexeme().faulty());
}
_ => unreachable!(),
}
}
}