1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3 error::Error,
4 fmt::{self, Debug, Display, Write as _},
5 hash::Hash,
6 marker::PhantomData,
7 vec,
8};
9
10#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{header::Value, span::Location, yacc::YaccGrammar, RIdx, Span, TIdx};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{cpctplus, LexError, Lexeme, LexerTypes, NonStreamingLexer};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; #[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; #[derive(Debug, Clone, PartialEq)]
34pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
35 Term { lexeme: LexemeT },
37 Nonterm {
39 ridx: RIdx<StorageT>,
40 nodes: Vec<Node<LexemeT, StorageT>>,
41 },
42}
43
44impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
45where
46 usize: AsPrimitive<StorageT>,
47{
48 pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
50 let mut st = vec![(0, self)]; let mut s = String::new();
52 while let Some((indent, e)) = st.pop() {
53 for _ in 0..indent {
54 s.push(' ');
55 }
56 match *e {
57 Node::Term { lexeme } => {
58 let tidx = TIdx(lexeme.tok_id());
59 let tn = grm.token_name(tidx).unwrap();
60 let lt = &input[lexeme.span().start()..lexeme.span().end()];
61 writeln!(s, "{} {}", tn, lt).ok();
62 }
63 Node::Nonterm { ridx, ref nodes } => {
64 writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
65 for x in nodes.iter().rev() {
66 st.push((indent + 1, x));
67 }
68 }
69 }
70 }
71 s
72 }
73}
74
75type PStack<StorageT> = Vec<StIdx<StorageT>>; type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
77type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
78 RIdx<StorageT>,
79 &'b dyn NonStreamingLexer<'input, LexerTypesT>,
80 Span,
81 vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
82 ParamT,
83) -> ActionT;
84
85#[derive(Debug)]
86pub enum AStackType<LexemeT, ActionT> {
87 ActionType(ActionT),
88 Lexeme(LexemeT),
89}
90
91pub(super) struct Parser<
92 'a,
93 'b: 'a,
94 'input: 'b,
95 StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
96 LexerTypesT: LexerTypes<StorageT = StorageT>,
97 ActionT: 'a,
98 ParamT: Clone,
99> where
100 usize: AsPrimitive<StorageT>,
101{
102 rcvry_kind: RecoveryKind,
103 pub(super) grm: &'a YaccGrammar<StorageT>,
104 pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
105 pub(super) stable: &'a StateTable<StorageT>,
106 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
107 pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
110 actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
111 param: ParamT,
112}
113
114impl<
115 'a,
116 'b: 'a,
117 'input: 'b,
118 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
119 LexerTypesT: LexerTypes<StorageT = StorageT>,
120 > Parser<'a, 'b, 'input, StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>
121where
122 usize: AsPrimitive<StorageT>,
123{
124 fn parse_generictree(
125 rcvry_kind: RecoveryKind,
126 grm: &YaccGrammar<StorageT>,
127 token_cost: TokenCostFn<'a, StorageT>,
128 stable: &StateTable<StorageT>,
129 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
130 lexemes: Vec<LexerTypesT::LexemeT>,
131 ) -> (
132 Option<Node<LexerTypesT::LexemeT, StorageT>>,
133 Vec<LexParseError<StorageT, LexerTypesT>>,
134 ) {
135 for tidx in grm.iter_tidxs() {
136 assert!(token_cost(tidx) > 0);
137 }
138 let mut actions: Vec<
139 ActionFn<
140 'a,
141 'b,
142 'input,
143 StorageT,
144 LexerTypesT,
145 Node<LexerTypesT::LexemeT, StorageT>,
146 (),
147 >,
148 > = Vec::new();
149 actions.resize(usize::from(grm.prods_len()), &action_generictree);
150 let psr = Parser {
151 rcvry_kind,
152 grm,
153 token_cost: Box::new(token_cost),
154 stable,
155 lexer,
156 lexemes,
157 actions: actions.as_slice(),
158 param: (),
159 };
160 let mut pstack = vec![stable.start_state()];
161 let mut astack = Vec::new();
162 let mut errors = Vec::new();
163 let mut spans = Vec::new();
164 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
165 (accpt, errors)
166 }
167}
168
169pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
173 ridx: RIdx<StorageT>,
174 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
175 _span: Span,
176 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
177 _param: (),
178) -> Node<LexerTypesT::LexemeT, StorageT>
179where
180 usize: AsPrimitive<LexerTypesT::StorageT>,
181 LexerTypesT::LexemeT: Lexeme<StorageT>,
182{
183 let mut nodes = Vec::with_capacity(astack.len());
184 for a in astack {
185 nodes.push(match a {
186 AStackType::ActionType(n) => n,
187 AStackType::Lexeme(lexeme) => Node::Term { lexeme },
188 });
189 }
190 Node::Nonterm { ridx, nodes }
191}
192
193impl<
194 'a,
195 'b: 'a,
196 'input: 'b,
197 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
198 LexerTypesT: LexerTypes<StorageT = StorageT>,
199 > Parser<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>
200where
201 usize: AsPrimitive<StorageT>,
202{
203 fn parse_noaction(
204 rcvry_kind: RecoveryKind,
205 grm: &YaccGrammar<StorageT>,
206 token_cost: TokenCostFn<'a, StorageT>,
207 stable: &StateTable<StorageT>,
208 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
209 lexemes: Vec<LexerTypesT::LexemeT>,
210 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
211 for tidx in grm.iter_tidxs() {
212 assert!(token_cost(tidx) > 0);
213 }
214 let mut actions: Vec<ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>> = Vec::new();
215 actions.resize(usize::from(grm.prods_len()), &Parser::noaction);
216 let psr = Parser {
217 rcvry_kind,
218 grm,
219 token_cost: Box::new(token_cost),
220 stable,
221 lexer,
222 lexemes,
223 actions: actions.as_slice(),
224 param: (),
225 };
226 let mut pstack = vec![stable.start_state()];
227 let mut astack = Vec::new();
228 let mut errors = Vec::new();
229 let mut spans = Vec::new();
230 psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
231 errors
232 }
233
234 fn noaction(
235 _ridx: RIdx<StorageT>,
236 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
237 _span: Span,
238 _astack: vec::Drain<AStackType<LexerTypesT::LexemeT, ()>>,
239 _param: (),
240 ) {
241 }
242}
243
244impl<
245 'a,
246 'b: 'a,
247 'input: 'b,
248 StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
249 LexerTypesT: LexerTypes<StorageT = StorageT>,
250 ActionT: 'a,
251 ParamT: Clone,
252 > Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
253where
254 usize: AsPrimitive<StorageT>,
255{
256 fn parse_actions(
257 rcvry_kind: RecoveryKind,
258 grm: &'a YaccGrammar<StorageT>,
259 token_cost: TokenCostFn<'a, StorageT>,
260 stable: &'a StateTable<StorageT>,
261 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
262 lexemes: Vec<LexerTypesT::LexemeT>,
263 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
264 param: ParamT,
265 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
266 for tidx in grm.iter_tidxs() {
267 assert!(token_cost(tidx) > 0);
268 }
269 let psr = Parser {
270 rcvry_kind,
271 grm,
272 token_cost: Box::new(token_cost),
273 stable,
274 lexer,
275 lexemes,
276 actions,
277 param,
278 };
279 let mut pstack = vec![stable.start_state()];
280 let mut astack = Vec::new();
281 let mut errors = Vec::new();
282 let mut spans = Vec::new();
283 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
284 (accpt, errors)
285 }
286
287 fn lr(
301 &self,
302 mut laidx: usize,
303 pstack: &mut PStack<StorageT>,
304 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
305 errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
306 spans: &mut Vec<Span>,
307 ) -> Option<ActionT> {
308 let mut recoverer = None;
309 let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
310 loop {
311 debug_assert_eq!(astack.len(), spans.len());
312 let stidx = *pstack.last().unwrap();
313 let la_tidx = self.next_tidx(laidx);
314
315 match self.stable.action(stidx, la_tidx) {
316 Action::Reduce(pidx) => {
317 let ridx = self.grm.prod_to_rule(pidx);
318 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
319
320 pstack.drain(pop_idx..);
321 let prior = *pstack.last().unwrap();
322 pstack.push(self.stable.goto(prior, ridx).unwrap());
323
324 let span = if spans.is_empty() {
325 Span::new(0, 0)
326 } else if pop_idx - 1 < spans.len() {
327 Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
328 } else {
329 Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
330 };
331 spans.truncate(pop_idx - 1);
332 spans.push(span);
333
334 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
335 ridx,
336 self.lexer,
337 span,
338 astack.drain(pop_idx - 1..),
339 self.param.clone(),
340 ));
341 astack.push(v);
342 }
343 Action::Shift(state_id) => {
344 let la_lexeme = self.next_lexeme(laidx);
345 pstack.push(state_id);
346 astack.push(AStackType::Lexeme(la_lexeme));
347
348 spans.push(la_lexeme.span());
349 laidx += 1;
350 }
351 Action::Accept => {
352 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
353 debug_assert_eq!(astack.len(), 1);
354 match astack.drain(..).next().unwrap() {
355 AStackType::ActionType(v) => return Some(v),
356 _ => unreachable!(),
357 }
358 }
359 Action::Error => {
360 if recoverer.is_none() {
361 recoverer = Some(match self.rcvry_kind {
362 RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
363 RecoveryKind::None => {
364 let la_lexeme = self.next_lexeme(laidx);
365 errors.push(
366 ParseError {
367 stidx,
368 lexeme: la_lexeme,
369 repairs: vec![],
370 }
371 .into(),
372 );
373 return None;
374 }
375 });
376 }
377
378 let before = Instant::now();
379 let finish_by = before + recovery_budget;
380 let (new_laidx, repairs) = recoverer
381 .as_ref()
382 .unwrap()
383 .as_ref()
384 .recover(finish_by, self, laidx, pstack, astack, spans);
385 let after = Instant::now();
386 recovery_budget = recovery_budget
387 .checked_sub(after - before)
388 .unwrap_or_else(|| Duration::new(0, 0));
389 let keep_going = !repairs.is_empty();
390 let la_lexeme = self.next_lexeme(laidx);
391 errors.push(
392 ParseError {
393 stidx,
394 lexeme: la_lexeme,
395 repairs,
396 }
397 .into(),
398 );
399 if !keep_going {
400 return None;
401 }
402 laidx = new_laidx;
403 }
404 }
405 }
406 }
407
408 pub(super) fn lr_upto(
413 &self,
414 lexeme_prefix: Option<LexerTypesT::LexemeT>,
415 mut laidx: usize,
416 end_laidx: usize,
417 pstack: &mut PStack<StorageT>,
418 astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
419 spans: &mut Option<&mut Vec<Span>>,
420 ) -> usize {
421 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
422 while laidx != end_laidx && laidx <= self.lexemes.len() {
423 let stidx = *pstack.last().unwrap();
424 let la_tidx = if let Some(l) = lexeme_prefix {
425 TIdx(l.tok_id())
426 } else {
427 self.next_tidx(laidx)
428 };
429
430 match self.stable.action(stidx, la_tidx) {
431 Action::Reduce(pidx) => {
432 let ridx = self.grm.prod_to_rule(pidx);
433 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
434 if let Some(ref mut astack_uw) = *astack {
435 if let Some(ref mut spans_uw) = *spans {
436 let span = if spans_uw.is_empty() {
437 Span::new(0, 0)
438 } else if pop_idx - 1 < spans_uw.len() {
439 Span::new(
440 spans_uw[pop_idx - 1].start(),
441 spans_uw[spans_uw.len() - 1].end(),
442 )
443 } else {
444 Span::new(
445 spans_uw[spans_uw.len() - 1].start(),
446 spans_uw[spans_uw.len() - 1].end(),
447 )
448 };
449 spans_uw.truncate(pop_idx - 1);
450 spans_uw.push(span);
451
452 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
453 ridx,
454 self.lexer,
455 span,
456 astack_uw.drain(pop_idx - 1..),
457 self.param.clone(),
458 ));
459 astack_uw.push(v);
460 } else {
461 unreachable!();
462 }
463 }
464
465 pstack.drain(pop_idx..);
466 let prior = *pstack.last().unwrap();
467 pstack.push(self.stable.goto(prior, ridx).unwrap());
468 }
469 Action::Shift(state_id) => {
470 if let Some(ref mut astack_uw) = *astack {
471 if let Some(ref mut spans_uw) = spans {
472 let la_lexeme = if let Some(l) = lexeme_prefix {
473 l
474 } else {
475 self.next_lexeme(laidx)
476 };
477 astack_uw.push(AStackType::Lexeme(la_lexeme));
478 spans_uw.push(la_lexeme.span());
479 }
480 }
481 pstack.push(state_id);
482 laidx += 1;
483 }
484 Action::Accept => {
485 break;
486 }
487 Action::Error => {
488 break;
489 }
490 }
491 }
492 laidx
493 }
494
495 pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
498 let llen = self.lexemes.len();
499 debug_assert!(laidx <= llen);
500 if laidx < llen {
501 self.lexemes[laidx]
502 } else {
503 let last_la_end = if llen == 0 {
505 0
506 } else {
507 debug_assert!(laidx > 0);
508 let last_la = self.lexemes[laidx - 1];
509 last_la.span().end()
510 };
511
512 Lexeme::new_faulty(
513 StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
514 last_la_end,
515 0,
516 )
517 }
518 }
519
520 pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
523 let ll = self.lexemes.len();
524 debug_assert!(laidx <= ll);
525 if laidx < ll {
526 TIdx(self.lexemes[laidx].tok_id())
527 } else {
528 self.grm.eof_token_idx()
529 }
530 }
531
532 pub(super) fn lr_cactus(
540 &self,
541 lexeme_prefix: Option<LexerTypesT::LexemeT>,
542 mut laidx: usize,
543 end_laidx: usize,
544 mut pstack: Cactus<StIdx<StorageT>>,
545 tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
546 ) -> (usize, Cactus<StIdx<StorageT>>) {
547 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
548 while laidx != end_laidx {
549 let stidx = *pstack.val().unwrap();
550 let la_tidx = if let Some(l) = lexeme_prefix {
551 TIdx(l.tok_id())
552 } else {
553 self.next_tidx(laidx)
554 };
555
556 match self.stable.action(stidx, la_tidx) {
557 Action::Reduce(pidx) => {
558 let ridx = self.grm.prod_to_rule(pidx);
559 let pop_num = self.grm.prod(pidx).len();
560 if let Some(ref mut tstack_uw) = *tstack {
561 let nodes = tstack_uw
562 .drain(pstack.len() - pop_num - 1..)
563 .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
564 tstack_uw.push(Node::Nonterm { ridx, nodes });
565 }
566
567 for _ in 0..pop_num {
568 pstack = pstack.parent().unwrap();
569 }
570 let prior = *pstack.val().unwrap();
571 pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
572 }
573 Action::Shift(state_id) => {
574 if let Some(ref mut tstack_uw) = *tstack {
575 let la_lexeme = if let Some(l) = lexeme_prefix {
576 l
577 } else {
578 self.next_lexeme(laidx)
579 };
580 tstack_uw.push(Node::Term { lexeme: la_lexeme });
581 }
582 pstack = pstack.child(state_id);
583 laidx += 1;
584 }
585 Action::Accept => {
586 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
587 if let Some(ref tstack_uw) = *tstack {
588 debug_assert_eq!(tstack_uw.len(), 1);
589 }
590 break;
591 }
592 Action::Error => {
593 break;
594 }
595 }
596 }
597 (laidx, pstack)
598 }
599}
600
601pub(super) trait Recoverer<
602 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
603 LexerTypesT: LexerTypes<StorageT = StorageT>,
604 ActionT,
605 ParamT: Clone,
606> where
607 usize: AsPrimitive<StorageT>,
608{
609 fn recover(
610 &self,
611 finish_by: Instant,
612 parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
613 in_laidx: usize,
614 in_pstack: &mut PStack<StorageT>,
615 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
616 spans: &mut Vec<Span>,
617 ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
618}
619
620#[derive(Clone, Copy, Debug)]
622#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
623#[non_exhaustive]
624pub enum RecoveryKind {
625 CPCTPlus,
628 None,
630}
631
632impl TryFrom<RecoveryKind> for Value<Location> {
633 type Error = cfgrammar::header::HeaderError<Location>;
634 fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
635 use cfgrammar::{
636 header::{Namespaced, Setting},
637 Location,
638 };
639 let from_loc = Location::Other("From<RecoveryKind>".to_string());
640 Ok(match rk {
641 RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
642 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
643 member: ("CPCTPlus".to_string(), from_loc.clone()),
644 })),
645 RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
646 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
647 member: ("None".to_string(), from_loc.clone()),
648 })),
649 })
650 }
651}
652
653impl TryFrom<&Value<Location>> for RecoveryKind {
654 type Error = cfgrammar::header::HeaderError<Location>;
655 fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
656 use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
657
658 match rk {
659 Value::Flag(_, loc) => Err(HeaderError {
660 kind: HeaderErrorKind::ConversionError(
661 "RecoveryKind",
662 "Cannot convert boolean to RecoveryKind",
663 ),
664 locations: vec![loc.clone()],
665 }),
666 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
667 kind: HeaderErrorKind::ConversionError(
668 "RecoveryKind",
669 "Cannot convert number to RecoveryKind",
670 ),
671 locations: vec![loc.clone()],
672 }),
673 Value::Setting(Setting::Unitary(Namespaced {
674 namespace,
675 member: (kind, kind_loc),
676 })) => {
677 match namespace {
678 Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
679 return Err(HeaderError {
680 kind: HeaderErrorKind::ConversionError(
681 "RecoveryKind",
682 "Unknown namespace",
683 ),
684 locations: vec![loc.clone()],
685 })
686 }
687 _ => {}
688 }
689 match kind.to_lowercase().as_ref() {
690 "cpctplus" => Ok(RecoveryKind::CPCTPlus),
691 "none" => Ok(RecoveryKind::None),
692 _ => Err(HeaderError {
693 kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
694 locations: vec![kind_loc.clone()],
695 }),
696 }
697 }
698 Value::Setting(Setting::Constructor {
699 ctor: _,
700 arg:
701 Namespaced {
702 namespace: _,
703 member: (_, arg_loc),
704 },
705 }) => Err(HeaderError {
706 kind: HeaderErrorKind::ConversionError(
707 "RecoveryKind",
708 "Cannot convert constructor to RecoveryKind",
709 ),
710 locations: vec![arg_loc.clone()],
711 }),
712 }
713 }
714}
715
716impl quote::ToTokens for RecoveryKind {
717 fn to_tokens(&self, tokens: &mut TokenStream) {
718 tokens.extend(match *self {
719 RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
720 RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
721 })
722 }
723}
724
725#[derive(Debug)]
729pub enum LexParseError<
730 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
731 LexerTypesT: LexerTypes<StorageT = StorageT>,
732> where
733 usize: AsPrimitive<StorageT>,
734{
735 LexError(LexerTypesT::LexErrorT),
736 ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
737}
738
739impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
740 LexParseError<StorageT, LexerTypesT>
741where
742 usize: AsPrimitive<StorageT>,
743{
744 pub fn pp<'a>(
761 &self,
762 lexer: &dyn NonStreamingLexer<LexerTypesT>,
763 epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
764 ) -> String {
765 match self {
766 LexParseError::LexError(e) => {
767 let ((line, col), _) = lexer.line_col(e.span());
768 format!("Lexing error at line {} column {}.", line, col)
769 }
770 LexParseError::ParseError(e) => {
771 let ((line, col), _) = lexer.line_col(e.lexeme().span());
772 let mut out = format!("Parsing error at line {} column {}.", line, col);
773 let repairs_len = e.repairs().len();
774 if repairs_len == 0 {
775 out.push_str(" No repair sequences found.");
776 } else {
777 out.push_str(" Repair sequences found:");
778 for (i, rs) in e.repairs().iter().enumerate() {
779 let padding = ((repairs_len as f64).log10() as usize)
780 - (((i + 1) as f64).log10() as usize)
781 + 1;
782 write!(out, "\n {}{}: ", " ".repeat(padding), i + 1).ok();
783 let mut rs_out = Vec::new();
784
785 let mut i = 0;
788 while i < rs.len() {
789 match rs[i] {
790 ParseRepair::Delete(l) => {
791 let mut j = i + 1;
792 let mut last_end = l.span().end();
793 while j < rs.len() {
794 if let ParseRepair::Delete(next_l) = rs[j] {
795 if next_l.span().start() == last_end {
796 last_end = next_l.span().end();
797 j += 1;
798 continue;
799 }
800 }
801 break;
802 }
803 let t = &lexer
804 .span_str(Span::new(l.span().start(), last_end))
805 .replace('\n', "\\n");
806 rs_out.push(format!("Delete {}", t));
807 i = j;
808 }
809 ParseRepair::Insert(tidx) => {
810 rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
811 i += 1;
812 }
813 ParseRepair::Shift(l) => {
814 let t = &lexer.span_str(l.span()).replace('\n', "\\n");
815 rs_out.push(format!("Shift {}", t));
816 i += 1;
817 }
818 }
819 }
820
821 out.push_str(&rs_out.join(", "));
822 }
823 }
824 out
825 }
826 }
827 }
828}
829
830impl<
831 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
832 LexerTypesT: LexerTypes<StorageT = StorageT>,
833 > fmt::Display for LexParseError<StorageT, LexerTypesT>
834where
835 usize: AsPrimitive<StorageT>,
836{
837 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
838 match *self {
839 LexParseError::LexError(ref e) => Display::fmt(e, f),
840 LexParseError::ParseError(ref e) => Display::fmt(e, f),
841 }
842 }
843}
844
845impl<
846 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
847 LexerTypesT: LexerTypes<StorageT = StorageT>,
848 > Error for LexParseError<StorageT, LexerTypesT>
849where
850 usize: AsPrimitive<StorageT>,
851{
852}
853
854impl<
855 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
856 LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
857 T: LexError,
858 > From<T> for LexParseError<StorageT, LexerTypesT>
859where
860 usize: AsPrimitive<StorageT>,
861{
862 fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
863 LexParseError::LexError(err)
864 }
865}
866
867impl<
868 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
869 LexerTypesT: LexerTypes<StorageT = StorageT>,
870 > From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
871where
872 usize: AsPrimitive<StorageT>,
873{
874 fn from(
875 err: ParseError<LexerTypesT::LexemeT, StorageT>,
876 ) -> LexParseError<StorageT, LexerTypesT> {
877 LexParseError::ParseError(err)
878 }
879}
880
881pub struct RTParserBuilder<
883 'a,
884 StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
885 LexerTypesT: LexerTypes<StorageT = StorageT>,
886> where
887 usize: AsPrimitive<StorageT>,
888{
889 grm: &'a YaccGrammar<StorageT>,
890 stable: &'a StateTable<StorageT>,
891 recoverer: RecoveryKind,
892 term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
893 phantom: PhantomData<(StorageT, LexerTypesT)>,
894}
895
896impl<
897 'a,
898 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
899 LexerTypesT: LexerTypes<StorageT = StorageT>,
900 > RTParserBuilder<'a, StorageT, LexerTypesT>
901where
902 usize: AsPrimitive<StorageT>,
903{
904 pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
906 RTParserBuilder {
907 grm,
908 stable,
909 recoverer: RecoveryKind::CPCTPlus,
910 term_costs: &|_| 1,
911 phantom: PhantomData,
912 }
913 }
914
915 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
917 self.recoverer = rk;
918 self
919 }
920
921 pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
922 self.term_costs = f;
923 self
924 }
925
926 pub fn parse_generictree(
929 &self,
930 lexer: &dyn NonStreamingLexer<LexerTypesT>,
931 ) -> (
932 Option<Node<LexerTypesT::LexemeT, StorageT>>,
933 Vec<LexParseError<StorageT, LexerTypesT>>,
934 ) {
935 let mut lexemes = vec![];
936 for e in lexer.iter().collect::<Vec<_>>() {
937 match e {
938 Ok(l) => lexemes.push(l),
939 Err(e) => return (None, vec![e.into()]),
940 }
941 }
942 Parser::<StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>::parse_generictree(
943 self.recoverer,
944 self.grm,
945 self.term_costs,
946 self.stable,
947 lexer,
948 lexemes,
949 )
950 }
951
952 pub fn parse_noaction(
955 &self,
956 lexer: &dyn NonStreamingLexer<LexerTypesT>,
957 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
958 let mut lexemes = vec![];
959 for e in lexer.iter().collect::<Vec<_>>() {
960 match e {
961 Ok(l) => lexemes.push(l),
962 Err(e) => return vec![e.into()],
963 }
964 }
965 Parser::<StorageT, LexerTypesT, (), ()>::parse_noaction(
966 self.recoverer,
967 self.grm,
968 self.term_costs,
969 self.stable,
970 lexer,
971 lexemes,
972 )
973 }
974
975 pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
982 &self,
983 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
984 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
985 param: ParamT,
986 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
987 let mut lexemes = vec![];
988 for e in lexer.iter().collect::<Vec<_>>() {
989 match e {
990 Ok(l) => lexemes.push(l),
991 Err(e) => return (None, vec![e.into()]),
992 }
993 }
994 Parser::parse_actions(
995 self.recoverer,
996 self.grm,
997 self.term_costs,
998 self.stable,
999 lexer,
1000 lexemes,
1001 actions,
1002 param,
1003 )
1004 }
1005}
1006
1007#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1010pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1011 Insert(TIdx<StorageT>),
1013 Delete(LexemeT),
1015 Shift(LexemeT),
1017}
1018
1019#[derive(Clone, Debug, PartialEq)]
1021pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1022 stidx: StIdx<StorageT>,
1023 lexeme: LexemeT,
1024 repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1025}
1026
1027impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1028 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1029 write!(f, "Parse error at lexeme {:?}", self.lexeme)
1030 }
1031}
1032
1033impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1034
1035impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1036 pub fn stidx(&self) -> StIdx<StorageT> {
1038 self.stidx
1039 }
1040
1041 pub fn lexeme(&self) -> &LexemeT {
1043 &self.lexeme
1044 }
1045
1046 pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1049 &self.repairs
1050 }
1051}
1052
1053#[cfg(test)]
1054pub(crate) mod test {
1055 use std::collections::HashMap;
1056
1057 use cfgrammar::{
1058 yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1059 Span,
1060 };
1061 use lrtable::{from_yacc, Minimiser};
1062 use num_traits::ToPrimitive;
1063 use regex::Regex;
1064
1065 use super::*;
1066 use crate::{
1067 test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1068 Lexeme, Lexer,
1069 };
1070
1071 pub(crate) fn do_parse<'input>(
1072 rcvry_kind: RecoveryKind,
1073 lexs: &str,
1074 grms: &str,
1075 input: &'input str,
1076 ) -> (
1077 YaccGrammar<u16>,
1078 SmallLexer<'input>,
1079 Result<
1080 Node<TestLexeme, u16>,
1081 (
1082 Option<Node<TestLexeme, u16>>,
1083 Vec<LexParseError<u16, TestLexerTypes>>,
1084 ),
1085 >,
1086 ) {
1087 do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1088 }
1089
1090 fn do_parse_with_costs<'input>(
1091 rcvry_kind: RecoveryKind,
1092 lexs: &str,
1093 grms: &str,
1094 input: &'input str,
1095 costs: &HashMap<&str, u8>,
1096 ) -> (
1097 YaccGrammar<u16>,
1098 SmallLexer<'input>,
1099 Result<
1100 Node<TestLexeme, u16>,
1101 (
1102 Option<Node<TestLexeme, u16>>,
1103 Vec<LexParseError<u16, TestLexerTypes>>,
1104 ),
1105 >,
1106 ) {
1107 let grm = YaccGrammar::<u16>::new_with_storaget(
1108 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1109 grms,
1110 )
1111 .unwrap();
1112 let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1113 let rule_ids = grm
1114 .tokens_map()
1115 .iter()
1116 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1117 .collect();
1118 let lexer_rules = small_lexer(lexs, rule_ids);
1119 let lexemes = small_lex(lexer_rules, input);
1120 let lexer = SmallLexer { lexemes, s: input };
1121 let costs_tidx = costs
1122 .iter()
1123 .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1124 .collect::<HashMap<_, _>>();
1125 let (r, errs) = RTParserBuilder::new(&grm, &stable)
1126 .recoverer(rcvry_kind)
1127 .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1128 .parse_generictree(&lexer);
1129 if let Some(node) = r {
1130 if errs.is_empty() {
1131 (grm, lexer, Ok(node))
1132 } else {
1133 (grm, lexer, Err((Some(node), errs)))
1134 }
1135 } else {
1136 (grm, lexer, Err((None, errs)))
1137 }
1138 }
1139
1140 fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1141 let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1142 assert_eq!(expected, pt.unwrap().pp(&grm, input));
1143 }
1144
1145 pub(crate) struct SmallLexer<'input> {
1150 lexemes: Vec<TestLexeme>,
1151 s: &'input str,
1152 }
1153
1154 impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1155 fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1156 Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1157 }
1158 }
1159
1160 impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1161 fn span_str(&self, span: Span) -> &'input str {
1162 &self.s[span.start()..span.end()]
1163 }
1164
1165 fn span_lines_str(&self, _: Span) -> &'input str {
1166 unreachable!();
1167 }
1168
1169 fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1170 unreachable!();
1171 }
1172 }
1173
1174 fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1175 let mut rules = Vec::new();
1176 for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1177 assert!(l.rfind('\'') == Some(l.len() - 1));
1178 let i = l[..l.len() - 1].rfind('\'').unwrap();
1179 let name = &l[i + 1..l.len() - 1];
1180 let re = &l[..i - 1].trim();
1181 rules.push((
1182 ids_map[name],
1183 Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1184 ));
1185 }
1186 rules
1187 }
1188
1189 fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1190 let mut lexemes = vec![];
1191 let mut i = 0;
1192 while i < input.len() {
1193 let mut longest = 0; let mut longest_tok_id = 0; for (tok_id, r) in rules.iter() {
1196 if let Some(m) = r.find(&input[i..]) {
1197 let len = m.end();
1198 if len > longest {
1199 longest = len;
1200 longest_tok_id = *tok_id;
1201 }
1202 }
1203 }
1204 assert!(longest > 0);
1205 lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1206 i += longest;
1207 }
1208 lexemes
1209 }
1210
1211 #[test]
1212 fn simple_parse() {
1213 check_parse_output(
1215 "[a-zA-Z_] 'ID'
1216 \\+ '+'",
1217 "
1218%start E
1219%%
1220E: T '+' E
1221 | T;
1222T: 'ID';
1223",
1224 "a+b",
1225 "E
1226 T
1227 ID a
1228 + +
1229 E
1230 T
1231 ID b
1232",
1233 );
1234 }
1235
1236 #[test]
1237 fn parse_empty_rules() {
1238 let lexs = "[a-zA-Z_] 'ID'";
1239 let grms = "%start S
1240%%
1241S: L;
1242L: 'ID'
1243 | ;
1244";
1245 check_parse_output(
1246 lexs, grms, "", "S
1247 L
1248",
1249 );
1250
1251 check_parse_output(
1252 lexs,
1253 grms,
1254 "x",
1255 "S
1256 L
1257 ID x
1258",
1259 );
1260 }
1261
1262 #[test]
1263 fn recursive_parse() {
1264 let lexs = "\\+ '+'
1265 \\* '*'
1266 [0-9]+ 'INT'";
1267 let grms = "%start Expr
1268%%
1269Expr : Expr '+' Term | Term;
1270Term : Term '*' Factor | Factor;
1271Factor : 'INT';";
1272
1273 check_parse_output(
1274 lexs,
1275 grms,
1276 "2+3*4",
1277 "Expr
1278 Expr
1279 Term
1280 Factor
1281 INT 2
1282 + +
1283 Term
1284 Term
1285 Factor
1286 INT 3
1287 * *
1288 Factor
1289 INT 4
1290",
1291 );
1292 check_parse_output(
1293 lexs,
1294 grms,
1295 "2*3+4",
1296 "Expr
1297 Expr
1298 Term
1299 Term
1300 Factor
1301 INT 2
1302 * *
1303 Factor
1304 INT 3
1305 + +
1306 Term
1307 Factor
1308 INT 4
1309",
1310 );
1311 }
1312
1313 #[test]
1314 fn parse_error() {
1315 let lexs = "\\( '('
1316 \\) ')'
1317 [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1318 let grms = "%start Call
1319%%
1320Call: 'ID' '(' ')';";
1321
1322 check_parse_output(
1323 lexs,
1324 grms,
1325 "f()",
1326 "Call
1327 ID f
1328 ( (
1329 ) )
1330",
1331 );
1332
1333 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1334 let (_, errs) = pr.unwrap_err();
1335 assert_eq!(errs.len(), 1);
1336 let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1337 match &errs[0] {
1338 LexParseError::ParseError(e) => {
1339 assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1340 assert!(e.lexeme().faulty());
1341 }
1342 _ => unreachable!(),
1343 }
1344
1345 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1346 let (_, errs) = pr.unwrap_err();
1347 assert_eq!(errs.len(), 1);
1348 let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1349 match &errs[0] {
1350 LexParseError::ParseError(e) => {
1351 assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1352 assert!(!e.lexeme().faulty());
1353 }
1354 _ => unreachable!(),
1355 }
1356 }
1357}