1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3 error::Error,
4 fmt::{self, Debug, Display, Write as _},
5 hash::Hash,
6 marker::PhantomData,
7 vec,
8};
9
10#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{header::Value, span::Location, yacc::YaccGrammar, RIdx, Span, TIdx};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{cpctplus, LexError, Lexeme, LexerTypes, NonStreamingLexer};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; #[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; #[derive(Debug, Clone, PartialEq)]
34pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
35 Term { lexeme: LexemeT },
37 Nonterm {
39 ridx: RIdx<StorageT>,
40 nodes: Vec<Node<LexemeT, StorageT>>,
41 },
42}
43
44impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
45where
46 usize: AsPrimitive<StorageT>,
47{
48 pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
50 let mut st = vec![(0, self)]; let mut s = String::new();
52 while let Some((indent, e)) = st.pop() {
53 for _ in 0..indent {
54 s.push(' ');
55 }
56 match *e {
57 Node::Term { lexeme } => {
58 let tidx = TIdx(lexeme.tok_id());
59 let tn = grm.token_name(tidx).unwrap();
60 let lt = &input[lexeme.span().start()..lexeme.span().end()];
61 writeln!(s, "{} {}", tn, lt).ok();
62 }
63 Node::Nonterm { ridx, ref nodes } => {
64 writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
65 for x in nodes.iter().rev() {
66 st.push((indent + 1, x));
67 }
68 }
69 }
70 }
71 s
72 }
73}
74
75type PStack<StorageT> = Vec<StIdx<StorageT>>; type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
77type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
78 RIdx<StorageT>,
79 &'b dyn NonStreamingLexer<'input, LexerTypesT>,
80 Span,
81 vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
82 ParamT,
83) -> ActionT;
84
85#[derive(Debug)]
86pub enum AStackType<LexemeT, ActionT> {
87 ActionType(ActionT),
88 Lexeme(LexemeT),
89}
90
91pub(super) struct Parser<
92 'a,
93 'b: 'a,
94 'input: 'b,
95 StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
96 LexerTypesT: LexerTypes<StorageT = StorageT>,
97 ActionT: 'a,
98 ParamT: Clone,
99> where
100 usize: AsPrimitive<StorageT>,
101{
102 rcvry_kind: RecoveryKind,
103 pub(super) grm: &'a YaccGrammar<StorageT>,
104 pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
105 pub(super) stable: &'a StateTable<StorageT>,
106 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
107 pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
110 actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
111 param: ParamT,
112}
113
114impl<
115 'a,
116 'b: 'a,
117 'input: 'b,
118 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
119 LexerTypesT: LexerTypes<StorageT = StorageT>,
120 > Parser<'a, 'b, 'input, StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>
121where
122 usize: AsPrimitive<StorageT>,
123{
124 fn parse_generictree(
125 rcvry_kind: RecoveryKind,
126 grm: &YaccGrammar<StorageT>,
127 token_cost: TokenCostFn<'a, StorageT>,
128 stable: &StateTable<StorageT>,
129 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
130 lexemes: Vec<LexerTypesT::LexemeT>,
131 ) -> (
132 Option<Node<LexerTypesT::LexemeT, StorageT>>,
133 Vec<LexParseError<StorageT, LexerTypesT>>,
134 ) {
135 for tidx in grm.iter_tidxs() {
136 assert!(token_cost(tidx) > 0);
137 }
138 let mut actions: Vec<
139 ActionFn<
140 'a,
141 'b,
142 'input,
143 StorageT,
144 LexerTypesT,
145 Node<LexerTypesT::LexemeT, StorageT>,
146 (),
147 >,
148 > = Vec::new();
149 actions.resize(usize::from(grm.prods_len()), &action_generictree);
150 let psr = Parser {
151 rcvry_kind,
152 grm,
153 token_cost: Box::new(token_cost),
154 stable,
155 lexer,
156 lexemes,
157 actions: actions.as_slice(),
158 param: (),
159 };
160 let mut pstack = vec![stable.start_state()];
161 let mut astack = Vec::new();
162 let mut errors = Vec::new();
163 let mut spans = Vec::new();
164 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
165 (accpt, errors)
166 }
167}
168
169pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
173 ridx: RIdx<StorageT>,
174 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
175 _span: Span,
176 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
177 _param: (),
178) -> Node<LexerTypesT::LexemeT, StorageT>
179where
180 usize: AsPrimitive<LexerTypesT::StorageT>,
181 LexerTypesT::LexemeT: Lexeme<StorageT>,
182{
183 let mut nodes = Vec::with_capacity(astack.len());
184 for a in astack {
185 nodes.push(match a {
186 AStackType::ActionType(n) => n,
187 AStackType::Lexeme(lexeme) => Node::Term { lexeme },
188 });
189 }
190 Node::Nonterm { ridx, nodes }
191}
192
193impl<
194 'a,
195 'b: 'a,
196 'input: 'b,
197 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
198 LexerTypesT: LexerTypes<StorageT = StorageT>,
199 > Parser<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>
200where
201 usize: AsPrimitive<StorageT>,
202{
203 fn parse_noaction(
204 rcvry_kind: RecoveryKind,
205 grm: &YaccGrammar<StorageT>,
206 token_cost: TokenCostFn<'a, StorageT>,
207 stable: &StateTable<StorageT>,
208 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
209 lexemes: Vec<LexerTypesT::LexemeT>,
210 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
211 for tidx in grm.iter_tidxs() {
212 assert!(token_cost(tidx) > 0);
213 }
214 let mut actions: Vec<ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, (), ()>> = Vec::new();
215 actions.resize(usize::from(grm.prods_len()), &Parser::noaction);
216 let psr = Parser {
217 rcvry_kind,
218 grm,
219 token_cost: Box::new(token_cost),
220 stable,
221 lexer,
222 lexemes,
223 actions: actions.as_slice(),
224 param: (),
225 };
226 let mut pstack = vec![stable.start_state()];
227 let mut astack = Vec::new();
228 let mut errors = Vec::new();
229 let mut spans = Vec::new();
230 psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
231 errors
232 }
233
234 fn noaction(
235 _ridx: RIdx<StorageT>,
236 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
237 _span: Span,
238 _astack: vec::Drain<AStackType<LexerTypesT::LexemeT, ()>>,
239 _param: (),
240 ) {
241 }
242}
243
244impl<
245 'a,
246 'b: 'a,
247 'input: 'b,
248 StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
249 LexerTypesT: LexerTypes<StorageT = StorageT>,
250 ActionT: 'a,
251 ParamT: Clone,
252 > Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
253where
254 usize: AsPrimitive<StorageT>,
255{
256 fn parse_actions(
257 rcvry_kind: RecoveryKind,
258 grm: &'a YaccGrammar<StorageT>,
259 token_cost: TokenCostFn<'a, StorageT>,
260 stable: &'a StateTable<StorageT>,
261 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
262 lexemes: Vec<LexerTypesT::LexemeT>,
263 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
264 param: ParamT,
265 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
266 for tidx in grm.iter_tidxs() {
267 assert!(token_cost(tidx) > 0);
268 }
269 let psr = Parser {
270 rcvry_kind,
271 grm,
272 token_cost: Box::new(token_cost),
273 stable,
274 lexer,
275 lexemes,
276 actions,
277 param,
278 };
279 let mut pstack = vec![stable.start_state()];
280 let mut astack = Vec::new();
281 let mut errors = Vec::new();
282 let mut spans = Vec::new();
283 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
284 (accpt, errors)
285 }
286
287 fn lr(
301 &self,
302 mut laidx: usize,
303 pstack: &mut PStack<StorageT>,
304 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
305 errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
306 spans: &mut Vec<Span>,
307 ) -> Option<ActionT> {
308 let mut recoverer = None;
309 let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
310 loop {
311 debug_assert_eq!(astack.len(), spans.len());
312 let stidx = *pstack.last().unwrap();
313 let la_tidx = self.next_tidx(laidx);
314
315 match self.stable.action(stidx, la_tidx) {
316 Action::Reduce(pidx) => {
317 let ridx = self.grm.prod_to_rule(pidx);
318 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
319
320 pstack.drain(pop_idx..);
321 let prior = *pstack.last().unwrap();
322 pstack.push(self.stable.goto(prior, ridx).unwrap());
323
324 let span = if spans.is_empty() {
325 Span::new(0, 0)
326 } else if pop_idx - 1 < spans.len() {
327 Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
328 } else {
329 Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
330 };
331 spans.truncate(pop_idx - 1);
332 spans.push(span);
333
334 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
335 ridx,
336 self.lexer,
337 span,
338 astack.drain(pop_idx - 1..),
339 self.param.clone(),
340 ));
341 astack.push(v);
342 }
343 Action::Shift(state_id) => {
344 let la_lexeme = self.next_lexeme(laidx);
345 pstack.push(state_id);
346 astack.push(AStackType::Lexeme(la_lexeme));
347
348 spans.push(la_lexeme.span());
349 laidx += 1;
350 }
351 Action::Accept => {
352 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
353 debug_assert_eq!(astack.len(), 1);
354 match astack.drain(..).next().unwrap() {
355 AStackType::ActionType(v) => return Some(v),
356 _ => unreachable!(),
357 }
358 }
359 Action::Error => {
360 if recoverer.is_none() {
361 recoverer = Some(match self.rcvry_kind {
362 RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
363 RecoveryKind::None => {
364 let la_lexeme = self.next_lexeme(laidx);
365 errors.push(
366 ParseError {
367 stidx,
368 lexeme: la_lexeme,
369 repairs: vec![],
370 }
371 .into(),
372 );
373 return None;
374 }
375 });
376 }
377
378 let before = Instant::now();
379 let finish_by = before + recovery_budget;
380 let (new_laidx, repairs) = recoverer
381 .as_ref()
382 .unwrap()
383 .as_ref()
384 .recover(finish_by, self, laidx, pstack, astack, spans);
385 let after = Instant::now();
386 recovery_budget = recovery_budget
387 .checked_sub(after - before)
388 .unwrap_or_else(|| Duration::new(0, 0));
389 let keep_going = !repairs.is_empty();
390 let la_lexeme = self.next_lexeme(laidx);
391 errors.push(
392 ParseError {
393 stidx,
394 lexeme: la_lexeme,
395 repairs,
396 }
397 .into(),
398 );
399 if !keep_going {
400 return None;
401 }
402 laidx = new_laidx;
403 }
404 }
405 }
406 }
407
408 pub(super) fn lr_upto(
413 &self,
414 lexeme_prefix: Option<LexerTypesT::LexemeT>,
415 mut laidx: usize,
416 end_laidx: usize,
417 pstack: &mut PStack<StorageT>,
418 astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
419 spans: &mut Option<&mut Vec<Span>>,
420 ) -> usize {
421 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
422 while laidx != end_laidx && laidx <= self.lexemes.len() {
423 let stidx = *pstack.last().unwrap();
424 let la_tidx = if let Some(l) = lexeme_prefix {
425 TIdx(l.tok_id())
426 } else {
427 self.next_tidx(laidx)
428 };
429
430 match self.stable.action(stidx, la_tidx) {
431 Action::Reduce(pidx) => {
432 let ridx = self.grm.prod_to_rule(pidx);
433 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
434 if let Some(ref mut astack_uw) = *astack {
435 if let Some(ref mut spans_uw) = *spans {
436 let span = if spans_uw.is_empty() {
437 Span::new(0, 0)
438 } else if pop_idx - 1 < spans_uw.len() {
439 Span::new(
440 spans_uw[pop_idx - 1].start(),
441 spans_uw[spans_uw.len() - 1].end(),
442 )
443 } else {
444 Span::new(
445 spans_uw[spans_uw.len() - 1].start(),
446 spans_uw[spans_uw.len() - 1].end(),
447 )
448 };
449 spans_uw.truncate(pop_idx - 1);
450 spans_uw.push(span);
451
452 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
453 ridx,
454 self.lexer,
455 span,
456 astack_uw.drain(pop_idx - 1..),
457 self.param.clone(),
458 ));
459 astack_uw.push(v);
460 } else {
461 unreachable!();
462 }
463 }
464
465 pstack.drain(pop_idx..);
466 let prior = *pstack.last().unwrap();
467 pstack.push(self.stable.goto(prior, ridx).unwrap());
468 }
469 Action::Shift(state_id) => {
470 if let Some(ref mut astack_uw) = *astack {
471 if let Some(ref mut spans_uw) = spans {
472 let la_lexeme = if let Some(l) = lexeme_prefix {
473 l
474 } else {
475 self.next_lexeme(laidx)
476 };
477 astack_uw.push(AStackType::Lexeme(la_lexeme));
478 spans_uw.push(la_lexeme.span());
479 }
480 }
481 pstack.push(state_id);
482 laidx += 1;
483 }
484 Action::Accept => {
485 break;
486 }
487 Action::Error => {
488 break;
489 }
490 }
491 }
492 laidx
493 }
494
495 pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
498 let llen = self.lexemes.len();
499 debug_assert!(laidx <= llen);
500 if laidx < llen {
501 self.lexemes[laidx]
502 } else {
503 let last_la_end = if llen == 0 {
505 0
506 } else {
507 debug_assert!(laidx > 0);
508 let last_la = self.lexemes[laidx - 1];
509 last_la.span().end()
510 };
511
512 Lexeme::new_faulty(
513 StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
514 last_la_end,
515 0,
516 )
517 }
518 }
519
520 pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
523 let ll = self.lexemes.len();
524 debug_assert!(laidx <= ll);
525 if laidx < ll {
526 TIdx(self.lexemes[laidx].tok_id())
527 } else {
528 self.grm.eof_token_idx()
529 }
530 }
531
532 pub(super) fn lr_cactus(
540 &self,
541 lexeme_prefix: Option<LexerTypesT::LexemeT>,
542 mut laidx: usize,
543 end_laidx: usize,
544 mut pstack: Cactus<StIdx<StorageT>>,
545 tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
546 ) -> (usize, Cactus<StIdx<StorageT>>) {
547 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
548 while laidx != end_laidx {
549 let stidx = *pstack.val().unwrap();
550 let la_tidx = if let Some(l) = lexeme_prefix {
551 TIdx(l.tok_id())
552 } else {
553 self.next_tidx(laidx)
554 };
555
556 match self.stable.action(stidx, la_tidx) {
557 Action::Reduce(pidx) => {
558 let ridx = self.grm.prod_to_rule(pidx);
559 let pop_num = self.grm.prod(pidx).len();
560 if let Some(ref mut tstack_uw) = *tstack {
561 let nodes = tstack_uw
562 .drain(pstack.len() - pop_num - 1..)
563 .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
564 tstack_uw.push(Node::Nonterm { ridx, nodes });
565 }
566
567 for _ in 0..pop_num {
568 pstack = pstack.parent().unwrap();
569 }
570 let prior = *pstack.val().unwrap();
571 pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
572 }
573 Action::Shift(state_id) => {
574 if let Some(ref mut tstack_uw) = *tstack {
575 let la_lexeme = if let Some(l) = lexeme_prefix {
576 l
577 } else {
578 self.next_lexeme(laidx)
579 };
580 tstack_uw.push(Node::Term { lexeme: la_lexeme });
581 }
582 pstack = pstack.child(state_id);
583 laidx += 1;
584 }
585 Action::Accept => {
586 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
587 if let Some(ref tstack_uw) = *tstack {
588 debug_assert_eq!(tstack_uw.len(), 1);
589 }
590 break;
591 }
592 Action::Error => {
593 break;
594 }
595 }
596 }
597 (laidx, pstack)
598 }
599}
600
601pub(super) trait Recoverer<
602 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
603 LexerTypesT: LexerTypes<StorageT = StorageT>,
604 ActionT,
605 ParamT: Clone,
606> where
607 usize: AsPrimitive<StorageT>,
608{
609 fn recover(
610 &self,
611 finish_by: Instant,
612 parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
613 in_laidx: usize,
614 in_pstack: &mut PStack<StorageT>,
615 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
616 spans: &mut Vec<Span>,
617 ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
618}
619
620#[derive(Clone, Copy, Debug)]
622#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
623#[non_exhaustive]
624pub enum RecoveryKind {
625 CPCTPlus,
628 None,
630}
631
632impl TryFrom<RecoveryKind> for Value<Location> {
633 type Error = cfgrammar::header::HeaderError<Location>;
634 fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
635 use cfgrammar::{
636 header::{Namespaced, Setting},
637 Location,
638 };
639 let from_loc = Location::Other("From<RecoveryKind>".to_string());
640 Ok(match rk {
641 RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
642 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
643 member: ("CPCTPlus".to_string(), from_loc.clone()),
644 })),
645 RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
646 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
647 member: ("None".to_string(), from_loc.clone()),
648 })),
649 })
650 }
651}
652
653impl TryFrom<&Value<Location>> for RecoveryKind {
654 type Error = cfgrammar::header::HeaderError<Location>;
655 fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
656 use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
657
658 match rk {
659 Value::Flag(_, loc) => Err(HeaderError {
660 kind: HeaderErrorKind::ConversionError(
661 "RecoveryKind",
662 "Cannot convert boolean to RecoveryKind",
663 ),
664 locations: vec![loc.clone()],
665 }),
666 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
667 kind: HeaderErrorKind::ConversionError(
668 "RecoveryKind",
669 "Cannot convert number to RecoveryKind",
670 ),
671 locations: vec![loc.clone()],
672 }),
673 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
674 kind: HeaderErrorKind::ConversionError(
675 "RecoveryKind",
676 "Cannot convert string to RecoveryKind",
677 ),
678 locations: vec![loc.clone()],
679 }),
680 Value::Setting(Setting::Unitary(Namespaced {
681 namespace,
682 member: (kind, kind_loc),
683 })) => {
684 match namespace {
685 Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
686 return Err(HeaderError {
687 kind: HeaderErrorKind::ConversionError(
688 "RecoveryKind",
689 "Unknown namespace",
690 ),
691 locations: vec![loc.clone()],
692 })
693 }
694 _ => {}
695 }
696 match kind.to_lowercase().as_ref() {
697 "cpctplus" => Ok(RecoveryKind::CPCTPlus),
698 "none" => Ok(RecoveryKind::None),
699 _ => Err(HeaderError {
700 kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
701 locations: vec![kind_loc.clone()],
702 }),
703 }
704 }
705 Value::Setting(Setting::Constructor {
706 ctor: _,
707 arg:
708 Namespaced {
709 namespace: _,
710 member: (_, arg_loc),
711 },
712 }) => Err(HeaderError {
713 kind: HeaderErrorKind::ConversionError(
714 "RecoveryKind",
715 "Cannot convert constructor to RecoveryKind",
716 ),
717 locations: vec![arg_loc.clone()],
718 }),
719 }
720 }
721}
722
723impl quote::ToTokens for RecoveryKind {
724 fn to_tokens(&self, tokens: &mut TokenStream) {
725 tokens.extend(match *self {
726 RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
727 RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
728 })
729 }
730}
731
732#[derive(Debug)]
736pub enum LexParseError<
737 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
738 LexerTypesT: LexerTypes<StorageT = StorageT>,
739> where
740 usize: AsPrimitive<StorageT>,
741{
742 LexError(LexerTypesT::LexErrorT),
743 ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
744}
745
746impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
747 LexParseError<StorageT, LexerTypesT>
748where
749 usize: AsPrimitive<StorageT>,
750{
751 pub fn pp<'a>(
768 &self,
769 lexer: &dyn NonStreamingLexer<LexerTypesT>,
770 epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
771 ) -> String {
772 match self {
773 LexParseError::LexError(e) => {
774 let ((line, col), _) = lexer.line_col(e.span());
775 format!("Lexing error at line {} column {}.", line, col)
776 }
777 LexParseError::ParseError(e) => {
778 let ((line, col), _) = lexer.line_col(e.lexeme().span());
779 let mut out = format!("Parsing error at line {} column {}.", line, col);
780 let repairs_len = e.repairs().len();
781 if repairs_len == 0 {
782 out.push_str(" No repair sequences found.");
783 } else {
784 out.push_str(" Repair sequences found:");
785 for (i, rs) in e.repairs().iter().enumerate() {
786 let padding = ((repairs_len as f64).log10() as usize)
787 - (((i + 1) as f64).log10() as usize)
788 + 1;
789 write!(out, "\n {}{}: ", " ".repeat(padding), i + 1).ok();
790 let mut rs_out = Vec::new();
791
792 let mut i = 0;
795 while i < rs.len() {
796 match rs[i] {
797 ParseRepair::Delete(l) => {
798 let mut j = i + 1;
799 let mut last_end = l.span().end();
800 while j < rs.len() {
801 if let ParseRepair::Delete(next_l) = rs[j] {
802 if next_l.span().start() == last_end {
803 last_end = next_l.span().end();
804 j += 1;
805 continue;
806 }
807 }
808 break;
809 }
810 let t = &lexer
811 .span_str(Span::new(l.span().start(), last_end))
812 .replace('\n', "\\n");
813 rs_out.push(format!("Delete {}", t));
814 i = j;
815 }
816 ParseRepair::Insert(tidx) => {
817 rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
818 i += 1;
819 }
820 ParseRepair::Shift(l) => {
821 let t = &lexer.span_str(l.span()).replace('\n', "\\n");
822 rs_out.push(format!("Shift {}", t));
823 i += 1;
824 }
825 }
826 }
827
828 out.push_str(&rs_out.join(", "));
829 }
830 }
831 out
832 }
833 }
834 }
835}
836
837impl<
838 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
839 LexerTypesT: LexerTypes<StorageT = StorageT>,
840 > fmt::Display for LexParseError<StorageT, LexerTypesT>
841where
842 usize: AsPrimitive<StorageT>,
843{
844 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
845 match *self {
846 LexParseError::LexError(ref e) => Display::fmt(e, f),
847 LexParseError::ParseError(ref e) => Display::fmt(e, f),
848 }
849 }
850}
851
852impl<
853 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
854 LexerTypesT: LexerTypes<StorageT = StorageT>,
855 > Error for LexParseError<StorageT, LexerTypesT>
856where
857 usize: AsPrimitive<StorageT>,
858{
859}
860
861impl<
862 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
863 LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
864 T: LexError,
865 > From<T> for LexParseError<StorageT, LexerTypesT>
866where
867 usize: AsPrimitive<StorageT>,
868{
869 fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
870 LexParseError::LexError(err)
871 }
872}
873
874impl<
875 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
876 LexerTypesT: LexerTypes<StorageT = StorageT>,
877 > From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
878where
879 usize: AsPrimitive<StorageT>,
880{
881 fn from(
882 err: ParseError<LexerTypesT::LexemeT, StorageT>,
883 ) -> LexParseError<StorageT, LexerTypesT> {
884 LexParseError::ParseError(err)
885 }
886}
887
888pub struct RTParserBuilder<
890 'a,
891 StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
892 LexerTypesT: LexerTypes<StorageT = StorageT>,
893> where
894 usize: AsPrimitive<StorageT>,
895{
896 grm: &'a YaccGrammar<StorageT>,
897 stable: &'a StateTable<StorageT>,
898 recoverer: RecoveryKind,
899 term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
900 phantom: PhantomData<(StorageT, LexerTypesT)>,
901}
902
903impl<
904 'a,
905 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
906 LexerTypesT: LexerTypes<StorageT = StorageT>,
907 > RTParserBuilder<'a, StorageT, LexerTypesT>
908where
909 usize: AsPrimitive<StorageT>,
910{
911 pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
913 RTParserBuilder {
914 grm,
915 stable,
916 recoverer: RecoveryKind::CPCTPlus,
917 term_costs: &|_| 1,
918 phantom: PhantomData,
919 }
920 }
921
922 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
924 self.recoverer = rk;
925 self
926 }
927
928 pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
929 self.term_costs = f;
930 self
931 }
932
933 pub fn parse_generictree(
936 &self,
937 lexer: &dyn NonStreamingLexer<LexerTypesT>,
938 ) -> (
939 Option<Node<LexerTypesT::LexemeT, StorageT>>,
940 Vec<LexParseError<StorageT, LexerTypesT>>,
941 ) {
942 let mut lexemes = vec![];
943 for e in lexer.iter().collect::<Vec<_>>() {
944 match e {
945 Ok(l) => lexemes.push(l),
946 Err(e) => return (None, vec![e.into()]),
947 }
948 }
949 Parser::<StorageT, LexerTypesT, Node<LexerTypesT::LexemeT, StorageT>, ()>::parse_generictree(
950 self.recoverer,
951 self.grm,
952 self.term_costs,
953 self.stable,
954 lexer,
955 lexemes,
956 )
957 }
958
959 pub fn parse_noaction(
962 &self,
963 lexer: &dyn NonStreamingLexer<LexerTypesT>,
964 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
965 let mut lexemes = vec![];
966 for e in lexer.iter().collect::<Vec<_>>() {
967 match e {
968 Ok(l) => lexemes.push(l),
969 Err(e) => return vec![e.into()],
970 }
971 }
972 Parser::<StorageT, LexerTypesT, (), ()>::parse_noaction(
973 self.recoverer,
974 self.grm,
975 self.term_costs,
976 self.stable,
977 lexer,
978 lexemes,
979 )
980 }
981
982 pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
989 &self,
990 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
991 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
992 param: ParamT,
993 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
994 let mut lexemes = vec![];
995 for e in lexer.iter().collect::<Vec<_>>() {
996 match e {
997 Ok(l) => lexemes.push(l),
998 Err(e) => return (None, vec![e.into()]),
999 }
1000 }
1001 Parser::parse_actions(
1002 self.recoverer,
1003 self.grm,
1004 self.term_costs,
1005 self.stable,
1006 lexer,
1007 lexemes,
1008 actions,
1009 param,
1010 )
1011 }
1012}
1013
1014#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1017pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1018 Insert(TIdx<StorageT>),
1020 Delete(LexemeT),
1022 Shift(LexemeT),
1024}
1025
1026#[derive(Clone, Debug, PartialEq)]
1028pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1029 stidx: StIdx<StorageT>,
1030 lexeme: LexemeT,
1031 repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1032}
1033
1034impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1035 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1036 write!(f, "Parse error at lexeme {:?}", self.lexeme)
1037 }
1038}
1039
1040impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1041
1042impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1043 pub fn stidx(&self) -> StIdx<StorageT> {
1045 self.stidx
1046 }
1047
1048 pub fn lexeme(&self) -> &LexemeT {
1050 &self.lexeme
1051 }
1052
1053 pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1056 &self.repairs
1057 }
1058}
1059
1060#[cfg(test)]
1061pub(crate) mod test {
1062 use std::collections::HashMap;
1063
1064 use cfgrammar::{
1065 yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1066 Span,
1067 };
1068 use lrtable::{from_yacc, Minimiser};
1069 use num_traits::ToPrimitive;
1070 use regex::Regex;
1071
1072 use super::*;
1073 use crate::{
1074 test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1075 Lexeme, Lexer,
1076 };
1077
1078 pub(crate) fn do_parse<'input>(
1079 rcvry_kind: RecoveryKind,
1080 lexs: &str,
1081 grms: &str,
1082 input: &'input str,
1083 ) -> (
1084 YaccGrammar<u16>,
1085 SmallLexer<'input>,
1086 Result<
1087 Node<TestLexeme, u16>,
1088 (
1089 Option<Node<TestLexeme, u16>>,
1090 Vec<LexParseError<u16, TestLexerTypes>>,
1091 ),
1092 >,
1093 ) {
1094 do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1095 }
1096
1097 fn do_parse_with_costs<'input>(
1098 rcvry_kind: RecoveryKind,
1099 lexs: &str,
1100 grms: &str,
1101 input: &'input str,
1102 costs: &HashMap<&str, u8>,
1103 ) -> (
1104 YaccGrammar<u16>,
1105 SmallLexer<'input>,
1106 Result<
1107 Node<TestLexeme, u16>,
1108 (
1109 Option<Node<TestLexeme, u16>>,
1110 Vec<LexParseError<u16, TestLexerTypes>>,
1111 ),
1112 >,
1113 ) {
1114 let grm = YaccGrammar::<u16>::new_with_storaget(
1115 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1116 grms,
1117 )
1118 .unwrap();
1119 let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1120 let rule_ids = grm
1121 .tokens_map()
1122 .iter()
1123 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1124 .collect();
1125 let lexer_rules = small_lexer(lexs, rule_ids);
1126 let lexemes = small_lex(lexer_rules, input);
1127 let lexer = SmallLexer { lexemes, s: input };
1128 let costs_tidx = costs
1129 .iter()
1130 .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1131 .collect::<HashMap<_, _>>();
1132 let (r, errs) = RTParserBuilder::new(&grm, &stable)
1133 .recoverer(rcvry_kind)
1134 .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1135 .parse_generictree(&lexer);
1136 if let Some(node) = r {
1137 if errs.is_empty() {
1138 (grm, lexer, Ok(node))
1139 } else {
1140 (grm, lexer, Err((Some(node), errs)))
1141 }
1142 } else {
1143 (grm, lexer, Err((None, errs)))
1144 }
1145 }
1146
1147 fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1148 let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1149 assert_eq!(expected, pt.unwrap().pp(&grm, input));
1150 }
1151
1152 pub(crate) struct SmallLexer<'input> {
1157 lexemes: Vec<TestLexeme>,
1158 s: &'input str,
1159 }
1160
1161 impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1162 fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1163 Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1164 }
1165 }
1166
1167 impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1168 fn span_str(&self, span: Span) -> &'input str {
1169 &self.s[span.start()..span.end()]
1170 }
1171
1172 fn span_lines_str(&self, _: Span) -> &'input str {
1173 unreachable!();
1174 }
1175
1176 fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1177 unreachable!();
1178 }
1179 }
1180
1181 fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1182 let mut rules = Vec::new();
1183 for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1184 assert!(l.rfind('\'') == Some(l.len() - 1));
1185 let i = l[..l.len() - 1].rfind('\'').unwrap();
1186 let name = &l[i + 1..l.len() - 1];
1187 let re = &l[..i - 1].trim();
1188 rules.push((
1189 ids_map[name],
1190 Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1191 ));
1192 }
1193 rules
1194 }
1195
1196 fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1197 let mut lexemes = vec![];
1198 let mut i = 0;
1199 while i < input.len() {
1200 let mut longest = 0; let mut longest_tok_id = 0; for (tok_id, r) in rules.iter() {
1203 if let Some(m) = r.find(&input[i..]) {
1204 let len = m.end();
1205 if len > longest {
1206 longest = len;
1207 longest_tok_id = *tok_id;
1208 }
1209 }
1210 }
1211 assert!(longest > 0);
1212 lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1213 i += longest;
1214 }
1215 lexemes
1216 }
1217
1218 #[test]
1219 fn simple_parse() {
1220 check_parse_output(
1222 "[a-zA-Z_] 'ID'
1223 \\+ '+'",
1224 "
1225%start E
1226%%
1227E: T '+' E
1228 | T;
1229T: 'ID';
1230",
1231 "a+b",
1232 "E
1233 T
1234 ID a
1235 + +
1236 E
1237 T
1238 ID b
1239",
1240 );
1241 }
1242
1243 #[test]
1244 fn parse_empty_rules() {
1245 let lexs = "[a-zA-Z_] 'ID'";
1246 let grms = "%start S
1247%%
1248S: L;
1249L: 'ID'
1250 | ;
1251";
1252 check_parse_output(
1253 lexs, grms, "", "S
1254 L
1255",
1256 );
1257
1258 check_parse_output(
1259 lexs,
1260 grms,
1261 "x",
1262 "S
1263 L
1264 ID x
1265",
1266 );
1267 }
1268
1269 #[test]
1270 fn recursive_parse() {
1271 let lexs = "\\+ '+'
1272 \\* '*'
1273 [0-9]+ 'INT'";
1274 let grms = "%start Expr
1275%%
1276Expr : Expr '+' Term | Term;
1277Term : Term '*' Factor | Factor;
1278Factor : 'INT';";
1279
1280 check_parse_output(
1281 lexs,
1282 grms,
1283 "2+3*4",
1284 "Expr
1285 Expr
1286 Term
1287 Factor
1288 INT 2
1289 + +
1290 Term
1291 Term
1292 Factor
1293 INT 3
1294 * *
1295 Factor
1296 INT 4
1297",
1298 );
1299 check_parse_output(
1300 lexs,
1301 grms,
1302 "2*3+4",
1303 "Expr
1304 Expr
1305 Term
1306 Term
1307 Factor
1308 INT 2
1309 * *
1310 Factor
1311 INT 3
1312 + +
1313 Term
1314 Factor
1315 INT 4
1316",
1317 );
1318 }
1319
1320 #[test]
1321 fn parse_error() {
1322 let lexs = "\\( '('
1323 \\) ')'
1324 [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1325 let grms = "%start Call
1326%%
1327Call: 'ID' '(' ')';";
1328
1329 check_parse_output(
1330 lexs,
1331 grms,
1332 "f()",
1333 "Call
1334 ID f
1335 ( (
1336 ) )
1337",
1338 );
1339
1340 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1341 let (_, errs) = pr.unwrap_err();
1342 assert_eq!(errs.len(), 1);
1343 let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1344 match &errs[0] {
1345 LexParseError::ParseError(e) => {
1346 assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1347 assert!(e.lexeme().faulty());
1348 }
1349 _ => unreachable!(),
1350 }
1351
1352 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1353 let (_, errs) = pr.unwrap_err();
1354 assert_eq!(errs.len(), 1);
1355 let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1356 match &errs[0] {
1357 LexParseError::ParseError(e) => {
1358 assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1359 assert!(!e.lexeme().faulty());
1360 }
1361 _ => unreachable!(),
1362 }
1363 }
1364}