1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3 error::Error,
4 fmt::{self, Debug, Display, Write as _},
5 hash::Hash,
6 marker::PhantomData,
7 vec,
8};
9
10#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{RIdx, Span, TIdx, header::Value, span::Location, yacc::YaccGrammar};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{LexError, Lexeme, LexerTypes, NonStreamingLexer, cpctplus};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; #[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; #[deprecated(
33 since = "0.14.0",
34 note = "Use the version of `Node` exported from your `lrpar_mod!`"
35)]
36pub type Node<T, S> = _deprecated_moved_::Node<T, S>;
37
38#[doc(hidden)]
39pub mod _deprecated_moved_ {
40 use super::*;
41 #[derive(Debug, Clone, PartialEq)]
43 pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
44 Term { lexeme: LexemeT },
46 Nonterm {
48 ridx: RIdx<StorageT>,
49 nodes: Vec<Node<LexemeT, StorageT>>,
50 },
51 }
52}
53
54#[allow(deprecated)]
55impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
56where
57 usize: AsPrimitive<StorageT>,
58{
59 pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
61 let mut st = vec![(0, self)]; let mut s = String::new();
63 while let Some((indent, e)) = st.pop() {
64 for _ in 0..indent {
65 s.push(' ');
66 }
67 match *e {
68 Node::Term { lexeme } => {
69 let tidx = TIdx(lexeme.tok_id());
70 let tn = grm.token_name(tidx).unwrap();
71 let lt = &input[lexeme.span().start()..lexeme.span().end()];
72 writeln!(s, "{} {}", tn, lt).ok();
73 }
74 Node::Nonterm { ridx, ref nodes } => {
75 writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
76 for x in nodes.iter().rev() {
77 st.push((indent + 1, x));
78 }
79 }
80 }
81 }
82 s
83 }
84}
85
86type PStack<StorageT> = Vec<StIdx<StorageT>>; type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
88type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
89 RIdx<StorageT>,
90 &'b dyn NonStreamingLexer<'input, LexerTypesT>,
91 Span,
92 vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
93 ParamT,
94) -> ActionT;
95
96#[derive(Debug)]
97pub enum AStackType<LexemeT, ActionT> {
98 ActionType(ActionT),
99 Lexeme(LexemeT),
100}
101
102pub(super) struct Parser<
103 'a,
104 'b: 'a,
105 'input: 'b,
106 StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
107 LexerTypesT: LexerTypes<StorageT = StorageT>,
108 ActionT: 'a,
109 ParamT: Clone,
110> where
111 usize: AsPrimitive<StorageT>,
112{
113 rcvry_kind: RecoveryKind,
114 pub(super) grm: &'a YaccGrammar<StorageT>,
115 pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
116 pub(super) stable: &'a StateTable<StorageT>,
117 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
118 pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
121 actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
122 param: ParamT,
123}
124
125impl<
126 'a,
127 'b: 'a,
128 'input: 'b,
129 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
130 LexerTypesT: LexerTypes<StorageT = StorageT>,
131 Node,
132>
133 Parser<
134 'a,
135 'b,
136 'input,
137 StorageT,
138 LexerTypesT,
139 Node,
140 (
141 &dyn Fn(LexerTypesT::LexemeT) -> Node,
142 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
143 ),
144 >
145where
146 usize: AsPrimitive<StorageT>,
147{
148 fn parse_map(
149 rcvry_kind: RecoveryKind,
150 grm: &YaccGrammar<StorageT>,
151 token_cost: TokenCostFn<'a, StorageT>,
152 stable: &StateTable<StorageT>,
153 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
154 lexemes: Vec<LexerTypesT::LexemeT>,
155 fterm: &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
156 fnonterm: &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
157 ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
158 for tidx in grm.iter_tidxs() {
159 assert!(token_cost(tidx) > 0);
160 }
161 let mut actions: Vec<
162 ActionFn<
163 'a,
164 'b,
165 'input,
166 StorageT,
167 LexerTypesT,
168 Node,
169 (
170 &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
171 &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
172 ),
173 >,
174 > = Vec::new();
175 actions.resize(usize::from(grm.prods_len()), &action_map);
176 let psr = Parser {
177 rcvry_kind,
178 grm,
179 token_cost: Box::new(token_cost),
180 stable,
181 lexer,
182 lexemes,
183 actions: actions.as_slice(),
184 param: (fterm, fnonterm),
185 };
186 let mut pstack = vec![stable.start_state()];
187 let mut astack = Vec::new();
188 let mut errors = Vec::new();
189 let mut spans = Vec::new();
190 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
191 (accpt, errors)
192 }
193}
194
195fn action_map<StorageT, LexerTypesT: LexerTypes, Node>(
196 ridx: RIdx<StorageT>,
197 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
198 _span: Span,
199 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node>>,
200 param: (
201 &dyn Fn(LexerTypesT::LexemeT) -> Node,
202 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
203 ),
204) -> Node
205where
206 usize: AsPrimitive<LexerTypesT::StorageT>,
207 LexerTypesT::LexemeT: Lexeme<StorageT>,
208{
209 let (fterm, fnonterm) = param;
210 let mut nodes = Vec::with_capacity(astack.len());
211 for a in astack {
212 nodes.push(match a {
213 AStackType::ActionType(n) => n,
214 AStackType::Lexeme(lexeme) => fterm(lexeme),
215 });
216 }
217 fnonterm(ridx, nodes)
218}
219
220#[deprecated(
221 since = "0.14.0",
222 note = "Deprecated with `parse_generictree` there is no direct replacement, besides a custom action"
223)]
224#[allow(deprecated)]
225pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
229 ridx: RIdx<StorageT>,
230 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
231 _span: Span,
232 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
233 _param: (),
234) -> Node<LexerTypesT::LexemeT, StorageT>
235where
236 usize: AsPrimitive<LexerTypesT::StorageT>,
237 LexerTypesT::LexemeT: Lexeme<StorageT>,
238{
239 let mut nodes = Vec::with_capacity(astack.len());
240 for a in astack {
241 nodes.push(match a {
242 AStackType::ActionType(n) => n,
243 AStackType::Lexeme(lexeme) => Node::Term { lexeme },
244 });
245 }
246 Node::Nonterm { ridx, nodes }
247}
248
249impl<
250 'a,
251 'b: 'a,
252 'input: 'b,
253 StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
254 LexerTypesT: LexerTypes<StorageT = StorageT>,
255 ActionT: 'a,
256 ParamT: Clone,
257> Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
258where
259 usize: AsPrimitive<StorageT>,
260{
261 fn parse_actions(
262 rcvry_kind: RecoveryKind,
263 grm: &'a YaccGrammar<StorageT>,
264 token_cost: TokenCostFn<'a, StorageT>,
265 stable: &'a StateTable<StorageT>,
266 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
267 lexemes: Vec<LexerTypesT::LexemeT>,
268 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
269 param: ParamT,
270 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
271 for tidx in grm.iter_tidxs() {
272 assert!(token_cost(tidx) > 0);
273 }
274 let psr = Parser {
275 rcvry_kind,
276 grm,
277 token_cost: Box::new(token_cost),
278 stable,
279 lexer,
280 lexemes,
281 actions,
282 param,
283 };
284 let mut pstack = vec![stable.start_state()];
285 let mut astack = Vec::new();
286 let mut errors = Vec::new();
287 let mut spans = Vec::new();
288 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
289 (accpt, errors)
290 }
291
292 fn lr(
306 &self,
307 mut laidx: usize,
308 pstack: &mut PStack<StorageT>,
309 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
310 errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
311 spans: &mut Vec<Span>,
312 ) -> Option<ActionT> {
313 let mut recoverer = None;
314 let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
315 loop {
316 debug_assert_eq!(astack.len(), spans.len());
317 let stidx = *pstack.last().unwrap();
318 let la_tidx = self.next_tidx(laidx);
319
320 match self.stable.action(stidx, la_tidx) {
321 Action::Reduce(pidx) => {
322 let ridx = self.grm.prod_to_rule(pidx);
323 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
324
325 pstack.drain(pop_idx..);
326 let prior = *pstack.last().unwrap();
327 pstack.push(self.stable.goto(prior, ridx).unwrap());
328
329 let span = if spans.is_empty() {
330 Span::new(0, 0)
331 } else if pop_idx - 1 < spans.len() {
332 Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
333 } else {
334 Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
335 };
336 spans.truncate(pop_idx - 1);
337 spans.push(span);
338
339 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
340 ridx,
341 self.lexer,
342 span,
343 astack.drain(pop_idx - 1..),
344 self.param.clone(),
345 ));
346 astack.push(v);
347 }
348 Action::Shift(state_id) => {
349 let la_lexeme = self.next_lexeme(laidx);
350 pstack.push(state_id);
351 astack.push(AStackType::Lexeme(la_lexeme));
352
353 spans.push(la_lexeme.span());
354 laidx += 1;
355 }
356 Action::Accept => {
357 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
358 debug_assert_eq!(astack.len(), 1);
359 match astack.drain(..).next().unwrap() {
360 AStackType::ActionType(v) => return Some(v),
361 _ => unreachable!(),
362 }
363 }
364 Action::Error => {
365 if recoverer.is_none() {
366 recoverer = Some(match self.rcvry_kind {
367 RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
368 RecoveryKind::None => {
369 let la_lexeme = self.next_lexeme(laidx);
370 errors.push(
371 ParseError {
372 stidx,
373 lexeme: la_lexeme,
374 repairs: vec![],
375 }
376 .into(),
377 );
378 return None;
379 }
380 });
381 }
382
383 let before = Instant::now();
384 let finish_by = before + recovery_budget;
385 let (new_laidx, repairs) = recoverer
386 .as_ref()
387 .unwrap()
388 .as_ref()
389 .recover(finish_by, self, laidx, pstack, astack, spans);
390 let after = Instant::now();
391 recovery_budget = recovery_budget
392 .checked_sub(after - before)
393 .unwrap_or_else(|| Duration::new(0, 0));
394 let keep_going = !repairs.is_empty();
395 let la_lexeme = self.next_lexeme(laidx);
396 errors.push(
397 ParseError {
398 stidx,
399 lexeme: la_lexeme,
400 repairs,
401 }
402 .into(),
403 );
404 if !keep_going {
405 return None;
406 }
407 laidx = new_laidx;
408 }
409 }
410 }
411 }
412
413 pub(super) fn lr_upto(
418 &self,
419 lexeme_prefix: Option<LexerTypesT::LexemeT>,
420 mut laidx: usize,
421 end_laidx: usize,
422 pstack: &mut PStack<StorageT>,
423 astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
424 spans: &mut Option<&mut Vec<Span>>,
425 ) -> usize {
426 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
427 while laidx != end_laidx && laidx <= self.lexemes.len() {
428 let stidx = *pstack.last().unwrap();
429 let la_tidx = if let Some(l) = lexeme_prefix {
430 TIdx(l.tok_id())
431 } else {
432 self.next_tidx(laidx)
433 };
434
435 match self.stable.action(stidx, la_tidx) {
436 Action::Reduce(pidx) => {
437 let ridx = self.grm.prod_to_rule(pidx);
438 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
439 if let Some(ref mut astack_uw) = *astack {
440 if let Some(ref mut spans_uw) = *spans {
441 let span = if spans_uw.is_empty() {
442 Span::new(0, 0)
443 } else if pop_idx - 1 < spans_uw.len() {
444 Span::new(
445 spans_uw[pop_idx - 1].start(),
446 spans_uw[spans_uw.len() - 1].end(),
447 )
448 } else {
449 Span::new(
450 spans_uw[spans_uw.len() - 1].start(),
451 spans_uw[spans_uw.len() - 1].end(),
452 )
453 };
454 spans_uw.truncate(pop_idx - 1);
455 spans_uw.push(span);
456
457 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
458 ridx,
459 self.lexer,
460 span,
461 astack_uw.drain(pop_idx - 1..),
462 self.param.clone(),
463 ));
464 astack_uw.push(v);
465 } else {
466 unreachable!();
467 }
468 }
469
470 pstack.drain(pop_idx..);
471 let prior = *pstack.last().unwrap();
472 pstack.push(self.stable.goto(prior, ridx).unwrap());
473 }
474 Action::Shift(state_id) => {
475 if let Some(ref mut astack_uw) = *astack {
476 if let Some(spans_uw) = spans {
477 let la_lexeme = if let Some(l) = lexeme_prefix {
478 l
479 } else {
480 self.next_lexeme(laidx)
481 };
482 astack_uw.push(AStackType::Lexeme(la_lexeme));
483 spans_uw.push(la_lexeme.span());
484 }
485 }
486 pstack.push(state_id);
487 laidx += 1;
488 }
489 Action::Accept => {
490 break;
491 }
492 Action::Error => {
493 break;
494 }
495 }
496 }
497 laidx
498 }
499
500 pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
503 let llen = self.lexemes.len();
504 debug_assert!(laidx <= llen);
505 if laidx < llen {
506 self.lexemes[laidx]
507 } else {
508 let last_la_end = if llen == 0 {
510 0
511 } else {
512 debug_assert!(laidx > 0);
513 let last_la = self.lexemes[laidx - 1];
514 last_la.span().end()
515 };
516
517 Lexeme::new_faulty(
518 StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
519 last_la_end,
520 0,
521 )
522 }
523 }
524
525 pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
528 let ll = self.lexemes.len();
529 debug_assert!(laidx <= ll);
530 if laidx < ll {
531 TIdx(self.lexemes[laidx].tok_id())
532 } else {
533 self.grm.eof_token_idx()
534 }
535 }
536
537 #[allow(deprecated)]
545 pub(super) fn lr_cactus(
546 &self,
547 lexeme_prefix: Option<LexerTypesT::LexemeT>,
548 mut laidx: usize,
549 end_laidx: usize,
550 mut pstack: Cactus<StIdx<StorageT>>,
551 tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
552 ) -> (usize, Cactus<StIdx<StorageT>>) {
553 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
554 while laidx != end_laidx {
555 let stidx = *pstack.val().unwrap();
556 let la_tidx = if let Some(l) = lexeme_prefix {
557 TIdx(l.tok_id())
558 } else {
559 self.next_tidx(laidx)
560 };
561
562 match self.stable.action(stidx, la_tidx) {
563 Action::Reduce(pidx) => {
564 let ridx = self.grm.prod_to_rule(pidx);
565 let pop_num = self.grm.prod(pidx).len();
566 if let Some(ref mut tstack_uw) = *tstack {
567 let nodes = tstack_uw
568 .drain(pstack.len() - pop_num - 1..)
569 .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
570 tstack_uw.push(Node::Nonterm { ridx, nodes });
571 }
572
573 for _ in 0..pop_num {
574 pstack = pstack.parent().unwrap();
575 }
576 let prior = *pstack.val().unwrap();
577 pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
578 }
579 Action::Shift(state_id) => {
580 if let Some(ref mut tstack_uw) = *tstack {
581 let la_lexeme = if let Some(l) = lexeme_prefix {
582 l
583 } else {
584 self.next_lexeme(laidx)
585 };
586 tstack_uw.push(Node::Term { lexeme: la_lexeme });
587 }
588 pstack = pstack.child(state_id);
589 laidx += 1;
590 }
591 Action::Accept => {
592 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
593 if let Some(ref tstack_uw) = *tstack {
594 debug_assert_eq!(tstack_uw.len(), 1);
595 }
596 break;
597 }
598 Action::Error => {
599 break;
600 }
601 }
602 }
603 (laidx, pstack)
604 }
605}
606
607pub(super) trait Recoverer<
608 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
609 LexerTypesT: LexerTypes<StorageT = StorageT>,
610 ActionT,
611 ParamT: Clone,
612> where
613 usize: AsPrimitive<StorageT>,
614{
615 fn recover(
616 &self,
617 finish_by: Instant,
618 parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
619 in_laidx: usize,
620 in_pstack: &mut PStack<StorageT>,
621 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
622 spans: &mut Vec<Span>,
623 ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
624}
625
626#[derive(Clone, Copy, Debug)]
628#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
629#[non_exhaustive]
630pub enum RecoveryKind {
631 CPCTPlus,
634 None,
636}
637
638impl TryFrom<RecoveryKind> for Value<Location> {
639 type Error = cfgrammar::header::HeaderError<Location>;
640 fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
641 use cfgrammar::{
642 Location,
643 header::{Namespaced, Setting},
644 };
645 let from_loc = Location::Other("From<RecoveryKind>".to_string());
646 Ok(match rk {
647 RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
648 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
649 member: ("CPCTPlus".to_string(), from_loc.clone()),
650 })),
651 RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
652 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
653 member: ("None".to_string(), from_loc.clone()),
654 })),
655 })
656 }
657}
658
659impl TryFrom<&Value<Location>> for RecoveryKind {
660 type Error = cfgrammar::header::HeaderError<Location>;
661 fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
662 use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
663
664 match rk {
665 Value::Flag(_, loc) => Err(HeaderError {
666 kind: HeaderErrorKind::ConversionError(
667 "RecoveryKind",
668 "Cannot convert boolean to RecoveryKind",
669 ),
670 locations: vec![loc.clone()],
671 }),
672 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
673 kind: HeaderErrorKind::ConversionError(
674 "RecoveryKind",
675 "Cannot convert number to RecoveryKind",
676 ),
677 locations: vec![loc.clone()],
678 }),
679 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
680 kind: HeaderErrorKind::ConversionError(
681 "RecoveryKind",
682 "Cannot convert string to RecoveryKind",
683 ),
684 locations: vec![loc.clone()],
685 }),
686 Value::Setting(Setting::Unitary(Namespaced {
687 namespace,
688 member: (kind, kind_loc),
689 })) => {
690 match namespace {
691 Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
692 return Err(HeaderError {
693 kind: HeaderErrorKind::ConversionError(
694 "RecoveryKind",
695 "Unknown namespace",
696 ),
697 locations: vec![loc.clone()],
698 });
699 }
700 _ => {}
701 }
702 match kind.to_lowercase().as_ref() {
703 "cpctplus" => Ok(RecoveryKind::CPCTPlus),
704 "none" => Ok(RecoveryKind::None),
705 _ => Err(HeaderError {
706 kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
707 locations: vec![kind_loc.clone()],
708 }),
709 }
710 }
711 Value::Setting(Setting::Constructor {
712 ctor: _,
713 arg:
714 Namespaced {
715 namespace: _,
716 member: (_, arg_loc),
717 },
718 }) => Err(HeaderError {
719 kind: HeaderErrorKind::ConversionError(
720 "RecoveryKind",
721 "Cannot convert constructor to RecoveryKind",
722 ),
723 locations: vec![arg_loc.clone()],
724 }),
725 }
726 }
727}
728
729impl quote::ToTokens for RecoveryKind {
730 fn to_tokens(&self, tokens: &mut TokenStream) {
731 tokens.extend(match *self {
732 RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
733 RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
734 })
735 }
736}
737
738#[derive(Debug)]
742pub enum LexParseError<
743 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
744 LexerTypesT: LexerTypes<StorageT = StorageT>,
745> where
746 usize: AsPrimitive<StorageT>,
747{
748 LexError(LexerTypesT::LexErrorT),
749 ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
750}
751
752impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
753 LexParseError<StorageT, LexerTypesT>
754where
755 usize: AsPrimitive<StorageT>,
756{
757 pub fn pp<'a>(
774 &self,
775 lexer: &dyn NonStreamingLexer<LexerTypesT>,
776 epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
777 ) -> String {
778 match self {
779 LexParseError::LexError(e) => {
780 let ((line, col), _) = lexer.line_col(e.span());
781 format!("Lexing error at line {} column {}.", line, col)
782 }
783 LexParseError::ParseError(e) => {
784 let ((line, col), _) = lexer.line_col(e.lexeme().span());
785 let mut out = format!("Parsing error at line {} column {}.", line, col);
786 let repairs_len = e.repairs().len();
787 if repairs_len == 0 {
788 out.push_str(" No repair sequences found.");
789 } else {
790 out.push_str(" Repair sequences found:");
791 for (i, rs) in e.repairs().iter().enumerate() {
792 let padding = ((repairs_len as f64).log10() as usize)
793 - (((i + 1) as f64).log10() as usize)
794 + 1;
795 write!(out, "\n {}{}: ", " ".repeat(padding), i + 1).ok();
796 let mut rs_out = Vec::new();
797
798 let mut i = 0;
801 while i < rs.len() {
802 match rs[i] {
803 ParseRepair::Delete(l) => {
804 let mut j = i + 1;
805 let mut last_end = l.span().end();
806 while j < rs.len() {
807 if let ParseRepair::Delete(next_l) = rs[j] {
808 if next_l.span().start() == last_end {
809 last_end = next_l.span().end();
810 j += 1;
811 continue;
812 }
813 }
814 break;
815 }
816 let t = &lexer
817 .span_str(Span::new(l.span().start(), last_end))
818 .replace('\n', "\\n");
819 rs_out.push(format!("Delete {}", t));
820 i = j;
821 }
822 ParseRepair::Insert(tidx) => {
823 rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
824 i += 1;
825 }
826 ParseRepair::Shift(l) => {
827 let t = &lexer.span_str(l.span()).replace('\n', "\\n");
828 rs_out.push(format!("Shift {}", t));
829 i += 1;
830 }
831 }
832 }
833
834 out.push_str(&rs_out.join(", "));
835 }
836 }
837 out
838 }
839 }
840 }
841}
842
843impl<
844 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
845 LexerTypesT: LexerTypes<StorageT = StorageT>,
846> fmt::Display for LexParseError<StorageT, LexerTypesT>
847where
848 usize: AsPrimitive<StorageT>,
849{
850 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
851 match *self {
852 LexParseError::LexError(ref e) => Display::fmt(e, f),
853 LexParseError::ParseError(ref e) => Display::fmt(e, f),
854 }
855 }
856}
857
858impl<
859 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
860 LexerTypesT: LexerTypes<StorageT = StorageT>,
861> Error for LexParseError<StorageT, LexerTypesT>
862where
863 usize: AsPrimitive<StorageT>,
864{
865}
866
867impl<
868 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
869 LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
870 T: LexError,
871> From<T> for LexParseError<StorageT, LexerTypesT>
872where
873 usize: AsPrimitive<StorageT>,
874{
875 fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
876 LexParseError::LexError(err)
877 }
878}
879
880impl<
881 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
882 LexerTypesT: LexerTypes<StorageT = StorageT>,
883> From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
884where
885 usize: AsPrimitive<StorageT>,
886{
887 fn from(
888 err: ParseError<LexerTypesT::LexemeT, StorageT>,
889 ) -> LexParseError<StorageT, LexerTypesT> {
890 LexParseError::ParseError(err)
891 }
892}
893
894pub struct RTParserBuilder<
896 'a,
897 StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
898 LexerTypesT: LexerTypes<StorageT = StorageT>,
899> where
900 usize: AsPrimitive<StorageT>,
901{
902 grm: &'a YaccGrammar<StorageT>,
903 stable: &'a StateTable<StorageT>,
904 recoverer: RecoveryKind,
905 term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
906 phantom: PhantomData<(StorageT, LexerTypesT)>,
907}
908
909impl<
910 'a,
911 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
912 LexerTypesT: LexerTypes<StorageT = StorageT>,
913> RTParserBuilder<'a, StorageT, LexerTypesT>
914where
915 usize: AsPrimitive<StorageT>,
916{
917 pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
919 RTParserBuilder {
920 grm,
921 stable,
922 recoverer: RecoveryKind::CPCTPlus,
923 term_costs: &|_| 1,
924 phantom: PhantomData,
925 }
926 }
927
928 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
930 self.recoverer = rk;
931 self
932 }
933
934 pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
935 self.term_costs = f;
936 self
937 }
938
939 #[deprecated(
940 since = "0.14.0",
941 note = "Use `parse_map` to return a `Node` from your `lrpar_mod!` instead"
942 )]
943 #[allow(deprecated)]
944 pub fn parse_generictree(
947 &self,
948 lexer: &dyn NonStreamingLexer<LexerTypesT>,
949 ) -> (
950 Option<Node<LexerTypesT::LexemeT, StorageT>>,
951 Vec<LexParseError<StorageT, LexerTypesT>>,
952 ) {
953 self.parse_map(lexer, &|lexeme| Node::Term { lexeme }, &|ridx, nodes| {
954 Node::Nonterm { ridx, nodes }
955 })
956 }
957
958 pub fn parse_map<Node>(
961 &self,
962 lexer: &dyn NonStreamingLexer<LexerTypesT>,
963 fterm: &dyn Fn(LexerTypesT::LexemeT) -> Node,
964 fnonterm: &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
965 ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
966 let mut lexemes = vec![];
967 for e in lexer.iter().collect::<Vec<_>>() {
968 match e {
969 Ok(l) => lexemes.push(l),
970 Err(e) => return (None, vec![e.into()]),
971 }
972 }
973 Parser::<
974 StorageT,
975 LexerTypesT,
976 Node,
977 (
978 &dyn Fn(LexerTypesT::LexemeT) -> Node,
979 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
980 ),
981 >::parse_map(
982 self.recoverer,
983 self.grm,
984 self.term_costs,
985 self.stable,
986 lexer,
987 lexemes,
988 fterm,
989 fnonterm,
990 )
991 }
992
993 #[deprecated(since = "0.14.0", note = "Use `parse_map` instead")]
994 pub fn parse_noaction(
997 &self,
998 lexer: &dyn NonStreamingLexer<LexerTypesT>,
999 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
1000 self.parse_map(lexer, &|_| (), &|_, _| ()).1
1001 }
1002
1003 pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
1010 &self,
1011 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
1012 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
1013 param: ParamT,
1014 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
1015 let mut lexemes = vec![];
1016 for e in lexer.iter().collect::<Vec<_>>() {
1017 match e {
1018 Ok(l) => lexemes.push(l),
1019 Err(e) => return (None, vec![e.into()]),
1020 }
1021 }
1022 Parser::parse_actions(
1023 self.recoverer,
1024 self.grm,
1025 self.term_costs,
1026 self.stable,
1027 lexer,
1028 lexemes,
1029 actions,
1030 param,
1031 )
1032 }
1033}
1034
1035#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1038pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1039 Insert(TIdx<StorageT>),
1041 Delete(LexemeT),
1043 Shift(LexemeT),
1045}
1046
1047#[derive(Clone, Debug, PartialEq)]
1049pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1050 stidx: StIdx<StorageT>,
1051 lexeme: LexemeT,
1052 repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1053}
1054
1055impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1056 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1057 write!(f, "Parse error at lexeme {:?}", self.lexeme)
1058 }
1059}
1060
1061impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1062
1063impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1064 pub fn stidx(&self) -> StIdx<StorageT> {
1066 self.stidx
1067 }
1068
1069 pub fn lexeme(&self) -> &LexemeT {
1071 &self.lexeme
1072 }
1073
1074 pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1077 &self.repairs
1078 }
1079}
1080
1081#[cfg(test)]
1082pub(crate) mod test {
1083 use std::collections::HashMap;
1084
1085 use cfgrammar::{
1086 Span,
1087 yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1088 };
1089 use lrtable::{Minimiser, from_yacc};
1090 use num_traits::ToPrimitive;
1091 use regex::Regex;
1092
1093 use super::*;
1094 use crate::{
1095 Lexeme, Lexer,
1096 test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1097 };
1098
1099 #[allow(deprecated)]
1100 pub(crate) fn do_parse<'input>(
1101 rcvry_kind: RecoveryKind,
1102 lexs: &str,
1103 grms: &str,
1104 input: &'input str,
1105 ) -> (
1106 YaccGrammar<u16>,
1107 SmallLexer<'input>,
1108 Result<
1109 Node<TestLexeme, u16>,
1110 (
1111 Option<Node<TestLexeme, u16>>,
1112 Vec<LexParseError<u16, TestLexerTypes>>,
1113 ),
1114 >,
1115 ) {
1116 do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1117 }
1118
1119 #[allow(deprecated)]
1120 fn do_parse_with_costs<'input>(
1121 rcvry_kind: RecoveryKind,
1122 lexs: &str,
1123 grms: &str,
1124 input: &'input str,
1125 costs: &HashMap<&str, u8>,
1126 ) -> (
1127 YaccGrammar<u16>,
1128 SmallLexer<'input>,
1129 Result<
1130 Node<TestLexeme, u16>,
1131 (
1132 Option<Node<TestLexeme, u16>>,
1133 Vec<LexParseError<u16, TestLexerTypes>>,
1134 ),
1135 >,
1136 ) {
1137 let grm = YaccGrammar::<u16>::new_with_storaget(
1138 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1139 grms,
1140 )
1141 .unwrap();
1142 let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1143 let rule_ids = grm
1144 .tokens_map()
1145 .iter()
1146 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1147 .collect();
1148 let lexer_rules = small_lexer(lexs, rule_ids);
1149 let lexemes = small_lex(lexer_rules, input);
1150 let lexer = SmallLexer { lexemes, s: input };
1151 let costs_tidx = costs
1152 .iter()
1153 .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1154 .collect::<HashMap<_, _>>();
1155 let (r, errs) = RTParserBuilder::new(&grm, &stable)
1156 .recoverer(rcvry_kind)
1157 .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1158 .parse_generictree(&lexer);
1159 if let Some(node) = r {
1160 if errs.is_empty() {
1161 (grm, lexer, Ok(node))
1162 } else {
1163 (grm, lexer, Err((Some(node), errs)))
1164 }
1165 } else {
1166 (grm, lexer, Err((None, errs)))
1167 }
1168 }
1169
1170 fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1171 let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1172 assert_eq!(expected, pt.unwrap().pp(&grm, input));
1173 }
1174
1175 pub(crate) struct SmallLexer<'input> {
1180 lexemes: Vec<TestLexeme>,
1181 s: &'input str,
1182 }
1183
1184 impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1185 fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1186 Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1187 }
1188 }
1189
1190 impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1191 fn span_str(&self, span: Span) -> &'input str {
1192 &self.s[span.start()..span.end()]
1193 }
1194
1195 fn span_lines_str(&self, _: Span) -> &'input str {
1196 unreachable!();
1197 }
1198
1199 fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1200 unreachable!();
1201 }
1202 }
1203
1204 fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1205 let mut rules = Vec::new();
1206 for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1207 assert!(l.rfind('\'') == Some(l.len() - 1));
1208 let i = l[..l.len() - 1].rfind('\'').unwrap();
1209 let name = &l[i + 1..l.len() - 1];
1210 let re = &l[..i - 1].trim();
1211 rules.push((
1212 ids_map[name],
1213 Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1214 ));
1215 }
1216 rules
1217 }
1218
1219 fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1220 let mut lexemes = vec![];
1221 let mut i = 0;
1222 while i < input.len() {
1223 let mut longest = 0; let mut longest_tok_id = 0; for (tok_id, r) in rules.iter() {
1226 if let Some(m) = r.find(&input[i..]) {
1227 let len = m.end();
1228 if len > longest {
1229 longest = len;
1230 longest_tok_id = *tok_id;
1231 }
1232 }
1233 }
1234 assert!(longest > 0);
1235 lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1236 i += longest;
1237 }
1238 lexemes
1239 }
1240
1241 #[test]
1242 fn simple_parse() {
1243 check_parse_output(
1245 "[a-zA-Z_] 'ID'
1246 \\+ '+'",
1247 "
1248%start E
1249%%
1250E: T '+' E
1251 | T;
1252T: 'ID';
1253",
1254 "a+b",
1255 "E
1256 T
1257 ID a
1258 + +
1259 E
1260 T
1261 ID b
1262",
1263 );
1264 }
1265
1266 #[test]
1267 fn parse_empty_rules() {
1268 let lexs = "[a-zA-Z_] 'ID'";
1269 let grms = "%start S
1270%%
1271S: L;
1272L: 'ID'
1273 | ;
1274";
1275 check_parse_output(
1276 lexs, grms, "", "S
1277 L
1278",
1279 );
1280
1281 check_parse_output(
1282 lexs,
1283 grms,
1284 "x",
1285 "S
1286 L
1287 ID x
1288",
1289 );
1290 }
1291
1292 #[test]
1293 fn recursive_parse() {
1294 let lexs = "\\+ '+'
1295 \\* '*'
1296 [0-9]+ 'INT'";
1297 let grms = "%start Expr
1298%%
1299Expr : Expr '+' Term | Term;
1300Term : Term '*' Factor | Factor;
1301Factor : 'INT';";
1302
1303 check_parse_output(
1304 lexs,
1305 grms,
1306 "2+3*4",
1307 "Expr
1308 Expr
1309 Term
1310 Factor
1311 INT 2
1312 + +
1313 Term
1314 Term
1315 Factor
1316 INT 3
1317 * *
1318 Factor
1319 INT 4
1320",
1321 );
1322 check_parse_output(
1323 lexs,
1324 grms,
1325 "2*3+4",
1326 "Expr
1327 Expr
1328 Term
1329 Term
1330 Factor
1331 INT 2
1332 * *
1333 Factor
1334 INT 3
1335 + +
1336 Term
1337 Factor
1338 INT 4
1339",
1340 );
1341 }
1342
1343 #[test]
1344 fn parse_error() {
1345 let lexs = "\\( '('
1346 \\) ')'
1347 [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1348 let grms = "%start Call
1349%%
1350Call: 'ID' '(' ')';";
1351
1352 check_parse_output(
1353 lexs,
1354 grms,
1355 "f()",
1356 "Call
1357 ID f
1358 ( (
1359 ) )
1360",
1361 );
1362
1363 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1364 let (_, errs) = pr.unwrap_err();
1365 assert_eq!(errs.len(), 1);
1366 let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1367 match &errs[0] {
1368 LexParseError::ParseError(e) => {
1369 assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1370 assert!(e.lexeme().faulty());
1371 }
1372 _ => unreachable!(),
1373 }
1374
1375 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1376 let (_, errs) = pr.unwrap_err();
1377 assert_eq!(errs.len(), 1);
1378 let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1379 match &errs[0] {
1380 LexParseError::ParseError(e) => {
1381 assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1382 assert!(!e.lexeme().faulty());
1383 }
1384 _ => unreachable!(),
1385 }
1386 }
1387
1388 #[test]
1389 fn test_parse_map() {
1390 #[derive(PartialEq, Debug)]
1391 enum TestParseMap<'a> {
1392 Term(&'a str, &'a str),
1393 NonTerm(&'a str, Vec<TestParseMap<'a>>),
1394 }
1395 let lex_src = r#"[0-9]+ 'INT'
1396\+ '+'
1397\* '*'
1398"#;
1399 let grammar_src = "
1400%grmtools{YaccKind: Original(NoAction)}
1401%start Expr
1402%%
1403Expr : Expr '+' Term | Term;
1404Term : Term '*' Factor | Factor;
1405Factor : 'INT';";
1406 let input_src = "2*3+4";
1407 let grm = str::parse::<YaccGrammar<u16>>(grammar_src).unwrap();
1408 let (_, stable) = lrtable::from_yacc(&grm, lrtable::Minimiser::Pager).unwrap();
1409 let rt_parser = RTParserBuilder::new(&grm, &stable);
1410 let rule_ids = grm
1411 .tokens_map()
1412 .iter()
1413 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1414 .collect();
1415 let lexer_rules = small_lexer(lex_src, rule_ids);
1416 let lexemes = small_lex(lexer_rules, input_src);
1417 let lexer = SmallLexer {
1418 lexemes,
1419 s: input_src,
1420 };
1421
1422 let (parse_map, errs) = rt_parser.parse_map(
1423 &lexer,
1424 &|lexeme: TestLexeme| {
1425 let tidx = TIdx(lexeme.tok_id());
1426 let tn = &grm.token_name(tidx).unwrap();
1427 let lt = &input_src[lexeme.span().start()..lexeme.span().end()];
1428 TestParseMap::Term(tn, lt)
1429 },
1430 &|ridx, nodes| {
1431 let rule_name = &grm.rule_name_str(ridx);
1432 TestParseMap::NonTerm(rule_name, nodes)
1433 },
1434 );
1435 assert!(parse_map.is_some() && errs.is_empty());
1436 check_parse_output(
1438 lex_src,
1439 grammar_src,
1440 input_src,
1441 "Expr
1442 Expr
1443 Term
1444 Term
1445 Factor
1446 INT 2
1447 * *
1448 Factor
1449 INT 3
1450 + +
1451 Term
1452 Factor
1453 INT 4
1454",
1455 );
1456
1457 let expected_parse_map = {
1458 use TestParseMap::*;
1459 NonTerm(
1460 "Expr",
1461 vec![
1462 NonTerm(
1463 "Expr",
1464 vec![NonTerm(
1465 "Term",
1466 vec![
1467 NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "2")])]),
1468 Term("*", "*"),
1469 NonTerm("Factor", vec![Term("INT", "3")]),
1470 ],
1471 )],
1472 ),
1473 Term("+", "+"),
1474 NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "4")])]),
1475 ],
1476 )
1477 };
1478 assert_eq!(parse_map, Some(expected_parse_map));
1479 }
1480}