1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3 error::Error,
4 fmt::{self, Debug, Display, Write as _},
5 hash::Hash,
6 marker::PhantomData,
7 vec,
8};
9
10#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{RIdx, Span, TIdx, header::Value, span::Location, yacc::YaccGrammar};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{LexError, Lexeme, LexerTypes, NonStreamingLexer, cpctplus};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; #[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; #[deprecated(
33 since = "0.14.0",
34 note = "Use the version of `Node` exported from your `lrpar_mod!`"
35)]
36pub type Node<T, S> = _deprecated_moved_::Node<T, S>;
37
38#[doc(hidden)]
39pub mod _deprecated_moved_ {
40 use super::*;
41 #[derive(Debug, Clone, PartialEq)]
43 pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
44 Term { lexeme: LexemeT },
46 Nonterm {
48 ridx: RIdx<StorageT>,
49 nodes: Vec<Node<LexemeT, StorageT>>,
50 },
51 }
52}
53
54#[allow(deprecated)]
55impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
56where
57 usize: AsPrimitive<StorageT>,
58{
59 pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
61 let mut st = vec![(0, self)]; let mut s = String::new();
63 while let Some((indent, e)) = st.pop() {
64 for _ in 0..indent {
65 s.push(' ');
66 }
67 match *e {
68 Node::Term { lexeme } => {
69 let tidx = TIdx(lexeme.tok_id());
70 let tn = grm.token_name(tidx).unwrap();
71 let lt = &input[lexeme.span().start()..lexeme.span().end()];
72 writeln!(s, "{} {}", tn, lt).ok();
73 }
74 Node::Nonterm { ridx, ref nodes } => {
75 writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
76 for x in nodes.iter().rev() {
77 st.push((indent + 1, x));
78 }
79 }
80 }
81 }
82 s
83 }
84}
85
86type PStack<StorageT> = Vec<StIdx<StorageT>>; type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
88type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
89 RIdx<StorageT>,
90 &'b dyn NonStreamingLexer<'input, LexerTypesT>,
91 Span,
92 vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
93 ParamT,
94) -> ActionT;
95
96#[derive(Debug)]
97pub enum AStackType<LexemeT, ActionT> {
98 ActionType(ActionT),
99 Lexeme(LexemeT),
100}
101
102pub(super) struct Parser<
103 'a,
104 'b: 'a,
105 'input: 'b,
106 StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
107 LexerTypesT: LexerTypes<StorageT = StorageT>,
108 ActionT: 'a,
109 ParamT: Clone,
110> where
111 usize: AsPrimitive<StorageT>,
112{
113 rcvry_kind: RecoveryKind,
114 pub(super) grm: &'a YaccGrammar<StorageT>,
115 pub(super) token_cost: TokenCostFn<'a, StorageT>,
116 pub(super) stable: &'a StateTable<StorageT>,
117 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
118 pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
121 actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
122 param: ParamT,
123}
124
125impl<
126 'a,
127 'b: 'a,
128 'input: 'b,
129 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
130 LexerTypesT: LexerTypes<StorageT = StorageT>,
131 Node,
132>
133 Parser<
134 'a,
135 'b,
136 'input,
137 StorageT,
138 LexerTypesT,
139 Node,
140 (
141 &dyn Fn(LexerTypesT::LexemeT) -> Node,
142 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
143 ),
144 >
145where
146 usize: AsPrimitive<StorageT>,
147{
148 fn parse_map(
149 rcvry_kind: RecoveryKind,
150 grm: &YaccGrammar<StorageT>,
151 token_cost: TokenCostFn<'a, StorageT>,
152 stable: &StateTable<StorageT>,
153 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
154 lexemes: Vec<LexerTypesT::LexemeT>,
155 fterm: &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
156 fnonterm: &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
157 ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
158 for tidx in grm.iter_tidxs() {
159 assert!(token_cost(tidx) > 0);
160 }
161 let mut actions: Vec<
162 ActionFn<
163 'a,
164 'b,
165 'input,
166 StorageT,
167 LexerTypesT,
168 Node,
169 (
170 &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
171 &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
172 ),
173 >,
174 > = Vec::new();
175 actions.resize(usize::from(grm.prods_len()), &action_map);
176 let psr = Parser {
177 rcvry_kind,
178 grm,
179 token_cost,
180 stable,
181 lexer,
182 lexemes,
183 actions: actions.as_slice(),
184 param: (fterm, fnonterm),
185 };
186 let mut pstack = vec![stable.start_state()];
187 let mut astack = Vec::new();
188 let mut errors = Vec::new();
189 let mut spans = Vec::new();
190 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
191 (accpt, errors)
192 }
193}
194
195fn action_map<StorageT, LexerTypesT: LexerTypes, Node>(
196 ridx: RIdx<StorageT>,
197 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
198 _span: Span,
199 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node>>,
200 param: (
201 &dyn Fn(LexerTypesT::LexemeT) -> Node,
202 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
203 ),
204) -> Node
205where
206 usize: AsPrimitive<LexerTypesT::StorageT>,
207 LexerTypesT::LexemeT: Lexeme<StorageT>,
208{
209 let (fterm, fnonterm) = param;
210 let mut nodes = Vec::with_capacity(astack.len());
211 for a in astack {
212 nodes.push(match a {
213 AStackType::ActionType(n) => n,
214 AStackType::Lexeme(lexeme) => fterm(lexeme),
215 });
216 }
217 fnonterm(ridx, nodes)
218}
219
220#[deprecated(
221 since = "0.14.0",
222 note = "Deprecated with `parse_generictree` there is no direct replacement, besides a custom action"
223)]
224#[allow(deprecated)]
225pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
229 ridx: RIdx<StorageT>,
230 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
231 _span: Span,
232 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
233 _param: (),
234) -> Node<LexerTypesT::LexemeT, StorageT>
235where
236 usize: AsPrimitive<LexerTypesT::StorageT>,
237 LexerTypesT::LexemeT: Lexeme<StorageT>,
238{
239 let mut nodes = Vec::with_capacity(astack.len());
240 for a in astack {
241 nodes.push(match a {
242 AStackType::ActionType(n) => n,
243 AStackType::Lexeme(lexeme) => Node::Term { lexeme },
244 });
245 }
246 Node::Nonterm { ridx, nodes }
247}
248
249impl<
250 'a,
251 'b: 'a,
252 'input: 'b,
253 StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
254 LexerTypesT: LexerTypes<StorageT = StorageT>,
255 ActionT: 'a,
256 ParamT: Clone,
257> Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
258where
259 usize: AsPrimitive<StorageT>,
260{
261 fn parse_actions(
262 rcvry_kind: RecoveryKind,
263 grm: &'a YaccGrammar<StorageT>,
264 token_cost: TokenCostFn<'a, StorageT>,
265 stable: &'a StateTable<StorageT>,
266 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
267 lexemes: Vec<LexerTypesT::LexemeT>,
268 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
269 param: ParamT,
270 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
271 for tidx in grm.iter_tidxs() {
272 assert!(token_cost(tidx) > 0);
273 }
274 let psr = Parser {
275 rcvry_kind,
276 grm,
277 token_cost,
278 stable,
279 lexer,
280 lexemes,
281 actions,
282 param,
283 };
284 let mut pstack = vec![stable.start_state()];
285 let mut astack = Vec::new();
286 let mut errors = Vec::new();
287 let mut spans = Vec::new();
288 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
289 (accpt, errors)
290 }
291
292 fn lr(
306 &self,
307 mut laidx: usize,
308 pstack: &mut PStack<StorageT>,
309 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
310 errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
311 spans: &mut Vec<Span>,
312 ) -> Option<ActionT> {
313 let mut recoverer = None;
314 let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
315 loop {
316 debug_assert_eq!(astack.len(), spans.len());
317 let stidx = *pstack.last().unwrap();
318 let la_tidx = self.next_tidx(laidx);
319
320 match self.stable.action(stidx, la_tidx) {
321 Action::Reduce(pidx) => {
322 let ridx = self.grm.prod_to_rule(pidx);
323 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
324
325 pstack.drain(pop_idx..);
326 let prior = *pstack.last().unwrap();
327 pstack.push(self.stable.goto(prior, ridx).unwrap());
328
329 let span = if spans.is_empty() {
330 Span::new(0, 0)
331 } else if pop_idx - 1 < spans.len() {
332 Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
333 } else {
334 Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
335 };
336 spans.truncate(pop_idx - 1);
337 spans.push(span);
338
339 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
340 ridx,
341 self.lexer,
342 span,
343 astack.drain(pop_idx - 1..),
344 self.param.clone(),
345 ));
346 astack.push(v);
347 }
348 Action::Shift(state_id) => {
349 let la_lexeme = self.next_lexeme(laidx);
350 pstack.push(state_id);
351 astack.push(AStackType::Lexeme(la_lexeme));
352
353 spans.push(la_lexeme.span());
354 laidx += 1;
355 }
356 Action::Accept => {
357 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
358 debug_assert_eq!(astack.len(), 1);
359 match astack.drain(..).next().unwrap() {
360 AStackType::ActionType(v) => return Some(v),
361 _ => unreachable!(),
362 }
363 }
364 Action::Error => {
365 if recoverer.is_none() {
366 recoverer = Some(match self.rcvry_kind {
367 RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
368 RecoveryKind::None => {
369 let la_lexeme = self.next_lexeme(laidx);
370 errors.push(
371 ParseError {
372 stidx,
373 lexeme: la_lexeme,
374 repairs: vec![],
375 }
376 .into(),
377 );
378 return None;
379 }
380 });
381 }
382
383 let before = Instant::now();
384 let finish_by = before + recovery_budget;
385 let (new_laidx, repairs) = recoverer
386 .as_ref()
387 .unwrap()
388 .as_ref()
389 .recover(finish_by, self, laidx, pstack, astack, spans);
390 let after = Instant::now();
391 recovery_budget = recovery_budget
392 .checked_sub(after - before)
393 .unwrap_or_else(|| Duration::new(0, 0));
394 let keep_going = !repairs.is_empty();
395 let la_lexeme = self.next_lexeme(laidx);
396 errors.push(
397 ParseError {
398 stidx,
399 lexeme: la_lexeme,
400 repairs,
401 }
402 .into(),
403 );
404 if !keep_going {
405 return None;
406 }
407 laidx = new_laidx;
408 }
409 }
410 }
411 }
412
413 pub(super) fn lr_upto(
418 &self,
419 lexeme_prefix: Option<LexerTypesT::LexemeT>,
420 mut laidx: usize,
421 end_laidx: usize,
422 pstack: &mut PStack<StorageT>,
423 astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
424 spans: &mut Option<&mut Vec<Span>>,
425 ) -> usize {
426 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
427 while laidx != end_laidx && laidx <= self.lexemes.len() {
428 let stidx = *pstack.last().unwrap();
429 let la_tidx = if let Some(l) = lexeme_prefix {
430 TIdx(l.tok_id())
431 } else {
432 self.next_tidx(laidx)
433 };
434
435 match self.stable.action(stidx, la_tidx) {
436 Action::Reduce(pidx) => {
437 let ridx = self.grm.prod_to_rule(pidx);
438 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
439 if let Some(ref mut astack_uw) = *astack {
440 if let Some(ref mut spans_uw) = *spans {
441 let span = if spans_uw.is_empty() {
442 Span::new(0, 0)
443 } else if pop_idx - 1 < spans_uw.len() {
444 Span::new(
445 spans_uw[pop_idx - 1].start(),
446 spans_uw[spans_uw.len() - 1].end(),
447 )
448 } else {
449 Span::new(
450 spans_uw[spans_uw.len() - 1].start(),
451 spans_uw[spans_uw.len() - 1].end(),
452 )
453 };
454 spans_uw.truncate(pop_idx - 1);
455 spans_uw.push(span);
456
457 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
458 ridx,
459 self.lexer,
460 span,
461 astack_uw.drain(pop_idx - 1..),
462 self.param.clone(),
463 ));
464 astack_uw.push(v);
465 } else {
466 unreachable!();
467 }
468 }
469
470 pstack.drain(pop_idx..);
471 let prior = *pstack.last().unwrap();
472 pstack.push(self.stable.goto(prior, ridx).unwrap());
473 }
474 Action::Shift(state_id) => {
475 if let Some(ref mut astack_uw) = *astack {
476 if let Some(spans_uw) = spans {
477 let la_lexeme = if let Some(l) = lexeme_prefix {
478 l
479 } else {
480 self.next_lexeme(laidx)
481 };
482 astack_uw.push(AStackType::Lexeme(la_lexeme));
483 spans_uw.push(la_lexeme.span());
484 }
485 }
486 pstack.push(state_id);
487 laidx += 1;
488 }
489 Action::Accept => {
490 break;
491 }
492 Action::Error => {
493 break;
494 }
495 }
496 }
497 laidx
498 }
499
500 pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
503 let llen = self.lexemes.len();
504 debug_assert!(laidx <= llen);
505 if laidx < llen {
506 self.lexemes[laidx]
507 } else {
508 let last_la_end = if llen == 0 {
510 0
511 } else {
512 debug_assert!(laidx > 0);
513 let last_la = self.lexemes[laidx - 1];
514 last_la.span().end()
515 };
516
517 Lexeme::new_faulty(
518 StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
519 last_la_end,
520 0,
521 )
522 }
523 }
524
525 pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
528 let ll = self.lexemes.len();
529 debug_assert!(laidx <= ll);
530 if laidx < ll {
531 TIdx(self.lexemes[laidx].tok_id())
532 } else {
533 self.grm.eof_token_idx()
534 }
535 }
536
537 #[allow(deprecated)]
545 pub(super) fn lr_cactus(
546 &self,
547 lexeme_prefix: Option<LexerTypesT::LexemeT>,
548 mut laidx: usize,
549 end_laidx: usize,
550 mut pstack: Cactus<StIdx<StorageT>>,
551 tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
552 ) -> (usize, Cactus<StIdx<StorageT>>) {
553 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
554 while laidx != end_laidx {
555 let stidx = *pstack.val().unwrap();
556 let la_tidx = if let Some(l) = lexeme_prefix {
557 TIdx(l.tok_id())
558 } else {
559 self.next_tidx(laidx)
560 };
561
562 match self.stable.action(stidx, la_tidx) {
563 Action::Reduce(pidx) => {
564 let ridx = self.grm.prod_to_rule(pidx);
565 let pop_num = self.grm.prod(pidx).len();
566 if let Some(ref mut tstack_uw) = *tstack {
567 let nodes = tstack_uw
568 .drain(pstack.len() - pop_num - 1..)
569 .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
570 tstack_uw.push(Node::Nonterm { ridx, nodes });
571 }
572
573 for _ in 0..pop_num {
574 pstack = pstack.parent().unwrap();
575 }
576 let prior = *pstack.val().unwrap();
577 pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
578 }
579 Action::Shift(state_id) => {
580 if let Some(ref mut tstack_uw) = *tstack {
581 let la_lexeme = if let Some(l) = lexeme_prefix {
582 l
583 } else {
584 self.next_lexeme(laidx)
585 };
586 tstack_uw.push(Node::Term { lexeme: la_lexeme });
587 }
588 pstack = pstack.child(state_id);
589 laidx += 1;
590 }
591 Action::Accept => {
592 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
593 if let Some(ref tstack_uw) = *tstack {
594 debug_assert_eq!(tstack_uw.len(), 1);
595 }
596 break;
597 }
598 Action::Error => {
599 break;
600 }
601 }
602 }
603 (laidx, pstack)
604 }
605}
606
607pub(super) trait Recoverer<
608 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
609 LexerTypesT: LexerTypes<StorageT = StorageT>,
610 ActionT,
611 ParamT: Clone,
612> where
613 usize: AsPrimitive<StorageT>,
614{
615 fn recover(
616 &self,
617 finish_by: Instant,
618 parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
619 in_laidx: usize,
620 in_pstack: &mut PStack<StorageT>,
621 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
622 spans: &mut Vec<Span>,
623 ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
624}
625
626#[derive(Clone, Copy, Debug)]
628#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
629#[non_exhaustive]
630pub enum RecoveryKind {
631 CPCTPlus,
634 None,
636}
637
638impl TryFrom<RecoveryKind> for Value<Location> {
639 type Error = cfgrammar::header::HeaderError<Location>;
640 fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
641 use cfgrammar::{
642 Location,
643 header::{Namespaced, Setting},
644 };
645 let from_loc = Location::Other("From<RecoveryKind>".to_string());
646 Ok(match rk {
647 RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
648 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
649 member: ("CPCTPlus".to_string(), from_loc.clone()),
650 })),
651 RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
652 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
653 member: ("None".to_string(), from_loc.clone()),
654 })),
655 })
656 }
657}
658
659impl TryFrom<&Value<Location>> for RecoveryKind {
660 type Error = cfgrammar::header::HeaderError<Location>;
661 fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
662 use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
663
664 match rk {
665 Value::Flag(_, loc) => Err(HeaderError {
666 kind: HeaderErrorKind::ConversionError(
667 "RecoveryKind",
668 "Cannot convert boolean to RecoveryKind",
669 ),
670 locations: vec![loc.clone()],
671 }),
672 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
673 kind: HeaderErrorKind::ConversionError(
674 "RecoveryKind",
675 "Cannot convert number to RecoveryKind",
676 ),
677 locations: vec![loc.clone()],
678 }),
679 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
680 kind: HeaderErrorKind::ConversionError(
681 "RecoveryKind",
682 "Cannot convert string to RecoveryKind",
683 ),
684 locations: vec![loc.clone()],
685 }),
686 Value::Setting(Setting::Unitary(Namespaced {
687 namespace,
688 member: (kind, kind_loc),
689 })) => {
690 match namespace {
691 Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
692 return Err(HeaderError {
693 kind: HeaderErrorKind::ConversionError(
694 "RecoveryKind",
695 "Unknown namespace",
696 ),
697 locations: vec![loc.clone()],
698 });
699 }
700 _ => {}
701 }
702 match kind.to_lowercase().as_ref() {
703 "cpctplus" => Ok(RecoveryKind::CPCTPlus),
704 "none" => Ok(RecoveryKind::None),
705 _ => Err(HeaderError {
706 kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
707 locations: vec![kind_loc.clone()],
708 }),
709 }
710 }
711 Value::Setting(Setting::Constructor {
712 ctor: _,
713 arg:
714 Namespaced {
715 namespace: _,
716 member: (_, arg_loc),
717 },
718 }) => Err(HeaderError {
719 kind: HeaderErrorKind::ConversionError(
720 "RecoveryKind",
721 "Cannot convert constructor to RecoveryKind",
722 ),
723 locations: vec![arg_loc.clone()],
724 }),
725 Value::Setting(Setting::Array(_, arr_loc, _)) => Err(HeaderError {
726 kind: HeaderErrorKind::ConversionError(
727 "RecoveryKind",
728 "Cannot convert array to RecoveryKind",
729 ),
730 locations: vec![arr_loc.clone()],
731 }),
732 }
733 }
734}
735
736impl quote::ToTokens for RecoveryKind {
737 fn to_tokens(&self, tokens: &mut TokenStream) {
738 tokens.extend(match *self {
739 RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
740 RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
741 })
742 }
743}
744
745#[derive(Debug)]
749pub enum LexParseError<
750 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
751 LexerTypesT: LexerTypes<StorageT = StorageT>,
752> where
753 usize: AsPrimitive<StorageT>,
754{
755 LexError(LexerTypesT::LexErrorT),
756 ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
757}
758
759impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
760 LexParseError<StorageT, LexerTypesT>
761where
762 usize: AsPrimitive<StorageT>,
763{
764 pub fn pp<'a>(
781 &self,
782 lexer: &dyn NonStreamingLexer<LexerTypesT>,
783 epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
784 ) -> String {
785 match self {
786 LexParseError::LexError(e) => {
787 let ((line, col), _) = lexer.line_col(e.span());
788 format!("Lexing error at line {} column {}.", line, col)
789 }
790 LexParseError::ParseError(e) => {
791 let ((line, col), _) = lexer.line_col(e.lexeme().span());
792 let mut out = format!("Parsing error at line {} column {}.", line, col);
793 let repairs_len = e.repairs().len();
794 if repairs_len == 0 {
795 out.push_str(" No repair sequences found.");
796 } else {
797 out.push_str(" Repair sequences found:");
798 for (i, rs) in e.repairs().iter().enumerate() {
799 let padding = ((repairs_len as f64).log10() as usize)
800 - (((i + 1) as f64).log10() as usize)
801 + 1;
802 write!(out, "\n {}{}: ", " ".repeat(padding), i + 1).ok();
803 let mut rs_out = Vec::new();
804
805 let mut i = 0;
808 while i < rs.len() {
809 match rs[i] {
810 ParseRepair::Delete(l) => {
811 let mut j = i + 1;
812 let mut last_end = l.span().end();
813 while j < rs.len() {
814 if let ParseRepair::Delete(next_l) = rs[j] {
815 if next_l.span().start() == last_end {
816 last_end = next_l.span().end();
817 j += 1;
818 continue;
819 }
820 }
821 break;
822 }
823 let t = &lexer
824 .span_str(Span::new(l.span().start(), last_end))
825 .replace('\n', "\\n");
826 rs_out.push(format!("Delete {}", t));
827 i = j;
828 }
829 ParseRepair::Insert(tidx) => {
830 rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
831 i += 1;
832 }
833 ParseRepair::Shift(l) => {
834 let t = &lexer.span_str(l.span()).replace('\n', "\\n");
835 rs_out.push(format!("Shift {}", t));
836 i += 1;
837 }
838 }
839 }
840
841 out.push_str(&rs_out.join(", "));
842 }
843 }
844 out
845 }
846 }
847 }
848}
849
850impl<
851 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
852 LexerTypesT: LexerTypes<StorageT = StorageT>,
853> fmt::Display for LexParseError<StorageT, LexerTypesT>
854where
855 usize: AsPrimitive<StorageT>,
856{
857 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
858 match *self {
859 LexParseError::LexError(ref e) => Display::fmt(e, f),
860 LexParseError::ParseError(ref e) => Display::fmt(e, f),
861 }
862 }
863}
864
865impl<
866 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
867 LexerTypesT: LexerTypes<StorageT = StorageT>,
868> Error for LexParseError<StorageT, LexerTypesT>
869where
870 usize: AsPrimitive<StorageT>,
871{
872}
873
874impl<
875 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
876 LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
877 T: LexError,
878> From<T> for LexParseError<StorageT, LexerTypesT>
879where
880 usize: AsPrimitive<StorageT>,
881{
882 fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
883 LexParseError::LexError(err)
884 }
885}
886
887impl<
888 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
889 LexerTypesT: LexerTypes<StorageT = StorageT>,
890> From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
891where
892 usize: AsPrimitive<StorageT>,
893{
894 fn from(
895 err: ParseError<LexerTypesT::LexemeT, StorageT>,
896 ) -> LexParseError<StorageT, LexerTypesT> {
897 LexParseError::ParseError(err)
898 }
899}
900
901pub struct RTParserBuilder<
903 'a,
904 StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
905 LexerTypesT: LexerTypes<StorageT = StorageT>,
906> where
907 usize: AsPrimitive<StorageT>,
908{
909 grm: &'a YaccGrammar<StorageT>,
910 stable: &'a StateTable<StorageT>,
911 recoverer: RecoveryKind,
912 term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
913 phantom: PhantomData<(StorageT, LexerTypesT)>,
914}
915
916impl<
917 'a,
918 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
919 LexerTypesT: LexerTypes<StorageT = StorageT>,
920> RTParserBuilder<'a, StorageT, LexerTypesT>
921where
922 usize: AsPrimitive<StorageT>,
923{
924 pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
926 RTParserBuilder {
927 grm,
928 stable,
929 recoverer: RecoveryKind::CPCTPlus,
930 term_costs: &|_| 1,
931 phantom: PhantomData,
932 }
933 }
934
935 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
937 self.recoverer = rk;
938 self
939 }
940
941 pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
942 self.term_costs = f;
943 self
944 }
945
946 #[deprecated(
947 since = "0.14.0",
948 note = "Use `parse_map` to return a `Node` from your `lrpar_mod!` instead"
949 )]
950 #[allow(deprecated)]
951 pub fn parse_generictree(
954 &self,
955 lexer: &dyn NonStreamingLexer<LexerTypesT>,
956 ) -> (
957 Option<Node<LexerTypesT::LexemeT, StorageT>>,
958 Vec<LexParseError<StorageT, LexerTypesT>>,
959 ) {
960 self.parse_map(lexer, &|lexeme| Node::Term { lexeme }, &|ridx, nodes| {
961 Node::Nonterm { ridx, nodes }
962 })
963 }
964
965 pub fn parse_map<Node>(
968 &self,
969 lexer: &dyn NonStreamingLexer<LexerTypesT>,
970 fterm: &dyn Fn(LexerTypesT::LexemeT) -> Node,
971 fnonterm: &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
972 ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
973 let lexemes = match lexer.iter().collect() {
974 Ok(lexemes) => lexemes,
975 Err(e) => return (None, vec![e.into()]),
976 };
977 Parser::<
978 StorageT,
979 LexerTypesT,
980 Node,
981 (
982 &dyn Fn(LexerTypesT::LexemeT) -> Node,
983 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
984 ),
985 >::parse_map(
986 self.recoverer,
987 self.grm,
988 self.term_costs,
989 self.stable,
990 lexer,
991 lexemes,
992 fterm,
993 fnonterm,
994 )
995 }
996
997 #[deprecated(since = "0.14.0", note = "Use `parse_map` instead")]
998 pub fn parse_noaction(
1001 &self,
1002 lexer: &dyn NonStreamingLexer<LexerTypesT>,
1003 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
1004 self.parse_map(lexer, &|_| (), &|_, _| ()).1
1005 }
1006
1007 pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
1014 &self,
1015 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
1016 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
1017 param: ParamT,
1018 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
1019 let lexemes = match lexer.iter().collect() {
1020 Ok(lexemes) => lexemes,
1021 Err(e) => return (None, vec![e.into()]),
1022 };
1023 Parser::parse_actions(
1024 self.recoverer,
1025 self.grm,
1026 self.term_costs,
1027 self.stable,
1028 lexer,
1029 lexemes,
1030 actions,
1031 param,
1032 )
1033 }
1034
1035 pub fn grammar(&self) -> &YaccGrammar<StorageT> {
1036 self.grm
1037 }
1038}
1039
1040#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1043pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1044 Insert(TIdx<StorageT>),
1046 Delete(LexemeT),
1048 Shift(LexemeT),
1050}
1051
1052#[derive(Clone, Debug, PartialEq)]
1054pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1055 stidx: StIdx<StorageT>,
1056 lexeme: LexemeT,
1057 repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1058}
1059
1060impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1061 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1062 write!(f, "Parse error at lexeme {:?}", self.lexeme)
1063 }
1064}
1065
1066impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1067
1068impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1069 pub fn stidx(&self) -> StIdx<StorageT> {
1071 self.stidx
1072 }
1073
1074 pub fn lexeme(&self) -> &LexemeT {
1076 &self.lexeme
1077 }
1078
1079 pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1082 &self.repairs
1083 }
1084}
1085
1086#[cfg(test)]
1087pub(crate) mod test {
1088 use std::collections::HashMap;
1089
1090 use cfgrammar::{
1091 Span,
1092 yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1093 };
1094 use lrtable::{Minimiser, from_yacc};
1095 use num_traits::ToPrimitive;
1096 use regex::Regex;
1097
1098 use super::*;
1099 use crate::{
1100 Lexeme, Lexer,
1101 test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1102 };
1103
1104 #[allow(deprecated)]
1105 pub(crate) fn do_parse<'input>(
1106 rcvry_kind: RecoveryKind,
1107 lexs: &str,
1108 grms: &str,
1109 input: &'input str,
1110 ) -> (
1111 YaccGrammar<u16>,
1112 SmallLexer<'input>,
1113 Result<
1114 Node<TestLexeme, u16>,
1115 (
1116 Option<Node<TestLexeme, u16>>,
1117 Vec<LexParseError<u16, TestLexerTypes>>,
1118 ),
1119 >,
1120 ) {
1121 do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1122 }
1123
1124 #[allow(deprecated)]
1125 fn do_parse_with_costs<'input>(
1126 rcvry_kind: RecoveryKind,
1127 lexs: &str,
1128 grms: &str,
1129 input: &'input str,
1130 costs: &HashMap<&str, u8>,
1131 ) -> (
1132 YaccGrammar<u16>,
1133 SmallLexer<'input>,
1134 Result<
1135 Node<TestLexeme, u16>,
1136 (
1137 Option<Node<TestLexeme, u16>>,
1138 Vec<LexParseError<u16, TestLexerTypes>>,
1139 ),
1140 >,
1141 ) {
1142 let grm = YaccGrammar::<u16>::new_with_storaget(
1143 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1144 grms,
1145 )
1146 .unwrap();
1147 let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1148 let rule_ids = grm
1149 .tokens_map()
1150 .iter()
1151 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1152 .collect();
1153 let lexer_rules = small_lexer(lexs, rule_ids);
1154 let lexemes = small_lex(lexer_rules, input);
1155 let lexer = SmallLexer { lexemes, s: input };
1156 let costs_tidx = costs
1157 .iter()
1158 .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1159 .collect::<HashMap<_, _>>();
1160 let (r, errs) = RTParserBuilder::new(&grm, &stable)
1161 .recoverer(rcvry_kind)
1162 .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1163 .parse_generictree(&lexer);
1164 if let Some(node) = r {
1165 if errs.is_empty() {
1166 (grm, lexer, Ok(node))
1167 } else {
1168 (grm, lexer, Err((Some(node), errs)))
1169 }
1170 } else {
1171 (grm, lexer, Err((None, errs)))
1172 }
1173 }
1174
1175 fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1176 let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1177 assert_eq!(expected, pt.unwrap().pp(&grm, input));
1178 }
1179
1180 pub(crate) struct SmallLexer<'input> {
1185 lexemes: Vec<TestLexeme>,
1186 s: &'input str,
1187 }
1188
1189 impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1190 fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1191 Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1192 }
1193 }
1194
1195 impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1196 fn span_str(&self, span: Span) -> &'input str {
1197 &self.s[span.start()..span.end()]
1198 }
1199
1200 fn span_lines_str(&self, _: Span) -> &'input str {
1201 unreachable!();
1202 }
1203
1204 fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1205 unreachable!();
1206 }
1207 }
1208
1209 fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1210 let mut rules = Vec::new();
1211 for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1212 assert!(l.rfind('\'') == Some(l.len() - 1));
1213 let i = l[..l.len() - 1].rfind('\'').unwrap();
1214 let name = &l[i + 1..l.len() - 1];
1215 let re = &l[..i - 1].trim();
1216 rules.push((
1217 ids_map[name],
1218 Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1219 ));
1220 }
1221 rules
1222 }
1223
1224 fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1225 let mut lexemes = vec![];
1226 let mut i = 0;
1227 while i < input.len() {
1228 let mut longest = 0; let mut longest_tok_id = 0; for (tok_id, r) in rules.iter() {
1231 if let Some(m) = r.find(&input[i..]) {
1232 let len = m.end();
1233 if len > longest {
1234 longest = len;
1235 longest_tok_id = *tok_id;
1236 }
1237 }
1238 }
1239 assert!(longest > 0);
1240 lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1241 i += longest;
1242 }
1243 lexemes
1244 }
1245
1246 #[test]
1247 fn simple_parse() {
1248 check_parse_output(
1250 "[a-zA-Z_] 'ID'
1251 \\+ '+'",
1252 "
1253%start E
1254%%
1255E: T '+' E
1256 | T;
1257T: 'ID';
1258",
1259 "a+b",
1260 "E
1261 T
1262 ID a
1263 + +
1264 E
1265 T
1266 ID b
1267",
1268 );
1269 }
1270
1271 #[test]
1272 fn parse_empty_rules() {
1273 let lexs = "[a-zA-Z_] 'ID'";
1274 let grms = "%start S
1275%%
1276S: L;
1277L: 'ID'
1278 | ;
1279";
1280 check_parse_output(
1281 lexs, grms, "", "S
1282 L
1283",
1284 );
1285
1286 check_parse_output(
1287 lexs,
1288 grms,
1289 "x",
1290 "S
1291 L
1292 ID x
1293",
1294 );
1295 }
1296
1297 #[test]
1298 fn recursive_parse() {
1299 let lexs = "\\+ '+'
1300 \\* '*'
1301 [0-9]+ 'INT'";
1302 let grms = "%start Expr
1303%%
1304Expr : Expr '+' Term | Term;
1305Term : Term '*' Factor | Factor;
1306Factor : 'INT';";
1307
1308 check_parse_output(
1309 lexs,
1310 grms,
1311 "2+3*4",
1312 "Expr
1313 Expr
1314 Term
1315 Factor
1316 INT 2
1317 + +
1318 Term
1319 Term
1320 Factor
1321 INT 3
1322 * *
1323 Factor
1324 INT 4
1325",
1326 );
1327 check_parse_output(
1328 lexs,
1329 grms,
1330 "2*3+4",
1331 "Expr
1332 Expr
1333 Term
1334 Term
1335 Factor
1336 INT 2
1337 * *
1338 Factor
1339 INT 3
1340 + +
1341 Term
1342 Factor
1343 INT 4
1344",
1345 );
1346 }
1347
1348 #[test]
1349 fn parse_error() {
1350 let lexs = "\\( '('
1351 \\) ')'
1352 [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1353 let grms = "%start Call
1354%%
1355Call: 'ID' '(' ')';";
1356
1357 check_parse_output(
1358 lexs,
1359 grms,
1360 "f()",
1361 "Call
1362 ID f
1363 ( (
1364 ) )
1365",
1366 );
1367
1368 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1369 let (_, errs) = pr.unwrap_err();
1370 assert_eq!(errs.len(), 1);
1371 let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1372 match &errs[0] {
1373 LexParseError::ParseError(e) => {
1374 assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1375 assert!(e.lexeme().faulty());
1376 }
1377 _ => unreachable!(),
1378 }
1379
1380 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1381 let (_, errs) = pr.unwrap_err();
1382 assert_eq!(errs.len(), 1);
1383 let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1384 match &errs[0] {
1385 LexParseError::ParseError(e) => {
1386 assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1387 assert!(!e.lexeme().faulty());
1388 }
1389 _ => unreachable!(),
1390 }
1391 }
1392
1393 #[test]
1394 fn test_parse_map() {
1395 #[derive(PartialEq, Debug)]
1396 enum TestParseMap<'a> {
1397 Term(&'a str, &'a str),
1398 NonTerm(&'a str, Vec<TestParseMap<'a>>),
1399 }
1400 let lex_src = r#"[0-9]+ 'INT'
1401\+ '+'
1402\* '*'
1403"#;
1404 let grammar_src = "
1405%grmtools{YaccKind: Original(NoAction)}
1406%start Expr
1407%%
1408Expr : Expr '+' Term | Term;
1409Term : Term '*' Factor | Factor;
1410Factor : 'INT';";
1411 let input_src = "2*3+4";
1412 let grm = str::parse::<YaccGrammar<u16>>(grammar_src).unwrap();
1413 let (_, stable) = lrtable::from_yacc(&grm, lrtable::Minimiser::Pager).unwrap();
1414 let rt_parser = RTParserBuilder::new(&grm, &stable);
1415 let rule_ids = grm
1416 .tokens_map()
1417 .iter()
1418 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1419 .collect();
1420 let lexer_rules = small_lexer(lex_src, rule_ids);
1421 let lexemes = small_lex(lexer_rules, input_src);
1422 let lexer = SmallLexer {
1423 lexemes,
1424 s: input_src,
1425 };
1426
1427 let (parse_map, errs) = rt_parser.parse_map(
1428 &lexer,
1429 &|lexeme: TestLexeme| {
1430 let tidx = TIdx(lexeme.tok_id());
1431 let tn = &grm.token_name(tidx).unwrap();
1432 let lt = &input_src[lexeme.span().start()..lexeme.span().end()];
1433 TestParseMap::Term(tn, lt)
1434 },
1435 &|ridx, nodes| {
1436 let rule_name = &grm.rule_name_str(ridx);
1437 TestParseMap::NonTerm(rule_name, nodes)
1438 },
1439 );
1440 assert!(parse_map.is_some() && errs.is_empty());
1441 check_parse_output(
1443 lex_src,
1444 grammar_src,
1445 input_src,
1446 "Expr
1447 Expr
1448 Term
1449 Term
1450 Factor
1451 INT 2
1452 * *
1453 Factor
1454 INT 3
1455 + +
1456 Term
1457 Factor
1458 INT 4
1459",
1460 );
1461
1462 let expected_parse_map = {
1463 use TestParseMap::*;
1464 NonTerm(
1465 "Expr",
1466 vec![
1467 NonTerm(
1468 "Expr",
1469 vec![NonTerm(
1470 "Term",
1471 vec![
1472 NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "2")])]),
1473 Term("*", "*"),
1474 NonTerm("Factor", vec![Term("INT", "3")]),
1475 ],
1476 )],
1477 ),
1478 Term("+", "+"),
1479 NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "4")])]),
1480 ],
1481 )
1482 };
1483 assert_eq!(parse_map, Some(expected_parse_map));
1484 }
1485}