1#![allow(clippy::derive_partial_eq_without_eq)]
2use std::{
3 error::Error,
4 fmt::{self, Debug, Display, Write as _},
5 hash::Hash,
6 marker::PhantomData,
7 vec,
8};
9
10#[cfg(not(target_arch = "wasm32"))]
12use std::time::{Duration, Instant};
13#[cfg(target_arch = "wasm32")]
14use web_time::{Duration, Instant};
15
16use cactus::Cactus;
17use cfgrammar::{RIdx, Span, TIdx, header::Value, span::Location, yacc::YaccGrammar};
18use lrtable::{Action, StIdx, StateTable};
19use num_traits::{AsPrimitive, PrimInt, Unsigned};
20use proc_macro2::TokenStream;
21use quote::quote;
22#[cfg(feature = "serde")]
23use serde::{Deserialize, Serialize};
24
25use crate::{LexError, Lexeme, LexerTypes, NonStreamingLexer, cpctplus};
26
27#[cfg(test)]
28const RECOVERY_TIME_BUDGET: u64 = 60_000; #[cfg(not(test))]
30const RECOVERY_TIME_BUDGET: u64 = 500; #[deprecated(
33 since = "0.14.0",
34 note = "Use the version of `Node` exported from your `lrpar_mod!`"
35)]
36pub type Node<T, S> = _deprecated_moved_::Node<T, S>;
37
38#[doc(hidden)]
39pub mod _deprecated_moved_ {
40 use super::*;
41 #[derive(Debug, Clone, PartialEq)]
43 pub enum Node<LexemeT: Lexeme<StorageT>, StorageT> {
44 Term { lexeme: LexemeT },
46 Nonterm {
48 ridx: RIdx<StorageT>,
49 nodes: Vec<Node<LexemeT, StorageT>>,
50 },
51 }
52}
53
54#[allow(deprecated)]
55impl<LexemeT: Lexeme<StorageT>, StorageT: 'static + PrimInt + Unsigned> Node<LexemeT, StorageT>
56where
57 usize: AsPrimitive<StorageT>,
58{
59 pub fn pp(&self, grm: &YaccGrammar<StorageT>, input: &str) -> String {
61 let mut st = vec![(0, self)]; let mut s = String::new();
63 while let Some((indent, e)) = st.pop() {
64 for _ in 0..indent {
65 s.push(' ');
66 }
67 match *e {
68 Node::Term { lexeme } => {
69 let tidx = TIdx(lexeme.tok_id());
70 let tn = grm.token_name(tidx).unwrap();
71 let lt = &input[lexeme.span().start()..lexeme.span().end()];
72 writeln!(s, "{} {}", tn, lt).ok();
73 }
74 Node::Nonterm { ridx, ref nodes } => {
75 writeln!(s, "{}", grm.rule_name_str(ridx)).ok();
76 for x in nodes.iter().rev() {
77 st.push((indent + 1, x));
78 }
79 }
80 }
81 }
82 s
83 }
84}
85
86type PStack<StorageT> = Vec<StIdx<StorageT>>; type TokenCostFn<'a, StorageT> = &'a (dyn Fn(TIdx<StorageT>) -> u8 + 'a);
88type ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT> = &'a dyn Fn(
89 RIdx<StorageT>,
90 &'b dyn NonStreamingLexer<'input, LexerTypesT>,
91 Span,
92 vec::Drain<AStackType<<LexerTypesT as LexerTypes>::LexemeT, ActionT>>,
93 ParamT,
94) -> ActionT;
95
96#[derive(Debug)]
97pub enum AStackType<LexemeT, ActionT> {
98 ActionType(ActionT),
99 Lexeme(LexemeT),
100}
101
102pub(super) struct Parser<
103 'a,
104 'b: 'a,
105 'input: 'b,
106 StorageT: 'static + Eq + Hash + PrimInt + Unsigned,
107 LexerTypesT: LexerTypes<StorageT = StorageT>,
108 ActionT: 'a,
109 ParamT: Clone,
110> where
111 usize: AsPrimitive<StorageT>,
112{
113 rcvry_kind: RecoveryKind,
114 pub(super) grm: &'a YaccGrammar<StorageT>,
115 pub(super) token_cost: Box<TokenCostFn<'a, StorageT>>,
116 pub(super) stable: &'a StateTable<StorageT>,
117 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
118 pub(super) lexemes: Vec<LexerTypesT::LexemeT>,
121 actions: &'a [ActionFn<'a, 'b, 'input, LexerTypesT::StorageT, LexerTypesT, ActionT, ParamT>],
122 param: ParamT,
123}
124
125impl<
126 'a,
127 'b: 'a,
128 'input: 'b,
129 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
130 LexerTypesT: LexerTypes<StorageT = StorageT>,
131 Node,
132>
133 Parser<
134 'a,
135 'b,
136 'input,
137 StorageT,
138 LexerTypesT,
139 Node,
140 (
141 &dyn Fn(LexerTypesT::LexemeT) -> Node,
142 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
143 ),
144 >
145where
146 usize: AsPrimitive<StorageT>,
147{
148 fn parse_map(
149 rcvry_kind: RecoveryKind,
150 grm: &YaccGrammar<StorageT>,
151 token_cost: TokenCostFn<'a, StorageT>,
152 stable: &StateTable<StorageT>,
153 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
154 lexemes: Vec<LexerTypesT::LexemeT>,
155 fterm: &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
156 fnonterm: &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
157 ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
158 for tidx in grm.iter_tidxs() {
159 assert!(token_cost(tidx) > 0);
160 }
161 let mut actions: Vec<
162 ActionFn<
163 'a,
164 'b,
165 'input,
166 StorageT,
167 LexerTypesT,
168 Node,
169 (
170 &'a dyn Fn(LexerTypesT::LexemeT) -> Node,
171 &'a dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
172 ),
173 >,
174 > = Vec::new();
175 actions.resize(usize::from(grm.prods_len()), &action_map);
176 let psr = Parser {
177 rcvry_kind,
178 grm,
179 token_cost: Box::new(token_cost),
180 stable,
181 lexer,
182 lexemes,
183 actions: actions.as_slice(),
184 param: (fterm, fnonterm),
185 };
186 let mut pstack = vec![stable.start_state()];
187 let mut astack = Vec::new();
188 let mut errors = Vec::new();
189 let mut spans = Vec::new();
190 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
191 (accpt, errors)
192 }
193}
194
195fn action_map<StorageT, LexerTypesT: LexerTypes, Node>(
196 ridx: RIdx<StorageT>,
197 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
198 _span: Span,
199 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node>>,
200 param: (
201 &dyn Fn(LexerTypesT::LexemeT) -> Node,
202 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
203 ),
204) -> Node
205where
206 usize: AsPrimitive<LexerTypesT::StorageT>,
207 LexerTypesT::LexemeT: Lexeme<StorageT>,
208{
209 let (fterm, fnonterm) = param;
210 let mut nodes = Vec::with_capacity(astack.len());
211 for a in astack {
212 nodes.push(match a {
213 AStackType::ActionType(n) => n,
214 AStackType::Lexeme(lexeme) => fterm(lexeme),
215 });
216 }
217 fnonterm(ridx, nodes)
218}
219
220#[deprecated(
221 since = "0.14.0",
222 note = "Deprecated with `parse_generictree` there is no direct replacement, besides a custom action"
223)]
224#[allow(deprecated)]
225pub fn action_generictree<StorageT, LexerTypesT: LexerTypes>(
229 ridx: RIdx<StorageT>,
230 _lexer: &dyn NonStreamingLexer<LexerTypesT>,
231 _span: Span,
232 astack: vec::Drain<AStackType<LexerTypesT::LexemeT, Node<LexerTypesT::LexemeT, StorageT>>>,
233 _param: (),
234) -> Node<LexerTypesT::LexemeT, StorageT>
235where
236 usize: AsPrimitive<LexerTypesT::StorageT>,
237 LexerTypesT::LexemeT: Lexeme<StorageT>,
238{
239 let mut nodes = Vec::with_capacity(astack.len());
240 for a in astack {
241 nodes.push(match a {
242 AStackType::ActionType(n) => n,
243 AStackType::Lexeme(lexeme) => Node::Term { lexeme },
244 });
245 }
246 Node::Nonterm { ridx, nodes }
247}
248
249impl<
250 'a,
251 'b: 'a,
252 'input: 'b,
253 StorageT: 'static + Debug + Eq + Hash + PrimInt + Unsigned,
254 LexerTypesT: LexerTypes<StorageT = StorageT>,
255 ActionT: 'a,
256 ParamT: Clone,
257> Parser<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>
258where
259 usize: AsPrimitive<StorageT>,
260{
261 fn parse_actions(
262 rcvry_kind: RecoveryKind,
263 grm: &'a YaccGrammar<StorageT>,
264 token_cost: TokenCostFn<'a, StorageT>,
265 stable: &'a StateTable<StorageT>,
266 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
267 lexemes: Vec<LexerTypesT::LexemeT>,
268 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
269 param: ParamT,
270 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
271 for tidx in grm.iter_tidxs() {
272 assert!(token_cost(tidx) > 0);
273 }
274 let psr = Parser {
275 rcvry_kind,
276 grm,
277 token_cost: Box::new(token_cost),
278 stable,
279 lexer,
280 lexemes,
281 actions,
282 param,
283 };
284 let mut pstack = vec![stable.start_state()];
285 let mut astack = Vec::new();
286 let mut errors = Vec::new();
287 let mut spans = Vec::new();
288 let accpt = psr.lr(0, &mut pstack, &mut astack, &mut errors, &mut spans);
289 (accpt, errors)
290 }
291
292 fn lr(
306 &self,
307 mut laidx: usize,
308 pstack: &mut PStack<StorageT>,
309 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
310 errors: &mut Vec<LexParseError<StorageT, LexerTypesT>>,
311 spans: &mut Vec<Span>,
312 ) -> Option<ActionT> {
313 let mut recoverer = None;
314 let mut recovery_budget = Duration::from_millis(RECOVERY_TIME_BUDGET);
315 loop {
316 debug_assert_eq!(astack.len(), spans.len());
317 let stidx = *pstack.last().unwrap();
318 let la_tidx = self.next_tidx(laidx);
319
320 match self.stable.action(stidx, la_tidx) {
321 Action::Reduce(pidx) => {
322 let ridx = self.grm.prod_to_rule(pidx);
323 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
324
325 pstack.drain(pop_idx..);
326 let prior = *pstack.last().unwrap();
327 pstack.push(self.stable.goto(prior, ridx).unwrap());
328
329 let span = if spans.is_empty() {
330 Span::new(0, 0)
331 } else if pop_idx - 1 < spans.len() {
332 Span::new(spans[pop_idx - 1].start(), spans[spans.len() - 1].end())
333 } else {
334 Span::new(spans[spans.len() - 1].start(), spans[spans.len() - 1].end())
335 };
336 spans.truncate(pop_idx - 1);
337 spans.push(span);
338
339 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
340 ridx,
341 self.lexer,
342 span,
343 astack.drain(pop_idx - 1..),
344 self.param.clone(),
345 ));
346 astack.push(v);
347 }
348 Action::Shift(state_id) => {
349 let la_lexeme = self.next_lexeme(laidx);
350 pstack.push(state_id);
351 astack.push(AStackType::Lexeme(la_lexeme));
352
353 spans.push(la_lexeme.span());
354 laidx += 1;
355 }
356 Action::Accept => {
357 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
358 debug_assert_eq!(astack.len(), 1);
359 match astack.drain(..).next().unwrap() {
360 AStackType::ActionType(v) => return Some(v),
361 _ => unreachable!(),
362 }
363 }
364 Action::Error => {
365 if recoverer.is_none() {
366 recoverer = Some(match self.rcvry_kind {
367 RecoveryKind::CPCTPlus => cpctplus::recoverer(self),
368 RecoveryKind::None => {
369 let la_lexeme = self.next_lexeme(laidx);
370 errors.push(
371 ParseError {
372 stidx,
373 lexeme: la_lexeme,
374 repairs: vec![],
375 }
376 .into(),
377 );
378 return None;
379 }
380 });
381 }
382
383 let before = Instant::now();
384 let finish_by = before + recovery_budget;
385 let (new_laidx, repairs) = recoverer
386 .as_ref()
387 .unwrap()
388 .as_ref()
389 .recover(finish_by, self, laidx, pstack, astack, spans);
390 let after = Instant::now();
391 recovery_budget = recovery_budget
392 .checked_sub(after - before)
393 .unwrap_or_else(|| Duration::new(0, 0));
394 let keep_going = !repairs.is_empty();
395 let la_lexeme = self.next_lexeme(laidx);
396 errors.push(
397 ParseError {
398 stidx,
399 lexeme: la_lexeme,
400 repairs,
401 }
402 .into(),
403 );
404 if !keep_going {
405 return None;
406 }
407 laidx = new_laidx;
408 }
409 }
410 }
411 }
412
413 pub(super) fn lr_upto(
418 &self,
419 lexeme_prefix: Option<LexerTypesT::LexemeT>,
420 mut laidx: usize,
421 end_laidx: usize,
422 pstack: &mut PStack<StorageT>,
423 astack: &mut Option<&mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>>,
424 spans: &mut Option<&mut Vec<Span>>,
425 ) -> usize {
426 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
427 while laidx != end_laidx && laidx <= self.lexemes.len() {
428 let stidx = *pstack.last().unwrap();
429 let la_tidx = if let Some(l) = lexeme_prefix {
430 TIdx(l.tok_id())
431 } else {
432 self.next_tidx(laidx)
433 };
434
435 match self.stable.action(stidx, la_tidx) {
436 Action::Reduce(pidx) => {
437 let ridx = self.grm.prod_to_rule(pidx);
438 let pop_idx = pstack.len() - self.grm.prod(pidx).len();
439 if let Some(ref mut astack_uw) = *astack {
440 if let Some(ref mut spans_uw) = *spans {
441 let span = if spans_uw.is_empty() {
442 Span::new(0, 0)
443 } else if pop_idx - 1 < spans_uw.len() {
444 Span::new(
445 spans_uw[pop_idx - 1].start(),
446 spans_uw[spans_uw.len() - 1].end(),
447 )
448 } else {
449 Span::new(
450 spans_uw[spans_uw.len() - 1].start(),
451 spans_uw[spans_uw.len() - 1].end(),
452 )
453 };
454 spans_uw.truncate(pop_idx - 1);
455 spans_uw.push(span);
456
457 let v = AStackType::ActionType(self.actions[usize::from(pidx)](
458 ridx,
459 self.lexer,
460 span,
461 astack_uw.drain(pop_idx - 1..),
462 self.param.clone(),
463 ));
464 astack_uw.push(v);
465 } else {
466 unreachable!();
467 }
468 }
469
470 pstack.drain(pop_idx..);
471 let prior = *pstack.last().unwrap();
472 pstack.push(self.stable.goto(prior, ridx).unwrap());
473 }
474 Action::Shift(state_id) => {
475 if let Some(ref mut astack_uw) = *astack {
476 if let Some(spans_uw) = spans {
477 let la_lexeme = if let Some(l) = lexeme_prefix {
478 l
479 } else {
480 self.next_lexeme(laidx)
481 };
482 astack_uw.push(AStackType::Lexeme(la_lexeme));
483 spans_uw.push(la_lexeme.span());
484 }
485 }
486 pstack.push(state_id);
487 laidx += 1;
488 }
489 Action::Accept => {
490 break;
491 }
492 Action::Error => {
493 break;
494 }
495 }
496 }
497 laidx
498 }
499
500 pub(super) fn next_lexeme(&self, laidx: usize) -> LexerTypesT::LexemeT {
503 let llen = self.lexemes.len();
504 debug_assert!(laidx <= llen);
505 if laidx < llen {
506 self.lexemes[laidx]
507 } else {
508 let last_la_end = if llen == 0 {
510 0
511 } else {
512 debug_assert!(laidx > 0);
513 let last_la = self.lexemes[laidx - 1];
514 last_la.span().end()
515 };
516
517 Lexeme::new_faulty(
518 StorageT::from(u32::from(self.grm.eof_token_idx())).unwrap(),
519 last_la_end,
520 0,
521 )
522 }
523 }
524
525 pub(super) fn next_tidx(&self, laidx: usize) -> TIdx<StorageT> {
528 let ll = self.lexemes.len();
529 debug_assert!(laidx <= ll);
530 if laidx < ll {
531 TIdx(self.lexemes[laidx].tok_id())
532 } else {
533 self.grm.eof_token_idx()
534 }
535 }
536
537 #[allow(deprecated)]
545 pub(super) fn lr_cactus(
546 &self,
547 lexeme_prefix: Option<LexerTypesT::LexemeT>,
548 mut laidx: usize,
549 end_laidx: usize,
550 mut pstack: Cactus<StIdx<StorageT>>,
551 tstack: &mut Option<&mut Vec<Node<LexerTypesT::LexemeT, StorageT>>>,
552 ) -> (usize, Cactus<StIdx<StorageT>>) {
553 assert!(lexeme_prefix.is_none() || end_laidx == laidx + 1);
554 while laidx != end_laidx {
555 let stidx = *pstack.val().unwrap();
556 let la_tidx = if let Some(l) = lexeme_prefix {
557 TIdx(l.tok_id())
558 } else {
559 self.next_tidx(laidx)
560 };
561
562 match self.stable.action(stidx, la_tidx) {
563 Action::Reduce(pidx) => {
564 let ridx = self.grm.prod_to_rule(pidx);
565 let pop_num = self.grm.prod(pidx).len();
566 if let Some(ref mut tstack_uw) = *tstack {
567 let nodes = tstack_uw
568 .drain(pstack.len() - pop_num - 1..)
569 .collect::<Vec<Node<LexerTypesT::LexemeT, StorageT>>>();
570 tstack_uw.push(Node::Nonterm { ridx, nodes });
571 }
572
573 for _ in 0..pop_num {
574 pstack = pstack.parent().unwrap();
575 }
576 let prior = *pstack.val().unwrap();
577 pstack = pstack.child(self.stable.goto(prior, ridx).unwrap());
578 }
579 Action::Shift(state_id) => {
580 if let Some(ref mut tstack_uw) = *tstack {
581 let la_lexeme = if let Some(l) = lexeme_prefix {
582 l
583 } else {
584 self.next_lexeme(laidx)
585 };
586 tstack_uw.push(Node::Term { lexeme: la_lexeme });
587 }
588 pstack = pstack.child(state_id);
589 laidx += 1;
590 }
591 Action::Accept => {
592 debug_assert_eq!(la_tidx, self.grm.eof_token_idx());
593 if let Some(ref tstack_uw) = *tstack {
594 debug_assert_eq!(tstack_uw.len(), 1);
595 }
596 break;
597 }
598 Action::Error => {
599 break;
600 }
601 }
602 }
603 (laidx, pstack)
604 }
605}
606
607pub(super) trait Recoverer<
608 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
609 LexerTypesT: LexerTypes<StorageT = StorageT>,
610 ActionT,
611 ParamT: Clone,
612> where
613 usize: AsPrimitive<StorageT>,
614{
615 fn recover(
616 &self,
617 finish_by: Instant,
618 parser: &Parser<StorageT, LexerTypesT, ActionT, ParamT>,
619 in_laidx: usize,
620 in_pstack: &mut PStack<StorageT>,
621 astack: &mut Vec<AStackType<LexerTypesT::LexemeT, ActionT>>,
622 spans: &mut Vec<Span>,
623 ) -> (usize, Vec<Vec<ParseRepair<LexerTypesT::LexemeT, StorageT>>>);
624}
625
626#[derive(Clone, Copy, Debug)]
628#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
629#[non_exhaustive]
630pub enum RecoveryKind {
631 CPCTPlus,
634 None,
636}
637
638impl TryFrom<RecoveryKind> for Value<Location> {
639 type Error = cfgrammar::header::HeaderError<Location>;
640 fn try_from(rk: RecoveryKind) -> Result<Value<Location>, Self::Error> {
641 use cfgrammar::{
642 Location,
643 header::{Namespaced, Setting},
644 };
645 let from_loc = Location::Other("From<RecoveryKind>".to_string());
646 Ok(match rk {
647 RecoveryKind::CPCTPlus => Value::Setting(Setting::Unitary(Namespaced {
648 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
649 member: ("CPCTPlus".to_string(), from_loc.clone()),
650 })),
651 RecoveryKind::None => Value::Setting(Setting::Unitary(Namespaced {
652 namespace: Some(("RecoveryKind".to_string(), from_loc.clone())),
653 member: ("None".to_string(), from_loc.clone()),
654 })),
655 })
656 }
657}
658
659impl TryFrom<&Value<Location>> for RecoveryKind {
660 type Error = cfgrammar::header::HeaderError<Location>;
661 fn try_from(rk: &Value<Location>) -> Result<RecoveryKind, Self::Error> {
662 use cfgrammar::header::{HeaderError, HeaderErrorKind, Namespaced, Setting};
663
664 match rk {
665 Value::Flag(_, loc) => Err(HeaderError {
666 kind: HeaderErrorKind::ConversionError(
667 "RecoveryKind",
668 "Cannot convert boolean to RecoveryKind",
669 ),
670 locations: vec![loc.clone()],
671 }),
672 Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
673 kind: HeaderErrorKind::ConversionError(
674 "RecoveryKind",
675 "Cannot convert number to RecoveryKind",
676 ),
677 locations: vec![loc.clone()],
678 }),
679 Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
680 kind: HeaderErrorKind::ConversionError(
681 "RecoveryKind",
682 "Cannot convert string to RecoveryKind",
683 ),
684 locations: vec![loc.clone()],
685 }),
686 Value::Setting(Setting::Unitary(Namespaced {
687 namespace,
688 member: (kind, kind_loc),
689 })) => {
690 match namespace {
691 Some((ns, loc)) if ns.to_lowercase() != "recoverykind" => {
692 return Err(HeaderError {
693 kind: HeaderErrorKind::ConversionError(
694 "RecoveryKind",
695 "Unknown namespace",
696 ),
697 locations: vec![loc.clone()],
698 });
699 }
700 _ => {}
701 }
702 match kind.to_lowercase().as_ref() {
703 "cpctplus" => Ok(RecoveryKind::CPCTPlus),
704 "none" => Ok(RecoveryKind::None),
705 _ => Err(HeaderError {
706 kind: HeaderErrorKind::ConversionError("RecoveryKind", "Unknown variant"),
707 locations: vec![kind_loc.clone()],
708 }),
709 }
710 }
711 Value::Setting(Setting::Constructor {
712 ctor: _,
713 arg:
714 Namespaced {
715 namespace: _,
716 member: (_, arg_loc),
717 },
718 }) => Err(HeaderError {
719 kind: HeaderErrorKind::ConversionError(
720 "RecoveryKind",
721 "Cannot convert constructor to RecoveryKind",
722 ),
723 locations: vec![arg_loc.clone()],
724 }),
725 Value::Setting(Setting::Array(_, arr_loc, _)) => Err(HeaderError {
726 kind: HeaderErrorKind::ConversionError(
727 "RecoveryKind",
728 "Cannot convert array to RecoveryKind",
729 ),
730 locations: vec![arr_loc.clone()],
731 }),
732 }
733 }
734}
735
736impl quote::ToTokens for RecoveryKind {
737 fn to_tokens(&self, tokens: &mut TokenStream) {
738 tokens.extend(match *self {
739 RecoveryKind::CPCTPlus => quote!(::lrpar::RecoveryKind::CPCTPlus),
740 RecoveryKind::None => quote!(::lrpar::RecoveryKind::None),
741 })
742 }
743}
744
745#[derive(Debug)]
749pub enum LexParseError<
750 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
751 LexerTypesT: LexerTypes<StorageT = StorageT>,
752> where
753 usize: AsPrimitive<StorageT>,
754{
755 LexError(LexerTypesT::LexErrorT),
756 ParseError(ParseError<LexerTypesT::LexemeT, StorageT>),
757}
758
759impl<StorageT: Debug + Hash + PrimInt + Unsigned, LexerTypesT: LexerTypes<StorageT = StorageT>>
760 LexParseError<StorageT, LexerTypesT>
761where
762 usize: AsPrimitive<StorageT>,
763{
764 pub fn pp<'a>(
781 &self,
782 lexer: &dyn NonStreamingLexer<LexerTypesT>,
783 epp: &dyn Fn(TIdx<StorageT>) -> Option<&'a str>,
784 ) -> String {
785 match self {
786 LexParseError::LexError(e) => {
787 let ((line, col), _) = lexer.line_col(e.span());
788 format!("Lexing error at line {} column {}.", line, col)
789 }
790 LexParseError::ParseError(e) => {
791 let ((line, col), _) = lexer.line_col(e.lexeme().span());
792 let mut out = format!("Parsing error at line {} column {}.", line, col);
793 let repairs_len = e.repairs().len();
794 if repairs_len == 0 {
795 out.push_str(" No repair sequences found.");
796 } else {
797 out.push_str(" Repair sequences found:");
798 for (i, rs) in e.repairs().iter().enumerate() {
799 let padding = ((repairs_len as f64).log10() as usize)
800 - (((i + 1) as f64).log10() as usize)
801 + 1;
802 write!(out, "\n {}{}: ", " ".repeat(padding), i + 1).ok();
803 let mut rs_out = Vec::new();
804
805 let mut i = 0;
808 while i < rs.len() {
809 match rs[i] {
810 ParseRepair::Delete(l) => {
811 let mut j = i + 1;
812 let mut last_end = l.span().end();
813 while j < rs.len() {
814 if let ParseRepair::Delete(next_l) = rs[j] {
815 if next_l.span().start() == last_end {
816 last_end = next_l.span().end();
817 j += 1;
818 continue;
819 }
820 }
821 break;
822 }
823 let t = &lexer
824 .span_str(Span::new(l.span().start(), last_end))
825 .replace('\n', "\\n");
826 rs_out.push(format!("Delete {}", t));
827 i = j;
828 }
829 ParseRepair::Insert(tidx) => {
830 rs_out.push(format!("Insert {}", epp(tidx).unwrap()));
831 i += 1;
832 }
833 ParseRepair::Shift(l) => {
834 let t = &lexer.span_str(l.span()).replace('\n', "\\n");
835 rs_out.push(format!("Shift {}", t));
836 i += 1;
837 }
838 }
839 }
840
841 out.push_str(&rs_out.join(", "));
842 }
843 }
844 out
845 }
846 }
847 }
848}
849
850impl<
851 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
852 LexerTypesT: LexerTypes<StorageT = StorageT>,
853> fmt::Display for LexParseError<StorageT, LexerTypesT>
854where
855 usize: AsPrimitive<StorageT>,
856{
857 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
858 match *self {
859 LexParseError::LexError(ref e) => Display::fmt(e, f),
860 LexParseError::ParseError(ref e) => Display::fmt(e, f),
861 }
862 }
863}
864
865impl<
866 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
867 LexerTypesT: LexerTypes<StorageT = StorageT>,
868> Error for LexParseError<StorageT, LexerTypesT>
869where
870 usize: AsPrimitive<StorageT>,
871{
872}
873
874impl<
875 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
876 LexerTypesT: LexerTypes<StorageT = StorageT, LexErrorT = T>,
877 T: LexError,
878> From<T> for LexParseError<StorageT, LexerTypesT>
879where
880 usize: AsPrimitive<StorageT>,
881{
882 fn from(err: T) -> LexParseError<StorageT, LexerTypesT> {
883 LexParseError::LexError(err)
884 }
885}
886
887impl<
888 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
889 LexerTypesT: LexerTypes<StorageT = StorageT>,
890> From<ParseError<LexerTypesT::LexemeT, StorageT>> for LexParseError<StorageT, LexerTypesT>
891where
892 usize: AsPrimitive<StorageT>,
893{
894 fn from(
895 err: ParseError<LexerTypesT::LexemeT, StorageT>,
896 ) -> LexParseError<StorageT, LexerTypesT> {
897 LexParseError::ParseError(err)
898 }
899}
900
901pub struct RTParserBuilder<
903 'a,
904 StorageT: 'static + Eq + Debug + Hash + PrimInt + Unsigned,
905 LexerTypesT: LexerTypes<StorageT = StorageT>,
906> where
907 usize: AsPrimitive<StorageT>,
908{
909 grm: &'a YaccGrammar<StorageT>,
910 stable: &'a StateTable<StorageT>,
911 recoverer: RecoveryKind,
912 term_costs: &'a dyn Fn(TIdx<StorageT>) -> u8,
913 phantom: PhantomData<(StorageT, LexerTypesT)>,
914}
915
916impl<
917 'a,
918 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
919 LexerTypesT: LexerTypes<StorageT = StorageT>,
920> RTParserBuilder<'a, StorageT, LexerTypesT>
921where
922 usize: AsPrimitive<StorageT>,
923{
924 pub fn new(grm: &'a YaccGrammar<StorageT>, stable: &'a StateTable<StorageT>) -> Self {
926 RTParserBuilder {
927 grm,
928 stable,
929 recoverer: RecoveryKind::CPCTPlus,
930 term_costs: &|_| 1,
931 phantom: PhantomData,
932 }
933 }
934
935 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
937 self.recoverer = rk;
938 self
939 }
940
941 pub fn term_costs(mut self, f: &'a dyn Fn(TIdx<StorageT>) -> u8) -> Self {
942 self.term_costs = f;
943 self
944 }
945
946 #[deprecated(
947 since = "0.14.0",
948 note = "Use `parse_map` to return a `Node` from your `lrpar_mod!` instead"
949 )]
950 #[allow(deprecated)]
951 pub fn parse_generictree(
954 &self,
955 lexer: &dyn NonStreamingLexer<LexerTypesT>,
956 ) -> (
957 Option<Node<LexerTypesT::LexemeT, StorageT>>,
958 Vec<LexParseError<StorageT, LexerTypesT>>,
959 ) {
960 self.parse_map(lexer, &|lexeme| Node::Term { lexeme }, &|ridx, nodes| {
961 Node::Nonterm { ridx, nodes }
962 })
963 }
964
965 pub fn parse_map<Node>(
968 &self,
969 lexer: &dyn NonStreamingLexer<LexerTypesT>,
970 fterm: &dyn Fn(LexerTypesT::LexemeT) -> Node,
971 fnonterm: &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
972 ) -> (Option<Node>, Vec<LexParseError<StorageT, LexerTypesT>>) {
973 let mut lexemes = vec![];
974 for e in lexer.iter().collect::<Vec<_>>() {
975 match e {
976 Ok(l) => lexemes.push(l),
977 Err(e) => return (None, vec![e.into()]),
978 }
979 }
980 Parser::<
981 StorageT,
982 LexerTypesT,
983 Node,
984 (
985 &dyn Fn(LexerTypesT::LexemeT) -> Node,
986 &dyn Fn(RIdx<StorageT>, Vec<Node>) -> Node,
987 ),
988 >::parse_map(
989 self.recoverer,
990 self.grm,
991 self.term_costs,
992 self.stable,
993 lexer,
994 lexemes,
995 fterm,
996 fnonterm,
997 )
998 }
999
1000 #[deprecated(since = "0.14.0", note = "Use `parse_map` instead")]
1001 pub fn parse_noaction(
1004 &self,
1005 lexer: &dyn NonStreamingLexer<LexerTypesT>,
1006 ) -> Vec<LexParseError<StorageT, LexerTypesT>> {
1007 self.parse_map(lexer, &|_| (), &|_, _| ()).1
1008 }
1009
1010 pub fn parse_actions<'b: 'a, 'input: 'b, ActionT: 'a, ParamT: Clone>(
1017 &self,
1018 lexer: &'b dyn NonStreamingLexer<'input, LexerTypesT>,
1019 actions: &'a [ActionFn<'a, 'b, 'input, StorageT, LexerTypesT, ActionT, ParamT>],
1020 param: ParamT,
1021 ) -> (Option<ActionT>, Vec<LexParseError<StorageT, LexerTypesT>>) {
1022 let mut lexemes = vec![];
1023 for e in lexer.iter().collect::<Vec<_>>() {
1024 match e {
1025 Ok(l) => lexemes.push(l),
1026 Err(e) => return (None, vec![e.into()]),
1027 }
1028 }
1029 Parser::parse_actions(
1030 self.recoverer,
1031 self.grm,
1032 self.term_costs,
1033 self.stable,
1034 lexer,
1035 lexemes,
1036 actions,
1037 param,
1038 )
1039 }
1040
1041 pub fn grammar(&self) -> &YaccGrammar<StorageT> {
1042 self.grm
1043 }
1044}
1045
1046#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1049pub enum ParseRepair<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1050 Insert(TIdx<StorageT>),
1052 Delete(LexemeT),
1054 Shift(LexemeT),
1056}
1057
1058#[derive(Clone, Debug, PartialEq)]
1060pub struct ParseError<LexemeT: Lexeme<StorageT>, StorageT: Hash> {
1061 stidx: StIdx<StorageT>,
1062 lexeme: LexemeT,
1063 repairs: Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
1064}
1065
1066impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Display for ParseError<LexemeT, StorageT> {
1067 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1068 write!(f, "Parse error at lexeme {:?}", self.lexeme)
1069 }
1070}
1071
1072impl<LexemeT: Lexeme<StorageT>, StorageT: Debug + Hash> Error for ParseError<LexemeT, StorageT> {}
1073
1074impl<LexemeT: Lexeme<StorageT>, StorageT: Hash + PrimInt + Unsigned> ParseError<LexemeT, StorageT> {
1075 pub fn stidx(&self) -> StIdx<StorageT> {
1077 self.stidx
1078 }
1079
1080 pub fn lexeme(&self) -> &LexemeT {
1082 &self.lexeme
1083 }
1084
1085 pub fn repairs(&self) -> &Vec<Vec<ParseRepair<LexemeT, StorageT>>> {
1088 &self.repairs
1089 }
1090}
1091
1092#[cfg(test)]
1093pub(crate) mod test {
1094 use std::collections::HashMap;
1095
1096 use cfgrammar::{
1097 Span,
1098 yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
1099 };
1100 use lrtable::{Minimiser, from_yacc};
1101 use num_traits::ToPrimitive;
1102 use regex::Regex;
1103
1104 use super::*;
1105 use crate::{
1106 Lexeme, Lexer,
1107 test_utils::{TestLexError, TestLexeme, TestLexerTypes},
1108 };
1109
1110 #[allow(deprecated)]
1111 pub(crate) fn do_parse<'input>(
1112 rcvry_kind: RecoveryKind,
1113 lexs: &str,
1114 grms: &str,
1115 input: &'input str,
1116 ) -> (
1117 YaccGrammar<u16>,
1118 SmallLexer<'input>,
1119 Result<
1120 Node<TestLexeme, u16>,
1121 (
1122 Option<Node<TestLexeme, u16>>,
1123 Vec<LexParseError<u16, TestLexerTypes>>,
1124 ),
1125 >,
1126 ) {
1127 do_parse_with_costs(rcvry_kind, lexs, grms, input, &HashMap::new())
1128 }
1129
1130 #[allow(deprecated)]
1131 fn do_parse_with_costs<'input>(
1132 rcvry_kind: RecoveryKind,
1133 lexs: &str,
1134 grms: &str,
1135 input: &'input str,
1136 costs: &HashMap<&str, u8>,
1137 ) -> (
1138 YaccGrammar<u16>,
1139 SmallLexer<'input>,
1140 Result<
1141 Node<TestLexeme, u16>,
1142 (
1143 Option<Node<TestLexeme, u16>>,
1144 Vec<LexParseError<u16, TestLexerTypes>>,
1145 ),
1146 >,
1147 ) {
1148 let grm = YaccGrammar::<u16>::new_with_storaget(
1149 YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
1150 grms,
1151 )
1152 .unwrap();
1153 let (_, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
1154 let rule_ids = grm
1155 .tokens_map()
1156 .iter()
1157 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1158 .collect();
1159 let lexer_rules = small_lexer(lexs, rule_ids);
1160 let lexemes = small_lex(lexer_rules, input);
1161 let lexer = SmallLexer { lexemes, s: input };
1162 let costs_tidx = costs
1163 .iter()
1164 .map(|(k, v)| (grm.token_idx(k).unwrap(), v))
1165 .collect::<HashMap<_, _>>();
1166 let (r, errs) = RTParserBuilder::new(&grm, &stable)
1167 .recoverer(rcvry_kind)
1168 .term_costs(&|tidx| **costs_tidx.get(&tidx).unwrap_or(&&1))
1169 .parse_generictree(&lexer);
1170 if let Some(node) = r {
1171 if errs.is_empty() {
1172 (grm, lexer, Ok(node))
1173 } else {
1174 (grm, lexer, Err((Some(node), errs)))
1175 }
1176 } else {
1177 (grm, lexer, Err((None, errs)))
1178 }
1179 }
1180
1181 fn check_parse_output(lexs: &str, grms: &str, input: &str, expected: &str) {
1182 let (grm, _, pt) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, input);
1183 assert_eq!(expected, pt.unwrap().pp(&grm, input));
1184 }
1185
1186 pub(crate) struct SmallLexer<'input> {
1191 lexemes: Vec<TestLexeme>,
1192 s: &'input str,
1193 }
1194
1195 impl Lexer<TestLexerTypes> for SmallLexer<'_> {
1196 fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Result<TestLexeme, TestLexError>> + 'a> {
1197 Box::new(self.lexemes.iter().map(|x| Ok(*x)))
1198 }
1199 }
1200
1201 impl<'input> NonStreamingLexer<'input, TestLexerTypes> for SmallLexer<'input> {
1202 fn span_str(&self, span: Span) -> &'input str {
1203 &self.s[span.start()..span.end()]
1204 }
1205
1206 fn span_lines_str(&self, _: Span) -> &'input str {
1207 unreachable!();
1208 }
1209
1210 fn line_col(&self, _: Span) -> ((usize, usize), (usize, usize)) {
1211 unreachable!();
1212 }
1213 }
1214
1215 fn small_lexer(lexs: &str, ids_map: HashMap<String, u16>) -> Vec<(u16, Regex)> {
1216 let mut rules = Vec::new();
1217 for l in lexs.split('\n').map(|x| x.trim()).filter(|x| !x.is_empty()) {
1218 assert!(l.rfind('\'') == Some(l.len() - 1));
1219 let i = l[..l.len() - 1].rfind('\'').unwrap();
1220 let name = &l[i + 1..l.len() - 1];
1221 let re = &l[..i - 1].trim();
1222 rules.push((
1223 ids_map[name],
1224 Regex::new(&format!("\\A(?:{})", re)).unwrap(),
1225 ));
1226 }
1227 rules
1228 }
1229
1230 fn small_lex(rules: Vec<(u16, Regex)>, input: &str) -> Vec<TestLexeme> {
1231 let mut lexemes = vec![];
1232 let mut i = 0;
1233 while i < input.len() {
1234 let mut longest = 0; let mut longest_tok_id = 0; for (tok_id, r) in rules.iter() {
1237 if let Some(m) = r.find(&input[i..]) {
1238 let len = m.end();
1239 if len > longest {
1240 longest = len;
1241 longest_tok_id = *tok_id;
1242 }
1243 }
1244 }
1245 assert!(longest > 0);
1246 lexemes.push(Lexeme::new(longest_tok_id, i, longest));
1247 i += longest;
1248 }
1249 lexemes
1250 }
1251
1252 #[test]
1253 fn simple_parse() {
1254 check_parse_output(
1256 "[a-zA-Z_] 'ID'
1257 \\+ '+'",
1258 "
1259%start E
1260%%
1261E: T '+' E
1262 | T;
1263T: 'ID';
1264",
1265 "a+b",
1266 "E
1267 T
1268 ID a
1269 + +
1270 E
1271 T
1272 ID b
1273",
1274 );
1275 }
1276
1277 #[test]
1278 fn parse_empty_rules() {
1279 let lexs = "[a-zA-Z_] 'ID'";
1280 let grms = "%start S
1281%%
1282S: L;
1283L: 'ID'
1284 | ;
1285";
1286 check_parse_output(
1287 lexs, grms, "", "S
1288 L
1289",
1290 );
1291
1292 check_parse_output(
1293 lexs,
1294 grms,
1295 "x",
1296 "S
1297 L
1298 ID x
1299",
1300 );
1301 }
1302
1303 #[test]
1304 fn recursive_parse() {
1305 let lexs = "\\+ '+'
1306 \\* '*'
1307 [0-9]+ 'INT'";
1308 let grms = "%start Expr
1309%%
1310Expr : Expr '+' Term | Term;
1311Term : Term '*' Factor | Factor;
1312Factor : 'INT';";
1313
1314 check_parse_output(
1315 lexs,
1316 grms,
1317 "2+3*4",
1318 "Expr
1319 Expr
1320 Term
1321 Factor
1322 INT 2
1323 + +
1324 Term
1325 Term
1326 Factor
1327 INT 3
1328 * *
1329 Factor
1330 INT 4
1331",
1332 );
1333 check_parse_output(
1334 lexs,
1335 grms,
1336 "2*3+4",
1337 "Expr
1338 Expr
1339 Term
1340 Term
1341 Factor
1342 INT 2
1343 * *
1344 Factor
1345 INT 3
1346 + +
1347 Term
1348 Factor
1349 INT 4
1350",
1351 );
1352 }
1353
1354 #[test]
1355 fn parse_error() {
1356 let lexs = "\\( '('
1357 \\) ')'
1358 [a-zA-Z_][a-zA-Z_0-9]* 'ID'";
1359 let grms = "%start Call
1360%%
1361Call: 'ID' '(' ')';";
1362
1363 check_parse_output(
1364 lexs,
1365 grms,
1366 "f()",
1367 "Call
1368 ID f
1369 ( (
1370 ) )
1371",
1372 );
1373
1374 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(");
1375 let (_, errs) = pr.unwrap_err();
1376 assert_eq!(errs.len(), 1);
1377 let err_tok_id = usize::from(grm.eof_token_idx()).to_u16().unwrap();
1378 match &errs[0] {
1379 LexParseError::ParseError(e) => {
1380 assert_eq!(e.lexeme(), &Lexeme::new_faulty(err_tok_id, 2, 0));
1381 assert!(e.lexeme().faulty());
1382 }
1383 _ => unreachable!(),
1384 }
1385
1386 let (grm, _, pr) = do_parse(RecoveryKind::CPCTPlus, lexs, grms, "f(f(");
1387 let (_, errs) = pr.unwrap_err();
1388 assert_eq!(errs.len(), 1);
1389 let err_tok_id = usize::from(grm.token_idx("ID").unwrap()).to_u16().unwrap();
1390 match &errs[0] {
1391 LexParseError::ParseError(e) => {
1392 assert_eq!(e.lexeme(), &Lexeme::new(err_tok_id, 2, 1));
1393 assert!(!e.lexeme().faulty());
1394 }
1395 _ => unreachable!(),
1396 }
1397 }
1398
1399 #[test]
1400 fn test_parse_map() {
1401 #[derive(PartialEq, Debug)]
1402 enum TestParseMap<'a> {
1403 Term(&'a str, &'a str),
1404 NonTerm(&'a str, Vec<TestParseMap<'a>>),
1405 }
1406 let lex_src = r#"[0-9]+ 'INT'
1407\+ '+'
1408\* '*'
1409"#;
1410 let grammar_src = "
1411%grmtools{YaccKind: Original(NoAction)}
1412%start Expr
1413%%
1414Expr : Expr '+' Term | Term;
1415Term : Term '*' Factor | Factor;
1416Factor : 'INT';";
1417 let input_src = "2*3+4";
1418 let grm = str::parse::<YaccGrammar<u16>>(grammar_src).unwrap();
1419 let (_, stable) = lrtable::from_yacc(&grm, lrtable::Minimiser::Pager).unwrap();
1420 let rt_parser = RTParserBuilder::new(&grm, &stable);
1421 let rule_ids = grm
1422 .tokens_map()
1423 .iter()
1424 .map(|(&n, &i)| (n.to_owned(), u32::from(i).to_u16().unwrap()))
1425 .collect();
1426 let lexer_rules = small_lexer(lex_src, rule_ids);
1427 let lexemes = small_lex(lexer_rules, input_src);
1428 let lexer = SmallLexer {
1429 lexemes,
1430 s: input_src,
1431 };
1432
1433 let (parse_map, errs) = rt_parser.parse_map(
1434 &lexer,
1435 &|lexeme: TestLexeme| {
1436 let tidx = TIdx(lexeme.tok_id());
1437 let tn = &grm.token_name(tidx).unwrap();
1438 let lt = &input_src[lexeme.span().start()..lexeme.span().end()];
1439 TestParseMap::Term(tn, lt)
1440 },
1441 &|ridx, nodes| {
1442 let rule_name = &grm.rule_name_str(ridx);
1443 TestParseMap::NonTerm(rule_name, nodes)
1444 },
1445 );
1446 assert!(parse_map.is_some() && errs.is_empty());
1447 check_parse_output(
1449 lex_src,
1450 grammar_src,
1451 input_src,
1452 "Expr
1453 Expr
1454 Term
1455 Term
1456 Factor
1457 INT 2
1458 * *
1459 Factor
1460 INT 3
1461 + +
1462 Term
1463 Factor
1464 INT 4
1465",
1466 );
1467
1468 let expected_parse_map = {
1469 use TestParseMap::*;
1470 NonTerm(
1471 "Expr",
1472 vec![
1473 NonTerm(
1474 "Expr",
1475 vec![NonTerm(
1476 "Term",
1477 vec![
1478 NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "2")])]),
1479 Term("*", "*"),
1480 NonTerm("Factor", vec![Term("INT", "3")]),
1481 ],
1482 )],
1483 ),
1484 Term("+", "+"),
1485 NonTerm("Term", vec![NonTerm("Factor", vec![Term("INT", "4")])]),
1486 ],
1487 )
1488 };
1489 assert_eq!(parse_map, Some(expected_parse_map));
1490 }
1491}