use std::{collections::hash_map::HashMap, fmt::Write, hash::Hash};
use cfgrammar::{yacc::YaccGrammar, Symbol, TIdx};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use crate::{itemset::Itemset, StIdx};
#[derive(Debug)]
pub struct StateGraph<StorageT: Eq + Hash> {
states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx<StorageT>,
edges: Vec<HashMap<Symbol<StorageT>, StIdx<StorageT>>>,
}
impl<StorageT: 'static + Hash + PrimInt + Unsigned> StateGraph<StorageT>
where
usize: AsPrimitive<StorageT>,
{
pub(crate) fn new(
states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx<StorageT>,
edges: Vec<HashMap<Symbol<StorageT>, StIdx<StorageT>>>,
) -> Self {
assert!(states.len() < num_traits::cast(StorageT::max_value()).unwrap());
StateGraph {
states,
start_state,
edges,
}
}
pub fn start_state(&self) -> StIdx<StorageT> {
self.start_state
}
pub fn iter_stidxs(&self) -> Box<dyn Iterator<Item = StIdx<StorageT>>> {
Box::new((0..self.states.len()).map(|x| StIdx(x.as_())))
}
pub fn closed_state(&self, stidx: StIdx<StorageT>) -> &Itemset<StorageT> {
let (_, closed_state) = &self.states[usize::from(stidx)];
closed_state
}
pub fn iter_closed_states<'a>(
&'a self,
) -> Box<dyn Iterator<Item = &'a Itemset<StorageT>> + 'a> {
Box::new(self.states.iter().map(|(_, x)| x))
}
pub fn core_state(&self, stidx: StIdx<StorageT>) -> &Itemset<StorageT> {
let (core_states, _) = &self.states[usize::from(stidx)];
core_states
}
pub fn iter_core_states<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Itemset<StorageT>> + 'a> {
Box::new(self.states.iter().map(|(x, _)| x))
}
pub fn all_states_len(&self) -> StIdx<StorageT> {
StIdx(self.states.len().as_())
}
pub fn edge(&self, stidx: StIdx<StorageT>, sym: Symbol<StorageT>) -> Option<StIdx<StorageT>> {
self.edges
.get(usize::from(stidx))
.and_then(|x| x.get(&sym))
.cloned()
}
pub fn edges(&self, stidx: StIdx<StorageT>) -> &HashMap<Symbol<StorageT>, StIdx<StorageT>> {
&self.edges[usize::from(stidx)]
}
pub fn all_edges_len(&self) -> usize {
self.edges.iter().fold(0, |a, x| a + x.len())
}
pub fn pp(&self, grm: &YaccGrammar<StorageT>, core_states: bool) -> String {
fn num_digits<StorageT: 'static + Hash + PrimInt + Unsigned>(i: StIdx<StorageT>) -> usize
where
usize: AsPrimitive<StorageT>,
{
if usize::from(i) == 0 {
1
} else {
((usize::from(i) as f64).log10() as usize) + 1
}
}
fn fmt_sym<StorageT: 'static + PrimInt + Unsigned>(
grm: &YaccGrammar<StorageT>,
sym: Symbol<StorageT>,
) -> String
where
usize: AsPrimitive<StorageT>,
{
match sym {
Symbol::Rule(ridx) => grm.rule_name_str(ridx).to_string(),
Symbol::Token(tidx) => format!("'{}'", grm.token_name(tidx).unwrap_or("")),
}
}
let mut o = String::new();
for (stidx, (core_st, closed_st)) in self.iter_stidxs().zip(self.states.iter()) {
if stidx != self.start_state {
o.push('\n');
}
{
let padding = num_digits(self.all_states_len()) - num_digits(stidx);
write!(o, "{}:{}", usize::from(stidx), " ".repeat(padding)).ok();
}
let st = if core_states { core_st } else { closed_st };
for (i, (&(pidx, sidx), ctx)) in st.items.iter().enumerate() {
let padding = if i == 0 {
0
} else {
o.push_str("\n "); num_digits(self.all_states_len())
};
write!(
o,
"{} [{} ->",
" ".repeat(padding),
grm.rule_name_str(grm.prod_to_rule(pidx))
)
.ok();
for (i_sidx, i_ssym) in grm.prod(pidx).iter().enumerate() {
if i_sidx == usize::from(sidx) {
o.push_str(" .");
}
write!(o, " {}", fmt_sym(grm, *i_ssym)).ok();
}
if usize::from(sidx) == grm.prod(pidx).len() {
o.push_str(" .");
}
o.push_str(", {");
let mut seen_b = false;
for bidx in ctx.iter_set_bits(..) {
if seen_b {
o.push_str(", ");
} else {
seen_b = true;
}
let tidx = TIdx(bidx.as_());
if tidx == grm.eof_token_idx() {
o.push_str("'$'");
} else {
write!(o, "'{}'", grm.token_name(tidx).unwrap()).ok();
}
}
o.push_str("}]");
}
for (esym, e_stidx) in self.edges(stidx).iter() {
write!(
o,
"\n{}{} -> {}",
" ".repeat(num_digits(self.all_states_len()) + 2),
fmt_sym(grm, *esym),
usize::from(*e_stidx)
)
.ok();
}
}
o
}
pub fn pp_core_states(&self, grm: &YaccGrammar<StorageT>) -> String {
self.pp(grm, true)
}
pub fn pp_closed_states(&self, grm: &YaccGrammar<StorageT>) -> String {
self.pp(grm, false)
}
}
#[cfg(test)]
use cfgrammar::SIdx;
#[cfg(test)]
pub(crate) fn state_exists<StorageT: 'static + Hash + PrimInt + Unsigned>(
grm: &YaccGrammar<StorageT>,
is: &Itemset<StorageT>,
nt: &str,
prod_off: usize,
dot: SIdx<StorageT>,
la: Vec<&str>,
) where
usize: AsPrimitive<StorageT>,
{
let ab_prod_off = grm.rule_to_prods(grm.rule_idx(nt).unwrap())[prod_off];
let ctx = &is.items[&(ab_prod_off, dot)];
for tidx in grm.iter_tidxs() {
let bit = ctx[usize::from(tidx)];
let mut found = false;
for t in la.iter() {
let off = if t == &"$" {
grm.eof_token_idx()
} else {
grm.token_idx(t).unwrap()
};
if off == tidx {
if !bit {
panic!("bit for token {}, dot {} is not set in production {} of {} when it should be",
t, usize::from(dot), prod_off, nt);
}
found = true;
break;
}
}
if !found && bit {
panic!(
"bit for token {}, dot {} is set in production {} of {} when it shouldn't be",
grm.token_name(tidx).unwrap(),
usize::from(dot),
prod_off,
nt
);
}
}
}
#[cfg(test)]
mod test {
use crate::{pager::pager_stategraph, StIdx};
use cfgrammar::{
yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
Symbol,
};
#[test]
#[rustfmt::skip]
fn test_statetable_core() {
let grm = YaccGrammar::new(
YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
"
%start A
%%
A: 'OPEN_BRACKET' A 'CLOSE_BRACKET'
| 'a'
| 'b';
"
).unwrap();
let sg = pager_stategraph(&grm);
assert_eq!(sg.all_states_len(), StIdx(7));
assert_eq!(sg.states.iter().fold(0, |a, (x, _)| a + x.items.len()), 7);
assert_eq!(sg.all_edges_len(), 9);
let s0 = StIdx(0);
sg.edge(s0, Symbol::Rule(grm.rule_idx("A").unwrap())).unwrap(); let s2 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
let s3 = sg.edge(s0, Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
let s5 = sg.edge(s0, Symbol::Token(grm.token_idx("OPEN_BRACKET").unwrap())).unwrap();
assert_eq!(s2, sg.edge(s5, Symbol::Token(grm.token_idx("a").unwrap())).unwrap());
assert_eq!(s3, sg.edge(s5, Symbol::Token(grm.token_idx("b").unwrap())).unwrap());
assert_eq!(s5, sg.edge(s5, Symbol::Token(grm.token_idx("OPEN_BRACKET").unwrap())).unwrap());
let s4 = sg.edge(s5, Symbol::Rule(grm.rule_idx("A").unwrap())).unwrap();
sg.edge(s4, Symbol::Token(grm.token_idx("CLOSE_BRACKET").unwrap())).unwrap(); }
}