use crate::Span;
use std::str::FromStr;
pub struct NewlineCache {
newlines: Vec<usize>,
trailing_bytes: usize,
}
impl NewlineCache {
pub fn new() -> Self {
Self {
newlines: vec![0],
trailing_bytes: 0,
}
}
pub fn feed(&mut self, src: &str) {
let start_pos = self.newlines.last().unwrap() + self.trailing_bytes;
self.newlines
.extend(src.char_indices().filter_map(|c| match c {
(offset, '\n') => {
self.trailing_bytes = 0;
Some(start_pos + offset + 1)
}
(_, c) => {
self.trailing_bytes += c.len_utf8();
None
}
}));
}
fn feed_len(&self) -> usize {
self.newlines.last().unwrap() + self.trailing_bytes
}
pub fn byte_to_line_num(&self, byte: usize) -> Option<usize> {
if byte > self.feed_len() {
return None;
}
let last_newline = self.newlines.last().unwrap();
let last_byte = last_newline + self.trailing_bytes;
if byte < last_byte && byte > *last_newline {
Some(self.newlines.len())
} else {
let (line_m1, _) = self
.newlines
.iter()
.enumerate()
.rev()
.find(|&(_, &line_off)| line_off <= byte)
.unwrap();
Some(line_m1 + 1)
}
}
fn line_num_to_byte(&self, line_num: usize) -> Option<usize> {
if line_num > self.newlines.len() || line_num == 0 {
None
} else {
Some(self.newlines[line_num - 1])
}
}
pub fn byte_to_line_byte(&self, byte: usize) -> Option<usize> {
self.byte_to_line_num(byte)
.and_then(|line_num| self.line_num_to_byte(line_num))
}
pub fn byte_to_line_num_and_col_num(&self, src: &str, byte: usize) -> Option<(usize, usize)> {
if byte > self.feed_len() || src.len() != self.feed_len() {
return None;
}
self.byte_to_line_num(byte).map(|line_num| {
if byte == src.len() {
let line_byte = *self.newlines.last().unwrap();
return (self.newlines.len(), src[line_byte..].chars().count() + 1);
}
let line_byte = self.line_num_to_byte(line_num).unwrap();
let mut column = 0;
let mut skip_char = None;
for (c_off, c) in src[line_byte..].char_indices() {
if Some(c) != skip_char {
column += 1;
skip_char = None;
}
if c == '\r' {
skip_char = Some('\n');
}
if c_off == byte - line_byte {
break;
}
}
(line_num, column)
})
}
pub fn span_line_bytes(&self, span: Span) -> (usize, usize) {
let (st, st_line) = match self.newlines.binary_search(&span.start()) {
Ok(j) => (self.newlines[j], j + 1),
Err(j) => (self.newlines[j - 1], j),
};
let en = match self.newlines[st_line..].binary_search(&span.end()) {
Ok(j) if st_line + j == self.newlines.len() - st_line => {
self.newlines.last().unwrap() + self.trailing_bytes
}
Ok(j) => self.newlines[st_line + j + 1] - 1,
Err(j) if st_line + j == self.newlines.len() => {
self.newlines.last().unwrap() + self.trailing_bytes
}
Err(j) => self.newlines[st_line + j] - 1,
};
(st, en)
}
}
impl FromStr for NewlineCache {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut x = Self::new();
x.feed(s);
Ok(x)
}
}
impl<'a> FromIterator<&'a str> for NewlineCache {
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = &'a str>,
{
let mut nlcache = NewlineCache::new();
for s in iter.into_iter() {
nlcache.feed(s)
}
nlcache
}
}
#[cfg(test)]
mod tests {
use super::NewlineCache;
use crate::Span;
use std::str::FromStr;
fn newline_test_helper(feed: &[&str], tests: &[(usize, usize)]) {
let cache: NewlineCache = feed.iter().copied().collect();
let full_string = feed.concat();
let mut src_pos = 0;
let mut result = Vec::new();
for f in feed {
for (offset, _) in f.char_indices() {
let line_col =
cache.byte_to_line_num_and_col_num(full_string.as_str(), offset + src_pos);
result.push(line_col.unwrap())
}
src_pos += f.len();
}
assert_eq!(result, tests)
}
#[test]
fn newline_cache_various() {
let abc = &[(1, 1), (1, 2), (1, 3)];
newline_test_helper(&["a", "b", "c"], abc);
newline_test_helper(&["abc"], abc);
newline_test_helper(&["ab", "c"], abc);
let ab = &[(1, 1), (1, 2), (2, 1)];
newline_test_helper(&["a\n", "b"], ab);
newline_test_helper(&["a", "\nb"], ab);
let cr = &[(1, 1), (1, 2), (1, 2), (2, 1)];
newline_test_helper(&["a\r\n", "b"], cr);
newline_test_helper(&["a", "\r\nb"], cr);
newline_test_helper(&["a\r", "\nb"], cr);
newline_test_helper(&["\r\n\n"], &[(1, 1), (1, 1), (2, 1)]);
let suits = &[(1, 1), (1, 2), (1, 3), (1, 4)];
newline_test_helper(&["", "♠♥♦♣"], suits);
newline_test_helper(&["♠", "♥♦♣"], suits);
newline_test_helper(&["♠♥", "♦♣"], suits);
newline_test_helper(&["♠♥♦", "♣"], suits);
newline_test_helper(&["♠♥♦♣", ""], suits);
let suits_nl = &[(1, 1), (1, 2), (1, 3), (2, 1), (2, 2)];
newline_test_helper(&["", "♠♥\n♦♣"], suits_nl);
newline_test_helper(&["♠", "♥\n♦♣"], suits_nl);
newline_test_helper(&["♠♥", "\n♦♣"], suits_nl);
newline_test_helper(&["♠♥\n", "♦♣"], suits_nl);
newline_test_helper(&["♠♥\n♦", "♣"], suits_nl);
newline_test_helper(&["♠♥\n♦♣", ""], suits_nl);
#[rustfmt::skip]
let multi_line = &[
(1, 1), (1, 2), (2, 1), (2, 2),
(3, 1), (3, 2), (4, 1), (4, 2),
(5, 1), (5, 2), (6, 1), (6, 2),
];
newline_test_helper(&["", "1\n2\n3\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n4\n5\n6\n", ""], multi_line);
newline_test_helper(&["1", "\n2\n3\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n", "2\n3\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2", "\n3\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n", "3\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3", "\n4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n", "4\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n4", "\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n4\n", "5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n4\n5", "\n6\n"], multi_line);
newline_test_helper(&["1\n2\n3\n4\n5\n", "6\n"], multi_line);
newline_test_helper(&["1\n2\n", "3\n4\n", "5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n", "3\n4", "\n5\n6\n"], multi_line);
newline_test_helper(&["1\n2\n", "", "3\n4", "", "\n5\n6\n"], multi_line);
newline_test_helper(&["", "", ""], &[]);
newline_test_helper(&["", ""], &[]);
newline_test_helper(&[""], &[]);
newline_test_helper(&["", "", "", "a"], &[(1, 1)]);
newline_test_helper(&["", "", "a"], &[(1, 1)]);
newline_test_helper(&["", "a"], &[(1, 1)]);
newline_test_helper(&["1", "\n23"], &[(1, 1), (1, 2), (2, 1), (2, 2)]);
}
#[test]
fn newline_cache_negative() {
let mut cache = NewlineCache::new();
cache.feed("1");
assert_eq!(None, cache.byte_to_line_num(2));
assert_eq!(None, cache.byte_to_line_num_and_col_num("1", 2));
cache.feed("\n23");
assert_eq!(None, cache.byte_to_line_num(5));
assert_eq!(None, cache.byte_to_line_num_and_col_num("1\n23", 5));
assert_eq!(None, cache.byte_to_line_num_and_col_num("1\n234", 1));
}
#[test]
fn spanlines_str() {
fn span_line_eq(input: &str, byte_st: usize, byte_en: usize, substr: &str) {
let nlc = NewlineCache::from_str(input).unwrap();
let (lines_st, lines_en) = nlc.span_line_bytes(Span::new(byte_st, byte_en));
assert_eq!(&input[lines_st..lines_en], substr);
}
span_line_eq("a b c", 2, 3, "a b c");
span_line_eq("a b c", 4, 5, "a b c");
span_line_eq("a b c\n", 2, 3, "a b c");
span_line_eq("a b c\n", 4, 5, "a b c");
span_line_eq("a b c\n", 5, 5, "a b c");
span_line_eq("a b c\n", 6, 6, "");
span_line_eq(" a\nb\n c d", 1, 2, " a");
span_line_eq(" a\nb\n c d", 3, 4, "b");
span_line_eq(" a\nb\n c d", 7, 8, " c d");
span_line_eq(" a\nb\n c d", 9, 10, " c d");
span_line_eq("ab\n", 2, 3, "ab\n");
span_line_eq("ab\ncd", 2, 3, "ab\ncd");
span_line_eq("ab\ncd\nef", 2, 3, "ab\ncd");
let s = "\na\na a\na a a\na a a a";
span_line_eq(s, 0, 0, "");
span_line_eq(s, 0, 2, "\na");
span_line_eq(s, 0, 4, "\na\na a");
span_line_eq(s, 0, 7, "\na\na a\na a a");
span_line_eq(s, 4, 7, "a a\na a a");
span_line_eq(" a\n❤ b", 1, 2, " a");
span_line_eq(" a\n❤ b", 3, 4, "❤ b");
span_line_eq(" a\n❤ b", 5, 6, "❤ b");
}
}