1use crate::Span;
2use std::str::FromStr;
3
4pub struct NewlineCache {
16 newlines: Vec<usize>,
17 trailing_bytes: usize,
18}
19
20impl NewlineCache {
21 pub fn new() -> Self {
23 Self {
24 newlines: vec![0],
25 trailing_bytes: 0,
26 }
27 }
28
29 pub fn feed(&mut self, src: &str) {
35 let start_pos = self.newlines.last().unwrap() + self.trailing_bytes;
36 self.newlines
37 .extend(src.char_indices().filter_map(|c| match c {
38 (offset, '\n') => {
39 self.trailing_bytes = 0;
40 Some(start_pos + offset + 1)
41 }
42 (_, c) => {
43 self.trailing_bytes += c.len_utf8();
44 None
45 }
46 }));
47 }
48
49 fn feed_len(&self) -> usize {
51 self.newlines.last().unwrap() + self.trailing_bytes
52 }
53
54 pub fn byte_to_line_num(&self, byte: usize) -> Option<usize> {
57 if byte > self.feed_len() {
58 return None;
59 }
60
61 let last_newline = self.newlines.last().unwrap();
62 let last_byte = last_newline + self.trailing_bytes;
63
64 if byte < last_byte && byte > *last_newline {
65 Some(self.newlines.len())
66 } else {
67 let (line_m1, _) = self
68 .newlines
69 .iter()
70 .enumerate()
71 .rev()
72 .find(|&(_, &line_off)| line_off <= byte)
73 .unwrap();
74 Some(line_m1 + 1)
75 }
76 }
77
78 fn line_num_to_byte(&self, line_num: usize) -> Option<usize> {
82 if line_num > self.newlines.len() || line_num == 0 {
83 None
84 } else {
85 Some(self.newlines[line_num - 1])
86 }
87 }
88
89 pub fn byte_to_line_byte(&self, byte: usize) -> Option<usize> {
92 self.byte_to_line_num(byte)
93 .and_then(|line_num| self.line_num_to_byte(line_num))
94 }
95
96 pub fn byte_to_line_num_and_col_num(&self, src: &str, byte: usize) -> Option<(usize, usize)> {
105 if byte > self.feed_len() || src.len() != self.feed_len() {
106 return None;
107 }
108
109 self.byte_to_line_num(byte).map(|line_num| {
110 if byte == src.len() {
111 let line_byte = *self.newlines.last().unwrap();
112 return (self.newlines.len(), src[line_byte..].chars().count() + 1);
113 }
114
115 let line_byte = self.line_num_to_byte(line_num).unwrap();
116 let mut column = 0;
117 let mut skip_char = None;
118
119 for (c_off, c) in src[line_byte..].char_indices() {
120 if Some(c) != skip_char {
121 column += 1;
122 skip_char = None;
123 }
124 if c == '\r' {
125 skip_char = Some('\n');
126 }
127 if c_off == byte - line_byte {
128 break;
129 }
130 }
131 (line_num, column)
132 })
133 }
134
135 pub fn span_line_bytes(&self, span: Span) -> (usize, usize) {
138 let (st, st_line) = match self.newlines.binary_search(&span.start()) {
139 Ok(j) => (self.newlines[j], j + 1),
140 Err(j) => (self.newlines[j - 1], j),
141 };
142 let en = match self.newlines[st_line..].binary_search(&span.end()) {
143 Ok(j) if st_line + j == self.newlines.len() - st_line => {
144 self.newlines.last().unwrap() + self.trailing_bytes
145 }
146 Ok(j) => self.newlines[st_line + j + 1] - 1,
147 Err(j) if st_line + j == self.newlines.len() => {
148 self.newlines.last().unwrap() + self.trailing_bytes
149 }
150 Err(j) => self.newlines[st_line + j] - 1,
151 };
152 (st, en)
153 }
154}
155
156impl FromStr for NewlineCache {
157 type Err = ();
158
159 fn from_str(s: &str) -> Result<Self, Self::Err> {
162 let mut x = Self::new();
163 x.feed(s);
164 Ok(x)
165 }
166}
167
168impl<'a> FromIterator<&'a str> for NewlineCache {
169 fn from_iter<I>(iter: I) -> Self
170 where
171 I: IntoIterator<Item = &'a str>,
172 {
173 let mut nlcache = NewlineCache::new();
174 for s in iter.into_iter() {
175 nlcache.feed(s)
176 }
177 nlcache
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use super::NewlineCache;
184 use crate::Span;
185 use std::str::FromStr;
186
187 fn newline_test_helper(feed: &[&str], tests: &[(usize, usize)]) {
188 let cache: NewlineCache = feed.iter().copied().collect();
189 let full_string = feed.concat();
190
191 let mut src_pos = 0;
192 let mut result = Vec::new();
193 for f in feed {
194 for (offset, _) in f.char_indices() {
195 let line_col =
196 cache.byte_to_line_num_and_col_num(full_string.as_str(), offset + src_pos);
197 result.push(line_col.unwrap())
198 }
199 src_pos += f.len();
200 }
201 assert_eq!(result, tests)
202 }
203
204 #[test]
205 fn newline_cache_various() {
206 let abc = &[(1, 1), (1, 2), (1, 3)];
207 newline_test_helper(&["a", "b", "c"], abc);
208 newline_test_helper(&["abc"], abc);
209 newline_test_helper(&["ab", "c"], abc);
210
211 let ab = &[(1, 1), (1, 2), (2, 1)];
212 newline_test_helper(&["a\n", "b"], ab);
213 newline_test_helper(&["a", "\nb"], ab);
214
215 let cr = &[(1, 1), (1, 2), (1, 2), (2, 1)];
216 newline_test_helper(&["a\r\n", "b"], cr);
217 newline_test_helper(&["a", "\r\nb"], cr);
218 newline_test_helper(&["a\r", "\nb"], cr);
219
220 newline_test_helper(&["\r\n\n"], &[(1, 1), (1, 1), (2, 1)]);
221
222 let suits = &[(1, 1), (1, 2), (1, 3), (1, 4)];
223 newline_test_helper(&["", "♠♥♦♣"], suits);
224 newline_test_helper(&["♠", "♥♦♣"], suits);
225 newline_test_helper(&["♠♥", "♦♣"], suits);
226 newline_test_helper(&["♠♥♦", "♣"], suits);
227 newline_test_helper(&["♠♥♦♣", ""], suits);
228
229 let suits_nl = &[(1, 1), (1, 2), (1, 3), (2, 1), (2, 2)];
230 newline_test_helper(&["", "♠♥\n♦♣"], suits_nl);
231 newline_test_helper(&["♠", "♥\n♦♣"], suits_nl);
232 newline_test_helper(&["♠♥", "\n♦♣"], suits_nl);
233 newline_test_helper(&["♠♥\n", "♦♣"], suits_nl);
234 newline_test_helper(&["♠♥\n♦", "♣"], suits_nl);
235 newline_test_helper(&["♠♥\n♦♣", ""], suits_nl);
236
237 #[rustfmt::skip]
238 let multi_line = &[
239 (1, 1), (1, 2), (2, 1), (2, 2),
240 (3, 1), (3, 2), (4, 1), (4, 2),
241 (5, 1), (5, 2), (6, 1), (6, 2),
242 ];
243 newline_test_helper(&["", "1\n2\n3\n4\n5\n6\n"], multi_line);
244 newline_test_helper(&["1\n2\n3\n4\n5\n6\n"], multi_line);
245 newline_test_helper(&["1\n2\n3\n4\n5\n6\n", ""], multi_line);
246
247 newline_test_helper(&["1", "\n2\n3\n4\n5\n6\n"], multi_line);
248 newline_test_helper(&["1\n", "2\n3\n4\n5\n6\n"], multi_line);
249 newline_test_helper(&["1\n2", "\n3\n4\n5\n6\n"], multi_line);
250 newline_test_helper(&["1\n2\n", "3\n4\n5\n6\n"], multi_line);
251 newline_test_helper(&["1\n2\n3", "\n4\n5\n6\n"], multi_line);
252 newline_test_helper(&["1\n2\n3\n", "4\n5\n6\n"], multi_line);
253 newline_test_helper(&["1\n2\n3\n4", "\n5\n6\n"], multi_line);
254 newline_test_helper(&["1\n2\n3\n4\n", "5\n6\n"], multi_line);
255 newline_test_helper(&["1\n2\n3\n4\n5", "\n6\n"], multi_line);
256 newline_test_helper(&["1\n2\n3\n4\n5\n", "6\n"], multi_line);
257 newline_test_helper(&["1\n2\n", "3\n4\n", "5\n6\n"], multi_line);
258 newline_test_helper(&["1\n2\n", "3\n4", "\n5\n6\n"], multi_line);
259 newline_test_helper(&["1\n2\n", "", "3\n4", "", "\n5\n6\n"], multi_line);
260
261 newline_test_helper(&["", "", ""], &[]);
262 newline_test_helper(&["", ""], &[]);
263 newline_test_helper(&[""], &[]);
264 newline_test_helper(&["", "", "", "a"], &[(1, 1)]);
265 newline_test_helper(&["", "", "a"], &[(1, 1)]);
266 newline_test_helper(&["", "a"], &[(1, 1)]);
267
268 newline_test_helper(&["1", "\n23"], &[(1, 1), (1, 2), (2, 1), (2, 2)]);
270 }
271
272 #[test]
273 fn newline_cache_negative() {
274 let mut cache = NewlineCache::new();
275
276 cache.feed("1");
278 assert_eq!(None, cache.byte_to_line_num(2));
279 assert_eq!(None, cache.byte_to_line_num_and_col_num("1", 2));
280 cache.feed("\n23");
281 assert_eq!(None, cache.byte_to_line_num(5));
282 assert_eq!(None, cache.byte_to_line_num_and_col_num("1\n23", 5));
283
284 assert_eq!(None, cache.byte_to_line_num_and_col_num("1\n234", 1));
286 }
287
288 #[test]
289 fn spanlines_str() {
290 fn span_line_eq(input: &str, byte_st: usize, byte_en: usize, substr: &str) {
291 let nlc = NewlineCache::from_str(input).unwrap();
292 let (lines_st, lines_en) = nlc.span_line_bytes(Span::new(byte_st, byte_en));
293 assert_eq!(&input[lines_st..lines_en], substr);
294 }
295
296 span_line_eq("a b c", 2, 3, "a b c");
297 span_line_eq("a b c", 4, 5, "a b c");
298
299 span_line_eq("a b c\n", 2, 3, "a b c");
300 span_line_eq("a b c\n", 4, 5, "a b c");
301 span_line_eq("a b c\n", 5, 5, "a b c");
302 span_line_eq("a b c\n", 6, 6, "");
303
304 span_line_eq(" a\nb\n c d", 1, 2, " a");
305 span_line_eq(" a\nb\n c d", 3, 4, "b");
306 span_line_eq(" a\nb\n c d", 7, 8, " c d");
307 span_line_eq(" a\nb\n c d", 9, 10, " c d");
308
309 span_line_eq("ab\n", 2, 3, "ab\n");
310 span_line_eq("ab\ncd", 2, 3, "ab\ncd");
311 span_line_eq("ab\ncd\nef", 2, 3, "ab\ncd");
312
313 let s = "\na\na a\na a a\na a a a";
314 span_line_eq(s, 0, 0, "");
315 span_line_eq(s, 0, 2, "\na");
316 span_line_eq(s, 0, 4, "\na\na a");
317 span_line_eq(s, 0, 7, "\na\na a\na a a");
318 span_line_eq(s, 4, 7, "a a\na a a");
319
320 span_line_eq(" a\n❤ b", 1, 2, " a");
321 span_line_eq(" a\n❤ b", 3, 4, "❤ b");
322 span_line_eq(" a\n❤ b", 5, 6, "❤ b");
323 }
324}