rustc_span/
analyze_source_file.rs1use super::*;
2
3#[cfg(test)]
4mod tests;
5
6pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
12 let mut lines = vec![RelativeBytePos::from_u32(0)];
13 let mut multi_byte_chars = vec![];
14
15 analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);
17
18 if let Some(&last_line_start) = lines.last() {
22 let source_file_end = RelativeBytePos::from_usize(src.len());
23 assert!(source_file_end >= last_line_start);
24 if last_line_start == source_file_end {
25 lines.pop();
26 }
27 }
28
29 (lines, multi_byte_chars)
30}
31
32cfg_match! {
33 any(target_arch = "x86", target_arch = "x86_64") => {
34 fn analyze_source_file_dispatch(
35 src: &str,
36 lines: &mut Vec<RelativeBytePos>,
37 multi_byte_chars: &mut Vec<MultiByteChar>,
38 ) {
39 if is_x86_feature_detected!("sse2") {
40 unsafe {
41 analyze_source_file_sse2(src, lines, multi_byte_chars);
42 }
43 } else {
44 analyze_source_file_generic(
45 src,
46 src.len(),
47 RelativeBytePos::from_u32(0),
48 lines,
49 multi_byte_chars,
50 );
51 }
52 }
53
54 #[target_feature(enable = "sse2")]
59 unsafe fn analyze_source_file_sse2(
60 src: &str,
61 lines: &mut Vec<RelativeBytePos>,
62 multi_byte_chars: &mut Vec<MultiByteChar>,
63 ) {
64 #[cfg(target_arch = "x86")]
65 use std::arch::x86::*;
66 #[cfg(target_arch = "x86_64")]
67 use std::arch::x86_64::*;
68
69 const CHUNK_SIZE: usize = 16;
70
71 let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
72
73 let mut intra_chunk_offset = 0;
78
79 for (chunk_index, chunk) in chunks.iter().enumerate() {
80 let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
83
84 let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
87 let multibyte_mask = _mm_movemask_epi8(multibyte_test);
89
90 if multibyte_mask == 0 {
92 assert!(intra_chunk_offset == 0);
93
94 let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
96 let mut newlines_mask = _mm_movemask_epi8(newlines_test);
97
98 let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
99
100 while newlines_mask != 0 {
101 let index = newlines_mask.trailing_zeros();
102
103 lines.push(RelativeBytePos(index) + output_offset);
104
105 newlines_mask &= newlines_mask - 1;
107 }
108 } else {
109 let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
112 intra_chunk_offset = analyze_source_file_generic(
113 &src[scan_start..],
114 CHUNK_SIZE - intra_chunk_offset,
115 RelativeBytePos::from_usize(scan_start),
116 lines,
117 multi_byte_chars,
118 );
119 }
120 }
121
122 let tail_start = src.len() - tail.len() + intra_chunk_offset;
124 if tail_start < src.len() {
125 analyze_source_file_generic(
126 &src[tail_start..],
127 src.len() - tail_start,
128 RelativeBytePos::from_usize(tail_start),
129 lines,
130 multi_byte_chars,
131 );
132 }
133 }
134 }
135 _ => {
136 fn analyze_source_file_dispatch(
138 src: &str,
139 lines: &mut Vec<RelativeBytePos>,
140 multi_byte_chars: &mut Vec<MultiByteChar>,
141 ) {
142 analyze_source_file_generic(
143 src,
144 src.len(),
145 RelativeBytePos::from_u32(0),
146 lines,
147 multi_byte_chars,
148 );
149 }
150 }
151}
152
153fn analyze_source_file_generic(
157 src: &str,
158 scan_len: usize,
159 output_offset: RelativeBytePos,
160 lines: &mut Vec<RelativeBytePos>,
161 multi_byte_chars: &mut Vec<MultiByteChar>,
162) -> usize {
163 assert!(src.len() >= scan_len);
164 let mut i = 0;
165 let src_bytes = src.as_bytes();
166
167 while i < scan_len {
168 let byte = unsafe {
169 *src_bytes.get_unchecked(i)
171 };
172
173 let mut char_len = 1;
176
177 if byte == b'\n' {
178 let pos = RelativeBytePos::from_usize(i) + output_offset;
179 lines.push(pos + RelativeBytePos(1));
180 } else if byte >= 128 {
181 let c = src[i..].chars().next().unwrap();
183 char_len = c.len_utf8();
184
185 let pos = RelativeBytePos::from_usize(i) + output_offset;
186 assert!((2..=4).contains(&char_len));
187 let mbc = MultiByteChar { pos, bytes: char_len as u8 };
188 multi_byte_chars.push(mbc);
189 }
190
191 i += char_len;
192 }
193
194 i - scan_len
195}