rustc_span/
analyze_source_file.rs1use super::*;
2
3#[cfg(test)]
4mod tests;
5
6pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
12 let mut lines = vec![RelativeBytePos::from_u32(0)];
13 let mut multi_byte_chars = vec![];
14
15 analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);
17
18 if let Some(&last_line_start) = lines.last() {
22 let source_file_end = RelativeBytePos::from_usize(src.len());
23 assert!(source_file_end >= last_line_start);
24 if last_line_start == source_file_end {
25 lines.pop();
26 }
27 }
28
29 (lines, multi_byte_chars)
30}
31
32macro_rules! cfg_select_dispatch {
34 ($($tokens:tt)*) => {
35 #[cfg(bootstrap)]
36 cfg_match! { $($tokens)* }
37
38 #[cfg(not(bootstrap))]
39 cfg_select! { $($tokens)* }
40 };
41}
42
43cfg_select_dispatch! {
44 any(target_arch = "x86", target_arch = "x86_64") => {
45 fn analyze_source_file_dispatch(
46 src: &str,
47 lines: &mut Vec<RelativeBytePos>,
48 multi_byte_chars: &mut Vec<MultiByteChar>,
49 ) {
50 if is_x86_feature_detected!("sse2") {
51 unsafe {
52 analyze_source_file_sse2(src, lines, multi_byte_chars);
53 }
54 } else {
55 analyze_source_file_generic(
56 src,
57 src.len(),
58 RelativeBytePos::from_u32(0),
59 lines,
60 multi_byte_chars,
61 );
62 }
63 }
64
65 #[target_feature(enable = "sse2")]
70 unsafe fn analyze_source_file_sse2(
71 src: &str,
72 lines: &mut Vec<RelativeBytePos>,
73 multi_byte_chars: &mut Vec<MultiByteChar>,
74 ) {
75 #[cfg(target_arch = "x86")]
76 use std::arch::x86::*;
77 #[cfg(target_arch = "x86_64")]
78 use std::arch::x86_64::*;
79
80 const CHUNK_SIZE: usize = 16;
81
82 let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
83
84 let mut intra_chunk_offset = 0;
89
90 for (chunk_index, chunk) in chunks.iter().enumerate() {
91 let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
94
95 let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
98 let multibyte_mask = _mm_movemask_epi8(multibyte_test);
100
101 if multibyte_mask == 0 {
103 assert!(intra_chunk_offset == 0);
104
105 let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
107 let mut newlines_mask = _mm_movemask_epi8(newlines_test);
108
109 let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
110
111 while newlines_mask != 0 {
112 let index = newlines_mask.trailing_zeros();
113
114 lines.push(RelativeBytePos(index) + output_offset);
115
116 newlines_mask &= newlines_mask - 1;
118 }
119 } else {
120 let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
123 intra_chunk_offset = analyze_source_file_generic(
124 &src[scan_start..],
125 CHUNK_SIZE - intra_chunk_offset,
126 RelativeBytePos::from_usize(scan_start),
127 lines,
128 multi_byte_chars,
129 );
130 }
131 }
132
133 let tail_start = src.len() - tail.len() + intra_chunk_offset;
135 if tail_start < src.len() {
136 analyze_source_file_generic(
137 &src[tail_start..],
138 src.len() - tail_start,
139 RelativeBytePos::from_usize(tail_start),
140 lines,
141 multi_byte_chars,
142 );
143 }
144 }
145 }
146 _ => {
147 fn analyze_source_file_dispatch(
149 src: &str,
150 lines: &mut Vec<RelativeBytePos>,
151 multi_byte_chars: &mut Vec<MultiByteChar>,
152 ) {
153 analyze_source_file_generic(
154 src,
155 src.len(),
156 RelativeBytePos::from_u32(0),
157 lines,
158 multi_byte_chars,
159 );
160 }
161 }
162}
163
164fn analyze_source_file_generic(
168 src: &str,
169 scan_len: usize,
170 output_offset: RelativeBytePos,
171 lines: &mut Vec<RelativeBytePos>,
172 multi_byte_chars: &mut Vec<MultiByteChar>,
173) -> usize {
174 assert!(src.len() >= scan_len);
175 let mut i = 0;
176 let src_bytes = src.as_bytes();
177
178 while i < scan_len {
179 let byte = unsafe {
180 *src_bytes.get_unchecked(i)
182 };
183
184 let mut char_len = 1;
187
188 if byte == b'\n' {
189 let pos = RelativeBytePos::from_usize(i) + output_offset;
190 lines.push(pos + RelativeBytePos(1));
191 } else if byte >= 128 {
192 let c = src[i..].chars().next().unwrap();
194 char_len = c.len_utf8();
195
196 let pos = RelativeBytePos::from_usize(i) + output_offset;
197 assert!((2..=4).contains(&char_len));
198 let mbc = MultiByteChar { pos, bytes: char_len as u8 };
199 multi_byte_chars.push(mbc);
200 }
201
202 i += char_len;
203 }
204
205 i - scan_len
206}