rustdoc/passes/lint/
unescaped_backticks.rs

1//! Detects unescaped backticks (\`) in doc comments.
2
3use std::ops::Range;
4
5use pulldown_cmark::{BrokenLink, Event, Parser};
6use rustc_errors::Diag;
7use rustc_hir::HirId;
8use rustc_lint_defs::Applicability;
9use rustc_resolve::rustdoc::source_span_for_markdown_range;
10
11use crate::clean::Item;
12use crate::core::DocContext;
13use crate::html::markdown::main_body_opts;
14
15pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) {
16    let tcx = cx.tcx;
17
18    let link_names = item.link_names(&cx.cache);
19    let mut replacer = |broken_link: BrokenLink<'_>| {
20        link_names
21            .iter()
22            .find(|link| *link.original_text == *broken_link.reference)
23            .map(|link| ((*link.href).into(), (*link.new_text).into()))
24    };
25    let parser = Parser::new_with_broken_link_callback(dox, main_body_opts(), Some(&mut replacer))
26        .into_offset_iter();
27
28    let mut element_stack = Vec::new();
29
30    let mut prev_text_end = 0;
31    for (event, event_range) in parser {
32        match event {
33            Event::Start(_) => {
34                element_stack.push(Element::new(event_range));
35            }
36            Event::End(_) => {
37                let element = element_stack.pop().unwrap();
38
39                let Some(backtick_index) = element.backtick_index else {
40                    continue;
41                };
42
43                // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
44                // use the span of the entire attribute as a fallback.
45                let span = match source_span_for_markdown_range(
46                    tcx,
47                    dox,
48                    &(backtick_index..backtick_index + 1),
49                    &item.attrs.doc_strings,
50                ) {
51                    Some((sp, _)) => sp,
52                    None => item.attr_span(tcx),
53                };
54
55                tcx.node_span_lint(crate::lint::UNESCAPED_BACKTICKS, hir_id, span, |lint| {
56                    lint.primary_message("unescaped backtick");
57
58                    let mut help_emitted = false;
59
60                    match element.prev_code_guess {
61                        PrevCodeGuess::None => {}
62                        PrevCodeGuess::Start { guess, .. } => {
63                            // "foo` `bar`" -> "`foo` `bar`"
64                            if let Some(suggest_index) =
65                                clamp_start(guess, &element.suggestible_ranges)
66                                && can_suggest_backtick(dox, suggest_index)
67                            {
68                                suggest_insertion(
69                                    cx,
70                                    item,
71                                    dox,
72                                    lint,
73                                    suggest_index,
74                                    '`',
75                                    "the opening backtick of a previous inline code may be missing",
76                                );
77                                help_emitted = true;
78                            }
79                        }
80                        PrevCodeGuess::End { guess, .. } => {
81                            // "`foo `bar`" -> "`foo` `bar`"
82                            // Don't `clamp_end` here, because the suggestion is guaranteed to be inside
83                            // an inline code node and we intentionally "break" the inline code here.
84                            let suggest_index = guess;
85                            if can_suggest_backtick(dox, suggest_index) {
86                                suggest_insertion(
87                                    cx,
88                                    item,
89                                    dox,
90                                    lint,
91                                    suggest_index,
92                                    '`',
93                                    "a previous inline code might be longer than expected",
94                                );
95                                help_emitted = true;
96                            }
97                        }
98                    }
99
100                    if !element.prev_code_guess.is_confident() {
101                        // "`foo` bar`" -> "`foo` `bar`"
102                        if let Some(guess) =
103                            guess_start_of_code(dox, element.element_range.start..backtick_index)
104                            && let Some(suggest_index) =
105                                clamp_start(guess, &element.suggestible_ranges)
106                            && can_suggest_backtick(dox, suggest_index)
107                        {
108                            suggest_insertion(
109                                cx,
110                                item,
111                                dox,
112                                lint,
113                                suggest_index,
114                                '`',
115                                "the opening backtick of an inline code may be missing",
116                            );
117                            help_emitted = true;
118                        }
119
120                        // "`foo` `bar" -> "`foo` `bar`"
121                        // Don't suggest closing backtick after single trailing char,
122                        // if we already suggested opening backtick. For example:
123                        // "foo`." -> "`foo`." or "foo`s" -> "`foo`s".
124                        if let Some(guess) =
125                            guess_end_of_code(dox, backtick_index + 1..element.element_range.end)
126                            && let Some(suggest_index) =
127                                clamp_end(guess, &element.suggestible_ranges)
128                            && can_suggest_backtick(dox, suggest_index)
129                            && (!help_emitted || suggest_index - backtick_index > 2)
130                        {
131                            suggest_insertion(
132                                cx,
133                                item,
134                                dox,
135                                lint,
136                                suggest_index,
137                                '`',
138                                "the closing backtick of an inline code may be missing",
139                            );
140                            help_emitted = true;
141                        }
142                    }
143
144                    if !help_emitted {
145                        lint.help(
146                            "the opening or closing backtick of an inline code may be missing",
147                        );
148                    }
149
150                    suggest_insertion(
151                        cx,
152                        item,
153                        dox,
154                        lint,
155                        backtick_index,
156                        '\\',
157                        "if you meant to use a literal backtick, escape it",
158                    );
159                });
160            }
161            Event::Code(_) => {
162                let element = element_stack
163                    .last_mut()
164                    .expect("expected inline code node to be inside of an element");
165                assert!(
166                    event_range.start >= element.element_range.start
167                        && event_range.end <= element.element_range.end
168                );
169
170                // This inline code might be longer than it's supposed to be.
171                // Only check single backtick inline code for now.
172                if !element.prev_code_guess.is_confident()
173                    && dox.as_bytes().get(event_range.start) == Some(&b'`')
174                    && dox.as_bytes().get(event_range.start + 1) != Some(&b'`')
175                {
176                    let range_inside = event_range.start + 1..event_range.end - 1;
177                    let text_inside = &dox[range_inside.clone()];
178
179                    let is_confident = text_inside.starts_with(char::is_whitespace)
180                        || text_inside.ends_with(char::is_whitespace);
181
182                    if let Some(guess) = guess_end_of_code(dox, range_inside) {
183                        // Find earlier end of code.
184                        element.prev_code_guess = PrevCodeGuess::End { guess, is_confident };
185                    } else {
186                        // Find alternate start of code.
187                        let range_before = element.element_range.start..event_range.start;
188                        if let Some(guess) = guess_start_of_code(dox, range_before) {
189                            element.prev_code_guess = PrevCodeGuess::Start { guess, is_confident };
190                        }
191                    }
192                }
193            }
194            Event::Text(text) => {
195                let element = element_stack
196                    .last_mut()
197                    .expect("expected inline text node to be inside of an element");
198                assert!(
199                    event_range.start >= element.element_range.start
200                        && event_range.end <= element.element_range.end
201                );
202
203                // The first char is escaped if the prev char is \ and not part of a text node.
204                let is_escaped = prev_text_end < event_range.start
205                    && dox.as_bytes()[event_range.start - 1] == b'\\';
206
207                // Don't lint backslash-escaped (\`) or html-escaped (&#96;) backticks.
208                if *text == *"`" && !is_escaped && *text == dox[event_range.clone()] {
209                    // We found a stray backtick.
210                    assert!(
211                        element.backtick_index.is_none(),
212                        "expected at most one unescaped backtick per element",
213                    );
214                    element.backtick_index = Some(event_range.start);
215                }
216
217                prev_text_end = event_range.end;
218
219                if is_escaped {
220                    // Ensure that we suggest "`\x" and not "\`x".
221                    element.suggestible_ranges.push(event_range.start - 1..event_range.end);
222                } else {
223                    element.suggestible_ranges.push(event_range);
224                }
225            }
226            _ => {}
227        }
228    }
229}
230
231/// A previous inline code node, that looks wrong.
232///
233/// `guess` is the position, where we want to suggest a \` and the guess `is_confident` if an
234/// inline code starts or ends with a whitespace.
235#[derive(Debug)]
236enum PrevCodeGuess {
237    None,
238
239    /// Missing \` at start.
240    ///
241    /// ```markdown
242    /// foo` `bar`
243    /// ```
244    Start {
245        guess: usize,
246        is_confident: bool,
247    },
248
249    /// Missing \` at end.
250    ///
251    /// ```markdown
252    /// `foo `bar`
253    /// ```
254    End {
255        guess: usize,
256        is_confident: bool,
257    },
258}
259
260impl PrevCodeGuess {
261    fn is_confident(&self) -> bool {
262        match *self {
263            PrevCodeGuess::None => false,
264            PrevCodeGuess::Start { is_confident, .. } | PrevCodeGuess::End { is_confident, .. } => {
265                is_confident
266            }
267        }
268    }
269}
270
271/// A markdown [tagged element], which may or may not contain an unescaped backtick.
272///
273/// [tagged element]: https://docs.rs/pulldown-cmark/0.9/pulldown_cmark/enum.Tag.html
274#[derive(Debug)]
275struct Element {
276    /// The full range (span) of the element in the doc string.
277    element_range: Range<usize>,
278
279    /// The ranges where we're allowed to put backticks.
280    /// This is used to prevent breaking markdown elements like links or lists.
281    suggestible_ranges: Vec<Range<usize>>,
282
283    /// The unescaped backtick.
284    backtick_index: Option<usize>,
285
286    /// Suggest a different start or end of an inline code.
287    prev_code_guess: PrevCodeGuess,
288}
289
290impl Element {
291    const fn new(element_range: Range<usize>) -> Self {
292        Self {
293            element_range,
294            suggestible_ranges: Vec::new(),
295            backtick_index: None,
296            prev_code_guess: PrevCodeGuess::None,
297        }
298    }
299}
300
301/// Given a potentially unclosed inline code, attempt to find the start.
302fn guess_start_of_code(dox: &str, range: Range<usize>) -> Option<usize> {
303    assert!(dox.as_bytes()[range.end] == b'`');
304
305    let mut braces = 0;
306    let mut guess = 0;
307    for (idx, ch) in dox[range.clone()].char_indices().rev() {
308        match ch {
309            ')' | ']' | '}' => braces += 1,
310            '(' | '[' | '{' => {
311                if braces == 0 {
312                    guess = idx + 1;
313                    break;
314                }
315                braces -= 1;
316            }
317            ch if ch.is_whitespace() && braces == 0 => {
318                guess = idx + 1;
319                break;
320            }
321            _ => (),
322        }
323    }
324
325    guess += range.start;
326
327    // Don't suggest empty inline code or duplicate backticks.
328    can_suggest_backtick(dox, guess).then_some(guess)
329}
330
331/// Given a potentially unclosed inline code, attempt to find the end.
332fn guess_end_of_code(dox: &str, range: Range<usize>) -> Option<usize> {
333    // Punctuation that should be outside of the inline code.
334    const TRAILING_PUNCTUATION: &[u8] = b".,";
335
336    assert!(dox.as_bytes()[range.start - 1] == b'`');
337
338    let text = dox[range.clone()].trim_end();
339    let mut braces = 0;
340    let mut guess = text.len();
341    for (idx, ch) in text.char_indices() {
342        match ch {
343            '(' | '[' | '{' => braces += 1,
344            ')' | ']' | '}' => {
345                if braces == 0 {
346                    guess = idx;
347                    break;
348                }
349                braces -= 1;
350            }
351            ch if ch.is_whitespace() && braces == 0 => {
352                guess = idx;
353                break;
354            }
355            _ => (),
356        }
357    }
358
359    // Strip a single trailing punctuation.
360    if guess >= 1
361        && TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 1])
362        && (guess < 2 || !TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 2]))
363    {
364        guess -= 1;
365    }
366
367    guess += range.start;
368
369    // Don't suggest empty inline code or duplicate backticks.
370    can_suggest_backtick(dox, guess).then_some(guess)
371}
372
373/// Returns whether inserting a backtick at `dox[index]` will not produce double backticks.
374fn can_suggest_backtick(dox: &str, index: usize) -> bool {
375    (index == 0 || dox.as_bytes()[index - 1] != b'`')
376        && (index == dox.len() || dox.as_bytes()[index] != b'`')
377}
378
379/// Increase the index until it is inside or one past the end of one of the ranges.
380///
381/// The ranges must be sorted for this to work correctly.
382fn clamp_start(index: usize, ranges: &[Range<usize>]) -> Option<usize> {
383    for range in ranges {
384        if range.start >= index {
385            return Some(range.start);
386        }
387        if index <= range.end {
388            return Some(index);
389        }
390    }
391    None
392}
393
394/// Decrease the index until it is inside or one past the end of one of the ranges.
395///
396/// The ranges must be sorted for this to work correctly.
397fn clamp_end(index: usize, ranges: &[Range<usize>]) -> Option<usize> {
398    for range in ranges.iter().rev() {
399        if range.end <= index {
400            return Some(range.end);
401        }
402        if index >= range.start {
403            return Some(index);
404        }
405    }
406    None
407}
408
409/// Try to emit a span suggestion and fall back to help messages if we can't find a suitable span.
410///
411/// This helps finding backticks in huge macro-generated docs.
412fn suggest_insertion(
413    cx: &DocContext<'_>,
414    item: &Item,
415    dox: &str,
416    lint: &mut Diag<'_, ()>,
417    insert_index: usize,
418    suggestion: char,
419    message: &'static str,
420) {
421    /// Maximum bytes of context to show around the insertion.
422    const CONTEXT_MAX_LEN: usize = 80;
423
424    if let Some((span, _)) = source_span_for_markdown_range(
425        cx.tcx,
426        dox,
427        &(insert_index..insert_index),
428        &item.attrs.doc_strings,
429    ) {
430        lint.span_suggestion(span, message, suggestion, Applicability::MaybeIncorrect);
431    } else {
432        let line_start = dox[..insert_index].rfind('\n').map_or(0, |idx| idx + 1);
433        let line_end = dox[insert_index..].find('\n').map_or(dox.len(), |idx| idx + insert_index);
434
435        let context_before_max_len = if insert_index - line_start < CONTEXT_MAX_LEN / 2 {
436            insert_index - line_start
437        } else if line_end - insert_index < CONTEXT_MAX_LEN / 2 {
438            CONTEXT_MAX_LEN - (line_end - insert_index)
439        } else {
440            CONTEXT_MAX_LEN / 2
441        };
442        let context_after_max_len = CONTEXT_MAX_LEN - context_before_max_len;
443
444        let (prefix, context_start) = if insert_index - line_start <= context_before_max_len {
445            ("", line_start)
446        } else {
447            ("...", dox.ceil_char_boundary(insert_index - context_before_max_len))
448        };
449        let (suffix, context_end) = if line_end - insert_index <= context_after_max_len {
450            ("", line_end)
451        } else {
452            ("...", dox.floor_char_boundary(insert_index + context_after_max_len))
453        };
454
455        let context_full = &dox[context_start..context_end].trim_end();
456        let context_before = &dox[context_start..insert_index];
457        let context_after = &dox[insert_index..context_end].trim_end();
458        lint.help(format!(
459            "{message}\n change: {prefix}{context_full}{suffix}\nto this: {prefix}{context_before}{suggestion}{context_after}{suffix}"
460        ));
461    }
462}