rustc_resolve/
rustdoc.rs

1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5use pulldown_cmark::{
6    BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
7};
8use rustc_ast as ast;
9use rustc_ast::attr::AttributeExt;
10use rustc_ast::util::comments::beautify_doc_string;
11use rustc_data_structures::fx::FxIndexMap;
12use rustc_data_structures::unord::UnordSet;
13use rustc_middle::ty::TyCtxt;
14use rustc_span::def_id::DefId;
15use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, kw, sym};
16use thin_vec::ThinVec;
17use tracing::{debug, trace};
18
19#[derive(Clone, Copy, PartialEq, Eq, Debug)]
20pub enum DocFragmentKind {
21    /// A doc fragment created from a `///` or `//!` doc comment.
22    SugaredDoc,
23    /// A doc fragment created from a "raw" `#[doc=""]` attribute.
24    RawDoc,
25}
26
27/// A portion of documentation, extracted from a `#[doc]` attribute.
28///
29/// Each variant contains the line number within the complete doc-comment where the fragment
30/// starts, as well as the Span where the corresponding doc comment or attribute is located.
31///
32/// Included files are kept separate from inline doc comments so that proper line-number
33/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
34/// kept separate because of issue #42760.
35#[derive(Clone, PartialEq, Eq, Debug)]
36pub struct DocFragment {
37    pub span: Span,
38    /// The item this doc-comment came from.
39    /// Used to determine the scope in which doc links in this fragment are resolved.
40    /// Typically filled for reexport docs when they are merged into the docs of the
41    /// original reexported item.
42    /// If the id is not filled, which happens for the original reexported item, then
43    /// it has to be taken from somewhere else during doc link resolution.
44    pub item_id: Option<DefId>,
45    pub doc: Symbol,
46    pub kind: DocFragmentKind,
47    pub indent: usize,
48}
49
50#[derive(Clone, Copy, Debug)]
51pub enum MalformedGenerics {
52    /// This link has unbalanced angle brackets.
53    ///
54    /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
55    UnbalancedAngleBrackets,
56    /// The generics are not attached to a type.
57    ///
58    /// For example, `<T>` should trigger this.
59    ///
60    /// This is detected by checking if the path is empty after the generics are stripped.
61    MissingType,
62    /// The link uses fully-qualified syntax, which is currently unsupported.
63    ///
64    /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
65    ///
66    /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
67    /// angle brackets.
68    HasFullyQualifiedSyntax,
69    /// The link has an invalid path separator.
70    ///
71    /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
72    /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
73    /// called.
74    ///
75    /// Note that this will also **not** be triggered if the invalid path separator is inside angle
76    /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
77    /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
78    ///
79    /// This is detected by checking if there is a colon followed by a non-colon in the link.
80    InvalidPathSeparator,
81    /// The link has too many angle brackets.
82    ///
83    /// For example, `Vec<<T>>` should trigger this.
84    TooManyAngleBrackets,
85    /// The link has empty angle brackets.
86    ///
87    /// For example, `Vec<>` should trigger this.
88    EmptyAngleBrackets,
89}
90
91/// Removes excess indentation on comments in order for the Markdown
92/// to be parsed correctly. This is necessary because the convention for
93/// writing documentation is to provide a space between the /// or //! marker
94/// and the doc text, but Markdown is whitespace-sensitive. For example,
95/// a block of text with four-space indentation is parsed as a code block,
96/// so if we didn't unindent comments, these list items
97///
98/// /// A list:
99/// ///
100/// ///    - Foo
101/// ///    - Bar
102///
103/// would be parsed as if they were in a code block, which is likely not what the user intended.
104pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
105    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
106    // fragments kind's lines are never starting with a whitespace unless they are using some
107    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
108    // we need to take into account the fact that the minimum indent minus one (to take this
109    // whitespace into account).
110    //
111    // For example:
112    //
113    // /// hello!
114    // #[doc = "another"]
115    //
116    // In this case, you want "hello! another" and not "hello!  another".
117    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
118        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
119    {
120        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
121        // "decide" how much the minimum indent will be.
122        1
123    } else {
124        0
125    };
126
127    // `min_indent` is used to know how much whitespaces from the start of each lines must be
128    // removed. Example:
129    //
130    // ///     hello!
131    // #[doc = "another"]
132    //
133    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
134    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
135    // (5 - 1) whitespaces.
136    let Some(min_indent) = docs
137        .iter()
138        .map(|fragment| {
139            fragment
140                .doc
141                .as_str()
142                .lines()
143                .filter(|line| line.chars().any(|c| !c.is_whitespace()))
144                .map(|line| {
145                    // Compare against either space or tab, ignoring whether they are
146                    // mixed or not.
147                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
148                    whitespace
149                        + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
150                })
151                .min()
152                .unwrap_or(usize::MAX)
153        })
154        .min()
155    else {
156        return;
157    };
158
159    for fragment in docs {
160        if fragment.doc == kw::Empty {
161            continue;
162        }
163
164        let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
165            min_indent - add
166        } else {
167            min_indent
168        };
169
170        fragment.indent = indent;
171    }
172}
173
174/// The goal of this function is to apply the `DocFragment` transformation that is required when
175/// transforming into the final Markdown, which is applying the computed indent to each line in
176/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
177///
178/// Note: remove the trailing newline where appropriate
179pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
180    if frag.doc == kw::Empty {
181        out.push('\n');
182        return;
183    }
184    let s = frag.doc.as_str();
185    let mut iter = s.lines();
186
187    while let Some(line) = iter.next() {
188        if line.chars().any(|c| !c.is_whitespace()) {
189            assert!(line.len() >= frag.indent);
190            out.push_str(&line[frag.indent..]);
191        } else {
192            out.push_str(line);
193        }
194        out.push('\n');
195    }
196}
197
198pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
199    attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
200    doc_only: bool,
201) -> (Vec<DocFragment>, ThinVec<A>) {
202    let mut doc_fragments = Vec::new();
203    let mut other_attrs = ThinVec::<A>::new();
204    for (attr, item_id) in attrs {
205        if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
206            let doc = beautify_doc_string(doc_str, comment_kind);
207            let (span, kind) = if attr.is_doc_comment() {
208                (attr.span(), DocFragmentKind::SugaredDoc)
209            } else {
210                (
211                    attr.value_span()
212                        .map(|i| i.with_ctxt(attr.span().ctxt()))
213                        .unwrap_or(attr.span()),
214                    DocFragmentKind::RawDoc,
215                )
216            };
217            let fragment = DocFragment { span, doc, kind, item_id, indent: 0 };
218            doc_fragments.push(fragment);
219        } else if !doc_only {
220            other_attrs.push(attr.clone());
221        }
222    }
223
224    unindent_doc_fragments(&mut doc_fragments);
225
226    (doc_fragments, other_attrs)
227}
228
229/// Return the doc-comments on this item, grouped by the module they came from.
230/// The module can be different if this is a re-export with added documentation.
231///
232/// The last newline is not trimmed so the produced strings are reusable between
233/// early and late doc link resolution regardless of their position.
234pub fn prepare_to_doc_link_resolution(
235    doc_fragments: &[DocFragment],
236) -> FxIndexMap<Option<DefId>, String> {
237    let mut res = FxIndexMap::default();
238    for fragment in doc_fragments {
239        let out_str = res.entry(fragment.item_id).or_default();
240        add_doc_fragment(out_str, fragment);
241    }
242    res
243}
244
245/// Options for rendering Markdown in the main body of documentation.
246pub fn main_body_opts() -> Options {
247    Options::ENABLE_TABLES
248        | Options::ENABLE_FOOTNOTES
249        | Options::ENABLE_STRIKETHROUGH
250        | Options::ENABLE_TASKLISTS
251        | Options::ENABLE_SMART_PUNCTUATION
252}
253
254fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
255    let mut stripped_segment = String::new();
256    let mut param_depth = 0;
257
258    let mut latest_generics_chunk = String::new();
259
260    for c in segment {
261        if c == '<' {
262            param_depth += 1;
263            latest_generics_chunk.clear();
264        } else if c == '>' {
265            param_depth -= 1;
266            if latest_generics_chunk.contains(" as ") {
267                // The segment tries to use fully-qualified syntax, which is currently unsupported.
268                // Give a helpful error message instead of completely ignoring the angle brackets.
269                return Err(MalformedGenerics::HasFullyQualifiedSyntax);
270            }
271        } else if param_depth == 0 {
272            stripped_segment.push(c);
273        } else {
274            latest_generics_chunk.push(c);
275        }
276    }
277
278    if param_depth == 0 {
279        Ok(stripped_segment)
280    } else {
281        // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
282        Err(MalformedGenerics::UnbalancedAngleBrackets)
283    }
284}
285
286pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
287    if !path_str.contains(['<', '>']) {
288        return Ok(path_str.into());
289    }
290    let mut stripped_segments = vec![];
291    let mut path = path_str.chars().peekable();
292    let mut segment = Vec::new();
293
294    while let Some(chr) = path.next() {
295        match chr {
296            ':' => {
297                if path.next_if_eq(&':').is_some() {
298                    let stripped_segment =
299                        strip_generics_from_path_segment(mem::take(&mut segment))?;
300                    if !stripped_segment.is_empty() {
301                        stripped_segments.push(stripped_segment);
302                    }
303                } else {
304                    return Err(MalformedGenerics::InvalidPathSeparator);
305                }
306            }
307            '<' => {
308                segment.push(chr);
309
310                match path.next() {
311                    Some('<') => {
312                        return Err(MalformedGenerics::TooManyAngleBrackets);
313                    }
314                    Some('>') => {
315                        return Err(MalformedGenerics::EmptyAngleBrackets);
316                    }
317                    Some(chr) => {
318                        segment.push(chr);
319
320                        while let Some(chr) = path.next_if(|c| *c != '>') {
321                            segment.push(chr);
322                        }
323                    }
324                    None => break,
325                }
326            }
327            _ => segment.push(chr),
328        }
329        trace!("raw segment: {:?}", segment);
330    }
331
332    if !segment.is_empty() {
333        let stripped_segment = strip_generics_from_path_segment(segment)?;
334        if !stripped_segment.is_empty() {
335            stripped_segments.push(stripped_segment);
336        }
337    }
338
339    debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
340
341    let stripped_path = stripped_segments.join("::");
342
343    if !stripped_path.is_empty() {
344        Ok(stripped_path.into())
345    } else {
346        Err(MalformedGenerics::MissingType)
347    }
348}
349
350/// Returns whether the first doc-comment is an inner attribute.
351///
352/// If there are no doc-comments, return true.
353/// FIXME(#78591): Support both inner and outer attributes on the same item.
354pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
355    attrs.iter().find(|a| a.doc_str().is_some()).is_none_or(|a| a.style() == ast::AttrStyle::Inner)
356}
357
358/// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
359pub fn has_primitive_or_keyword_docs(attrs: &[impl AttributeExt]) -> bool {
360    for attr in attrs {
361        if attr.has_name(sym::rustc_doc_primitive) {
362            return true;
363        } else if attr.has_name(sym::doc)
364            && let Some(items) = attr.meta_item_list()
365        {
366            for item in items {
367                if item.has_name(sym::keyword) {
368                    return true;
369                }
370            }
371        }
372    }
373    false
374}
375
376/// Simplified version of the corresponding function in rustdoc.
377/// If the rustdoc version returns a successful result, this function must return the same result.
378/// Otherwise this function may return anything.
379fn preprocess_link(link: &str) -> Box<str> {
380    let link = link.replace('`', "");
381    let link = link.split('#').next().unwrap();
382    let link = link.trim();
383    let link = link.rsplit('@').next().unwrap();
384    let link = link.strip_suffix("()").unwrap_or(link);
385    let link = link.strip_suffix("{}").unwrap_or(link);
386    let link = link.strip_suffix("[]").unwrap_or(link);
387    let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
388    let link = link.trim();
389    strip_generics_from_path(link).unwrap_or_else(|_| link.into())
390}
391
392/// Keep inline and reference links `[]`,
393/// but skip autolinks `<>` which we never consider to be intra-doc links.
394pub fn may_be_doc_link(link_type: LinkType) -> bool {
395    match link_type {
396        LinkType::Inline
397        | LinkType::Reference
398        | LinkType::ReferenceUnknown
399        | LinkType::Collapsed
400        | LinkType::CollapsedUnknown
401        | LinkType::Shortcut
402        | LinkType::ShortcutUnknown => true,
403        LinkType::Autolink | LinkType::Email => false,
404    }
405}
406
407/// Simplified version of `preprocessed_markdown_links` from rustdoc.
408/// Must return at least the same links as it, but may add some more links on top of that.
409pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
410    let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
411    let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
412
413    parse_links(&doc)
414}
415
416/// Similar version of `markdown_links` from rustdoc.
417/// This will collect destination links and display text if exists.
418fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
419    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
420    let mut event_iter = Parser::new_with_broken_link_callback(
421        doc,
422        main_body_opts(),
423        Some(&mut broken_link_callback),
424    );
425    let mut links = Vec::new();
426
427    let mut refids = UnordSet::default();
428
429    while let Some(event) = event_iter.next() {
430        match event {
431            Event::Start(Tag::Link { link_type, dest_url, title: _, id })
432                if may_be_doc_link(link_type) =>
433            {
434                if matches!(
435                    link_type,
436                    LinkType::Inline
437                        | LinkType::ReferenceUnknown
438                        | LinkType::Reference
439                        | LinkType::Shortcut
440                        | LinkType::ShortcutUnknown
441                ) {
442                    if let Some(display_text) = collect_link_data(&mut event_iter) {
443                        links.push(display_text);
444                    }
445                }
446                if matches!(
447                    link_type,
448                    LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
449                ) {
450                    refids.insert(id);
451                }
452
453                links.push(preprocess_link(&dest_url));
454            }
455            _ => {}
456        }
457    }
458
459    for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
460        if !refids.contains(label) {
461            links.push(preprocess_link(&refdef.dest));
462        }
463    }
464
465    links
466}
467
468/// Collects additional data of link.
469fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
470    event_iter: &mut Parser<'input, F>,
471) -> Option<Box<str>> {
472    let mut display_text: Option<String> = None;
473    let mut append_text = |text: CowStr<'_>| {
474        if let Some(display_text) = &mut display_text {
475            display_text.push_str(&text);
476        } else {
477            display_text = Some(text.to_string());
478        }
479    };
480
481    while let Some(event) = event_iter.next() {
482        match event {
483            Event::Text(text) => {
484                append_text(text);
485            }
486            Event::Code(code) => {
487                append_text(code);
488            }
489            Event::End(_) => {
490                break;
491            }
492            _ => {}
493        }
494    }
495
496    display_text.map(String::into_boxed_str)
497}
498
499/// Returns a span encompassing all the document fragments.
500pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
501    if fragments.is_empty() {
502        return None;
503    }
504    let start = fragments[0].span;
505    if start == DUMMY_SP {
506        return None;
507    }
508    let end = fragments.last().expect("no doc strings provided").span;
509    Some(start.to(end))
510}
511
512/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
513///
514/// This method does not always work, because markdown bytes don't necessarily match source bytes,
515/// like if escapes are used in the string. In this case, it returns `None`.
516///
517/// This method will return `Some` only if:
518///
519/// - The doc is made entirely from sugared doc comments, which cannot contain escapes
520/// - The doc is entirely from a single doc fragment, with a string literal, exactly equal
521/// - The doc comes from `include_str!`
522pub fn source_span_for_markdown_range(
523    tcx: TyCtxt<'_>,
524    markdown: &str,
525    md_range: &Range<usize>,
526    fragments: &[DocFragment],
527) -> Option<Span> {
528    if let &[fragment] = &fragments
529        && fragment.kind == DocFragmentKind::RawDoc
530        && let Ok(snippet) = tcx.sess.source_map().span_to_snippet(fragment.span)
531        && snippet.trim_end() == markdown.trim_end()
532        && let Ok(md_range_lo) = u32::try_from(md_range.start)
533        && let Ok(md_range_hi) = u32::try_from(md_range.end)
534    {
535        // Single fragment with string that contains same bytes as doc.
536        return Some(Span::new(
537            fragment.span.lo() + rustc_span::BytePos(md_range_lo),
538            fragment.span.lo() + rustc_span::BytePos(md_range_hi),
539            fragment.span.ctxt(),
540            fragment.span.parent(),
541        ));
542    }
543
544    let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
545
546    if !is_all_sugared_doc {
547        return None;
548    }
549
550    let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
551
552    let starting_line = markdown[..md_range.start].matches('\n').count();
553    let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
554
555    // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
556    // CRLF and LF line endings the same way.
557    let mut src_lines = snippet.split_terminator('\n');
558    let md_lines = markdown.split_terminator('\n');
559
560    // The number of bytes from the source span to the markdown span that are not part
561    // of the markdown, like comment markers.
562    let mut start_bytes = 0;
563    let mut end_bytes = 0;
564
565    'outer: for (line_no, md_line) in md_lines.enumerate() {
566        loop {
567            let source_line = src_lines.next()?;
568            match source_line.find(md_line) {
569                Some(offset) => {
570                    if line_no == starting_line {
571                        start_bytes += offset;
572
573                        if starting_line == ending_line {
574                            break 'outer;
575                        }
576                    } else if line_no == ending_line {
577                        end_bytes += offset;
578                        break 'outer;
579                    } else if line_no < starting_line {
580                        start_bytes += source_line.len() - md_line.len();
581                    } else {
582                        end_bytes += source_line.len() - md_line.len();
583                    }
584                    break;
585                }
586                None => {
587                    // Since this is a source line that doesn't include a markdown line,
588                    // we have to count the newline that we split from earlier.
589                    if line_no <= starting_line {
590                        start_bytes += source_line.len() + 1;
591                    } else {
592                        end_bytes += source_line.len() + 1;
593                    }
594                }
595            }
596        }
597    }
598
599    Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
600        md_range.start + start_bytes,
601        md_range.end + start_bytes + end_bytes,
602    )))
603}