1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5use pulldown_cmark::{
6 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
7};
8use rustc_ast as ast;
9use rustc_ast::attr::AttributeExt;
10use rustc_ast::util::comments::beautify_doc_string;
11use rustc_data_structures::fx::FxIndexMap;
12use rustc_data_structures::unord::UnordSet;
13use rustc_middle::ty::TyCtxt;
14use rustc_span::def_id::DefId;
15use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, kw, sym};
16use thin_vec::ThinVec;
17use tracing::{debug, trace};
18
19#[derive(Clone, Copy, PartialEq, Eq, Debug)]
20pub enum DocFragmentKind {
21 SugaredDoc,
23 RawDoc,
25}
26
27#[derive(Clone, PartialEq, Eq, Debug)]
36pub struct DocFragment {
37 pub span: Span,
38 pub item_id: Option<DefId>,
45 pub doc: Symbol,
46 pub kind: DocFragmentKind,
47 pub indent: usize,
48}
49
50#[derive(Clone, Copy, Debug)]
51pub enum MalformedGenerics {
52 UnbalancedAngleBrackets,
56 MissingType,
62 HasFullyQualifiedSyntax,
69 InvalidPathSeparator,
81 TooManyAngleBrackets,
85 EmptyAngleBrackets,
89}
90
91pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
105 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
118 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
119 {
120 1
123 } else {
124 0
125 };
126
127 let Some(min_indent) = docs
137 .iter()
138 .map(|fragment| {
139 fragment
140 .doc
141 .as_str()
142 .lines()
143 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
144 .map(|line| {
145 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
148 whitespace
149 + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
150 })
151 .min()
152 .unwrap_or(usize::MAX)
153 })
154 .min()
155 else {
156 return;
157 };
158
159 for fragment in docs {
160 if fragment.doc == kw::Empty {
161 continue;
162 }
163
164 let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
165 min_indent - add
166 } else {
167 min_indent
168 };
169
170 fragment.indent = indent;
171 }
172}
173
174pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
180 if frag.doc == kw::Empty {
181 out.push('\n');
182 return;
183 }
184 let s = frag.doc.as_str();
185 let mut iter = s.lines();
186
187 while let Some(line) = iter.next() {
188 if line.chars().any(|c| !c.is_whitespace()) {
189 assert!(line.len() >= frag.indent);
190 out.push_str(&line[frag.indent..]);
191 } else {
192 out.push_str(line);
193 }
194 out.push('\n');
195 }
196}
197
198pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
199 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
200 doc_only: bool,
201) -> (Vec<DocFragment>, ThinVec<A>) {
202 let mut doc_fragments = Vec::new();
203 let mut other_attrs = ThinVec::<A>::new();
204 for (attr, item_id) in attrs {
205 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
206 let doc = beautify_doc_string(doc_str, comment_kind);
207 let (span, kind) = if attr.is_doc_comment() {
208 (attr.span(), DocFragmentKind::SugaredDoc)
209 } else {
210 (
211 attr.value_span()
212 .map(|i| i.with_ctxt(attr.span().ctxt()))
213 .unwrap_or(attr.span()),
214 DocFragmentKind::RawDoc,
215 )
216 };
217 let fragment = DocFragment { span, doc, kind, item_id, indent: 0 };
218 doc_fragments.push(fragment);
219 } else if !doc_only {
220 other_attrs.push(attr.clone());
221 }
222 }
223
224 unindent_doc_fragments(&mut doc_fragments);
225
226 (doc_fragments, other_attrs)
227}
228
229pub fn prepare_to_doc_link_resolution(
235 doc_fragments: &[DocFragment],
236) -> FxIndexMap<Option<DefId>, String> {
237 let mut res = FxIndexMap::default();
238 for fragment in doc_fragments {
239 let out_str = res.entry(fragment.item_id).or_default();
240 add_doc_fragment(out_str, fragment);
241 }
242 res
243}
244
245pub fn main_body_opts() -> Options {
247 Options::ENABLE_TABLES
248 | Options::ENABLE_FOOTNOTES
249 | Options::ENABLE_STRIKETHROUGH
250 | Options::ENABLE_TASKLISTS
251 | Options::ENABLE_SMART_PUNCTUATION
252}
253
254fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
255 let mut stripped_segment = String::new();
256 let mut param_depth = 0;
257
258 let mut latest_generics_chunk = String::new();
259
260 for c in segment {
261 if c == '<' {
262 param_depth += 1;
263 latest_generics_chunk.clear();
264 } else if c == '>' {
265 param_depth -= 1;
266 if latest_generics_chunk.contains(" as ") {
267 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
270 }
271 } else if param_depth == 0 {
272 stripped_segment.push(c);
273 } else {
274 latest_generics_chunk.push(c);
275 }
276 }
277
278 if param_depth == 0 {
279 Ok(stripped_segment)
280 } else {
281 Err(MalformedGenerics::UnbalancedAngleBrackets)
283 }
284}
285
286pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
287 if !path_str.contains(['<', '>']) {
288 return Ok(path_str.into());
289 }
290 let mut stripped_segments = vec![];
291 let mut path = path_str.chars().peekable();
292 let mut segment = Vec::new();
293
294 while let Some(chr) = path.next() {
295 match chr {
296 ':' => {
297 if path.next_if_eq(&':').is_some() {
298 let stripped_segment =
299 strip_generics_from_path_segment(mem::take(&mut segment))?;
300 if !stripped_segment.is_empty() {
301 stripped_segments.push(stripped_segment);
302 }
303 } else {
304 return Err(MalformedGenerics::InvalidPathSeparator);
305 }
306 }
307 '<' => {
308 segment.push(chr);
309
310 match path.next() {
311 Some('<') => {
312 return Err(MalformedGenerics::TooManyAngleBrackets);
313 }
314 Some('>') => {
315 return Err(MalformedGenerics::EmptyAngleBrackets);
316 }
317 Some(chr) => {
318 segment.push(chr);
319
320 while let Some(chr) = path.next_if(|c| *c != '>') {
321 segment.push(chr);
322 }
323 }
324 None => break,
325 }
326 }
327 _ => segment.push(chr),
328 }
329 trace!("raw segment: {:?}", segment);
330 }
331
332 if !segment.is_empty() {
333 let stripped_segment = strip_generics_from_path_segment(segment)?;
334 if !stripped_segment.is_empty() {
335 stripped_segments.push(stripped_segment);
336 }
337 }
338
339 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
340
341 let stripped_path = stripped_segments.join("::");
342
343 if !stripped_path.is_empty() {
344 Ok(stripped_path.into())
345 } else {
346 Err(MalformedGenerics::MissingType)
347 }
348}
349
350pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
355 attrs.iter().find(|a| a.doc_str().is_some()).is_none_or(|a| a.style() == ast::AttrStyle::Inner)
356}
357
358pub fn has_primitive_or_keyword_docs(attrs: &[impl AttributeExt]) -> bool {
360 for attr in attrs {
361 if attr.has_name(sym::rustc_doc_primitive) {
362 return true;
363 } else if attr.has_name(sym::doc)
364 && let Some(items) = attr.meta_item_list()
365 {
366 for item in items {
367 if item.has_name(sym::keyword) {
368 return true;
369 }
370 }
371 }
372 }
373 false
374}
375
376fn preprocess_link(link: &str) -> Box<str> {
380 let link = link.replace('`', "");
381 let link = link.split('#').next().unwrap();
382 let link = link.trim();
383 let link = link.rsplit('@').next().unwrap();
384 let link = link.strip_suffix("()").unwrap_or(link);
385 let link = link.strip_suffix("{}").unwrap_or(link);
386 let link = link.strip_suffix("[]").unwrap_or(link);
387 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
388 let link = link.trim();
389 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
390}
391
392pub fn may_be_doc_link(link_type: LinkType) -> bool {
395 match link_type {
396 LinkType::Inline
397 | LinkType::Reference
398 | LinkType::ReferenceUnknown
399 | LinkType::Collapsed
400 | LinkType::CollapsedUnknown
401 | LinkType::Shortcut
402 | LinkType::ShortcutUnknown => true,
403 LinkType::Autolink | LinkType::Email => false,
404 }
405}
406
407pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
410 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
411 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
412
413 parse_links(&doc)
414}
415
416fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
419 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
420 let mut event_iter = Parser::new_with_broken_link_callback(
421 doc,
422 main_body_opts(),
423 Some(&mut broken_link_callback),
424 );
425 let mut links = Vec::new();
426
427 let mut refids = UnordSet::default();
428
429 while let Some(event) = event_iter.next() {
430 match event {
431 Event::Start(Tag::Link { link_type, dest_url, title: _, id })
432 if may_be_doc_link(link_type) =>
433 {
434 if matches!(
435 link_type,
436 LinkType::Inline
437 | LinkType::ReferenceUnknown
438 | LinkType::Reference
439 | LinkType::Shortcut
440 | LinkType::ShortcutUnknown
441 ) {
442 if let Some(display_text) = collect_link_data(&mut event_iter) {
443 links.push(display_text);
444 }
445 }
446 if matches!(
447 link_type,
448 LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
449 ) {
450 refids.insert(id);
451 }
452
453 links.push(preprocess_link(&dest_url));
454 }
455 _ => {}
456 }
457 }
458
459 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
460 if !refids.contains(label) {
461 links.push(preprocess_link(&refdef.dest));
462 }
463 }
464
465 links
466}
467
468fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
470 event_iter: &mut Parser<'input, F>,
471) -> Option<Box<str>> {
472 let mut display_text: Option<String> = None;
473 let mut append_text = |text: CowStr<'_>| {
474 if let Some(display_text) = &mut display_text {
475 display_text.push_str(&text);
476 } else {
477 display_text = Some(text.to_string());
478 }
479 };
480
481 while let Some(event) = event_iter.next() {
482 match event {
483 Event::Text(text) => {
484 append_text(text);
485 }
486 Event::Code(code) => {
487 append_text(code);
488 }
489 Event::End(_) => {
490 break;
491 }
492 _ => {}
493 }
494 }
495
496 display_text.map(String::into_boxed_str)
497}
498
499pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
501 if fragments.is_empty() {
502 return None;
503 }
504 let start = fragments[0].span;
505 if start == DUMMY_SP {
506 return None;
507 }
508 let end = fragments.last().expect("no doc strings provided").span;
509 Some(start.to(end))
510}
511
512pub fn source_span_for_markdown_range(
523 tcx: TyCtxt<'_>,
524 markdown: &str,
525 md_range: &Range<usize>,
526 fragments: &[DocFragment],
527) -> Option<Span> {
528 if let &[fragment] = &fragments
529 && fragment.kind == DocFragmentKind::RawDoc
530 && let Ok(snippet) = tcx.sess.source_map().span_to_snippet(fragment.span)
531 && snippet.trim_end() == markdown.trim_end()
532 && let Ok(md_range_lo) = u32::try_from(md_range.start)
533 && let Ok(md_range_hi) = u32::try_from(md_range.end)
534 {
535 return Some(Span::new(
537 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
538 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
539 fragment.span.ctxt(),
540 fragment.span.parent(),
541 ));
542 }
543
544 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
545
546 if !is_all_sugared_doc {
547 return None;
548 }
549
550 let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
551
552 let starting_line = markdown[..md_range.start].matches('\n').count();
553 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
554
555 let mut src_lines = snippet.split_terminator('\n');
558 let md_lines = markdown.split_terminator('\n');
559
560 let mut start_bytes = 0;
563 let mut end_bytes = 0;
564
565 'outer: for (line_no, md_line) in md_lines.enumerate() {
566 loop {
567 let source_line = src_lines.next()?;
568 match source_line.find(md_line) {
569 Some(offset) => {
570 if line_no == starting_line {
571 start_bytes += offset;
572
573 if starting_line == ending_line {
574 break 'outer;
575 }
576 } else if line_no == ending_line {
577 end_bytes += offset;
578 break 'outer;
579 } else if line_no < starting_line {
580 start_bytes += source_line.len() - md_line.len();
581 } else {
582 end_bytes += source_line.len() - md_line.len();
583 }
584 break;
585 }
586 None => {
587 if line_no <= starting_line {
590 start_bytes += source_line.len() + 1;
591 } else {
592 end_bytes += source_line.len() + 1;
593 }
594 }
595 }
596 }
597 }
598
599 Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
600 md_range.start + start_bytes,
601 md_range.end + start_bytes + end_bytes,
602 )))
603}