cargo/sources/registry/index/mod.rs
1//! Management of the index of a registry source.
2//!
3//! This module contains management of the index and various operations, such as
4//! actually parsing the index, looking for crates, etc. This is intended to be
5//! abstract over remote indices (downloaded via Git or HTTP) and local registry
6//! indices (which are all just present on the filesystem).
7//!
8//! ## How the index works
9//!
10//! Here is a simple flow when loading a [`Summary`] (metadata) from the index:
11//!
12//! 1. A query is fired via [`RegistryIndex::query_inner`].
13//! 2. Tries loading all summaries via [`RegistryIndex::load_summaries`], and
14//! under the hood calling [`Summaries::parse`] to parse an index file.
15//! 1. If an on-disk index cache is present, loads it via
16//! [`Summaries::parse_cache`].
17//! 2. Otherwise goes to the slower path [`RegistryData::load`] to get the
18//! specific index file.
19//! 3. A [`Summary`] is now ready in callback `f` in [`RegistryIndex::query_inner`].
20//!
21//! To learn the rationale behind this multi-layer index metadata loading,
22//! see [the documentation of the on-disk index cache](cache).
23use crate::core::Dependency;
24use crate::core::dependency::{Artifact, DepKind};
25use crate::core::{PackageId, SourceId, Summary};
26use crate::sources::registry::{LoadResponse, RegistryData};
27use crate::util::IntoUrl;
28use crate::util::interning::InternedString;
29use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, internal};
30use cargo_util::registry::make_dep_path;
31use cargo_util_schemas::index::{IndexPackage, RegistryDependency};
32use cargo_util_schemas::manifest::RustVersion;
33use semver::Version;
34use serde::{Deserialize, Serialize};
35use std::borrow::Cow;
36use std::collections::BTreeMap;
37use std::collections::HashMap;
38use std::path::Path;
39use std::str;
40use std::task::{Poll, ready};
41use tracing::info;
42
43mod cache;
44use self::cache::CacheManager;
45use self::cache::SummariesCache;
46
47/// The maximum schema version of the `v` field in the index this version of
48/// cargo understands. See [`IndexPackage::v`] for the detail.
49const INDEX_V_MAX: u32 = 2;
50
51/// Manager for handling the on-disk index.
52///
53/// Different kinds of registries store the index differently:
54///
55/// * [`LocalRegistry`] is a simple on-disk tree of files of the raw index.
56/// * [`RemoteRegistry`] is stored as a raw git repository.
57/// * [`HttpRegistry`] fills the on-disk index cache directly without keeping
58/// any raw index.
59///
60/// These means of access are handled via the [`RegistryData`] trait abstraction.
61/// This transparently handles caching of the index in a more efficient format.
62///
63/// [`LocalRegistry`]: super::local::LocalRegistry
64/// [`RemoteRegistry`]: super::remote::RemoteRegistry
65/// [`HttpRegistry`]: super::http_remote::HttpRegistry
66pub struct RegistryIndex<'gctx> {
67 source_id: SourceId,
68 /// Root directory of the index for the registry.
69 path: Filesystem,
70 /// In-memory cache of summary data.
71 ///
72 /// This is keyed off the package name. The [`Summaries`] value handles
73 /// loading the summary data. It keeps an optimized on-disk representation
74 /// of the JSON files, which is created in an as-needed fashion. If it
75 /// hasn't been cached already, it uses [`RegistryData::load`] to access
76 /// to JSON files from the index, and the creates the optimized on-disk
77 /// summary cache.
78 summaries_cache: HashMap<InternedString, Summaries>,
79 /// [`GlobalContext`] reference for convenience.
80 gctx: &'gctx GlobalContext,
81 /// Manager of on-disk caches.
82 cache_manager: CacheManager<'gctx>,
83}
84
85/// An internal cache of summaries for a particular package.
86///
87/// A list of summaries are loaded from disk via one of two methods:
88///
89/// 1. From raw registry index --- Primarily Cargo will parse the corresponding
90/// file for a crate in the upstream crates.io registry. That's just a JSON
91/// blob per line which we can parse, extract the version, and then store here.
92/// See [`IndexPackage`] and [`IndexSummary::parse`].
93///
94/// 2. From on-disk index cache --- If Cargo has previously run, we'll have a
95/// cached index of dependencies for the upstream index. This is a file that
96/// Cargo maintains lazily on the local filesystem and is much faster to
97/// parse since it doesn't involve parsing all of the JSON.
98/// See [`SummariesCache`].
99///
100/// The outward-facing interface of this doesn't matter too much where it's
101/// loaded from, but it's important when reading the implementation to note that
102/// we try to parse as little as possible!
103#[derive(Default)]
104struct Summaries {
105 /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
106 /// fields are indexes into. If a `Summaries` is loaded from the crates.io
107 /// index then this field will be empty since nothing is `Unparsed`.
108 raw_data: Vec<u8>,
109
110 /// All known versions of a crate, keyed from their `Version` to the
111 /// possibly parsed or unparsed version of the full summary.
112 versions: HashMap<Version, MaybeIndexSummary>,
113}
114
115/// A lazily parsed [`IndexSummary`].
116enum MaybeIndexSummary {
117 /// A summary which has not been parsed, The `start` and `end` are pointers
118 /// into [`Summaries::raw_data`] which this is an entry of.
119 Unparsed { start: usize, end: usize },
120
121 /// An actually parsed summary.
122 Parsed(IndexSummary),
123}
124
125/// A parsed representation of a summary from the index. This is usually parsed
126/// from a line from a raw index file, or a JSON blob from on-disk index cache.
127///
128/// In addition to a full [`Summary`], we have information on whether it is `yanked`.
129#[derive(Clone, Debug)]
130pub enum IndexSummary {
131 /// Available for consideration
132 Candidate(Summary),
133 /// Yanked within its registry
134 Yanked(Summary),
135 /// Not available as we are offline and create is not downloaded yet
136 Offline(Summary),
137 /// From a newer schema version and is likely incomplete or inaccurate
138 Unsupported(Summary, u32),
139 /// An error was encountered despite being a supported schema version
140 Invalid(Summary),
141}
142
143impl IndexSummary {
144 /// Extract the summary from any variant
145 pub fn as_summary(&self) -> &Summary {
146 match self {
147 IndexSummary::Candidate(sum)
148 | IndexSummary::Yanked(sum)
149 | IndexSummary::Offline(sum)
150 | IndexSummary::Unsupported(sum, _)
151 | IndexSummary::Invalid(sum) => sum,
152 }
153 }
154
155 /// Extract the summary from any variant
156 pub fn into_summary(self) -> Summary {
157 match self {
158 IndexSummary::Candidate(sum)
159 | IndexSummary::Yanked(sum)
160 | IndexSummary::Offline(sum)
161 | IndexSummary::Unsupported(sum, _)
162 | IndexSummary::Invalid(sum) => sum,
163 }
164 }
165
166 pub fn map_summary(self, f: impl Fn(Summary) -> Summary) -> Self {
167 match self {
168 IndexSummary::Candidate(s) => IndexSummary::Candidate(f(s)),
169 IndexSummary::Yanked(s) => IndexSummary::Yanked(f(s)),
170 IndexSummary::Offline(s) => IndexSummary::Offline(f(s)),
171 IndexSummary::Unsupported(s, v) => IndexSummary::Unsupported(f(s), v.clone()),
172 IndexSummary::Invalid(s) => IndexSummary::Invalid(f(s)),
173 }
174 }
175
176 /// Extract the package id from any variant
177 pub fn package_id(&self) -> PackageId {
178 self.as_summary().package_id()
179 }
180
181 /// Returns `true` if the index summary is [`Yanked`].
182 ///
183 /// [`Yanked`]: IndexSummary::Yanked
184 #[must_use]
185 pub fn is_yanked(&self) -> bool {
186 matches!(self, Self::Yanked(..))
187 }
188
189 /// Returns `true` if the index summary is [`Offline`].
190 ///
191 /// [`Offline`]: IndexSummary::Offline
192 #[must_use]
193 pub fn is_offline(&self) -> bool {
194 matches!(self, Self::Offline(..))
195 }
196}
197
198fn index_package_to_summary(pkg: &IndexPackage<'_>, source_id: SourceId) -> CargoResult<Summary> {
199 // ****CAUTION**** Please be extremely careful with returning errors, see
200 // `IndexSummary::parse` for details
201 let pkgid = PackageId::new(pkg.name.as_ref().into(), pkg.vers.clone(), source_id);
202 let deps = pkg
203 .deps
204 .iter()
205 .map(|dep| registry_dependency_into_dep(dep.clone(), source_id))
206 .collect::<CargoResult<Vec<_>>>()?;
207 let mut features = pkg.features.clone();
208 if let Some(features2) = pkg.features2.clone() {
209 for (name, values) in features2 {
210 features.entry(name).or_default().extend(values);
211 }
212 }
213 let features = features
214 .into_iter()
215 .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
216 .collect::<BTreeMap<_, _>>();
217 let links: Option<InternedString> = pkg.links.as_ref().map(|l| l.as_ref().into());
218 let mut summary = Summary::new(pkgid, deps, &features, links, pkg.rust_version.clone())?;
219 summary.set_checksum(pkg.cksum.clone());
220 if let Some(pubtime) = pkg.pubtime.as_ref().and_then(|p| p.parse().ok()) {
221 summary.set_pubtime(pubtime);
222 }
223 Ok(summary)
224}
225
226#[derive(Deserialize, Serialize)]
227struct IndexPackageMinimum<'a> {
228 name: Cow<'a, str>,
229 vers: Version,
230}
231
232#[derive(Deserialize, Serialize, Default)]
233struct IndexPackageRustVersion {
234 rust_version: Option<RustVersion>,
235}
236
237#[derive(Deserialize, Serialize, Default)]
238struct IndexPackageV {
239 v: Option<u32>,
240}
241
242impl<'gctx> RegistryIndex<'gctx> {
243 /// Creates an empty registry index at `path`.
244 pub fn new(
245 source_id: SourceId,
246 path: &Filesystem,
247 gctx: &'gctx GlobalContext,
248 ) -> RegistryIndex<'gctx> {
249 RegistryIndex {
250 source_id,
251 path: path.clone(),
252 summaries_cache: HashMap::new(),
253 gctx,
254 cache_manager: CacheManager::new(path.join(".cache"), gctx),
255 }
256 }
257
258 /// Returns the hash listed for a specified `PackageId`. Primarily for
259 /// checking the integrity of a downloaded package matching the checksum in
260 /// the index file, aka [`IndexSummary`].
261 pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
262 let req = OptVersionReq::lock_to_exact(pkg.version());
263 let summary = self.summaries(pkg.name(), &req, load)?;
264 let summary = ready!(summary).next();
265 Poll::Ready(Ok(summary
266 .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
267 .as_summary()
268 .checksum()
269 .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?))
270 }
271
272 /// Load a list of summaries for `name` package in this registry which
273 /// match `req`.
274 ///
275 /// This function will semantically
276 ///
277 /// 1. parse the index file (either raw or cache),
278 /// 2. match all versions,
279 /// 3. and then return an iterator over all summaries which matched.
280 ///
281 /// Internally there's quite a few layer of caching to amortize this cost
282 /// though since this method is called quite a lot on null builds in Cargo.
283 fn summaries<'a, 'b>(
284 &'a mut self,
285 name: InternedString,
286 req: &'b OptVersionReq,
287 load: &mut dyn RegistryData,
288 ) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
289 where
290 'a: 'b,
291 {
292 let bindeps = self.gctx.cli_unstable().bindeps;
293
294 let source_id = self.source_id;
295
296 // First up parse what summaries we have available.
297 let summaries = ready!(self.load_summaries(name, load)?);
298
299 // Iterate over our summaries, extract all relevant ones which match our
300 // version requirement, and then parse all corresponding rows in the
301 // registry. As a reminder this `summaries` method is called for each
302 // entry in a lock file on every build, so we want to absolutely
303 // minimize the amount of work being done here and parse as little as
304 // necessary.
305 let raw_data = &summaries.raw_data;
306 Poll::Ready(Ok(summaries
307 .versions
308 .iter_mut()
309 .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
310 .filter_map(move |maybe| {
311 match maybe.parse(raw_data, source_id, bindeps) {
312 Ok(sum) => Some(sum),
313 Err(e) => {
314 info!("failed to parse `{}` registry package: {}", name, e);
315 None
316 }
317 }
318 })))
319 }
320
321 /// Actually parses what summaries we have available.
322 ///
323 /// If Cargo has run previously, this tries in this order:
324 ///
325 /// 1. Returns from in-memory cache, aka [`RegistryIndex::summaries_cache`].
326 /// 2. If missing, hands over to [`Summaries::parse`] to parse an index file.
327 ///
328 /// The actual kind index file being parsed depends on which kind of
329 /// [`RegistryData`] the `load` argument is given. For example, a
330 /// Git-based [`RemoteRegistry`] will first try a on-disk index cache
331 /// file, and then try parsing registry raw index from Git repository.
332 ///
333 /// In effect, this is intended to be a quite cheap operation.
334 ///
335 /// [`RemoteRegistry`]: super::remote::RemoteRegistry
336 fn load_summaries(
337 &mut self,
338 name: InternedString,
339 load: &mut dyn RegistryData,
340 ) -> Poll<CargoResult<&mut Summaries>> {
341 // If we've previously loaded what versions are present for `name`, just
342 // return that since our in-memory cache should still be valid.
343 if self.summaries_cache.contains_key(&name) {
344 return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
345 }
346
347 // Prepare the `RegistryData` which will lazily initialize internal data
348 // structures.
349 load.prepare()?;
350
351 let root = load.assert_index_locked(&self.path);
352 let summaries = ready!(Summaries::parse(
353 root,
354 &name,
355 self.source_id,
356 load,
357 self.gctx.cli_unstable().bindeps,
358 &self.cache_manager,
359 ))?
360 .unwrap_or_default();
361 self.summaries_cache.insert(name, summaries);
362 Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
363 }
364
365 /// Clears the in-memory summaries cache.
366 pub fn clear_summaries_cache(&mut self) {
367 self.summaries_cache.clear();
368 }
369
370 /// Attempts to find the packages that match a `name` and a version `req`.
371 ///
372 /// This is primarily used by [`Source::query`](super::Source).
373 pub fn query_inner(
374 &mut self,
375 name: InternedString,
376 req: &OptVersionReq,
377 load: &mut dyn RegistryData,
378 f: &mut dyn FnMut(IndexSummary),
379 ) -> Poll<CargoResult<()>> {
380 if !self.gctx.network_allowed() {
381 // This should only return `Poll::Ready(Ok(()))` if there is at least 1 match.
382 //
383 // If there are 0 matches it should fall through and try again with online.
384 // This is necessary for dependencies that are not used (such as
385 // target-cfg or optional), but are not downloaded. Normally the
386 // build should succeed if they are not downloaded and not used,
387 // but they still need to resolve. If they are actually needed
388 // then cargo will fail to download and an error message
389 // indicating that the required dependency is unavailable while
390 // offline will be displayed.
391 let mut called = false;
392 let callback = &mut |s: IndexSummary| {
393 if !s.is_offline() {
394 called = true;
395 f(s);
396 }
397 };
398 ready!(self.query_inner_with_online(name, req, load, callback, false)?);
399 if called {
400 return Poll::Ready(Ok(()));
401 }
402 }
403 self.query_inner_with_online(name, req, load, f, true)
404 }
405
406 /// Inner implementation of [`Self::query_inner`]. Returns the number of
407 /// summaries we've got.
408 ///
409 /// The `online` controls whether Cargo can access the network when needed.
410 fn query_inner_with_online(
411 &mut self,
412 name: InternedString,
413 req: &OptVersionReq,
414 load: &mut dyn RegistryData,
415 f: &mut dyn FnMut(IndexSummary),
416 online: bool,
417 ) -> Poll<CargoResult<()>> {
418 ready!(self.summaries(name, &req, load))?
419 // First filter summaries for `--offline`. If we're online then
420 // everything is a candidate, otherwise if we're offline we're only
421 // going to consider candidates which are actually present on disk.
422 //
423 // Note: This particular logic can cause problems with
424 // optional dependencies when offline. If at least 1 version
425 // of an optional dependency is downloaded, but that version
426 // does not satisfy the requirements, then resolution will
427 // fail. Unfortunately, whether or not something is optional
428 // is not known here.
429 .map(|s| {
430 if online || load.is_crate_downloaded(s.package_id()) {
431 s.clone()
432 } else {
433 IndexSummary::Offline(s.as_summary().clone())
434 }
435 })
436 .for_each(f);
437 Poll::Ready(Ok(()))
438 }
439
440 /// Looks into the summaries to check if a package has been yanked.
441 pub fn is_yanked(
442 &mut self,
443 pkg: PackageId,
444 load: &mut dyn RegistryData,
445 ) -> Poll<CargoResult<bool>> {
446 let req = OptVersionReq::lock_to_exact(pkg.version());
447 let found = ready!(self.summaries(pkg.name(), &req, load))?.any(|s| s.is_yanked());
448 Poll::Ready(Ok(found))
449 }
450}
451
452impl Summaries {
453 /// Parse out a [`Summaries`] instances from on-disk state.
454 ///
455 /// This will do the followings in order:
456 ///
457 /// 1. Attempt to prefer parsing a previous index cache file that already
458 /// exists from a previous invocation of Cargo (aka you're typing `cargo
459 /// build` again after typing it previously).
460 /// 2. If parsing fails, or the cache isn't found or is invalid, we then
461 /// take a slower path which loads the full descriptor for `relative`
462 /// from the underlying index (aka libgit2 with crates.io, or from a
463 /// remote HTTP index) and then parse everything in there.
464 ///
465 /// * `root` --- this is the root argument passed to `load`
466 /// * `name` --- the name of the package.
467 /// * `source_id` --- the registry's `SourceId` used when parsing JSON blobs
468 /// to create summaries.
469 /// * `load` --- the actual index implementation which may be very slow to
470 /// call. We avoid this if we can.
471 /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled
472 pub fn parse(
473 root: &Path,
474 name: &str,
475 source_id: SourceId,
476 load: &mut dyn RegistryData,
477 bindeps: bool,
478 cache_manager: &CacheManager<'_>,
479 ) -> Poll<CargoResult<Option<Summaries>>> {
480 // This is the file we're loading from cache or the index data.
481 // See module comment in `registry/mod.rs` for why this is structured the way it is.
482 let lowered_name = &name.to_lowercase();
483 let relative = make_dep_path(&lowered_name, false);
484
485 let mut cached_summaries = None;
486 let mut index_version = None;
487 if let Some(contents) = cache_manager.get(lowered_name) {
488 match Summaries::parse_cache(contents) {
489 Ok((s, v)) => {
490 cached_summaries = Some(s);
491 index_version = Some(v);
492 }
493 Err(e) => {
494 tracing::debug!("failed to parse {lowered_name:?} cache: {e}");
495 }
496 }
497 }
498
499 let response = ready!(load.load(root, relative.as_ref(), index_version.as_deref())?);
500
501 match response {
502 LoadResponse::CacheValid => {
503 tracing::debug!("fast path for registry cache of {:?}", relative);
504 return Poll::Ready(Ok(cached_summaries));
505 }
506 LoadResponse::NotFound => {
507 cache_manager.invalidate(lowered_name);
508 return Poll::Ready(Ok(None));
509 }
510 LoadResponse::Data {
511 raw_data,
512 index_version,
513 } => {
514 // This is the fallback path where we actually talk to the registry backend to load
515 // information. Here we parse every single line in the index (as we need
516 // to find the versions)
517 tracing::debug!("slow path for {:?}", relative);
518 let mut cache = SummariesCache::default();
519 let mut ret = Summaries::default();
520 ret.raw_data = raw_data;
521 for line in split(&ret.raw_data, b'\n') {
522 // Attempt forwards-compatibility on the index by ignoring
523 // everything that we ourselves don't understand, that should
524 // allow future cargo implementations to break the
525 // interpretation of each line here and older cargo will simply
526 // ignore the new lines.
527 let summary = match IndexSummary::parse(line, source_id, bindeps) {
528 Ok(summary) => summary,
529 Err(e) => {
530 // This should only happen when there is an index
531 // entry from a future version of cargo that this
532 // version doesn't understand. Hopefully, those future
533 // versions of cargo correctly set INDEX_V_MAX and
534 // CURRENT_CACHE_VERSION, otherwise this will skip
535 // entries in the cache preventing those newer
536 // versions from reading them (that is, until the
537 // cache is rebuilt).
538 tracing::info!(
539 "failed to parse {:?} registry package: {}",
540 relative,
541 e
542 );
543 continue;
544 }
545 };
546 let version = summary.package_id().version().clone();
547 cache.versions.push((version.clone(), line));
548 ret.versions.insert(version, summary.into());
549 }
550 if let Some(index_version) = index_version {
551 tracing::trace!("caching index_version {}", index_version);
552 let cache_bytes = cache.serialize(index_version.as_str());
553 // Once we have our `cache_bytes` which represents the `Summaries` we're
554 // about to return, write that back out to disk so future Cargo
555 // invocations can use it.
556 cache_manager.put(lowered_name, &cache_bytes);
557
558 // If we've got debug assertions enabled read back in the cached values
559 // and assert they match the expected result.
560 #[cfg(debug_assertions)]
561 {
562 let readback = SummariesCache::parse(&cache_bytes)
563 .expect("failed to parse cache we just wrote");
564 assert_eq!(
565 readback.index_version, index_version,
566 "index_version mismatch"
567 );
568 assert_eq!(readback.versions, cache.versions, "versions mismatch");
569 }
570 }
571 Poll::Ready(Ok(Some(ret)))
572 }
573 }
574 }
575
576 /// Parses the contents of an on-disk cache, aka [`SummariesCache`], which
577 /// represents information previously cached by Cargo.
578 pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
579 let cache = SummariesCache::parse(&contents)?;
580 let index_version = cache.index_version.into();
581 let mut ret = Summaries::default();
582 for (version, summary) in cache.versions {
583 let (start, end) = subslice_bounds(&contents, summary);
584 ret.versions
585 .insert(version, MaybeIndexSummary::Unparsed { start, end });
586 }
587 ret.raw_data = contents;
588 return Ok((ret, index_version));
589
590 // Returns the start/end offsets of `inner` with `outer`. Asserts that
591 // `inner` is a subslice of `outer`.
592 fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
593 let outer_start = outer.as_ptr() as usize;
594 let outer_end = outer_start + outer.len();
595 let inner_start = inner.as_ptr() as usize;
596 let inner_end = inner_start + inner.len();
597 assert!(inner_start >= outer_start);
598 assert!(inner_end <= outer_end);
599 (inner_start - outer_start, inner_end - outer_start)
600 }
601 }
602}
603
604impl MaybeIndexSummary {
605 /// Parses this "maybe a summary" into a `Parsed` for sure variant.
606 ///
607 /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
608 /// passed in is sliced with the bounds in `Unparsed` and then actually
609 /// parsed.
610 fn parse(
611 &mut self,
612 raw_data: &[u8],
613 source_id: SourceId,
614 bindeps: bool,
615 ) -> CargoResult<&IndexSummary> {
616 let (start, end) = match self {
617 MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
618 MaybeIndexSummary::Parsed(summary) => return Ok(summary),
619 };
620 let summary = IndexSummary::parse(&raw_data[start..end], source_id, bindeps)?;
621 *self = MaybeIndexSummary::Parsed(summary);
622 match self {
623 MaybeIndexSummary::Unparsed { .. } => unreachable!(),
624 MaybeIndexSummary::Parsed(summary) => Ok(summary),
625 }
626 }
627}
628
629impl From<IndexSummary> for MaybeIndexSummary {
630 fn from(summary: IndexSummary) -> MaybeIndexSummary {
631 MaybeIndexSummary::Parsed(summary)
632 }
633}
634
635impl IndexSummary {
636 /// Parses a line from the registry's index file into an [`IndexSummary`]
637 /// for a package.
638 ///
639 /// The `line` provided is expected to be valid JSON. It is supposed to be
640 /// a [`IndexPackage`].
641 fn parse(line: &[u8], source_id: SourceId, bindeps: bool) -> CargoResult<IndexSummary> {
642 // ****CAUTION**** Please be extremely careful with returning errors
643 // from this function. Entries that error are not included in the
644 // index cache, and can cause cargo to get confused when switching
645 // between different versions that understand the index differently.
646 // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
647 // values carefully when making changes here.
648 let index_summary = (|| {
649 let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
650 let summary = index_package_to_summary(&index, source_id)?;
651 Ok((index, summary))
652 })();
653 let (index, summary, valid) = match index_summary {
654 Ok((index, summary)) => (index, summary, true),
655 Err(err) => {
656 let Ok(IndexPackageMinimum { name, vers }) =
657 serde_json::from_slice::<IndexPackageMinimum<'_>>(line)
658 else {
659 // If we can't recover, prefer the original error
660 return Err(err);
661 };
662 tracing::info!(
663 "recoverying from failed parse of registry package {name}@{vers}: {err}"
664 );
665 let IndexPackageRustVersion { rust_version } =
666 serde_json::from_slice::<IndexPackageRustVersion>(line).unwrap_or_default();
667 let IndexPackageV { v } =
668 serde_json::from_slice::<IndexPackageV>(line).unwrap_or_default();
669 let index = IndexPackage {
670 name,
671 vers,
672 rust_version,
673 v,
674 deps: Default::default(),
675 features: Default::default(),
676 features2: Default::default(),
677 cksum: Default::default(),
678 yanked: Default::default(),
679 links: Default::default(),
680 pubtime: Default::default(),
681 };
682 let summary = index_package_to_summary(&index, source_id)?;
683 (index, summary, false)
684 }
685 };
686 let v = index.v.unwrap_or(1);
687 tracing::trace!("json parsed registry {}/{}", index.name, index.vers);
688
689 let v_max = if bindeps {
690 INDEX_V_MAX + 1
691 } else {
692 INDEX_V_MAX
693 };
694
695 if v_max < v {
696 Ok(IndexSummary::Unsupported(summary, v))
697 } else if !valid {
698 Ok(IndexSummary::Invalid(summary))
699 } else if index.yanked.unwrap_or(false) {
700 Ok(IndexSummary::Yanked(summary))
701 } else {
702 Ok(IndexSummary::Candidate(summary))
703 }
704 }
705}
706
707/// Converts an encoded dependency in the registry to a cargo dependency
708fn registry_dependency_into_dep(
709 dep: RegistryDependency<'_>,
710 default: SourceId,
711) -> CargoResult<Dependency> {
712 let RegistryDependency {
713 name,
714 req,
715 mut features,
716 optional,
717 default_features,
718 target,
719 kind,
720 registry,
721 package,
722 public,
723 artifact,
724 bindep_target,
725 lib,
726 } = dep;
727
728 let id = if let Some(registry) = ®istry {
729 SourceId::for_registry(®istry.into_url()?)?
730 } else {
731 default
732 };
733
734 let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
735 let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
736 if package.is_some() {
737 dep.set_explicit_name_in_toml(name);
738 }
739 let kind = match kind.as_deref().unwrap_or("") {
740 "dev" => DepKind::Development,
741 "build" => DepKind::Build,
742 _ => DepKind::Normal,
743 };
744
745 let platform = match target {
746 Some(target) => Some(target.parse()?),
747 None => None,
748 };
749
750 // All dependencies are private by default
751 let public = public.unwrap_or(false);
752
753 // Unfortunately older versions of cargo and/or the registry ended up
754 // publishing lots of entries where the features array contained the
755 // empty feature, "", inside. This confuses the resolution process much
756 // later on and these features aren't actually valid, so filter them all
757 // out here.
758 features.retain(|s| !s.is_empty());
759
760 // In index, "registry" is null if it is from the same index.
761 // In Cargo.toml, "registry" is None if it is from the default
762 if !id.is_crates_io() {
763 dep.set_registry_id(id);
764 }
765
766 if let Some(artifacts) = artifact {
767 let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?;
768 dep.set_artifact(artifact);
769 }
770
771 dep.set_optional(optional)
772 .set_default_features(default_features)
773 .set_features(features)
774 .set_platform(platform)
775 .set_kind(kind)
776 .set_public(public);
777
778 Ok(dep)
779}
780
781/// Like [`slice::split`] but is optimized by [`memchr`].
782fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
783 struct Split<'a> {
784 haystack: &'a [u8],
785 needle: u8,
786 }
787
788 impl<'a> Iterator for Split<'a> {
789 type Item = &'a [u8];
790
791 fn next(&mut self) -> Option<&'a [u8]> {
792 if self.haystack.is_empty() {
793 return None;
794 }
795 let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
796 Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
797 None => (self.haystack, &[][..]),
798 };
799 self.haystack = remaining;
800 Some(ret)
801 }
802 }
803
804 Split { haystack, needle }
805}