cargo/sources/registry/index/
mod.rs

1//! Management of the index of a registry source.
2//!
3//! This module contains management of the index and various operations, such as
4//! actually parsing the index, looking for crates, etc. This is intended to be
5//! abstract over remote indices (downloaded via Git or HTTP) and local registry
6//! indices (which are all just present on the filesystem).
7//!
8//! ## How the index works
9//!
10//! Here is a simple flow when loading a [`Summary`] (metadata) from the index:
11//!
12//! 1. A query is fired via [`RegistryIndex::query_inner`].
13//! 2. Tries loading all summaries via [`RegistryIndex::load_summaries`], and
14//!    under the hood calling [`Summaries::parse`] to parse an index file.
15//!     1. If an on-disk index cache is present, loads it via
16//!        [`Summaries::parse_cache`].
17//!     2. Otherwise goes to the slower path [`RegistryData::load`] to get the
18//!        specific index file.
19//! 3. A [`Summary`] is now ready in callback `f` in [`RegistryIndex::query_inner`].
20//!
21//! To learn the rationale behind this multi-layer index metadata loading,
22//! see [the documentation of the on-disk index cache](cache).
23use crate::core::dependency::{Artifact, DepKind};
24use crate::core::Dependency;
25use crate::core::{PackageId, SourceId, Summary};
26use crate::sources::registry::{LoadResponse, RegistryData};
27use crate::util::interning::InternedString;
28use crate::util::IntoUrl;
29use crate::util::{internal, CargoResult, Filesystem, GlobalContext, OptVersionReq};
30use cargo_util::registry::make_dep_path;
31use cargo_util_schemas::manifest::RustVersion;
32use semver::Version;
33use serde::{Deserialize, Serialize};
34use std::borrow::Cow;
35use std::collections::BTreeMap;
36use std::collections::HashMap;
37use std::path::Path;
38use std::str;
39use std::task::{ready, Poll};
40use tracing::info;
41
42mod cache;
43use self::cache::CacheManager;
44use self::cache::SummariesCache;
45
46/// The maximum schema version of the `v` field in the index this version of
47/// cargo understands. See [`IndexPackage::v`] for the detail.
48const INDEX_V_MAX: u32 = 2;
49
50/// Manager for handling the on-disk index.
51///
52/// Different kinds of registries store the index differently:
53///
54/// * [`LocalRegistry`] is a simple on-disk tree of files of the raw index.
55/// * [`RemoteRegistry`] is stored as a raw git repository.
56/// * [`HttpRegistry`] fills the on-disk index cache directly without keeping
57///   any raw index.
58///
59/// These means of access are handled via the [`RegistryData`] trait abstraction.
60/// This transparently handles caching of the index in a more efficient format.
61///
62/// [`LocalRegistry`]: super::local::LocalRegistry
63/// [`RemoteRegistry`]: super::remote::RemoteRegistry
64/// [`HttpRegistry`]: super::http_remote::HttpRegistry
65pub struct RegistryIndex<'gctx> {
66    source_id: SourceId,
67    /// Root directory of the index for the registry.
68    path: Filesystem,
69    /// In-memory cache of summary data.
70    ///
71    /// This is keyed off the package name. The [`Summaries`] value handles
72    /// loading the summary data. It keeps an optimized on-disk representation
73    /// of the JSON files, which is created in an as-needed fashion. If it
74    /// hasn't been cached already, it uses [`RegistryData::load`] to access
75    /// to JSON files from the index, and the creates the optimized on-disk
76    /// summary cache.
77    summaries_cache: HashMap<InternedString, Summaries>,
78    /// [`GlobalContext`] reference for convenience.
79    gctx: &'gctx GlobalContext,
80    /// Manager of on-disk caches.
81    cache_manager: CacheManager<'gctx>,
82}
83
84/// An internal cache of summaries for a particular package.
85///
86/// A list of summaries are loaded from disk via one of two methods:
87///
88/// 1. From raw registry index --- Primarily Cargo will parse the corresponding
89///    file for a crate in the upstream crates.io registry. That's just a JSON
90///    blob per line which we can parse, extract the version, and then store here.
91///    See [`IndexPackage`] and [`IndexSummary::parse`].
92///
93/// 2. From on-disk index cache --- If Cargo has previously run, we'll have a
94///    cached index of dependencies for the upstream index. This is a file that
95///    Cargo maintains lazily on the local filesystem and is much faster to
96///    parse since it doesn't involve parsing all of the JSON.
97///    See [`SummariesCache`].
98///
99/// The outward-facing interface of this doesn't matter too much where it's
100/// loaded from, but it's important when reading the implementation to note that
101/// we try to parse as little as possible!
102#[derive(Default)]
103struct Summaries {
104    /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
105    /// fields are indexes into. If a `Summaries` is loaded from the crates.io
106    /// index then this field will be empty since nothing is `Unparsed`.
107    raw_data: Vec<u8>,
108
109    /// All known versions of a crate, keyed from their `Version` to the
110    /// possibly parsed or unparsed version of the full summary.
111    versions: HashMap<Version, MaybeIndexSummary>,
112}
113
114/// A lazily parsed [`IndexSummary`].
115enum MaybeIndexSummary {
116    /// A summary which has not been parsed, The `start` and `end` are pointers
117    /// into [`Summaries::raw_data`] which this is an entry of.
118    Unparsed { start: usize, end: usize },
119
120    /// An actually parsed summary.
121    Parsed(IndexSummary),
122}
123
124/// A parsed representation of a summary from the index. This is usually parsed
125/// from a line from a raw index file, or a JSON blob from on-disk index cache.
126///
127/// In addition to a full [`Summary`], we have information on whether it is `yanked`.
128#[derive(Clone, Debug)]
129pub enum IndexSummary {
130    /// Available for consideration
131    Candidate(Summary),
132    /// Yanked within its registry
133    Yanked(Summary),
134    /// Not available as we are offline and create is not downloaded yet
135    Offline(Summary),
136    /// From a newer schema version and is likely incomplete or inaccurate
137    Unsupported(Summary, u32),
138    /// An error was encountered despite being a supported schema version
139    Invalid(Summary),
140}
141
142impl IndexSummary {
143    /// Extract the summary from any variant
144    pub fn as_summary(&self) -> &Summary {
145        match self {
146            IndexSummary::Candidate(sum)
147            | IndexSummary::Yanked(sum)
148            | IndexSummary::Offline(sum)
149            | IndexSummary::Unsupported(sum, _)
150            | IndexSummary::Invalid(sum) => sum,
151        }
152    }
153
154    /// Extract the summary from any variant
155    pub fn into_summary(self) -> Summary {
156        match self {
157            IndexSummary::Candidate(sum)
158            | IndexSummary::Yanked(sum)
159            | IndexSummary::Offline(sum)
160            | IndexSummary::Unsupported(sum, _)
161            | IndexSummary::Invalid(sum) => sum,
162        }
163    }
164
165    pub fn map_summary(self, f: impl Fn(Summary) -> Summary) -> Self {
166        match self {
167            IndexSummary::Candidate(s) => IndexSummary::Candidate(f(s)),
168            IndexSummary::Yanked(s) => IndexSummary::Yanked(f(s)),
169            IndexSummary::Offline(s) => IndexSummary::Offline(f(s)),
170            IndexSummary::Unsupported(s, v) => IndexSummary::Unsupported(f(s), v.clone()),
171            IndexSummary::Invalid(s) => IndexSummary::Invalid(f(s)),
172        }
173    }
174
175    /// Extract the package id from any variant
176    pub fn package_id(&self) -> PackageId {
177        self.as_summary().package_id()
178    }
179
180    /// Returns `true` if the index summary is [`Yanked`].
181    ///
182    /// [`Yanked`]: IndexSummary::Yanked
183    #[must_use]
184    pub fn is_yanked(&self) -> bool {
185        matches!(self, Self::Yanked(..))
186    }
187
188    /// Returns `true` if the index summary is [`Offline`].
189    ///
190    /// [`Offline`]: IndexSummary::Offline
191    #[must_use]
192    pub fn is_offline(&self) -> bool {
193        matches!(self, Self::Offline(..))
194    }
195}
196
197/// A single line in the index representing a single version of a package.
198#[derive(Deserialize, Serialize)]
199pub struct IndexPackage<'a> {
200    /// Name of the package.
201    #[serde(borrow)]
202    pub name: Cow<'a, str>,
203    /// The version of this dependency.
204    pub vers: Version,
205    /// All kinds of direct dependencies of the package, including dev and
206    /// build dependencies.
207    #[serde(borrow)]
208    pub deps: Vec<RegistryDependency<'a>>,
209    /// Set of features defined for the package, i.e., `[features]` table.
210    #[serde(default)]
211    pub features: BTreeMap<Cow<'a, str>, Vec<Cow<'a, str>>>,
212    /// This field contains features with new, extended syntax. Specifically,
213    /// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
214    ///
215    /// This is separated from `features` because versions older than 1.19
216    /// will fail to load due to not being able to parse the new syntax, even
217    /// with a `Cargo.lock` file.
218    pub features2: Option<BTreeMap<Cow<'a, str>, Vec<Cow<'a, str>>>>,
219    /// Checksum for verifying the integrity of the corresponding downloaded package.
220    pub cksum: String,
221    /// If `true`, Cargo will skip this version when resolving.
222    ///
223    /// This was added in 2014. Everything in the crates.io index has this set
224    /// now, so this probably doesn't need to be an option anymore.
225    pub yanked: Option<bool>,
226    /// Native library name this package links to.
227    ///
228    /// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
229    /// can be `None` if published before then.
230    pub links: Option<Cow<'a, str>>,
231    /// Required version of rust
232    ///
233    /// Corresponds to `package.rust-version`.
234    ///
235    /// Added in 2023 (see <https://github.com/rust-lang/crates.io/pull/6267>),
236    /// can be `None` if published before then or if not set in the manifest.
237    pub rust_version: Option<RustVersion>,
238    /// The schema version for this entry.
239    ///
240    /// If this is None, it defaults to version `1`. Entries with unknown
241    /// versions are ignored.
242    ///
243    /// Version `2` schema adds the `features2` field.
244    ///
245    /// Version `3` schema adds `artifact`, `bindep_targes`, and `lib` for
246    /// artifact dependencies support.
247    ///
248    /// This provides a method to safely introduce changes to index entries
249    /// and allow older versions of cargo to ignore newer entries it doesn't
250    /// understand. This is honored as of 1.51, so unfortunately older
251    /// versions will ignore it, and potentially misinterpret version 2 and
252    /// newer entries.
253    ///
254    /// The intent is that versions older than 1.51 will work with a
255    /// pre-existing `Cargo.lock`, but they may not correctly process `cargo
256    /// update` or build a lock from scratch. In that case, cargo may
257    /// incorrectly select a new package that uses a new index schema. A
258    /// workaround is to downgrade any packages that are incompatible with the
259    /// `--precise` flag of `cargo update`.
260    pub v: Option<u32>,
261}
262
263impl IndexPackage<'_> {
264    fn to_summary(&self, source_id: SourceId) -> CargoResult<Summary> {
265        // ****CAUTION**** Please be extremely careful with returning errors, see
266        // `IndexSummary::parse` for details
267        let pkgid = PackageId::new(self.name.as_ref().into(), self.vers.clone(), source_id);
268        let deps = self
269            .deps
270            .iter()
271            .map(|dep| dep.clone().into_dep(source_id))
272            .collect::<CargoResult<Vec<_>>>()?;
273        let mut features = self.features.clone();
274        if let Some(features2) = self.features2.clone() {
275            for (name, values) in features2 {
276                features.entry(name).or_default().extend(values);
277            }
278        }
279        let features = features
280            .into_iter()
281            .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
282            .collect::<BTreeMap<_, _>>();
283        let links: Option<InternedString> = self.links.as_ref().map(|l| l.as_ref().into());
284        let mut summary = Summary::new(pkgid, deps, &features, links, self.rust_version.clone())?;
285        summary.set_checksum(self.cksum.clone());
286        Ok(summary)
287    }
288}
289
290#[derive(Deserialize, Serialize)]
291struct IndexPackageMinimum<'a> {
292    name: Cow<'a, str>,
293    vers: Version,
294}
295
296#[derive(Deserialize, Serialize, Default)]
297struct IndexPackageRustVersion {
298    rust_version: Option<RustVersion>,
299}
300
301#[derive(Deserialize, Serialize, Default)]
302struct IndexPackageV {
303    v: Option<u32>,
304}
305
306/// A dependency as encoded in the [`IndexPackage`] index JSON.
307#[derive(Deserialize, Serialize, Clone)]
308pub struct RegistryDependency<'a> {
309    /// Name of the dependency. If the dependency is renamed, the original
310    /// would be stored in [`RegistryDependency::package`].
311    #[serde(borrow)]
312    pub name: Cow<'a, str>,
313    /// The SemVer requirement for this dependency.
314    #[serde(borrow)]
315    pub req: Cow<'a, str>,
316    /// Set of features enabled for this dependency.
317    #[serde(default)]
318    pub features: Vec<Cow<'a, str>>,
319    /// Whether or not this is an optional dependency.
320    #[serde(default)]
321    pub optional: bool,
322    /// Whether or not default features are enabled.
323    #[serde(default = "default_true")]
324    pub default_features: bool,
325    /// The target platform for this dependency.
326    pub target: Option<Cow<'a, str>>,
327    /// The dependency kind. "dev", "build", and "normal".
328    pub kind: Option<Cow<'a, str>>,
329    // The URL of the index of the registry where this dependency is from.
330    // `None` if it is from the same index.
331    pub registry: Option<Cow<'a, str>>,
332    /// The original name if the dependency is renamed.
333    pub package: Option<Cow<'a, str>>,
334    /// Whether or not this is a public dependency. Unstable. See [RFC 1977].
335    ///
336    /// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html
337    pub public: Option<bool>,
338    pub artifact: Option<Vec<Cow<'a, str>>>,
339    pub bindep_target: Option<Cow<'a, str>>,
340    #[serde(default)]
341    pub lib: bool,
342}
343
344fn default_true() -> bool {
345    true
346}
347
348impl<'gctx> RegistryIndex<'gctx> {
349    /// Creates an empty registry index at `path`.
350    pub fn new(
351        source_id: SourceId,
352        path: &Filesystem,
353        gctx: &'gctx GlobalContext,
354    ) -> RegistryIndex<'gctx> {
355        RegistryIndex {
356            source_id,
357            path: path.clone(),
358            summaries_cache: HashMap::new(),
359            gctx,
360            cache_manager: CacheManager::new(path.join(".cache"), gctx),
361        }
362    }
363
364    /// Returns the hash listed for a specified `PackageId`. Primarily for
365    /// checking the integrity of a downloaded package matching the checksum in
366    /// the index file, aka [`IndexSummary`].
367    pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
368        let req = OptVersionReq::lock_to_exact(pkg.version());
369        let summary = self.summaries(pkg.name(), &req, load)?;
370        let summary = ready!(summary).next();
371        Poll::Ready(Ok(summary
372            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
373            .as_summary()
374            .checksum()
375            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?))
376    }
377
378    /// Load a list of summaries for `name` package in this registry which
379    /// match `req`.
380    ///
381    /// This function will semantically
382    ///
383    /// 1. parse the index file (either raw or cache),
384    /// 2. match all versions,
385    /// 3. and then return an iterator over all summaries which matched.
386    ///
387    /// Internally there's quite a few layer of caching to amortize this cost
388    /// though since this method is called quite a lot on null builds in Cargo.
389    fn summaries<'a, 'b>(
390        &'a mut self,
391        name: InternedString,
392        req: &'b OptVersionReq,
393        load: &mut dyn RegistryData,
394    ) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
395    where
396        'a: 'b,
397    {
398        let bindeps = self.gctx.cli_unstable().bindeps;
399
400        let source_id = self.source_id;
401
402        // First up parse what summaries we have available.
403        let summaries = ready!(self.load_summaries(name, load)?);
404
405        // Iterate over our summaries, extract all relevant ones which match our
406        // version requirement, and then parse all corresponding rows in the
407        // registry. As a reminder this `summaries` method is called for each
408        // entry in a lock file on every build, so we want to absolutely
409        // minimize the amount of work being done here and parse as little as
410        // necessary.
411        let raw_data = &summaries.raw_data;
412        Poll::Ready(Ok(summaries
413            .versions
414            .iter_mut()
415            .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
416            .filter_map(move |maybe| {
417                match maybe.parse(raw_data, source_id, bindeps) {
418                    Ok(sum) => Some(sum),
419                    Err(e) => {
420                        info!("failed to parse `{}` registry package: {}", name, e);
421                        None
422                    }
423                }
424            })))
425    }
426
427    /// Actually parses what summaries we have available.
428    ///
429    /// If Cargo has run previously, this tries in this order:
430    ///
431    /// 1. Returns from in-memory cache, aka [`RegistryIndex::summaries_cache`].
432    /// 2. If missing, hands over to [`Summaries::parse`] to parse an index file.
433    ///
434    ///    The actual kind index file being parsed depends on which kind of
435    ///    [`RegistryData`] the `load` argument is given. For example, a
436    ///    Git-based [`RemoteRegistry`] will first try a on-disk index cache
437    ///    file, and then try parsing registry raw index from Git repository.
438    ///
439    /// In effect, this is intended to be a quite cheap operation.
440    ///
441    /// [`RemoteRegistry`]: super::remote::RemoteRegistry
442    fn load_summaries(
443        &mut self,
444        name: InternedString,
445        load: &mut dyn RegistryData,
446    ) -> Poll<CargoResult<&mut Summaries>> {
447        // If we've previously loaded what versions are present for `name`, just
448        // return that since our in-memory cache should still be valid.
449        if self.summaries_cache.contains_key(&name) {
450            return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
451        }
452
453        // Prepare the `RegistryData` which will lazily initialize internal data
454        // structures.
455        load.prepare()?;
456
457        let root = load.assert_index_locked(&self.path);
458        let summaries = ready!(Summaries::parse(
459            root,
460            &name,
461            self.source_id,
462            load,
463            self.gctx.cli_unstable().bindeps,
464            &self.cache_manager,
465        ))?
466        .unwrap_or_default();
467        self.summaries_cache.insert(name, summaries);
468        Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
469    }
470
471    /// Clears the in-memory summaries cache.
472    pub fn clear_summaries_cache(&mut self) {
473        self.summaries_cache.clear();
474    }
475
476    /// Attempts to find the packages that match a `name` and a version `req`.
477    ///
478    /// This is primarily used by [`Source::query`](super::Source).
479    pub fn query_inner(
480        &mut self,
481        name: InternedString,
482        req: &OptVersionReq,
483        load: &mut dyn RegistryData,
484        f: &mut dyn FnMut(IndexSummary),
485    ) -> Poll<CargoResult<()>> {
486        if !self.gctx.network_allowed() {
487            // This should only return `Poll::Ready(Ok(()))` if there is at least 1 match.
488            //
489            // If there are 0 matches it should fall through and try again with online.
490            // This is necessary for dependencies that are not used (such as
491            // target-cfg or optional), but are not downloaded. Normally the
492            // build should succeed if they are not downloaded and not used,
493            // but they still need to resolve. If they are actually needed
494            // then cargo will fail to download and an error message
495            // indicating that the required dependency is unavailable while
496            // offline will be displayed.
497            let mut called = false;
498            let callback = &mut |s: IndexSummary| {
499                if !s.is_offline() {
500                    called = true;
501                    f(s);
502                }
503            };
504            ready!(self.query_inner_with_online(name, req, load, callback, false)?);
505            if called {
506                return Poll::Ready(Ok(()));
507            }
508        }
509        self.query_inner_with_online(name, req, load, f, true)
510    }
511
512    /// Inner implementation of [`Self::query_inner`]. Returns the number of
513    /// summaries we've got.
514    ///
515    /// The `online` controls whether Cargo can access the network when needed.
516    fn query_inner_with_online(
517        &mut self,
518        name: InternedString,
519        req: &OptVersionReq,
520        load: &mut dyn RegistryData,
521        f: &mut dyn FnMut(IndexSummary),
522        online: bool,
523    ) -> Poll<CargoResult<()>> {
524        ready!(self.summaries(name, &req, load))?
525            // First filter summaries for `--offline`. If we're online then
526            // everything is a candidate, otherwise if we're offline we're only
527            // going to consider candidates which are actually present on disk.
528            //
529            // Note: This particular logic can cause problems with
530            // optional dependencies when offline. If at least 1 version
531            // of an optional dependency is downloaded, but that version
532            // does not satisfy the requirements, then resolution will
533            // fail. Unfortunately, whether or not something is optional
534            // is not known here.
535            .map(|s| {
536                if online || load.is_crate_downloaded(s.package_id()) {
537                    s.clone()
538                } else {
539                    IndexSummary::Offline(s.as_summary().clone())
540                }
541            })
542            .for_each(f);
543        Poll::Ready(Ok(()))
544    }
545
546    /// Looks into the summaries to check if a package has been yanked.
547    pub fn is_yanked(
548        &mut self,
549        pkg: PackageId,
550        load: &mut dyn RegistryData,
551    ) -> Poll<CargoResult<bool>> {
552        let req = OptVersionReq::lock_to_exact(pkg.version());
553        let found = ready!(self.summaries(pkg.name(), &req, load))?.any(|s| s.is_yanked());
554        Poll::Ready(Ok(found))
555    }
556}
557
558impl Summaries {
559    /// Parse out a [`Summaries`] instances from on-disk state.
560    ///
561    /// This will do the followings in order:
562    ///
563    /// 1. Attempt to prefer parsing a previous index cache file that already
564    ///    exists from a previous invocation of Cargo (aka you're typing `cargo
565    ///    build` again after typing it previously).
566    /// 2. If parsing fails, or the cache isn't found or is invalid, we then
567    ///    take a slower path which loads the full descriptor for `relative`
568    ///    from the underlying index (aka libgit2 with crates.io, or from a
569    ///    remote HTTP index) and then parse everything in there.
570    ///
571    /// * `root` --- this is the root argument passed to `load`
572    /// * `name` --- the name of the package.
573    /// * `source_id` --- the registry's `SourceId` used when parsing JSON blobs
574    ///   to create summaries.
575    /// * `load` --- the actual index implementation which may be very slow to
576    ///   call. We avoid this if we can.
577    /// * `bindeps` --- whether the `-Zbindeps` unstable flag is enabled
578    pub fn parse(
579        root: &Path,
580        name: &str,
581        source_id: SourceId,
582        load: &mut dyn RegistryData,
583        bindeps: bool,
584        cache_manager: &CacheManager<'_>,
585    ) -> Poll<CargoResult<Option<Summaries>>> {
586        // This is the file we're loading from cache or the index data.
587        // See module comment in `registry/mod.rs` for why this is structured the way it is.
588        let lowered_name = &name.to_lowercase();
589        let relative = make_dep_path(&lowered_name, false);
590
591        let mut cached_summaries = None;
592        let mut index_version = None;
593        if let Some(contents) = cache_manager.get(lowered_name) {
594            match Summaries::parse_cache(contents) {
595                Ok((s, v)) => {
596                    cached_summaries = Some(s);
597                    index_version = Some(v);
598                }
599                Err(e) => {
600                    tracing::debug!("failed to parse {lowered_name:?} cache: {e}");
601                }
602            }
603        }
604
605        let response = ready!(load.load(root, relative.as_ref(), index_version.as_deref())?);
606
607        match response {
608            LoadResponse::CacheValid => {
609                tracing::debug!("fast path for registry cache of {:?}", relative);
610                return Poll::Ready(Ok(cached_summaries));
611            }
612            LoadResponse::NotFound => {
613                cache_manager.invalidate(lowered_name);
614                return Poll::Ready(Ok(None));
615            }
616            LoadResponse::Data {
617                raw_data,
618                index_version,
619            } => {
620                // This is the fallback path where we actually talk to the registry backend to load
621                // information. Here we parse every single line in the index (as we need
622                // to find the versions)
623                tracing::debug!("slow path for {:?}", relative);
624                let mut cache = SummariesCache::default();
625                let mut ret = Summaries::default();
626                ret.raw_data = raw_data;
627                for line in split(&ret.raw_data, b'\n') {
628                    // Attempt forwards-compatibility on the index by ignoring
629                    // everything that we ourselves don't understand, that should
630                    // allow future cargo implementations to break the
631                    // interpretation of each line here and older cargo will simply
632                    // ignore the new lines.
633                    let summary = match IndexSummary::parse(line, source_id, bindeps) {
634                        Ok(summary) => summary,
635                        Err(e) => {
636                            // This should only happen when there is an index
637                            // entry from a future version of cargo that this
638                            // version doesn't understand. Hopefully, those future
639                            // versions of cargo correctly set INDEX_V_MAX and
640                            // CURRENT_CACHE_VERSION, otherwise this will skip
641                            // entries in the cache preventing those newer
642                            // versions from reading them (that is, until the
643                            // cache is rebuilt).
644                            tracing::info!(
645                                "failed to parse {:?} registry package: {}",
646                                relative,
647                                e
648                            );
649                            continue;
650                        }
651                    };
652                    let version = summary.package_id().version().clone();
653                    cache.versions.push((version.clone(), line));
654                    ret.versions.insert(version, summary.into());
655                }
656                if let Some(index_version) = index_version {
657                    tracing::trace!("caching index_version {}", index_version);
658                    let cache_bytes = cache.serialize(index_version.as_str());
659                    // Once we have our `cache_bytes` which represents the `Summaries` we're
660                    // about to return, write that back out to disk so future Cargo
661                    // invocations can use it.
662                    cache_manager.put(lowered_name, &cache_bytes);
663
664                    // If we've got debug assertions enabled read back in the cached values
665                    // and assert they match the expected result.
666                    #[cfg(debug_assertions)]
667                    {
668                        let readback = SummariesCache::parse(&cache_bytes)
669                            .expect("failed to parse cache we just wrote");
670                        assert_eq!(
671                            readback.index_version, index_version,
672                            "index_version mismatch"
673                        );
674                        assert_eq!(readback.versions, cache.versions, "versions mismatch");
675                    }
676                }
677                Poll::Ready(Ok(Some(ret)))
678            }
679        }
680    }
681
682    /// Parses the contents of an on-disk cache, aka [`SummariesCache`], which
683    /// represents information previously cached by Cargo.
684    pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
685        let cache = SummariesCache::parse(&contents)?;
686        let index_version = cache.index_version.into();
687        let mut ret = Summaries::default();
688        for (version, summary) in cache.versions {
689            let (start, end) = subslice_bounds(&contents, summary);
690            ret.versions
691                .insert(version, MaybeIndexSummary::Unparsed { start, end });
692        }
693        ret.raw_data = contents;
694        return Ok((ret, index_version));
695
696        // Returns the start/end offsets of `inner` with `outer`. Asserts that
697        // `inner` is a subslice of `outer`.
698        fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
699            let outer_start = outer.as_ptr() as usize;
700            let outer_end = outer_start + outer.len();
701            let inner_start = inner.as_ptr() as usize;
702            let inner_end = inner_start + inner.len();
703            assert!(inner_start >= outer_start);
704            assert!(inner_end <= outer_end);
705            (inner_start - outer_start, inner_end - outer_start)
706        }
707    }
708}
709
710impl MaybeIndexSummary {
711    /// Parses this "maybe a summary" into a `Parsed` for sure variant.
712    ///
713    /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
714    /// passed in is sliced with the bounds in `Unparsed` and then actually
715    /// parsed.
716    fn parse(
717        &mut self,
718        raw_data: &[u8],
719        source_id: SourceId,
720        bindeps: bool,
721    ) -> CargoResult<&IndexSummary> {
722        let (start, end) = match self {
723            MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
724            MaybeIndexSummary::Parsed(summary) => return Ok(summary),
725        };
726        let summary = IndexSummary::parse(&raw_data[start..end], source_id, bindeps)?;
727        *self = MaybeIndexSummary::Parsed(summary);
728        match self {
729            MaybeIndexSummary::Unparsed { .. } => unreachable!(),
730            MaybeIndexSummary::Parsed(summary) => Ok(summary),
731        }
732    }
733}
734
735impl From<IndexSummary> for MaybeIndexSummary {
736    fn from(summary: IndexSummary) -> MaybeIndexSummary {
737        MaybeIndexSummary::Parsed(summary)
738    }
739}
740
741impl IndexSummary {
742    /// Parses a line from the registry's index file into an [`IndexSummary`]
743    /// for a package.
744    ///
745    /// The `line` provided is expected to be valid JSON. It is supposed to be
746    /// a [`IndexPackage`].
747    fn parse(line: &[u8], source_id: SourceId, bindeps: bool) -> CargoResult<IndexSummary> {
748        // ****CAUTION**** Please be extremely careful with returning errors
749        // from this function. Entries that error are not included in the
750        // index cache, and can cause cargo to get confused when switching
751        // between different versions that understand the index differently.
752        // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
753        // values carefully when making changes here.
754        let index_summary = (|| {
755            let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
756            let summary = index.to_summary(source_id)?;
757            Ok((index, summary))
758        })();
759        let (index, summary, valid) = match index_summary {
760            Ok((index, summary)) => (index, summary, true),
761            Err(err) => {
762                let Ok(IndexPackageMinimum { name, vers }) =
763                    serde_json::from_slice::<IndexPackageMinimum<'_>>(line)
764                else {
765                    // If we can't recover, prefer the original error
766                    return Err(err);
767                };
768                tracing::info!(
769                    "recoverying from failed parse of registry package {name}@{vers}: {err}"
770                );
771                let IndexPackageRustVersion { rust_version } =
772                    serde_json::from_slice::<IndexPackageRustVersion>(line).unwrap_or_default();
773                let IndexPackageV { v } =
774                    serde_json::from_slice::<IndexPackageV>(line).unwrap_or_default();
775                let index = IndexPackage {
776                    name,
777                    vers,
778                    rust_version,
779                    v,
780                    deps: Default::default(),
781                    features: Default::default(),
782                    features2: Default::default(),
783                    cksum: Default::default(),
784                    yanked: Default::default(),
785                    links: Default::default(),
786                };
787                let summary = index.to_summary(source_id)?;
788                (index, summary, false)
789            }
790        };
791        let v = index.v.unwrap_or(1);
792        tracing::trace!("json parsed registry {}/{}", index.name, index.vers);
793
794        let v_max = if bindeps {
795            INDEX_V_MAX + 1
796        } else {
797            INDEX_V_MAX
798        };
799
800        if v_max < v {
801            Ok(IndexSummary::Unsupported(summary, v))
802        } else if !valid {
803            Ok(IndexSummary::Invalid(summary))
804        } else if index.yanked.unwrap_or(false) {
805            Ok(IndexSummary::Yanked(summary))
806        } else {
807            Ok(IndexSummary::Candidate(summary))
808        }
809    }
810}
811
812impl<'a> RegistryDependency<'a> {
813    /// Converts an encoded dependency in the registry to a cargo dependency
814    pub fn into_dep(self, default: SourceId) -> CargoResult<Dependency> {
815        let RegistryDependency {
816            name,
817            req,
818            mut features,
819            optional,
820            default_features,
821            target,
822            kind,
823            registry,
824            package,
825            public,
826            artifact,
827            bindep_target,
828            lib,
829        } = self;
830
831        let id = if let Some(registry) = &registry {
832            SourceId::for_registry(&registry.into_url()?)?
833        } else {
834            default
835        };
836
837        let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
838        let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
839        if package.is_some() {
840            dep.set_explicit_name_in_toml(name);
841        }
842        let kind = match kind.as_deref().unwrap_or("") {
843            "dev" => DepKind::Development,
844            "build" => DepKind::Build,
845            _ => DepKind::Normal,
846        };
847
848        let platform = match target {
849            Some(target) => Some(target.parse()?),
850            None => None,
851        };
852
853        // All dependencies are private by default
854        let public = public.unwrap_or(false);
855
856        // Unfortunately older versions of cargo and/or the registry ended up
857        // publishing lots of entries where the features array contained the
858        // empty feature, "", inside. This confuses the resolution process much
859        // later on and these features aren't actually valid, so filter them all
860        // out here.
861        features.retain(|s| !s.is_empty());
862
863        // In index, "registry" is null if it is from the same index.
864        // In Cargo.toml, "registry" is None if it is from the default
865        if !id.is_crates_io() {
866            dep.set_registry_id(id);
867        }
868
869        if let Some(artifacts) = artifact {
870            let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?;
871            dep.set_artifact(artifact);
872        }
873
874        dep.set_optional(optional)
875            .set_default_features(default_features)
876            .set_features(features)
877            .set_platform(platform)
878            .set_kind(kind)
879            .set_public(public);
880
881        Ok(dep)
882    }
883}
884
885/// Like [`slice::split`] but is optimized by [`memchr`].
886fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
887    struct Split<'a> {
888        haystack: &'a [u8],
889        needle: u8,
890    }
891
892    impl<'a> Iterator for Split<'a> {
893        type Item = &'a [u8];
894
895        fn next(&mut self) -> Option<&'a [u8]> {
896            if self.haystack.is_empty() {
897                return None;
898            }
899            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
900                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
901                None => (self.haystack, &[][..]),
902            };
903            self.haystack = remaining;
904            Some(ret)
905        }
906    }
907
908    Split { haystack, needle }
909}
910
911#[test]
912fn escaped_char_in_index_json_blob() {
913    let _: IndexPackage<'_> = serde_json::from_str(
914        r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
915    )
916    .unwrap();
917    let _: IndexPackage<'_> = serde_json::from_str(
918        r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
919    ).unwrap();
920
921    // Now we add escaped cher all the places they can go
922    // these are not valid, but it should error later than json parsing
923    let _: IndexPackage<'_> = serde_json::from_str(
924        r#"{
925        "name":"This name has a escaped cher in it \n\t\" ",
926        "vers":"0.0.1",
927        "deps":[{
928            "name": " \n\t\" ",
929            "req": " \n\t\" ",
930            "features": [" \n\t\" "],
931            "optional": true,
932            "default_features": true,
933            "target": " \n\t\" ",
934            "kind": " \n\t\" ",
935            "registry": " \n\t\" "
936        }],
937        "cksum":"bae3",
938        "features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
939        "links":" \n\t\" "}"#,
940    )
941    .unwrap();
942}