cargo/core/
source_id.rs

1use crate::core::GitReference;
2use crate::core::PackageId;
3use crate::core::SourceKind;
4use crate::sources::registry::CRATES_IO_HTTP_INDEX;
5use crate::sources::source::Source;
6use crate::sources::{DirectorySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY};
7use crate::sources::{GitSource, PathSource, RegistrySource};
8use crate::util::interning::InternedString;
9use crate::util::{context, CanonicalUrl, CargoResult, GlobalContext, IntoUrl};
10use anyhow::Context as _;
11use serde::de;
12use serde::ser;
13use std::cmp::{self, Ordering};
14use std::collections::HashSet;
15use std::fmt::{self, Formatter};
16use std::hash::{self, Hash};
17use std::path::{Path, PathBuf};
18use std::ptr;
19use std::sync::Mutex;
20use std::sync::OnceLock;
21use tracing::trace;
22use url::Url;
23
24static SOURCE_ID_CACHE: OnceLock<Mutex<HashSet<&'static SourceIdInner>>> = OnceLock::new();
25
26/// Unique identifier for a source of packages.
27///
28/// Cargo uniquely identifies packages using [`PackageId`], a combination of the
29/// package name, version, and the code source. `SourceId` exactly represents
30/// the "code source" in `PackageId`. See [`SourceId::hash`] to learn what are
31/// taken into account for the uniqueness of a source.
32///
33/// `SourceId` is usually associated with an instance of [`Source`], which is
34/// supposed to provide a `SourceId` via [`Source::source_id`] method.
35///
36/// [`Source`]: crate::sources::source::Source
37/// [`Source::source_id`]: crate::sources::source::Source::source_id
38/// [`PackageId`]: super::PackageId
39#[derive(Clone, Copy, Eq, Debug)]
40pub struct SourceId {
41    inner: &'static SourceIdInner,
42}
43
44/// The interned version of [`SourceId`] to avoid excessive clones and borrows.
45/// Values are cached in `SOURCE_ID_CACHE` once created.
46#[derive(Eq, Clone, Debug)]
47struct SourceIdInner {
48    /// The source URL.
49    url: Url,
50    /// The canonical version of the above url. See [`CanonicalUrl`] to learn
51    /// why it is needed and how it normalizes a URL.
52    canonical_url: CanonicalUrl,
53    /// The source kind.
54    kind: SourceKind,
55    /// For example, the exact Git revision of the specified branch for a Git Source.
56    precise: Option<Precise>,
57    /// Name of the remote registry.
58    ///
59    /// WARNING: this is not always set when the name is not known,
60    /// e.g. registry coming from `--index` or Cargo.lock
61    registry_key: Option<KeyOf>,
62}
63
64#[derive(Eq, PartialEq, Clone, Debug, Hash)]
65enum Precise {
66    Locked,
67    Updated {
68        name: InternedString,
69        from: semver::Version,
70        to: semver::Version,
71    },
72    GitUrlFragment(String),
73}
74
75impl fmt::Display for Precise {
76    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
77        match self {
78            Precise::Locked => "locked".fmt(f),
79            Precise::Updated { name, from, to } => {
80                write!(f, "{name}={from}->{to}")
81            }
82            Precise::GitUrlFragment(s) => s.fmt(f),
83        }
84    }
85}
86
87/// Where the remote source key is defined.
88///
89/// The purpose of this is to provide better diagnostics for different sources of keys.
90#[derive(Debug, Clone, PartialEq, Eq)]
91enum KeyOf {
92    /// Defined in the `[registries]` table or the built-in `crates-io` key.
93    Registry(String),
94    /// Defined in the `[source]` replacement table.
95    Source(String),
96}
97
98impl SourceId {
99    /// Creates a `SourceId` object from the kind and URL.
100    ///
101    /// The canonical url will be calculated, but the precise field will not
102    fn new(kind: SourceKind, url: Url, key: Option<KeyOf>) -> CargoResult<SourceId> {
103        if kind == SourceKind::SparseRegistry {
104            // Sparse URLs are different because they store the kind prefix (sparse+)
105            // in the URL. This is because the prefix is necessary to differentiate
106            // from regular registries (git-based). The sparse+ prefix is included
107            // everywhere, including user-facing locations such as the `config.toml`
108            // file that defines the registry, or whenever Cargo displays it to the user.
109            assert!(url.as_str().starts_with("sparse+"));
110        }
111        let source_id = SourceId::wrap(SourceIdInner {
112            kind,
113            canonical_url: CanonicalUrl::new(&url)?,
114            url,
115            precise: None,
116            registry_key: key,
117        });
118        Ok(source_id)
119    }
120
121    /// Interns the value and returns the wrapped type.
122    fn wrap(inner: SourceIdInner) -> SourceId {
123        let mut cache = SOURCE_ID_CACHE
124            .get_or_init(|| Default::default())
125            .lock()
126            .unwrap();
127        let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
128            let inner = Box::leak(Box::new(inner));
129            cache.insert(inner);
130            inner
131        });
132        SourceId { inner }
133    }
134
135    fn remote_source_kind(url: &Url) -> SourceKind {
136        if url.as_str().starts_with("sparse+") {
137            SourceKind::SparseRegistry
138        } else {
139            SourceKind::Registry
140        }
141    }
142
143    /// Parses a source URL and returns the corresponding ID.
144    ///
145    /// ## Example
146    ///
147    /// ```
148    /// use cargo::core::SourceId;
149    /// SourceId::from_url("git+https://github.com/alexcrichton/\
150    ///                     libssh2-static-sys#80e71a3021618eb05\
151    ///                     656c58fb7c5ef5f12bc747f");
152    /// ```
153    pub fn from_url(string: &str) -> CargoResult<SourceId> {
154        let (kind, url) = string
155            .split_once('+')
156            .ok_or_else(|| anyhow::format_err!("invalid source `{}`", string))?;
157
158        match kind {
159            "git" => {
160                let mut url = url.into_url()?;
161                let reference = GitReference::from_query(url.query_pairs());
162                let precise = url.fragment().map(|s| s.to_owned());
163                url.set_fragment(None);
164                url.set_query(None);
165                Ok(SourceId::for_git(&url, reference)?.with_git_precise(precise))
166            }
167            "registry" => {
168                let url = url.into_url()?;
169                Ok(SourceId::new(SourceKind::Registry, url, None)?.with_locked_precise())
170            }
171            "sparse" => {
172                let url = string.into_url()?;
173                Ok(SourceId::new(SourceKind::SparseRegistry, url, None)?.with_locked_precise())
174            }
175            "path" => {
176                let url = url.into_url()?;
177                SourceId::new(SourceKind::Path, url, None)
178            }
179            kind => Err(anyhow::format_err!("unsupported source protocol: {}", kind)),
180        }
181    }
182
183    /// A view of the [`SourceId`] that can be `Display`ed as a URL.
184    pub fn as_url(&self) -> SourceIdAsUrl<'_> {
185        SourceIdAsUrl {
186            inner: &*self.inner,
187            encoded: false,
188        }
189    }
190
191    /// Like [`Self::as_url`] but with URL parameters encoded.
192    pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
193        SourceIdAsUrl {
194            inner: &*self.inner,
195            encoded: true,
196        }
197    }
198
199    /// Creates a `SourceId` from a filesystem path.
200    ///
201    /// `path`: an absolute path.
202    pub fn for_path(path: &Path) -> CargoResult<SourceId> {
203        let url = path.into_url()?;
204        SourceId::new(SourceKind::Path, url, None)
205    }
206
207    /// Creates a `SourceId` from a filesystem path.
208    ///
209    /// `path`: an absolute path.
210    pub fn for_manifest_path(manifest_path: &Path) -> CargoResult<SourceId> {
211        if crate::util::toml::is_embedded(manifest_path) {
212            Self::for_path(manifest_path)
213        } else {
214            Self::for_path(manifest_path.parent().unwrap())
215        }
216    }
217
218    /// Creates a `SourceId` from a Git reference.
219    pub fn for_git(url: &Url, reference: GitReference) -> CargoResult<SourceId> {
220        SourceId::new(SourceKind::Git(reference), url.clone(), None)
221    }
222
223    /// Creates a `SourceId` from a remote registry URL when the registry name
224    /// cannot be determined, e.g. a user passes `--index` directly from CLI.
225    ///
226    /// Use [`SourceId::for_alt_registry`] if a name can provided, which
227    /// generates better messages for cargo.
228    pub fn for_registry(url: &Url) -> CargoResult<SourceId> {
229        let kind = Self::remote_source_kind(url);
230        SourceId::new(kind, url.to_owned(), None)
231    }
232
233    /// Creates a `SourceId` for a remote registry from the `[registries]` table or crates.io.
234    pub fn for_alt_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
235        let kind = Self::remote_source_kind(url);
236        let key = KeyOf::Registry(key.into());
237        SourceId::new(kind, url.to_owned(), Some(key))
238    }
239
240    /// Creates a `SourceId` for a remote registry from the `[source]` replacement table.
241    pub fn for_source_replacement_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
242        let kind = Self::remote_source_kind(url);
243        let key = KeyOf::Source(key.into());
244        SourceId::new(kind, url.to_owned(), Some(key))
245    }
246
247    /// Creates a `SourceId` from a local registry path.
248    pub fn for_local_registry(path: &Path) -> CargoResult<SourceId> {
249        let url = path.into_url()?;
250        SourceId::new(SourceKind::LocalRegistry, url, None)
251    }
252
253    /// Creates a `SourceId` from a directory path.
254    pub fn for_directory(path: &Path) -> CargoResult<SourceId> {
255        let url = path.into_url()?;
256        SourceId::new(SourceKind::Directory, url, None)
257    }
258
259    /// Returns the `SourceId` corresponding to the main repository.
260    ///
261    /// This is the main cargo registry by default, but it can be overridden in
262    /// a `.cargo/config.toml`.
263    pub fn crates_io(gctx: &GlobalContext) -> CargoResult<SourceId> {
264        gctx.crates_io_source_id()
265    }
266
267    /// Returns the `SourceId` corresponding to the main repository, using the
268    /// sparse HTTP index if allowed.
269    pub fn crates_io_maybe_sparse_http(gctx: &GlobalContext) -> CargoResult<SourceId> {
270        if Self::crates_io_is_sparse(gctx)? {
271            gctx.check_registry_index_not_set()?;
272            let url = CRATES_IO_HTTP_INDEX.into_url().unwrap();
273            let key = KeyOf::Registry(CRATES_IO_REGISTRY.into());
274            SourceId::new(SourceKind::SparseRegistry, url, Some(key))
275        } else {
276            Self::crates_io(gctx)
277        }
278    }
279
280    /// Returns whether to access crates.io over the sparse protocol.
281    pub fn crates_io_is_sparse(gctx: &GlobalContext) -> CargoResult<bool> {
282        let proto: Option<context::Value<String>> = gctx.get("registries.crates-io.protocol")?;
283        let is_sparse = match proto.as_ref().map(|v| v.val.as_str()) {
284            Some("sparse") => true,
285            Some("git") => false,
286            Some(unknown) => anyhow::bail!(
287                "unsupported registry protocol `{unknown}` (defined in {})",
288                proto.as_ref().unwrap().definition
289            ),
290            None => true,
291        };
292        Ok(is_sparse)
293    }
294
295    /// Gets the `SourceId` associated with given name of the remote registry.
296    pub fn alt_registry(gctx: &GlobalContext, key: &str) -> CargoResult<SourceId> {
297        if key == CRATES_IO_REGISTRY {
298            return Self::crates_io(gctx);
299        }
300        let url = gctx.get_registry_index(key)?;
301        Self::for_alt_registry(&url, key)
302    }
303
304    /// Gets this source URL.
305    pub fn url(&self) -> &Url {
306        &self.inner.url
307    }
308
309    /// Gets the canonical URL of this source, used for internal comparison
310    /// purposes.
311    pub fn canonical_url(&self) -> &CanonicalUrl {
312        &self.inner.canonical_url
313    }
314
315    /// Displays the text "crates.io index" for Cargo shell status output.
316    pub fn display_index(self) -> String {
317        if self.is_crates_io() {
318            format!("{} index", CRATES_IO_DOMAIN)
319        } else {
320            format!("`{}` index", self.display_registry_name())
321        }
322    }
323
324    /// Displays the name of a registry if it has one. Otherwise just the URL.
325    pub fn display_registry_name(self) -> String {
326        if let Some(key) = self.inner.registry_key.as_ref().map(|k| k.key()) {
327            key.into()
328        } else if self.has_precise() {
329            // We remove `precise` here to retrieve an permissive version of
330            // `SourceIdInner`, which may contain the registry name.
331            self.without_precise().display_registry_name()
332        } else {
333            url_display(self.url())
334        }
335    }
336
337    /// Gets the name of the remote registry as defined in the `[registries]` table,
338    /// or the built-in `crates-io` key.
339    pub fn alt_registry_key(&self) -> Option<&str> {
340        self.inner.registry_key.as_ref()?.alternative_registry()
341    }
342
343    /// Returns `true` if this source is from a filesystem path.
344    pub fn is_path(self) -> bool {
345        self.inner.kind == SourceKind::Path
346    }
347
348    /// Returns the local path if this is a path dependency.
349    pub fn local_path(self) -> Option<PathBuf> {
350        if self.inner.kind != SourceKind::Path {
351            return None;
352        }
353
354        Some(self.inner.url.to_file_path().unwrap())
355    }
356
357    pub fn kind(&self) -> &SourceKind {
358        &self.inner.kind
359    }
360
361    /// Returns `true` if this source is from a registry (either local or not).
362    pub fn is_registry(self) -> bool {
363        matches!(
364            self.inner.kind,
365            SourceKind::Registry | SourceKind::SparseRegistry | SourceKind::LocalRegistry
366        )
367    }
368
369    /// Returns `true` if this source is from a sparse registry.
370    pub fn is_sparse(self) -> bool {
371        matches!(self.inner.kind, SourceKind::SparseRegistry)
372    }
373
374    /// Returns `true` if this source is a "remote" registry.
375    ///
376    /// "remote" may also mean a file URL to a git index, so it is not
377    /// necessarily "remote". This just means it is not `local-registry`.
378    pub fn is_remote_registry(self) -> bool {
379        matches!(
380            self.inner.kind,
381            SourceKind::Registry | SourceKind::SparseRegistry
382        )
383    }
384
385    /// Returns `true` if this source from a Git repository.
386    pub fn is_git(self) -> bool {
387        matches!(self.inner.kind, SourceKind::Git(_))
388    }
389
390    /// Creates an implementation of `Source` corresponding to this ID.
391    ///
392    /// * `yanked_whitelist` --- Packages allowed to be used, even if they are yanked.
393    pub fn load<'a>(
394        self,
395        gctx: &'a GlobalContext,
396        yanked_whitelist: &HashSet<PackageId>,
397    ) -> CargoResult<Box<dyn Source + 'a>> {
398        trace!("loading SourceId; {}", self);
399        match self.inner.kind {
400            SourceKind::Git(..) => Ok(Box::new(GitSource::new(self, gctx)?)),
401            SourceKind::Path => {
402                let path = self
403                    .inner
404                    .url
405                    .to_file_path()
406                    .expect("path sources cannot be remote");
407                if crate::util::toml::is_embedded(&path) {
408                    anyhow::bail!("Single file packages cannot be used as dependencies")
409                }
410                Ok(Box::new(PathSource::new(&path, self, gctx)))
411            }
412            SourceKind::Registry | SourceKind::SparseRegistry => Ok(Box::new(
413                RegistrySource::remote(self, yanked_whitelist, gctx)?,
414            )),
415            SourceKind::LocalRegistry => {
416                let path = self
417                    .inner
418                    .url
419                    .to_file_path()
420                    .expect("path sources cannot be remote");
421                Ok(Box::new(RegistrySource::local(
422                    self,
423                    &path,
424                    yanked_whitelist,
425                    gctx,
426                )))
427            }
428            SourceKind::Directory => {
429                let path = self
430                    .inner
431                    .url
432                    .to_file_path()
433                    .expect("path sources cannot be remote");
434                Ok(Box::new(DirectorySource::new(&path, self, gctx)))
435            }
436        }
437    }
438
439    /// Gets the Git reference if this is a git source, otherwise `None`.
440    pub fn git_reference(self) -> Option<&'static GitReference> {
441        match self.inner.kind {
442            SourceKind::Git(ref s) => Some(s),
443            _ => None,
444        }
445    }
446
447    /// Check if the precise data field has bean set
448    pub fn has_precise(self) -> bool {
449        self.inner.precise.is_some()
450    }
451
452    /// Check if the precise data field has bean set to "locked"
453    pub fn has_locked_precise(self) -> bool {
454        self.inner.precise == Some(Precise::Locked)
455    }
456
457    /// Check if two sources have the same precise data field
458    pub fn has_same_precise_as(self, other: Self) -> bool {
459        self.inner.precise == other.inner.precise
460    }
461
462    /// Check if the precise data field stores information for this `name`
463    /// from a call to [`SourceId::with_precise_registry_version`].
464    ///
465    /// If so return the version currently in the lock file and the version to be updated to.
466    pub fn precise_registry_version(
467        self,
468        pkg: &str,
469    ) -> Option<(&semver::Version, &semver::Version)> {
470        match &self.inner.precise {
471            Some(Precise::Updated { name, from, to }) if name == pkg => Some((from, to)),
472            _ => None,
473        }
474    }
475
476    pub fn precise_git_fragment(self) -> Option<&'static str> {
477        match &self.inner.precise {
478            Some(Precise::GitUrlFragment(s)) => Some(&s),
479            _ => None,
480        }
481    }
482
483    /// Creates a new `SourceId` from this source with the given `precise`.
484    pub fn with_git_precise(self, fragment: Option<String>) -> SourceId {
485        self.with_precise(&fragment.map(|f| Precise::GitUrlFragment(f)))
486    }
487
488    /// Creates a new `SourceId` from this source without a `precise`.
489    pub fn without_precise(self) -> SourceId {
490        self.with_precise(&None)
491    }
492
493    /// Creates a new `SourceId` from this source without a `precise`.
494    pub fn with_locked_precise(self) -> SourceId {
495        self.with_precise(&Some(Precise::Locked))
496    }
497
498    /// Creates a new `SourceId` from this source with the `precise` from some other `SourceId`.
499    pub fn with_precise_from(self, v: Self) -> SourceId {
500        self.with_precise(&v.inner.precise)
501    }
502
503    fn with_precise(self, precise: &Option<Precise>) -> SourceId {
504        if &self.inner.precise == precise {
505            self
506        } else {
507            SourceId::wrap(SourceIdInner {
508                precise: precise.clone(),
509                ..(*self.inner).clone()
510            })
511        }
512    }
513
514    /// When updating a lock file on a version using `cargo update --precise`
515    /// the requested version is stored in the precise field.
516    /// On a registry dependency we also need to keep track of the package that
517    /// should be updated and even which of the versions should be updated.
518    /// All of this gets encoded in the precise field using this method.
519    /// The data can be read with [`SourceId::precise_registry_version`]
520    pub fn with_precise_registry_version(
521        self,
522        name: InternedString,
523        version: semver::Version,
524        precise: &str,
525    ) -> CargoResult<SourceId> {
526        let precise = semver::Version::parse(precise).with_context(|| {
527            if let Some(stripped) = precise.strip_prefix("v") {
528                return format!(
529                    "the version provided, `{precise}` is not a \
530                    valid SemVer version\n\n\
531                    help: try changing the version to `{stripped}`",
532                );
533            }
534            format!("invalid version format for precise version `{precise}`")
535        })?;
536
537        Ok(SourceId::wrap(SourceIdInner {
538            precise: Some(Precise::Updated {
539                name,
540                from: version,
541                to: precise,
542            }),
543            ..(*self.inner).clone()
544        }))
545    }
546
547    /// Returns `true` if the remote registry is the standard <https://crates.io>.
548    pub fn is_crates_io(self) -> bool {
549        match self.inner.kind {
550            SourceKind::Registry | SourceKind::SparseRegistry => {}
551            _ => return false,
552        }
553        let url = self.inner.url.as_str();
554        url == CRATES_IO_INDEX || url == CRATES_IO_HTTP_INDEX || is_overridden_crates_io_url(url)
555    }
556
557    /// Hashes `self` to be used in the name of some Cargo folders, so shouldn't vary.
558    ///
559    /// For git and url, `as_str` gives the serialisation of a url (which has a spec) and so
560    /// insulates against possible changes in how the url crate does hashing.
561    ///
562    /// For paths, remove the workspace prefix so the same source will give the
563    /// same hash in different locations, helping reproducible builds.
564    pub fn stable_hash<S: hash::Hasher>(self, workspace: &Path, into: &mut S) {
565        if self.is_path() {
566            if let Ok(p) = self
567                .inner
568                .url
569                .to_file_path()
570                .unwrap()
571                .strip_prefix(workspace)
572            {
573                self.inner.kind.hash(into);
574                p.to_str().unwrap().hash(into);
575                return;
576            }
577        }
578        self.inner.kind.hash(into);
579        match self.inner.kind {
580            SourceKind::Git(_) => (&self).inner.canonical_url.hash(into),
581            _ => (&self).inner.url.as_str().hash(into),
582        }
583    }
584
585    pub fn full_eq(self, other: SourceId) -> bool {
586        ptr::eq(self.inner, other.inner)
587    }
588
589    pub fn full_hash<S: hash::Hasher>(self, into: &mut S) {
590        ptr::NonNull::from(self.inner).hash(into)
591    }
592}
593
594impl PartialEq for SourceId {
595    fn eq(&self, other: &SourceId) -> bool {
596        self.cmp(other) == Ordering::Equal
597    }
598}
599
600impl PartialOrd for SourceId {
601    fn partial_cmp(&self, other: &SourceId) -> Option<Ordering> {
602        Some(self.cmp(other))
603    }
604}
605
606// Custom comparison defined as source kind and canonical URL equality,
607// ignoring the `precise` and `name` fields.
608impl Ord for SourceId {
609    fn cmp(&self, other: &SourceId) -> Ordering {
610        // If our interior pointers are to the exact same `SourceIdInner` then
611        // we're guaranteed to be equal.
612        if ptr::eq(self.inner, other.inner) {
613            return Ordering::Equal;
614        }
615
616        // Sort first based on `kind`, deferring to the URL comparison if
617        // the kinds are equal.
618        let ord_kind = self.inner.kind.cmp(&other.inner.kind);
619        ord_kind.then_with(|| self.inner.canonical_url.cmp(&other.inner.canonical_url))
620    }
621}
622
623impl ser::Serialize for SourceId {
624    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
625    where
626        S: ser::Serializer,
627    {
628        if self.is_path() {
629            None::<String>.serialize(s)
630        } else {
631            s.collect_str(&self.as_url())
632        }
633    }
634}
635
636impl<'de> de::Deserialize<'de> for SourceId {
637    fn deserialize<D>(d: D) -> Result<SourceId, D::Error>
638    where
639        D: de::Deserializer<'de>,
640    {
641        let string = String::deserialize(d)?;
642        SourceId::from_url(&string).map_err(de::Error::custom)
643    }
644}
645
646fn url_display(url: &Url) -> String {
647    if url.scheme() == "file" {
648        if let Ok(path) = url.to_file_path() {
649            if let Some(path_str) = path.to_str() {
650                return path_str.to_string();
651            }
652        }
653    }
654
655    url.as_str().to_string()
656}
657
658impl fmt::Display for SourceId {
659    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
660        match self.inner.kind {
661            SourceKind::Git(ref reference) => {
662                // Don't replace the URL display for git references,
663                // because those are kind of expected to be URLs.
664                write!(f, "{}", self.inner.url)?;
665                if let Some(pretty) = reference.pretty_ref(true) {
666                    write!(f, "?{}", pretty)?;
667                }
668
669                if let Some(s) = &self.inner.precise {
670                    let s = s.to_string();
671                    let len = cmp::min(s.len(), 8);
672                    write!(f, "#{}", &s[..len])?;
673                }
674                Ok(())
675            }
676            SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)),
677            SourceKind::Registry | SourceKind::SparseRegistry => {
678                write!(f, "registry `{}`", self.display_registry_name())
679            }
680            SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)),
681            SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)),
682        }
683    }
684}
685
686impl Hash for SourceId {
687    fn hash<S: hash::Hasher>(&self, into: &mut S) {
688        self.inner.kind.hash(into);
689        self.inner.canonical_url.hash(into);
690    }
691}
692
693/// The hash of `SourceIdInner` is used to retrieve its interned value from
694/// `SOURCE_ID_CACHE`. We only care about fields that make `SourceIdInner`
695/// unique. Optional fields not affecting the uniqueness must be excluded,
696/// such as [`registry_key`]. That's why this is not derived.
697///
698/// [`registry_key`]: SourceIdInner::registry_key
699impl Hash for SourceIdInner {
700    fn hash<S: hash::Hasher>(&self, into: &mut S) {
701        self.kind.hash(into);
702        self.precise.hash(into);
703        self.canonical_url.hash(into);
704    }
705}
706
707/// This implementation must be synced with [`SourceIdInner::hash`].
708impl PartialEq for SourceIdInner {
709    fn eq(&self, other: &Self) -> bool {
710        self.kind == other.kind
711            && self.precise == other.precise
712            && self.canonical_url == other.canonical_url
713    }
714}
715
716/// A `Display`able view into a `SourceId` that will write it as a url
717pub struct SourceIdAsUrl<'a> {
718    inner: &'a SourceIdInner,
719    encoded: bool,
720}
721
722impl<'a> fmt::Display for SourceIdAsUrl<'a> {
723    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
724        if let Some(protocol) = self.inner.kind.protocol() {
725            write!(f, "{protocol}+")?;
726        }
727        write!(f, "{}", self.inner.url)?;
728        if let SourceIdInner {
729            kind: SourceKind::Git(ref reference),
730            ref precise,
731            ..
732        } = *self.inner
733        {
734            if let Some(pretty) = reference.pretty_ref(self.encoded) {
735                write!(f, "?{}", pretty)?;
736            }
737            if let Some(precise) = precise.as_ref() {
738                write!(f, "#{}", precise)?;
739            }
740        }
741        Ok(())
742    }
743}
744
745impl KeyOf {
746    /// Gets the underlying key.
747    fn key(&self) -> &str {
748        match self {
749            KeyOf::Registry(k) | KeyOf::Source(k) => k,
750        }
751    }
752
753    /// Gets the key if it's from an alternative registry.
754    fn alternative_registry(&self) -> Option<&str> {
755        match self {
756            KeyOf::Registry(k) => Some(k),
757            _ => None,
758        }
759    }
760}
761
762#[cfg(test)]
763mod tests {
764    use super::{GitReference, SourceId, SourceKind};
765    use crate::util::{GlobalContext, IntoUrl};
766
767    #[test]
768    fn github_sources_equal() {
769        let loc = "https://github.com/foo/bar".into_url().unwrap();
770        let default = SourceKind::Git(GitReference::DefaultBranch);
771        let s1 = SourceId::new(default.clone(), loc, None).unwrap();
772
773        let loc = "git://github.com/foo/bar".into_url().unwrap();
774        let s2 = SourceId::new(default, loc.clone(), None).unwrap();
775
776        assert_eq!(s1, s2);
777
778        let foo = SourceKind::Git(GitReference::Branch("foo".to_string()));
779        let s3 = SourceId::new(foo, loc, None).unwrap();
780        assert_ne!(s1, s3);
781    }
782
783    // This is a test that the hash of the `SourceId` for crates.io is a well-known
784    // value.
785    //
786    // Note that the hash value matches what the crates.io source id has hashed
787    // since Rust 1.84.0. We strive to keep this value the same across
788    // versions of Cargo because changing it means that users will need to
789    // redownload the index and all crates they use when using a new Cargo version.
790    //
791    // This isn't to say that this hash can *never* change, only that when changing
792    // this it should be explicitly done. If this hash changes accidentally and
793    // you're able to restore the hash to its original value, please do so!
794    // Otherwise please just leave a comment in your PR as to why the hash value is
795    // changing and why the old value can't be easily preserved.
796    // If it takes an ugly hack to restore it,
797    // then leave a link here so we can remove the hack next time we change the hash.
798    //
799    // Hacks to remove next time the hash changes:
800    // - (fill in your code here)
801    //
802    // The hash value should be stable across platforms, and doesn't depend on
803    // endianness and bit-width. One caveat is that absolute paths on Windows
804    // are inherently different than on Unix-like platforms. Unless we omit or
805    // strip the prefix components (e.g. `C:`), there is not way to have a true
806    // cross-platform stable hash for absolute paths.
807    #[test]
808    fn test_stable_hash() {
809        use std::hash::Hasher;
810        use std::path::Path;
811
812        use snapbox::assert_data_eq;
813        use snapbox::str;
814        use snapbox::IntoData as _;
815
816        use crate::util::hex::short_hash;
817        use crate::util::StableHasher;
818
819        #[cfg(not(windows))]
820        let ws_root = Path::new("/tmp/ws");
821        #[cfg(windows)]
822        let ws_root = Path::new(r"C:\\tmp\ws");
823
824        let gen_hash = |source_id: SourceId| {
825            let mut hasher = StableHasher::new();
826            source_id.stable_hash(ws_root, &mut hasher);
827            Hasher::finish(&hasher).to_string()
828        };
829
830        let source_id = SourceId::crates_io(&GlobalContext::default().unwrap()).unwrap();
831        assert_data_eq!(gen_hash(source_id), str!["7062945687441624357"].raw());
832        assert_data_eq!(short_hash(&source_id), str!["25cdd57fae9f0462"].raw());
833
834        let url = "https://my-crates.io".into_url().unwrap();
835        let source_id = SourceId::for_registry(&url).unwrap();
836        assert_data_eq!(gen_hash(source_id), str!["8310250053664888498"].raw());
837        assert_data_eq!(short_hash(&source_id), str!["b2d65deb64f05373"].raw());
838
839        let url = "https://your-crates.io".into_url().unwrap();
840        let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
841        assert_data_eq!(gen_hash(source_id), str!["14149534903000258933"].raw());
842        assert_data_eq!(short_hash(&source_id), str!["755952de063f5dc4"].raw());
843
844        let url = "sparse+https://my-crates.io".into_url().unwrap();
845        let source_id = SourceId::for_registry(&url).unwrap();
846        assert_data_eq!(gen_hash(source_id), str!["16249512552851930162"].raw());
847        assert_data_eq!(short_hash(&source_id), str!["327cfdbd92dd81e1"].raw());
848
849        let url = "sparse+https://your-crates.io".into_url().unwrap();
850        let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
851        assert_data_eq!(gen_hash(source_id), str!["6156697384053352292"].raw());
852        assert_data_eq!(short_hash(&source_id), str!["64a713b6a6fb7055"].raw());
853
854        let url = "file:///tmp/ws/crate".into_url().unwrap();
855        let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
856        assert_data_eq!(gen_hash(source_id), str!["473480029881867801"].raw());
857        assert_data_eq!(short_hash(&source_id), str!["199e591d94239206"].raw());
858
859        let path = &ws_root.join("crate");
860        let source_id = SourceId::for_local_registry(path).unwrap();
861        #[cfg(not(windows))]
862        {
863            assert_data_eq!(gen_hash(source_id), str!["11515846423845066584"].raw());
864            assert_data_eq!(short_hash(&source_id), str!["58d73c154f81d09f"].raw());
865        }
866        #[cfg(windows)]
867        {
868            assert_data_eq!(gen_hash(source_id), str!["6146331155906064276"].raw());
869            assert_data_eq!(short_hash(&source_id), str!["946fb2239f274c55"].raw());
870        }
871
872        let source_id = SourceId::for_path(path).unwrap();
873        assert_data_eq!(gen_hash(source_id), str!["215644081443634269"].raw());
874        #[cfg(not(windows))]
875        assert_data_eq!(short_hash(&source_id), str!["64bace89c92b101f"].raw());
876        #[cfg(windows)]
877        assert_data_eq!(short_hash(&source_id), str!["01e1e6c391813fb6"].raw());
878
879        let source_id = SourceId::for_directory(path).unwrap();
880        #[cfg(not(windows))]
881        {
882            assert_data_eq!(gen_hash(source_id), str!["6127590343904940368"].raw());
883            assert_data_eq!(short_hash(&source_id), str!["505191d1f3920955"].raw());
884        }
885        #[cfg(windows)]
886        {
887            assert_data_eq!(gen_hash(source_id), str!["10423446877655960172"].raw());
888            assert_data_eq!(short_hash(&source_id), str!["6c8ad69db585a790"].raw());
889        }
890    }
891
892    #[test]
893    fn serde_roundtrip() {
894        let url = "sparse+https://my-crates.io/".into_url().unwrap();
895        let source_id = SourceId::for_registry(&url).unwrap();
896        let formatted = format!("{}", source_id.as_url());
897        let deserialized = SourceId::from_url(&formatted).unwrap();
898        assert_eq!(formatted, "sparse+https://my-crates.io/");
899        assert_eq!(source_id, deserialized);
900    }
901
902    #[test]
903    fn gitrefs_roundtrip() {
904        let base = "https://host/path".into_url().unwrap();
905        let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
906        let s1 = SourceId::for_git(&base, branch).unwrap();
907        let ser1 = format!("{}", s1.as_encoded_url());
908        let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
909        let ser2 = format!("{}", s2.as_encoded_url());
910        // Serializing twice should yield the same result
911        assert_eq!(ser1, ser2, "Serialized forms don't match");
912        // SourceId serializing the same should have the same semantics
913        // This used to not be the case (# was ambiguous)
914        assert_eq!(s1, s2, "SourceId doesn't round-trip");
915        // Freeze the format to match an x-www-form-urlencoded query string
916        // https://url.spec.whatwg.org/#application/x-www-form-urlencoded
917        assert_eq!(
918            ser1,
919            "git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
920        );
921    }
922}
923
924/// Check if `url` equals to the overridden crates.io URL.
925// ALLOWED: For testing Cargo itself only.
926#[allow(clippy::disallowed_methods)]
927fn is_overridden_crates_io_url(url: &str) -> bool {
928    std::env::var("__CARGO_TEST_CRATES_IO_URL_DO_NOT_USE_THIS").map_or(false, |v| v == url)
929}