cargo/sources/git/
utils.rs

1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
3
4use crate::core::{GitReference, Verbosity};
5use crate::sources::git::fetch::RemoteKind;
6use crate::sources::git::oxide;
7use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8use crate::util::errors::CargoResult;
9use crate::util::HumanBytes;
10use crate::util::{network, GlobalContext, IntoUrl, MetricsCounter, Progress};
11use anyhow::{anyhow, Context as _};
12use cargo_util::{paths, ProcessBuilder};
13use curl::easy::List;
14use git2::{ErrorClass, ObjectType, Oid};
15use serde::ser;
16use serde::Serialize;
17use std::borrow::Cow;
18use std::fmt;
19use std::path::{Path, PathBuf};
20use std::process::Command;
21use std::str;
22use std::sync::atomic::{AtomicBool, Ordering};
23use std::time::{Duration, Instant};
24use tracing::{debug, info};
25use url::Url;
26
27/// A file indicates that if present, `git reset` has been done and a repo
28/// checkout is ready to go. See [`GitCheckout::reset`] for why we need this.
29const CHECKOUT_READY_LOCK: &str = ".cargo-ok";
30
31fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
32where
33    T: fmt::Display,
34    S: ser::Serializer,
35{
36    s.collect_str(t)
37}
38
39/// A short abbreviated OID.
40///
41/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`].
42pub struct GitShortID(git2::Buf);
43
44impl GitShortID {
45    /// Views the short ID as a `str`.
46    pub fn as_str(&self) -> &str {
47        self.0.as_str().unwrap()
48    }
49}
50
51/// A remote repository. It gets cloned into a local [`GitDatabase`].
52#[derive(PartialEq, Clone, Debug, Serialize)]
53pub struct GitRemote {
54    /// URL to a remote repository.
55    #[serde(serialize_with = "serialize_str")]
56    url: Url,
57}
58
59/// A local clone of a remote repository's database. Multiple [`GitCheckout`]s
60/// can be cloned from a single [`GitDatabase`].
61pub struct GitDatabase {
62    /// The remote repository where this database is fetched from.
63    remote: GitRemote,
64    /// Path to the root of the underlying Git repository on the local filesystem.
65    path: PathBuf,
66    /// Underlying Git repository instance for this database.
67    repo: git2::Repository,
68}
69
70/// A local checkout of a particular revision from a [`GitDatabase`].
71pub struct GitCheckout<'a> {
72    /// The git database where this checkout is cloned from.
73    database: &'a GitDatabase,
74    /// Path to the root of the underlying Git repository on the local filesystem.
75    path: PathBuf,
76    /// The git revision this checkout is for.
77    revision: git2::Oid,
78    /// Underlying Git repository instance for this checkout.
79    repo: git2::Repository,
80}
81
82impl GitRemote {
83    /// Creates an instance for a remote repository URL.
84    pub fn new(url: &Url) -> GitRemote {
85        GitRemote { url: url.clone() }
86    }
87
88    /// Gets the remote repository URL.
89    pub fn url(&self) -> &Url {
90        &self.url
91    }
92
93    /// Fetches and checkouts to a reference or a revision from this remote
94    /// into a local path.
95    ///
96    /// This ensures that it gets the up-to-date commit when a named reference
97    /// is given (tag, branch, refs/*). Thus, network connection is involved.
98    ///
99    /// If we have a previous instance of [`GitDatabase`] then fetch into that
100    /// if we can. If that can successfully load our revision then we've
101    /// populated the database with the latest version of `reference`, so
102    /// return that database and the rev we resolve to.
103    pub fn checkout(
104        &self,
105        into: &Path,
106        db: Option<GitDatabase>,
107        reference: &GitReference,
108        gctx: &GlobalContext,
109    ) -> CargoResult<(GitDatabase, git2::Oid)> {
110        if let Some(mut db) = db {
111            fetch(
112                &mut db.repo,
113                self.url.as_str(),
114                reference,
115                gctx,
116                RemoteKind::GitDependency,
117            )
118            .with_context(|| format!("failed to fetch into: {}", into.display()))?;
119
120            if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
121                return Ok((db, rev));
122            }
123        }
124
125        // Otherwise start from scratch to handle corrupt git repositories.
126        // After our fetch (which is interpreted as a clone now) we do the same
127        // resolution to figure out what we cloned.
128        if into.exists() {
129            paths::remove_dir_all(into)?;
130        }
131        paths::create_dir_all(into)?;
132        let mut repo = init(into, true)?;
133        fetch(
134            &mut repo,
135            self.url.as_str(),
136            reference,
137            gctx,
138            RemoteKind::GitDependency,
139        )
140        .with_context(|| format!("failed to clone into: {}", into.display()))?;
141        let rev = resolve_ref(reference, &repo)?;
142
143        Ok((
144            GitDatabase {
145                remote: self.clone(),
146                path: into.to_path_buf(),
147                repo,
148            },
149            rev,
150        ))
151    }
152
153    /// Creates a [`GitDatabase`] of this remote at `db_path`.
154    pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
155        let repo = git2::Repository::open(db_path)?;
156        Ok(GitDatabase {
157            remote: self.clone(),
158            path: db_path.to_path_buf(),
159            repo,
160        })
161    }
162}
163
164impl GitDatabase {
165    /// Checkouts to a revision at `dest`ination from this database.
166    #[tracing::instrument(skip(self, gctx))]
167    pub fn copy_to(
168        &self,
169        rev: git2::Oid,
170        dest: &Path,
171        gctx: &GlobalContext,
172    ) -> CargoResult<GitCheckout<'_>> {
173        // If the existing checkout exists, and it is fresh, use it.
174        // A non-fresh checkout can happen if the checkout operation was
175        // interrupted. In that case, the checkout gets deleted and a new
176        // clone is created.
177        let checkout = match git2::Repository::open(dest)
178            .ok()
179            .map(|repo| GitCheckout::new(self, rev, repo))
180            .filter(|co| co.is_fresh())
181        {
182            Some(co) => co,
183            None => {
184                let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
185                checkout.update_submodules(gctx)?;
186                guard.mark_ok()?;
187                checkout
188            }
189        };
190
191        Ok(checkout)
192    }
193
194    /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous.
195    pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
196        let obj = self.repo.find_object(revision, None)?;
197        Ok(GitShortID(obj.short_id()?))
198    }
199
200    /// Checks if the database contains the object of this `oid`..
201    pub fn contains(&self, oid: git2::Oid) -> bool {
202        self.repo.revparse_single(&oid.to_string()).is_ok()
203    }
204
205    /// [`resolve_ref`]s this reference with this database.
206    pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
207        resolve_ref(r, &self.repo)
208    }
209}
210
211/// Resolves [`GitReference`] to an object ID with objects the `repo` currently has.
212pub fn resolve_ref(gitref: &GitReference, repo: &git2::Repository) -> CargoResult<git2::Oid> {
213    let id = match gitref {
214        // Note that we resolve the named tag here in sync with where it's
215        // fetched into via `fetch` below.
216        GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
217            let refname = format!("refs/remotes/origin/tags/{}", s);
218            let id = repo.refname_to_id(&refname)?;
219            let obj = repo.find_object(id, None)?;
220            let obj = obj.peel(ObjectType::Commit)?;
221            Ok(obj.id())
222        })()
223        .with_context(|| format!("failed to find tag `{}`", s))?,
224
225        // Resolve the remote name since that's all we're configuring in
226        // `fetch` below.
227        GitReference::Branch(s) => {
228            let name = format!("origin/{}", s);
229            let b = repo
230                .find_branch(&name, git2::BranchType::Remote)
231                .with_context(|| format!("failed to find branch `{}`", s))?;
232            b.get()
233                .target()
234                .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
235        }
236
237        // We'll be using the HEAD commit
238        GitReference::DefaultBranch => {
239            let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
240            let head = repo.find_object(head_id, None)?;
241            head.peel(ObjectType::Commit)?.id()
242        }
243
244        GitReference::Rev(s) => {
245            let obj = repo.revparse_single(s)?;
246            match obj.as_tag() {
247                Some(tag) => tag.target_id(),
248                None => obj.id(),
249            }
250        }
251    };
252    Ok(id)
253}
254
255impl<'a> GitCheckout<'a> {
256    /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout
257    /// is done. Use [`GitCheckout::is_fresh`] to check.
258    ///
259    /// * The `database` is where this checkout is from.
260    /// * The `repo` will be the checked out Git repository.
261    fn new(
262        database: &'a GitDatabase,
263        revision: git2::Oid,
264        repo: git2::Repository,
265    ) -> GitCheckout<'a> {
266        let path = repo.workdir().unwrap_or_else(|| repo.path());
267        GitCheckout {
268            path: path.to_path_buf(),
269            database,
270            revision,
271            repo,
272        }
273    }
274
275    /// Gets the remote repository URL.
276    fn remote_url(&self) -> &Url {
277        &self.database.remote.url()
278    }
279
280    /// Clone a repo for a `revision` into a local path from a `datatabase`.
281    /// This is a filesystem-to-filesystem clone.
282    fn clone_into(
283        into: &Path,
284        database: &'a GitDatabase,
285        revision: git2::Oid,
286        gctx: &GlobalContext,
287    ) -> CargoResult<(GitCheckout<'a>, CheckoutGuard)> {
288        let dirname = into.parent().unwrap();
289        paths::create_dir_all(&dirname)?;
290        if into.exists() {
291            paths::remove_dir_all(into)?;
292        }
293
294        // we're doing a local filesystem-to-filesystem clone so there should
295        // be no need to respect global configuration options, so pass in
296        // an empty instance of `git2::Config` below.
297        let git_config = git2::Config::new()?;
298
299        // Clone the repository, but make sure we use the "local" option in
300        // libgit2 which will attempt to use hardlinks to set up the database.
301        // This should speed up the clone operation quite a bit if it works.
302        //
303        // Note that we still use the same fetch options because while we don't
304        // need authentication information we may want progress bars and such.
305        let url = database.path.into_url()?;
306        let mut repo = None;
307        with_fetch_options(&git_config, url.as_str(), gctx, &mut |fopts| {
308            let mut checkout = git2::build::CheckoutBuilder::new();
309            checkout.dry_run(); // we'll do this below during a `reset`
310
311            let r = git2::build::RepoBuilder::new()
312                // use hard links and/or copy the database, we're doing a
313                // filesystem clone so this'll speed things up quite a bit.
314                .clone_local(git2::build::CloneLocal::Local)
315                .with_checkout(checkout)
316                .fetch_options(fopts)
317                .clone(url.as_str(), into)?;
318            // `git2` doesn't seem to handle shallow repos correctly when doing
319            // a local clone. Fortunately all that's needed is the copy of the
320            // one file that defines the shallow boundary, the commits which
321            // have their parents omitted as part of the shallow clone.
322            //
323            // TODO(git2): remove this when git2 supports shallow clone correctly
324            if database.repo.is_shallow() {
325                std::fs::copy(
326                    database.repo.path().join("shallow"),
327                    r.path().join("shallow"),
328                )?;
329            }
330            repo = Some(r);
331            Ok(())
332        })?;
333        let repo = repo.unwrap();
334
335        let checkout = GitCheckout::new(database, revision, repo);
336        let guard = checkout.reset(gctx)?;
337        Ok((checkout, guard))
338    }
339
340    /// Checks if the `HEAD` of this checkout points to the expected revision.
341    fn is_fresh(&self) -> bool {
342        match self.repo.revparse_single("HEAD") {
343            Ok(ref head) if head.id() == self.revision => {
344                // See comments in reset() for why we check this
345                self.path.join(CHECKOUT_READY_LOCK).exists()
346            }
347            _ => false,
348        }
349    }
350
351    /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the
352    /// revision of this checkout, with additional interrupt protection by a
353    /// dummy file [`CHECKOUT_READY_LOCK`].
354    ///
355    /// If we're interrupted while performing a `git reset` (e.g., we die
356    /// because of a signal) Cargo needs to be sure to try to check out this
357    /// repo again on the next go-round.
358    ///
359    /// To enable this we have a dummy file in our checkout, [`.cargo-ok`],
360    /// which if present means that the repo has been successfully reset and is
361    /// ready to go. Hence if we start to do a reset, we make sure this file
362    /// *doesn't* exist. The caller of [`reset`] has an option to perform additional operations
363    /// (e.g. submodule update) before marking the check-out as ready.
364    ///
365    /// [`.cargo-ok`]: CHECKOUT_READY_LOCK
366    fn reset(&self, gctx: &GlobalContext) -> CargoResult<CheckoutGuard> {
367        let guard = CheckoutGuard::guard(&self.path);
368        info!("reset {} to {}", self.repo.path().display(), self.revision);
369
370        // Ensure libgit2 won't mess with newlines when we vendor.
371        if let Ok(mut git_config) = self.repo.config() {
372            git_config.set_bool("core.autocrlf", false)?;
373        }
374
375        let object = self.repo.find_object(self.revision, None)?;
376        reset(&self.repo, &object, gctx)?;
377
378        Ok(guard)
379    }
380
381    /// Like `git submodule update --recursive` but for this git checkout.
382    ///
383    /// This function respects `submodule.<name>.update = none`[^1] git config.
384    /// Submodules set to `none` won't be fetched.
385    ///
386    /// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
387    fn update_submodules(&self, gctx: &GlobalContext) -> CargoResult<()> {
388        return update_submodules(&self.repo, gctx, self.remote_url().as_str());
389
390        /// Recursive helper for [`GitCheckout::update_submodules`].
391        fn update_submodules(
392            repo: &git2::Repository,
393            gctx: &GlobalContext,
394            parent_remote_url: &str,
395        ) -> CargoResult<()> {
396            debug!("update submodules for: {:?}", repo.workdir().unwrap());
397
398            for mut child in repo.submodules()? {
399                update_submodule(repo, &mut child, gctx, parent_remote_url).with_context(|| {
400                    format!(
401                        "failed to update submodule `{}`",
402                        child.name().unwrap_or("")
403                    )
404                })?;
405            }
406            Ok(())
407        }
408
409        /// Update a single Git submodule, and recurse into its submodules.
410        fn update_submodule(
411            parent: &git2::Repository,
412            child: &mut git2::Submodule<'_>,
413            gctx: &GlobalContext,
414            parent_remote_url: &str,
415        ) -> CargoResult<()> {
416            child.init(false)?;
417
418            let child_url_str = child.url().ok_or_else(|| {
419                anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
420            })?;
421
422            // Skip the submodule if the config says not to update it.
423            if child.update_strategy() == git2::SubmoduleUpdate::None {
424                gctx.shell().status(
425                    "Skipping",
426                    format!(
427                        "git submodule `{}` due to update strategy in .gitmodules",
428                        child_url_str
429                    ),
430                )?;
431                return Ok(());
432            }
433
434            let child_remote_url = absolute_submodule_url(parent_remote_url, child_url_str)?;
435
436            // A submodule which is listed in .gitmodules but not actually
437            // checked out will not have a head id, so we should ignore it.
438            let Some(head) = child.head_id() else {
439                return Ok(());
440            };
441
442            // If the submodule hasn't been checked out yet, we need to
443            // clone it. If it has been checked out and the head is the same
444            // as the submodule's head, then we can skip an update and keep
445            // recursing.
446            let head_and_repo = child.open().and_then(|repo| {
447                let target = repo.head()?.target();
448                Ok((target, repo))
449            });
450            let mut repo = match head_and_repo {
451                Ok((head, repo)) => {
452                    if child.head_id() == head {
453                        return update_submodules(&repo, gctx, &child_remote_url);
454                    }
455                    repo
456                }
457                Err(..) => {
458                    let path = parent.workdir().unwrap().join(child.path());
459                    let _ = paths::remove_dir_all(&path);
460                    init(&path, false)?
461                }
462            };
463            // Fetch data from origin and reset to the head commit
464            let reference = GitReference::Rev(head.to_string());
465            gctx.shell()
466                .status("Updating", format!("git submodule `{child_remote_url}`"))?;
467            fetch(
468                &mut repo,
469                &child_remote_url,
470                &reference,
471                gctx,
472                RemoteKind::GitDependency,
473            )
474            .with_context(|| {
475                let name = child.name().unwrap_or("");
476                format!("failed to fetch submodule `{name}` from {child_remote_url}",)
477            })?;
478
479            let obj = repo.find_object(head, None)?;
480            reset(&repo, &obj, gctx)?;
481            update_submodules(&repo, gctx, &child_remote_url)
482        }
483    }
484}
485
486/// See [`GitCheckout::reset`] for rationale on this type.
487#[must_use]
488struct CheckoutGuard {
489    ok_file: PathBuf,
490}
491
492impl CheckoutGuard {
493    fn guard(path: &Path) -> Self {
494        let ok_file = path.join(CHECKOUT_READY_LOCK);
495        let _ = paths::remove_file(&ok_file);
496        Self { ok_file }
497    }
498
499    fn mark_ok(self) -> CargoResult<()> {
500        let _ = paths::create(self.ok_file)?;
501        Ok(())
502    }
503}
504
505/// Constructs an absolute URL for a child submodule URL with its parent base URL.
506///
507/// Git only assumes a submodule URL is a relative path if it starts with `./`
508/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute
509/// submodule URL.
510///
511/// At this moment it comes with some limitations:
512///
513/// * GitHub doesn't accept non-normalized URLs with relative paths.
514///   (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid)
515/// * `url` crate cannot parse SCP-like URLs.
516///   (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL)
517///
518/// To overcome these, this patch always tries [`Url::parse`] first to normalize
519/// the path. If it couldn't, append the relative path as the last resort and
520/// pray the remote git service supports non-normalized URLs.
521///
522/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295.
523///
524/// [^1]: <https://git-scm.com/docs/git-submodule>
525fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> CargoResult<Cow<'s, str>> {
526    let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) {
527        match Url::parse(base_url) {
528            Ok(mut base_url) => {
529                let path = base_url.path();
530                if !path.ends_with('/') {
531                    base_url.set_path(&format!("{path}/"));
532                }
533                let absolute_url = base_url.join(submodule_url).with_context(|| {
534                    format!(
535                        "failed to parse relative child submodule url `{submodule_url}` \
536                        using parent base url `{base_url}`"
537                    )
538                })?;
539                Cow::from(absolute_url.to_string())
540            }
541            Err(_) => {
542                let mut absolute_url = base_url.to_string();
543                if !absolute_url.ends_with('/') {
544                    absolute_url.push('/');
545                }
546                absolute_url.push_str(submodule_url);
547                Cow::from(absolute_url)
548            }
549        }
550    } else {
551        Cow::from(submodule_url)
552    };
553
554    Ok(absolute_url)
555}
556
557/// Prepare the authentication callbacks for cloning a git repository.
558///
559/// The main purpose of this function is to construct the "authentication
560/// callback" which is used to clone a repository. This callback will attempt to
561/// find the right authentication on the system (without user input) and will
562/// guide libgit2 in doing so.
563///
564/// The callback is provided `allowed` types of credentials, and we try to do as
565/// much as possible based on that:
566///
567/// * Prioritize SSH keys from the local ssh agent as they're likely the most
568///   reliable. The username here is prioritized from the credential
569///   callback, then from whatever is configured in git itself, and finally
570///   we fall back to the generic user of `git`.
571///
572/// * If a username/password is allowed, then we fallback to git2-rs's
573///   implementation of the credential helper. This is what is configured
574///   with `credential.helper` in git, and is the interface for the macOS
575///   keychain, for example.
576///
577/// * After the above two have failed, we just kinda grapple attempting to
578///   return *something*.
579///
580/// If any form of authentication fails, libgit2 will repeatedly ask us for
581/// credentials until we give it a reason to not do so. To ensure we don't
582/// just sit here looping forever we keep track of authentications we've
583/// attempted and we don't try the same ones again.
584fn with_authentication<T, F>(
585    gctx: &GlobalContext,
586    url: &str,
587    cfg: &git2::Config,
588    mut f: F,
589) -> CargoResult<T>
590where
591    F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
592{
593    let mut cred_helper = git2::CredentialHelper::new(url);
594    cred_helper.config(cfg);
595
596    let mut ssh_username_requested = false;
597    let mut cred_helper_bad = None;
598    let mut ssh_agent_attempts = Vec::new();
599    let mut any_attempts = false;
600    let mut tried_sshkey = false;
601    let mut url_attempt = None;
602
603    let orig_url = url;
604    let mut res = f(&mut |url, username, allowed| {
605        any_attempts = true;
606        if url != orig_url {
607            url_attempt = Some(url.to_string());
608        }
609        // libgit2's "USERNAME" authentication actually means that it's just
610        // asking us for a username to keep going. This is currently only really
611        // used for SSH authentication and isn't really an authentication type.
612        // The logic currently looks like:
613        //
614        //      let user = ...;
615        //      if (user.is_null())
616        //          user = callback(USERNAME, null, ...);
617        //
618        //      callback(SSH_KEY, user, ...)
619        //
620        // So if we're being called here then we know that (a) we're using ssh
621        // authentication and (b) no username was specified in the URL that
622        // we're trying to clone. We need to guess an appropriate username here,
623        // but that may involve a few attempts. Unfortunately we can't switch
624        // usernames during one authentication session with libgit2, so to
625        // handle this we bail out of this authentication session after setting
626        // the flag `ssh_username_requested`, and then we handle this below.
627        if allowed.contains(git2::CredentialType::USERNAME) {
628            debug_assert!(username.is_none());
629            ssh_username_requested = true;
630            return Err(git2::Error::from_str("gonna try usernames later"));
631        }
632
633        // An "SSH_KEY" authentication indicates that we need some sort of SSH
634        // authentication. This can currently either come from the ssh-agent
635        // process or from a raw in-memory SSH key. Cargo only supports using
636        // ssh-agent currently.
637        //
638        // If we get called with this then the only way that should be possible
639        // is if a username is specified in the URL itself (e.g., `username` is
640        // Some), hence the unwrap() here. We try custom usernames down below.
641        if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
642            // If ssh-agent authentication fails, libgit2 will keep
643            // calling this callback asking for other authentication
644            // methods to try. Make sure we only try ssh-agent once,
645            // to avoid looping forever.
646            tried_sshkey = true;
647            let username = username.unwrap();
648            debug_assert!(!ssh_username_requested);
649            ssh_agent_attempts.push(username.to_string());
650            return git2::Cred::ssh_key_from_agent(username);
651        }
652
653        // Sometimes libgit2 will ask for a username/password in plaintext. This
654        // is where Cargo would have an interactive prompt if we supported it,
655        // but we currently don't! Right now the only way we support fetching a
656        // plaintext password is through the `credential.helper` support, so
657        // fetch that here.
658        //
659        // If ssh-agent authentication fails, libgit2 will keep calling this
660        // callback asking for other authentication methods to try. Check
661        // cred_helper_bad to make sure we only try the git credential helper
662        // once, to avoid looping forever.
663        if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
664        {
665            let r = git2::Cred::credential_helper(cfg, url, username);
666            cred_helper_bad = Some(r.is_err());
667            return r;
668        }
669
670        // I'm... not sure what the DEFAULT kind of authentication is, but seems
671        // easy to support?
672        if allowed.contains(git2::CredentialType::DEFAULT) {
673            return git2::Cred::default();
674        }
675
676        // Whelp, we tried our best
677        Err(git2::Error::from_str("no authentication methods succeeded"))
678    });
679
680    // Ok, so if it looks like we're going to be doing ssh authentication, we
681    // want to try a few different usernames as one wasn't specified in the URL
682    // for us to use. In order, we'll try:
683    //
684    // * A credential helper's username for this URL, if available.
685    // * This account's username.
686    // * "git"
687    //
688    // We have to restart the authentication session each time (due to
689    // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
690    // call our callback, `f`, in a loop here.
691    if ssh_username_requested {
692        debug_assert!(res.is_err());
693        let mut attempts = vec![String::from("git")];
694        if let Ok(s) = gctx.get_env("USER").or_else(|_| gctx.get_env("USERNAME")) {
695            attempts.push(s.to_string());
696        }
697        if let Some(ref s) = cred_helper.username {
698            attempts.push(s.clone());
699        }
700
701        while let Some(s) = attempts.pop() {
702            // We should get `USERNAME` first, where we just return our attempt,
703            // and then after that we should get `SSH_KEY`. If the first attempt
704            // fails we'll get called again, but we don't have another option so
705            // we bail out.
706            let mut attempts = 0;
707            res = f(&mut |_url, username, allowed| {
708                if allowed.contains(git2::CredentialType::USERNAME) {
709                    return git2::Cred::username(&s);
710                }
711                if allowed.contains(git2::CredentialType::SSH_KEY) {
712                    debug_assert_eq!(Some(&s[..]), username);
713                    attempts += 1;
714                    if attempts == 1 {
715                        ssh_agent_attempts.push(s.to_string());
716                        return git2::Cred::ssh_key_from_agent(&s);
717                    }
718                }
719                Err(git2::Error::from_str("no authentication methods succeeded"))
720            });
721
722            // If we made two attempts then that means:
723            //
724            // 1. A username was requested, we returned `s`.
725            // 2. An ssh key was requested, we returned to look up `s` in the
726            //    ssh agent.
727            // 3. For whatever reason that lookup failed, so we were asked again
728            //    for another mode of authentication.
729            //
730            // Essentially, if `attempts == 2` then in theory the only error was
731            // that this username failed to authenticate (e.g., no other network
732            // errors happened). Otherwise something else is funny so we bail
733            // out.
734            if attempts != 2 {
735                break;
736            }
737        }
738    }
739    let mut err = match res {
740        Ok(e) => return Ok(e),
741        Err(e) => e,
742    };
743
744    // In the case of an authentication failure (where we tried something) then
745    // we try to give a more helpful error message about precisely what we
746    // tried.
747    if any_attempts {
748        let mut msg = "failed to authenticate when downloading \
749                       repository"
750            .to_string();
751
752        if let Some(attempt) = &url_attempt {
753            if url != attempt {
754                msg.push_str(": ");
755                msg.push_str(attempt);
756            }
757        }
758        msg.push('\n');
759        if !ssh_agent_attempts.is_empty() {
760            let names = ssh_agent_attempts
761                .iter()
762                .map(|s| format!("`{}`", s))
763                .collect::<Vec<_>>()
764                .join(", ");
765            msg.push_str(&format!(
766                "\n* attempted ssh-agent authentication, but \
767                 no usernames succeeded: {}",
768                names
769            ));
770        }
771        if let Some(failed_cred_helper) = cred_helper_bad {
772            if failed_cred_helper {
773                msg.push_str(
774                    "\n* attempted to find username/password via \
775                     git's `credential.helper` support, but failed",
776                );
777            } else {
778                msg.push_str(
779                    "\n* attempted to find username/password via \
780                     `credential.helper`, but maybe the found \
781                     credentials were incorrect",
782                );
783            }
784        }
785        msg.push_str("\n\n");
786        msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
787        msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
788        err = err.context(msg);
789
790        // Otherwise if we didn't even get to the authentication phase them we may
791        // have failed to set up a connection, in these cases hint on the
792        // `net.git-fetch-with-cli` configuration option.
793    } else if let Some(e) = err.downcast_ref::<git2::Error>() {
794        match e.class() {
795            ErrorClass::Net
796            | ErrorClass::Ssl
797            | ErrorClass::Submodule
798            | ErrorClass::FetchHead
799            | ErrorClass::Ssh
800            | ErrorClass::Http => {
801                let mut msg = "network failure seems to have happened\n".to_string();
802                msg.push_str(
803                    "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
804                );
805                msg.push_str(
806                    "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
807                );
808                err = err.context(msg);
809            }
810            ErrorClass::Callback => {
811                // This unwraps the git2 error. We're using the callback error
812                // specifically to convey errors from Rust land through the C
813                // callback interface. We don't need the `; class=Callback
814                // (26)` that gets tacked on to the git2 error message.
815                err = anyhow::format_err!("{}", e.message());
816            }
817            _ => {}
818        }
819    }
820
821    Err(err)
822}
823
824/// `git reset --hard` to the given `obj` for the `repo`.
825///
826/// The `obj` is a commit-ish to which the head should be moved.
827fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext) -> CargoResult<()> {
828    let mut pb = Progress::new("Checkout", gctx);
829    let mut opts = git2::build::CheckoutBuilder::new();
830    opts.progress(|_, cur, max| {
831        drop(pb.tick(cur, max, ""));
832    });
833    debug!("doing reset");
834    repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
835    debug!("reset done");
836    Ok(())
837}
838
839/// Prepares the callbacks for fetching a git repository.
840///
841/// The main purpose of this function is to construct everything before a fetch.
842/// This will attempt to setup a progress bar, the authentication for git,
843/// ssh known hosts check, and the network retry mechanism.
844///
845/// The callback is provided a fetch options, which can be used by the actual
846/// git fetch.
847pub fn with_fetch_options(
848    git_config: &git2::Config,
849    url: &str,
850    gctx: &GlobalContext,
851    cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
852) -> CargoResult<()> {
853    let mut progress = Progress::new("Fetch", gctx);
854    let ssh_config = gctx.net_config()?.ssh.as_ref();
855    let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
856    let diagnostic_home_config = gctx.diagnostic_home_config();
857    network::retry::with_retry(gctx, || {
858        // Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
859        // so we store the error additionally and unwrap it later
860        let mut check_cb_result = Ok(());
861        let auth_result = with_authentication(gctx, url, git_config, |f| {
862            let port = Url::parse(url).ok().and_then(|url| url.port());
863            let mut last_update = Instant::now();
864            let mut rcb = git2::RemoteCallbacks::new();
865            // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
866            // sliding window for tracking the data transfer rate (in bytes/s).
867            let mut counter = MetricsCounter::<10>::new(0, last_update);
868            rcb.credentials(f);
869            rcb.certificate_check(|cert, host| {
870                match super::known_hosts::certificate_check(
871                    gctx,
872                    cert,
873                    host,
874                    port,
875                    config_known_hosts,
876                    &diagnostic_home_config,
877                ) {
878                    Ok(status) => Ok(status),
879                    Err(e) => {
880                        check_cb_result = Err(e);
881                        // This is not really used because it'll be overridden by libgit2
882                        // See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
883                        Err(git2::Error::from_str(
884                            "invalid or unknown remote ssh hostkey",
885                        ))
886                    }
887                }
888            });
889            rcb.transfer_progress(|stats| {
890                let indexed_deltas = stats.indexed_deltas();
891                let msg = if indexed_deltas > 0 {
892                    // Resolving deltas.
893                    format!(
894                        ", ({}/{}) resolving deltas",
895                        indexed_deltas,
896                        stats.total_deltas()
897                    )
898                } else {
899                    // Receiving objects.
900                    //
901                    // # Caveat
902                    //
903                    // Progress bar relies on git2 calling `transfer_progress`
904                    // to update its transfer rate, but we cannot guarantee a
905                    // periodic call of that callback. Thus if we don't receive
906                    // any data for, say, 10 seconds, the rate will get stuck
907                    // and never go down to 0B/s.
908                    // In the future, we need to find away to update the rate
909                    // even when the callback is not called.
910                    let now = Instant::now();
911                    // Scrape a `received_bytes` to the counter every 300ms.
912                    if now - last_update > Duration::from_millis(300) {
913                        counter.add(stats.received_bytes(), now);
914                        last_update = now;
915                    }
916                    let rate = HumanBytes(counter.rate() as u64);
917                    format!(", {rate:.2}/s")
918                };
919                progress
920                    .tick(stats.indexed_objects(), stats.total_objects(), &msg)
921                    .is_ok()
922            });
923
924            // Create a local anonymous remote in the repository to fetch the
925            // url
926            let mut opts = git2::FetchOptions::new();
927            opts.remote_callbacks(rcb);
928            cb(opts)
929        });
930        if auth_result.is_err() {
931            check_cb_result?;
932        }
933        auth_result?;
934        Ok(())
935    })
936}
937
938/// Attempts to fetch the given git `reference` for a Git repository.
939///
940/// This is the main entry for git clone/fetch. It does the followings:
941///
942/// * Turns [`GitReference`] into refspecs accordingly.
943/// * Dispatches `git fetch` using libgit2, gitoxide, or git CLI.
944///
945/// The `remote_url` argument is the git remote URL where we want to fetch from.
946///
947/// The `remote_kind` argument is a thing for [`-Zgitoxide`] shallow clones
948/// at this time. It could be extended when libgit2 supports shallow clones.
949///
950/// [`-Zgitoxide`]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#gitoxide
951pub fn fetch(
952    repo: &mut git2::Repository,
953    remote_url: &str,
954    reference: &GitReference,
955    gctx: &GlobalContext,
956    remote_kind: RemoteKind,
957) -> CargoResult<()> {
958    if let Some(offline_flag) = gctx.offline_flag() {
959        anyhow::bail!(
960            "attempting to update a git repository, but {offline_flag} \
961             was specified"
962        )
963    }
964
965    let shallow = remote_kind.to_shallow_setting(repo.is_shallow(), gctx);
966
967    // Flag to keep track if the rev is a full commit hash
968    let mut fast_path_rev: bool = false;
969
970    let oid_to_fetch = match github_fast_path(repo, remote_url, reference, gctx) {
971        Ok(FastPathRev::UpToDate) => return Ok(()),
972        Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
973        Ok(FastPathRev::Indeterminate) => None,
974        Err(e) => {
975            debug!("failed to check github {:?}", e);
976            None
977        }
978    };
979
980    maybe_gc_repo(repo, gctx)?;
981
982    clean_repo_temp_files(repo);
983
984    // Translate the reference desired here into an actual list of refspecs
985    // which need to get fetched. Additionally record if we're fetching tags.
986    let mut refspecs = Vec::new();
987    let mut tags = false;
988    // The `+` symbol on the refspec means to allow a forced (fast-forward)
989    // update which is needed if there is ever a force push that requires a
990    // fast-forward.
991    match reference {
992        // For branches and tags we can fetch simply one reference and copy it
993        // locally, no need to fetch other branches/tags.
994        GitReference::Branch(b) => {
995            refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
996        }
997
998        GitReference::Tag(t) => {
999            refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
1000        }
1001
1002        GitReference::DefaultBranch => {
1003            refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1004        }
1005
1006        GitReference::Rev(rev) => {
1007            if rev.starts_with("refs/") {
1008                refspecs.push(format!("+{0}:{0}", rev));
1009            } else if let Some(oid_to_fetch) = oid_to_fetch {
1010                fast_path_rev = true;
1011                refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
1012            } else if !matches!(shallow, gix::remote::fetch::Shallow::NoChange)
1013                && rev.parse::<Oid>().is_ok()
1014            {
1015                // There is a specific commit to fetch and we will do so in shallow-mode only
1016                // to not disturb the previous logic.
1017                // Note that with typical settings for shallowing, we will just fetch a single `rev`
1018                // as single commit.
1019                // The reason we write to `refs/remotes/origin/HEAD` is that it's of special significance
1020                // when during `GitReference::resolve()`, but otherwise it shouldn't matter.
1021                refspecs.push(format!("+{0}:refs/remotes/origin/HEAD", rev));
1022            } else {
1023                // We don't know what the rev will point to. To handle this
1024                // situation we fetch all branches and tags, and then we pray
1025                // it's somewhere in there.
1026                refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
1027                refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
1028                tags = true;
1029            }
1030        }
1031    }
1032
1033    let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1034        fetch_with_cli(repo, remote_url, &refspecs, tags, gctx)
1035    } else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1036        fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
1037    } else {
1038        fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
1039    };
1040
1041    if fast_path_rev {
1042        if let Some(oid) = oid_to_fetch {
1043            return result.with_context(|| format!("revision {} not found", oid));
1044        }
1045    }
1046    result
1047}
1048
1049/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1050/// files atomically.
1051/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1052/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1053fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1054    matches!(
1055        err,
1056        gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::Fetch(
1057            gix::protocol::fetch::Error::LockShallowFile(_)
1058        ))
1059    )
1060}
1061
1062/// Attempts to use `git` CLI installed on the system to fetch a repository,
1063/// when the config value [`net.git-fetch-with-cli`][1] is set.
1064///
1065/// Unfortunately `libgit2` is notably lacking in the realm of authentication
1066/// when compared to the `git` command line. As a result, allow an escape
1067/// hatch for users that would prefer to use `git`-the-CLI for fetching
1068/// repositories instead of `libgit2`-the-library. This should make more
1069/// flavors of authentication possible while also still giving us all the
1070/// speed and portability of using `libgit2`.
1071///
1072/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli
1073fn fetch_with_cli(
1074    repo: &mut git2::Repository,
1075    url: &str,
1076    refspecs: &[String],
1077    tags: bool,
1078    gctx: &GlobalContext,
1079) -> CargoResult<()> {
1080    let mut cmd = ProcessBuilder::new("git");
1081    cmd.arg("fetch");
1082    if tags {
1083        cmd.arg("--tags");
1084    } else {
1085        cmd.arg("--no-tags");
1086    }
1087    match gctx.shell().verbosity() {
1088        Verbosity::Normal => {}
1089        Verbosity::Verbose => {
1090            cmd.arg("--verbose");
1091        }
1092        Verbosity::Quiet => {
1093            cmd.arg("--quiet");
1094        }
1095    }
1096    cmd.arg("--force") // handle force pushes
1097        .arg("--update-head-ok") // see discussion in #2078
1098        .arg(url)
1099        .args(refspecs)
1100        // If cargo is run by git (for example, the `exec` command in `git
1101        // rebase`), the GIT_DIR is set by git and will point to the wrong
1102        // location. This makes sure GIT_DIR is always the repository path.
1103        .env("GIT_DIR", repo.path())
1104        // The reset of these may not be necessary, but I'm including them
1105        // just to be extra paranoid and avoid any issues.
1106        .env_remove("GIT_WORK_TREE")
1107        .env_remove("GIT_INDEX_FILE")
1108        .env_remove("GIT_OBJECT_DIRECTORY")
1109        .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
1110        .cwd(repo.path());
1111    gctx.shell()
1112        .verbose(|s| s.status("Running", &cmd.to_string()))?;
1113    cmd.exec()?;
1114    Ok(())
1115}
1116
1117fn fetch_with_gitoxide(
1118    repo: &mut git2::Repository,
1119    remote_url: &str,
1120    refspecs: Vec<String>,
1121    tags: bool,
1122    shallow: gix::remote::fetch::Shallow,
1123    gctx: &GlobalContext,
1124) -> CargoResult<()> {
1125    let git2_repo = repo;
1126    let config_overrides = cargo_config_to_gitoxide_overrides(gctx)?;
1127    let repo_reinitialized = AtomicBool::default();
1128    let res = oxide::with_retry_and_progress(
1129        &git2_repo.path().to_owned(),
1130        gctx,
1131        &|repo_path,
1132          should_interrupt,
1133          mut progress,
1134          url_for_authentication: &mut dyn FnMut(&gix::bstr::BStr)| {
1135            // The `fetch` operation here may fail spuriously due to a corrupt
1136            // repository. It could also fail, however, for a whole slew of other
1137            // reasons (aka network related reasons). We want Cargo to automatically
1138            // recover from corrupt repositories, but we don't want Cargo to stomp
1139            // over other legitimate errors.
1140            //
1141            // Consequently we save off the error of the `fetch` operation and if it
1142            // looks like a "corrupt repo" error then we blow away the repo and try
1143            // again. If it looks like any other kind of error, or if we've already
1144            // blown away the repository, then we want to return the error as-is.
1145            loop {
1146                let res = oxide::open_repo(
1147                    repo_path,
1148                    config_overrides.clone(),
1149                    oxide::OpenMode::ForFetch,
1150                )
1151                .map_err(crate::sources::git::fetch::Error::from)
1152                .and_then(|repo| {
1153                    debug!("initiating fetch of {refspecs:?} from {remote_url}");
1154                    let url_for_authentication = &mut *url_for_authentication;
1155                    let remote = repo
1156                        .remote_at(remote_url)?
1157                        .with_fetch_tags(if tags {
1158                            gix::remote::fetch::Tags::All
1159                        } else {
1160                            gix::remote::fetch::Tags::Included
1161                        })
1162                        .with_refspecs(
1163                            refspecs.iter().map(|s| s.as_str()),
1164                            gix::remote::Direction::Fetch,
1165                        )
1166                        .map_err(crate::sources::git::fetch::Error::Other)?;
1167                    let url = remote
1168                        .url(gix::remote::Direction::Fetch)
1169                        .expect("set at init")
1170                        .to_owned();
1171                    let connection = remote.connect(gix::remote::Direction::Fetch)?;
1172                    let mut authenticate = connection.configured_credentials(url)?;
1173                    let connection = connection.with_credentials(
1174                        move |action: gix::protocol::credentials::helper::Action| {
1175                            if let Some(url) = action
1176                                .context()
1177                                .and_then(|gctx| gctx.url.as_ref().filter(|url| *url != remote_url))
1178                            {
1179                                url_for_authentication(url.as_ref());
1180                            }
1181                            authenticate(action)
1182                        },
1183                    );
1184                    let outcome = connection
1185                        .prepare_fetch(&mut progress, gix::remote::ref_map::Options::default())?
1186                        .with_shallow(shallow.clone())
1187                        .receive(&mut progress, should_interrupt)?;
1188                    Ok(outcome)
1189                });
1190                let err = match res {
1191                    Ok(_) => break,
1192                    Err(e) => e,
1193                };
1194                debug!("fetch failed: {}", err);
1195
1196                if !repo_reinitialized.load(Ordering::Relaxed)
1197                        // We check for errors that could occur if the configuration, refs or odb files are corrupted.
1198                        // We don't check for errors related to writing as `gitoxide` is expected to create missing leading
1199                        // folder before writing files into it, or else not even open a directory as git repository (which is
1200                        // also handled here).
1201                        && err.is_corrupted()
1202                    || has_shallow_lock_file(&err)
1203                {
1204                    repo_reinitialized.store(true, Ordering::Relaxed);
1205                    debug!(
1206                        "looks like this is a corrupt repository, reinitializing \
1207                     and trying again"
1208                    );
1209                    if oxide::reinitialize(repo_path).is_ok() {
1210                        continue;
1211                    }
1212                }
1213
1214                return Err(err.into());
1215            }
1216            Ok(())
1217        },
1218    );
1219    if repo_reinitialized.load(Ordering::Relaxed) {
1220        *git2_repo = git2::Repository::open(git2_repo.path())?;
1221    }
1222    res
1223}
1224
1225fn fetch_with_libgit2(
1226    repo: &mut git2::Repository,
1227    remote_url: &str,
1228    refspecs: Vec<String>,
1229    tags: bool,
1230    shallow: gix::remote::fetch::Shallow,
1231    gctx: &GlobalContext,
1232) -> CargoResult<()> {
1233    debug!("doing a fetch for {remote_url}");
1234    let git_config = git2::Config::open_default()?;
1235    with_fetch_options(&git_config, remote_url, gctx, &mut |mut opts| {
1236        if tags {
1237            opts.download_tags(git2::AutotagOption::All);
1238        }
1239        if let gix::remote::fetch::Shallow::DepthAtRemote(depth) = shallow {
1240            opts.depth(0i32.saturating_add_unsigned(depth.get()));
1241        }
1242        // The `fetch` operation here may fail spuriously due to a corrupt
1243        // repository. It could also fail, however, for a whole slew of other
1244        // reasons (aka network related reasons). We want Cargo to automatically
1245        // recover from corrupt repositories, but we don't want Cargo to stomp
1246        // over other legitimate errors.
1247        //
1248        // Consequently we save off the error of the `fetch` operation and if it
1249        // looks like a "corrupt repo" error then we blow away the repo and try
1250        // again. If it looks like any other kind of error, or if we've already
1251        // blown away the repository, then we want to return the error as-is.
1252        let mut repo_reinitialized = false;
1253        loop {
1254            debug!("initiating fetch of {refspecs:?} from {remote_url}");
1255            let res = repo
1256                .remote_anonymous(remote_url)?
1257                .fetch(&refspecs, Some(&mut opts), None);
1258            let err = match res {
1259                Ok(()) => break,
1260                Err(e) => e,
1261            };
1262            debug!("fetch failed: {}", err);
1263
1264            if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
1265            {
1266                repo_reinitialized = true;
1267                debug!(
1268                    "looks like this is a corrupt repository, reinitializing \
1269                     and trying again"
1270                );
1271                if reinitialize(repo).is_ok() {
1272                    continue;
1273                }
1274            }
1275
1276            return Err(err.into());
1277        }
1278        Ok(())
1279    })
1280}
1281
1282/// Attempts to `git gc` a repository.
1283///
1284/// Cargo has a bunch of long-lived git repositories in its global cache and
1285/// some, like the index, are updated very frequently. Right now each update
1286/// creates a new "pack file" inside the git database, and over time this can
1287/// cause bad performance and bad current behavior in libgit2.
1288///
1289/// One pathological use case today is where libgit2 opens hundreds of file
1290/// descriptors, getting us dangerously close to blowing out the OS limits of
1291/// how many fds we can have open. This is detailed in [#4403].
1292///
1293/// To try to combat this problem we attempt a `git gc` here. Note, though, that
1294/// we may not even have `git` installed on the system! As a result we
1295/// opportunistically try a `git gc` when the pack directory looks too big, and
1296/// failing that we just blow away the repository and start over.
1297///
1298/// In theory this shouldn't be too expensive compared to the network request
1299/// we're about to issue.
1300///
1301/// [#4403]: https://github.com/rust-lang/cargo/issues/4403
1302fn maybe_gc_repo(repo: &mut git2::Repository, gctx: &GlobalContext) -> CargoResult<()> {
1303    // Here we arbitrarily declare that if you have more than 100 files in your
1304    // `pack` folder that we need to do a gc.
1305    let entries = match repo.path().join("objects/pack").read_dir() {
1306        Ok(e) => e.count(),
1307        Err(_) => {
1308            debug!("skipping gc as pack dir appears gone");
1309            return Ok(());
1310        }
1311    };
1312    let max = gctx
1313        .get_env("__CARGO_PACKFILE_LIMIT")
1314        .ok()
1315        .and_then(|s| s.parse::<usize>().ok())
1316        .unwrap_or(100);
1317    if entries < max {
1318        debug!("skipping gc as there's only {} pack files", entries);
1319        return Ok(());
1320    }
1321
1322    // First up, try a literal `git gc` by shelling out to git. This is pretty
1323    // likely to fail though as we may not have `git` installed. Note that
1324    // libgit2 doesn't currently implement the gc operation, so there's no
1325    // equivalent there.
1326    match Command::new("git")
1327        .arg("gc")
1328        .current_dir(repo.path())
1329        .output()
1330    {
1331        Ok(out) => {
1332            debug!(
1333                "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
1334                out.status,
1335                String::from_utf8_lossy(&out.stdout),
1336                String::from_utf8_lossy(&out.stderr)
1337            );
1338            if out.status.success() {
1339                let new = git2::Repository::open(repo.path())?;
1340                *repo = new;
1341                return Ok(());
1342            }
1343        }
1344        Err(e) => debug!("git-gc failed to spawn: {}", e),
1345    }
1346
1347    // Alright all else failed, let's start over.
1348    reinitialize(repo)
1349}
1350
1351/// Removes temporary files left from previous activity.
1352///
1353/// If libgit2 is interrupted while indexing pack files, it will leave behind
1354/// some temporary files that it doesn't clean up. These can be quite large in
1355/// size, so this tries to clean things up.
1356///
1357/// This intentionally ignores errors. This is only an opportunistic cleaning,
1358/// and we don't really care if there are issues (there's unlikely anything
1359/// that can be done).
1360///
1361/// The git CLI has similar behavior (its temp files look like
1362/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1363/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1364/// filenames, so they never get cleaned up.
1365fn clean_repo_temp_files(repo: &git2::Repository) {
1366    let path = repo.path().join("objects/pack/pack_git2_*");
1367    let Some(pattern) = path.to_str() else {
1368        tracing::warn!("cannot convert {path:?} to a string");
1369        return;
1370    };
1371    let Ok(paths) = glob::glob(pattern) else {
1372        return;
1373    };
1374    for path in paths {
1375        if let Ok(path) = path {
1376            match paths::remove_file(&path) {
1377                Ok(_) => tracing::debug!("removed stale temp git file {path:?}"),
1378                Err(e) => {
1379                    tracing::warn!("failed to remove {path:?} while cleaning temp files: {e}")
1380                }
1381            }
1382        }
1383    }
1384}
1385
1386/// Reinitializes a given Git repository. This is useful when a Git repository
1387/// seems corrupted and we want to start over.
1388fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1389    // Here we want to drop the current repository object pointed to by `repo`,
1390    // so we initialize temporary repository in a sub-folder, blow away the
1391    // existing git folder, and then recreate the git repo. Finally we blow away
1392    // the `tmp` folder we allocated.
1393    let path = repo.path().to_path_buf();
1394    debug!("reinitializing git repo at {:?}", path);
1395    let tmp = path.join("tmp");
1396    let bare = !repo.path().ends_with(".git");
1397    *repo = init(&tmp, false)?;
1398    for entry in path.read_dir()? {
1399        let entry = entry?;
1400        if entry.file_name().to_str() == Some("tmp") {
1401            continue;
1402        }
1403        let path = entry.path();
1404        drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
1405    }
1406    *repo = init(&path, bare)?;
1407    paths::remove_dir_all(&tmp)?;
1408    Ok(())
1409}
1410
1411/// Initializes a Git repository at `path`.
1412fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1413    let mut opts = git2::RepositoryInitOptions::new();
1414    // Skip anything related to templates, they just call all sorts of issues as
1415    // we really don't want to use them yet they insist on being used. See #6240
1416    // for an example issue that comes up.
1417    opts.external_template(false);
1418    opts.bare(bare);
1419    Ok(git2::Repository::init_opts(&path, &opts)?)
1420}
1421
1422/// The result of GitHub fast path check. See [`github_fast_path`] for more.
1423enum FastPathRev {
1424    /// The local rev (determined by `reference.resolve(repo)`) is already up to
1425    /// date with what this rev resolves to on GitHub's server.
1426    UpToDate,
1427    /// The following SHA must be fetched in order for the local rev to become
1428    /// up to date.
1429    NeedsFetch(Oid),
1430    /// Don't know whether local rev is up to date. We'll fetch _all_ branches
1431    /// and tags from the server and see what happens.
1432    Indeterminate,
1433}
1434
1435/// Attempts GitHub's special fast path for testing if we've already got an
1436/// up-to-date copy of the repository.
1437///
1438/// Updating the index is done pretty regularly so we want it to be as fast as
1439/// possible. For registries hosted on GitHub (like the crates.io index) there's
1440/// a fast path available to use[^1] to tell us that there's no updates to be
1441/// made.
1442///
1443/// Note that this function should never cause an actual failure because it's
1444/// just a fast path. As a result, a caller should ignore `Err` returned from
1445/// this function and move forward on the normal path.
1446///
1447/// [^1]: <https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference>
1448fn github_fast_path(
1449    repo: &mut git2::Repository,
1450    url: &str,
1451    reference: &GitReference,
1452    gctx: &GlobalContext,
1453) -> CargoResult<FastPathRev> {
1454    let url = Url::parse(url)?;
1455    if !is_github(&url) {
1456        return Ok(FastPathRev::Indeterminate);
1457    }
1458
1459    let local_object = resolve_ref(reference, repo).ok();
1460
1461    let github_branch_name = match reference {
1462        GitReference::Branch(branch) => branch,
1463        GitReference::Tag(tag) => tag,
1464        GitReference::DefaultBranch => "HEAD",
1465        GitReference::Rev(rev) => {
1466            if rev.starts_with("refs/") {
1467                rev
1468            } else if looks_like_commit_hash(rev) {
1469                // `revparse_single` (used by `resolve`) is the only way to turn
1470                // short hash -> long hash, but it also parses other things,
1471                // like branch and tag names, which might coincidentally be
1472                // valid hex.
1473                //
1474                // We only return early if `rev` is a prefix of the object found
1475                // by `revparse_single`. Don't bother talking to GitHub in that
1476                // case, since commit hashes are permanent. If a commit with the
1477                // requested hash is already present in the local clone, its
1478                // contents must be the same as what is on the server for that
1479                // hash.
1480                //
1481                // If `rev` is not found locally by `revparse_single`, we'll
1482                // need GitHub to resolve it and get a hash. If `rev` is found
1483                // but is not a short hash of the found object, it's probably a
1484                // branch and we also need to get a hash from GitHub, in case
1485                // the branch has moved.
1486                if let Some(local_object) = local_object {
1487                    if is_short_hash_of(rev, local_object) {
1488                        debug!("github fast path already has {local_object}");
1489                        return Ok(FastPathRev::UpToDate);
1490                    }
1491                }
1492                // If `rev` is a full commit hash, the only thing it can resolve
1493                // to is itself. Don't bother talking to GitHub in that case
1494                // either. (This ensures that we always attempt to fetch the
1495                // commit directly even if we can't reach the GitHub API.)
1496                if let Some(oid) = rev_to_oid(rev) {
1497                    debug!("github fast path is already a full commit hash {rev}");
1498                    return Ok(FastPathRev::NeedsFetch(oid));
1499                }
1500                rev
1501            } else {
1502                debug!("can't use github fast path with `rev = \"{}\"`", rev);
1503                return Ok(FastPathRev::Indeterminate);
1504            }
1505        }
1506    };
1507
1508    // This expects GitHub urls in the form `github.com/user/repo` and nothing
1509    // else
1510    let mut pieces = url
1511        .path_segments()
1512        .ok_or_else(|| anyhow!("no path segments on url"))?;
1513    let username = pieces
1514        .next()
1515        .ok_or_else(|| anyhow!("couldn't find username"))?;
1516    let repository = pieces
1517        .next()
1518        .ok_or_else(|| anyhow!("couldn't find repository name"))?;
1519    if pieces.next().is_some() {
1520        anyhow::bail!("too many segments on URL");
1521    }
1522
1523    // Trim off the `.git` from the repository, if present, since that's
1524    // optional for GitHub and won't work when we try to use the API as well.
1525    let repository = repository.strip_suffix(".git").unwrap_or(repository);
1526
1527    let url = format!(
1528        "https://api.github.com/repos/{}/{}/commits/{}",
1529        username, repository, github_branch_name,
1530    );
1531    let mut handle = gctx.http()?.borrow_mut();
1532    debug!("attempting GitHub fast path for {}", url);
1533    handle.get(true)?;
1534    handle.url(&url)?;
1535    handle.useragent("cargo")?;
1536    handle.follow_location(true)?; // follow redirects
1537    handle.http_headers({
1538        let mut headers = List::new();
1539        headers.append("Accept: application/vnd.github.3.sha")?;
1540        if let Some(local_object) = local_object {
1541            headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
1542        }
1543        headers
1544    })?;
1545
1546    let mut response_body = Vec::new();
1547    let mut transfer = handle.transfer();
1548    transfer.write_function(|data| {
1549        response_body.extend_from_slice(data);
1550        Ok(data.len())
1551    })?;
1552    transfer.perform()?;
1553    drop(transfer); // end borrow of handle so that response_code can be called
1554
1555    let response_code = handle.response_code()?;
1556    if response_code == 304 {
1557        debug!("github fast path up-to-date");
1558        Ok(FastPathRev::UpToDate)
1559    } else if response_code == 200 {
1560        let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
1561        debug!("github fast path fetch {oid_to_fetch}");
1562        Ok(FastPathRev::NeedsFetch(oid_to_fetch))
1563    } else {
1564        // Usually response_code == 404 if the repository does not exist, and
1565        // response_code == 422 if exists but GitHub is unable to resolve the
1566        // requested rev.
1567        debug!("github fast path bad response code {response_code}");
1568        Ok(FastPathRev::Indeterminate)
1569    }
1570}
1571
1572/// Whether a `url` is one from GitHub.
1573fn is_github(url: &Url) -> bool {
1574    url.host_str() == Some("github.com")
1575}
1576
1577/// Whether a `rev` looks like a commit hash (ASCII hex digits).
1578fn looks_like_commit_hash(rev: &str) -> bool {
1579    rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
1580}
1581
1582/// Whether `rev` is a shorter hash of `oid`.
1583fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
1584    let long_hash = oid.to_string();
1585    match long_hash.get(..rev.len()) {
1586        Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
1587        None => false,
1588    }
1589}
1590
1591#[cfg(test)]
1592mod tests {
1593    use super::absolute_submodule_url;
1594
1595    #[test]
1596    fn test_absolute_submodule_url() {
1597        let cases = [
1598            (
1599                "ssh://git@gitub.com/rust-lang/cargo",
1600                "git@github.com:rust-lang/cargo.git",
1601                "git@github.com:rust-lang/cargo.git",
1602            ),
1603            (
1604                "ssh://git@gitub.com/rust-lang/cargo",
1605                "./",
1606                "ssh://git@gitub.com/rust-lang/cargo/",
1607            ),
1608            (
1609                "ssh://git@gitub.com/rust-lang/cargo",
1610                "../",
1611                "ssh://git@gitub.com/rust-lang/",
1612            ),
1613            (
1614                "ssh://git@gitub.com/rust-lang/cargo",
1615                "./foo",
1616                "ssh://git@gitub.com/rust-lang/cargo/foo",
1617            ),
1618            (
1619                "ssh://git@gitub.com/rust-lang/cargo/",
1620                "./foo",
1621                "ssh://git@gitub.com/rust-lang/cargo/foo",
1622            ),
1623            (
1624                "ssh://git@gitub.com/rust-lang/cargo/",
1625                "../foo",
1626                "ssh://git@gitub.com/rust-lang/foo",
1627            ),
1628            (
1629                "ssh://git@gitub.com/rust-lang/cargo",
1630                "../foo",
1631                "ssh://git@gitub.com/rust-lang/foo",
1632            ),
1633            (
1634                "ssh://git@gitub.com/rust-lang/cargo",
1635                "../foo/bar/../baz",
1636                "ssh://git@gitub.com/rust-lang/foo/baz",
1637            ),
1638            (
1639                "git@github.com:rust-lang/cargo.git",
1640                "ssh://git@gitub.com/rust-lang/cargo",
1641                "ssh://git@gitub.com/rust-lang/cargo",
1642            ),
1643            (
1644                "git@github.com:rust-lang/cargo.git",
1645                "./",
1646                "git@github.com:rust-lang/cargo.git/./",
1647            ),
1648            (
1649                "git@github.com:rust-lang/cargo.git",
1650                "../",
1651                "git@github.com:rust-lang/cargo.git/../",
1652            ),
1653            (
1654                "git@github.com:rust-lang/cargo.git",
1655                "./foo",
1656                "git@github.com:rust-lang/cargo.git/./foo",
1657            ),
1658            (
1659                "git@github.com:rust-lang/cargo.git/",
1660                "./foo",
1661                "git@github.com:rust-lang/cargo.git/./foo",
1662            ),
1663            (
1664                "git@github.com:rust-lang/cargo.git",
1665                "../foo",
1666                "git@github.com:rust-lang/cargo.git/../foo",
1667            ),
1668            (
1669                "git@github.com:rust-lang/cargo.git/",
1670                "../foo",
1671                "git@github.com:rust-lang/cargo.git/../foo",
1672            ),
1673            (
1674                "git@github.com:rust-lang/cargo.git",
1675                "../foo/bar/../baz",
1676                "git@github.com:rust-lang/cargo.git/../foo/bar/../baz",
1677            ),
1678        ];
1679
1680        for (base_url, submodule_url, expected) in cases {
1681            let url = absolute_submodule_url(base_url, submodule_url).unwrap();
1682            assert_eq!(
1683                expected, url,
1684                "base `{base_url}`; submodule `{submodule_url}`"
1685            );
1686        }
1687    }
1688}
1689
1690/// Turns a full commit hash revision into an oid.
1691///
1692/// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
1693/// Its length must be double to the underlying bytes (40 or 64),
1694/// otherwise libgit2 would happily zero-pad the returned oid.
1695///
1696/// See:
1697///
1698/// * <https://github.com/rust-lang/cargo/issues/13188>
1699/// * <https://github.com/rust-lang/cargo/issues/13968>
1700pub(super) fn rev_to_oid(rev: &str) -> Option<Oid> {
1701    Oid::from_str(rev)
1702        .ok()
1703        .filter(|oid| oid.as_bytes().len() * 2 == rev.len())
1704}