cargo/ops/cargo_package/
vcs.rs

1//! Helpers to gather the VCS information for `cargo package`.
2
3use std::collections::HashSet;
4use std::path::Path;
5use std::path::PathBuf;
6
7use anyhow::Context as _;
8use cargo_util::paths;
9use serde::Serialize;
10use tracing::debug;
11
12use crate::core::Package;
13use crate::core::Workspace;
14use crate::sources::PathEntry;
15use crate::CargoResult;
16use crate::GlobalContext;
17
18use super::PackageOpts;
19
20/// Represents the VCS information when packaging.
21#[derive(Serialize)]
22pub struct VcsInfo {
23    git: GitVcsInfo,
24    /// Path to the package within repo (empty string if root).
25    path_in_vcs: String,
26}
27
28/// Represents the Git VCS information when packaging.
29#[derive(Serialize)]
30pub struct GitVcsInfo {
31    sha1: String,
32    /// Indicate whether or not the Git worktree is dirty.
33    #[serde(skip_serializing_if = "std::ops::Not::not")]
34    dirty: bool,
35}
36
37/// Checks if the package source is in a *git* DVCS repository.
38///
39/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
40/// and `--allow-dirty` has not been passed,
41/// then `bail!` with an informative message.
42/// Otherwise return the sha1 hash of the current *HEAD* commit,
43/// or `None` if no repo is found.
44#[tracing::instrument(skip_all)]
45pub fn check_repo_state(
46    p: &Package,
47    src_files: &[PathEntry],
48    ws: &Workspace<'_>,
49    opts: &PackageOpts<'_>,
50) -> CargoResult<Option<VcsInfo>> {
51    let gctx = ws.gctx();
52    let Ok(repo) = git2::Repository::discover(p.root()) else {
53        gctx.shell().verbose(|shell| {
54            shell.warn(format_args!(
55                "no (git) VCS found for `{}`",
56                p.root().display()
57            ))
58        })?;
59        // No Git repo found. Have to assume it is clean.
60        return Ok(None);
61    };
62
63    let Some(workdir) = repo.workdir() else {
64        debug!(
65            "no (git) workdir found for repo at `{}`",
66            repo.path().display()
67        );
68        // No git workdir. Have to assume it is clean.
69        return Ok(None);
70    };
71
72    debug!("found a git repo at `{}`", workdir.display());
73    let path = p.manifest_path();
74    let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
75    let Ok(status) = repo.status_file(&path) else {
76        gctx.shell().verbose(|shell| {
77            shell.warn(format_args!(
78                "no (git) Cargo.toml found at `{}` in workdir `{}`",
79                path.display(),
80                workdir.display()
81            ))
82        })?;
83        // No checked-in `Cargo.toml` found. This package may be irrelevant.
84        // Have to assume it is clean.
85        return Ok(None);
86    };
87
88    if !(status & git2::Status::IGNORED).is_empty() {
89        gctx.shell().verbose(|shell| {
90            shell.warn(format_args!(
91                "found (git) Cargo.toml ignored at `{}` in workdir `{}`",
92                path.display(),
93                workdir.display()
94            ))
95        })?;
96        // An ignored `Cargo.toml` found. This package may be irrelevant.
97        // Have to assume it is clean.
98        return Ok(None);
99    }
100
101    warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
102
103    debug!(
104        "found (git) Cargo.toml at `{}` in workdir `{}`",
105        path.display(),
106        workdir.display(),
107    );
108    let Some(git) = git(ws, p, src_files, &repo, &opts)? else {
109        // If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
110        // then don't generate the corresponding file in order to maintain consistency with past behavior.
111        return Ok(None);
112    };
113
114    let path_in_vcs = path
115        .parent()
116        .and_then(|p| p.to_str())
117        .unwrap_or("")
118        .replace("\\", "/");
119
120    return Ok(Some(VcsInfo { git, path_in_vcs }));
121}
122
123/// Warns if any symlinks were checked out as plain text files.
124///
125/// Git config [`core.symlinks`] defaults to true when unset.
126/// In git-for-windows (and git as well),
127/// the config should be set to false explicitly when the repo was created,
128/// if symlink support wasn't detected [^1].
129///
130/// We assume the config was always set at creation time and never changed.
131/// So, if it is true, we don't bother users with any warning.
132///
133/// [^1]: <https://github.com/git-for-windows/git/blob/f1241afcc7956918d5da33ef74abd9cbba369247/setup.c#L2394-L2403>
134///
135/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks
136fn warn_symlink_checked_out_as_plain_text_file(
137    gctx: &GlobalContext,
138    src_files: &[PathEntry],
139    repo: &git2::Repository,
140) -> CargoResult<()> {
141    if repo
142        .config()
143        .and_then(|c| c.get_bool("core.symlinks"))
144        .unwrap_or(true)
145    {
146        return Ok(());
147    }
148
149    if src_files.iter().any(|f| f.maybe_plain_text_symlink()) {
150        let mut shell = gctx.shell();
151        shell.warn(format_args!(
152            "found symbolic links that may be checked out as regular files for git repo at `{}`\n\
153            This might cause the `.crate` file to include incorrect or incomplete files",
154            repo.workdir().unwrap().display(),
155        ))?;
156        let extra_note = if cfg!(windows) {
157            "\nAnd on Windows, enable the Developer Mode to support symlinks"
158        } else {
159            ""
160        };
161        shell.note(format_args!(
162            "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}",
163        ))?;
164    }
165
166    Ok(())
167}
168
169/// The real git status check starts from here.
170fn git(
171    ws: &Workspace<'_>,
172    pkg: &Package,
173    src_files: &[PathEntry],
174    repo: &git2::Repository,
175    opts: &PackageOpts<'_>,
176) -> CargoResult<Option<GitVcsInfo>> {
177    // This is a collection of any dirty or untracked files. This covers:
178    // - new/modified/deleted/renamed/type change (index or worktree)
179    // - untracked files (which are "new" worktree files)
180    // - ignored (in case the user has an `include` directive that
181    //   conflicts with .gitignore).
182    let mut dirty_files = Vec::new();
183    let pathspec = relative_pathspec(repo, pkg.root());
184    collect_statuses(repo, &[pathspec.as_str()], &mut dirty_files)?;
185
186    // Include each submodule so that the error message can provide
187    // specifically *which* files in a submodule are modified.
188    status_submodules(repo, &mut dirty_files)?;
189
190    // Find the intersection of dirty in git, and the src_files that would
191    // be packaged. This is a lazy n^2 check, but seems fine with
192    // thousands of files.
193    let cwd = ws.gctx().cwd();
194    let mut dirty_src_files: Vec<_> = src_files
195        .iter()
196        .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
197        .map(|p| p.as_ref())
198        .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
199        .map(|path| {
200            pathdiff::diff_paths(path, cwd)
201                .as_ref()
202                .unwrap_or(path)
203                .display()
204                .to_string()
205        })
206        .collect();
207    let dirty = !dirty_src_files.is_empty();
208    if !dirty || opts.allow_dirty {
209        // Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
210        // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
211        if repo.is_empty()? {
212            return Ok(None);
213        }
214        let rev_obj = repo.revparse_single("HEAD")?;
215        Ok(Some(GitVcsInfo {
216            sha1: rev_obj.id().to_string(),
217            dirty,
218        }))
219    } else {
220        dirty_src_files.sort_unstable();
221        anyhow::bail!(
222            "{} files in the working directory contain changes that were \
223             not yet committed into git:\n\n{}\n\n\
224             to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
225            dirty_src_files.len(),
226            dirty_src_files.join("\n")
227        )
228    }
229}
230
231/// Checks whether "included" source files outside package root have been modified.
232///
233/// This currently looks at
234///
235/// * `package.readme` and `package.license-file` pointing to paths outside package root
236/// * symlinks targets reside outside package root
237/// * Any change in the root workspace manifest, regardless of what has changed.
238///
239/// This is required because those paths may link to a file outside the
240/// current package root, but still under the git workdir, affecting the
241/// final packaged `.crate` file.
242fn dirty_files_outside_pkg_root(
243    ws: &Workspace<'_>,
244    pkg: &Package,
245    repo: &git2::Repository,
246    src_files: &[PathEntry],
247) -> CargoResult<HashSet<PathBuf>> {
248    let pkg_root = pkg.root();
249    let workdir = repo.workdir().unwrap();
250
251    let mut dirty_files = HashSet::new();
252
253    let meta = pkg.manifest().metadata();
254    let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
255        .into_iter()
256        .filter_map(|p| p.as_deref())
257        .map(|path| paths::normalize_path(&pkg_root.join(path)))
258        .collect();
259
260    for rel_path in src_files
261        .iter()
262        .filter(|p| p.is_symlink_or_under_symlink())
263        .map(|p| p.as_ref().as_path())
264        .chain(metadata_paths.iter().map(AsRef::as_ref))
265        .chain([ws.root_manifest()])
266        // If inside package root. Don't bother checking git status.
267        .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
268        // Handle files outside package root but under git workdir,
269        .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
270    {
271        if repo.status_file(&rel_path)? != git2::Status::CURRENT {
272            dirty_files.insert(workdir.join(rel_path));
273        }
274    }
275    Ok(dirty_files)
276}
277
278/// Helper to collect dirty statuses for a single repo.
279fn collect_statuses(
280    repo: &git2::Repository,
281    pathspecs: &[&str],
282    dirty_files: &mut Vec<PathBuf>,
283) -> CargoResult<()> {
284    let mut status_opts = git2::StatusOptions::new();
285    // Exclude submodules, as they are being handled manually by recursing
286    // into each one so that details about specific files can be
287    // retrieved.
288    pathspecs
289        .iter()
290        .fold(&mut status_opts, git2::StatusOptions::pathspec)
291        .exclude_submodules(true)
292        .include_ignored(true)
293        .include_untracked(true);
294    let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
295        format!(
296            "failed to retrieve git status from repo {}",
297            repo.path().display()
298        )
299    })?;
300    let workdir = repo.workdir().unwrap();
301    let this_dirty = repo_statuses.iter().filter_map(|entry| {
302        let path = entry.path().expect("valid utf-8 path");
303        if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
304            // It is OK to include Cargo.lock even if it is ignored.
305            return None;
306        }
307        // Use an absolute path, so that comparing paths is easier
308        // (particularly with submodules).
309        Some(workdir.join(path))
310    });
311    dirty_files.extend(this_dirty);
312    Ok(())
313}
314
315/// Helper to collect dirty statuses while recursing into submodules.
316fn status_submodules(repo: &git2::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
317    for submodule in repo.submodules()? {
318        // Ignore submodules that don't open, they are probably not initialized.
319        // If its files are required, then the verification step should fail.
320        if let Ok(sub_repo) = submodule.open() {
321            status_submodules(&sub_repo, dirty_files)?;
322            collect_statuses(&sub_repo, &[], dirty_files)?;
323        }
324    }
325    Ok(())
326}
327
328/// Use pathspec so git only matches a certain path prefix
329fn relative_pathspec(repo: &git2::Repository, pkg_root: &Path) -> String {
330    let workdir = repo.workdir().unwrap();
331    let relpath = pkg_root.strip_prefix(workdir).unwrap_or(Path::new(""));
332    // to unix separators
333    relpath.to_str().unwrap().replace('\\', "/")
334}