cargo/ops/
vendor.rs

1use crate::core::SourceId;
2use crate::core::shell::Verbosity;
3use crate::core::{GitReference, Package, Workspace};
4use crate::ops;
5use crate::sources::CRATES_IO_REGISTRY;
6use crate::sources::RegistrySource;
7use crate::sources::SourceConfigMap;
8use crate::sources::path::PathSource;
9use crate::util::cache_lock::CacheLockMode;
10use crate::util::{CargoResult, GlobalContext, try_canonicalize};
11
12use anyhow::{Context as _, bail};
13use cargo_util::{Sha256, paths};
14use cargo_util_schemas::core::SourceKind;
15use cargo_util_schemas::manifest::TomlPackageBuild;
16use serde::Serialize;
17use walkdir::WalkDir;
18
19use std::collections::HashSet;
20use std::collections::{BTreeMap, BTreeSet, HashMap};
21use std::ffi::OsStr;
22use std::fs::{self, File, OpenOptions};
23use std::io::{self, Read, Write};
24use std::path::{Path, PathBuf};
25
26pub struct VendorOptions<'a> {
27    pub no_delete: bool,
28    pub versioned_dirs: bool,
29    pub destination: &'a Path,
30    pub extra: Vec<PathBuf>,
31    pub respect_source_config: bool,
32}
33
34pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
35    let gctx = ws.gctx();
36    let mut extra_workspaces = Vec::new();
37    for extra in opts.extra.iter() {
38        let extra = gctx.cwd().join(extra);
39        let ws = Workspace::new(&extra, gctx)?;
40        extra_workspaces.push(ws);
41    }
42    let workspaces = extra_workspaces.iter().chain(Some(ws)).collect::<Vec<_>>();
43    let _lock = gctx.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)?;
44    let vendor_config = sync(gctx, &workspaces, opts).context("failed to sync")?;
45
46    if gctx.shell().verbosity() != Verbosity::Quiet {
47        if vendor_config.source.is_empty() {
48            crate::drop_eprintln!(gctx, "There is no dependency to vendor in this project.");
49        } else {
50            crate::drop_eprint!(
51                gctx,
52                "To use vendored sources, add this to your .cargo/config.toml for this project:\n\n"
53            );
54            crate::drop_print!(gctx, "{}", &toml::to_string_pretty(&vendor_config).unwrap());
55        }
56    }
57
58    Ok(())
59}
60
61#[derive(Serialize)]
62struct VendorConfig {
63    source: BTreeMap<String, VendorSource>,
64}
65
66#[derive(Serialize)]
67#[serde(rename_all = "lowercase", untagged)]
68enum VendorSource {
69    Directory {
70        directory: String,
71    },
72    Registry {
73        registry: Option<String>,
74        #[serde(rename = "replace-with")]
75        replace_with: String,
76    },
77    Git {
78        git: String,
79        branch: Option<String>,
80        tag: Option<String>,
81        rev: Option<String>,
82        #[serde(rename = "replace-with")]
83        replace_with: String,
84    },
85}
86
87/// Cache for mapping replaced sources to replacements.
88struct SourceReplacementCache<'gctx> {
89    map: SourceConfigMap<'gctx>,
90    cache: HashMap<SourceId, SourceId>,
91}
92
93impl SourceReplacementCache<'_> {
94    fn new(
95        gctx: &GlobalContext,
96        respect_source_config: bool,
97    ) -> CargoResult<SourceReplacementCache<'_>> {
98        Ok(SourceReplacementCache {
99            map: if respect_source_config {
100                SourceConfigMap::new(gctx)
101            } else {
102                SourceConfigMap::empty(gctx)
103            }?,
104            cache: Default::default(),
105        })
106    }
107
108    fn get(&mut self, id: SourceId) -> CargoResult<SourceId> {
109        use std::collections::hash_map::Entry;
110        match self.cache.entry(id) {
111            Entry::Occupied(e) => Ok(e.get().clone()),
112            Entry::Vacant(e) => {
113                let replaced = self.map.load(id, &HashSet::new())?.replaced_source_id();
114                Ok(e.insert(replaced).clone())
115            }
116        }
117    }
118}
119
120fn sync(
121    gctx: &GlobalContext,
122    workspaces: &[&Workspace<'_>],
123    opts: &VendorOptions<'_>,
124) -> CargoResult<VendorConfig> {
125    let dry_run = false;
126    let vendor_dir = try_canonicalize(opts.destination);
127    let vendor_dir = vendor_dir.as_deref().unwrap_or(opts.destination);
128    let vendor_dir_already_exists = vendor_dir.exists();
129
130    paths::create_dir_all(&vendor_dir)?;
131    let mut to_remove = HashSet::new();
132    if !opts.no_delete {
133        for entry in vendor_dir.read_dir()? {
134            let entry = entry?;
135            if !entry
136                .file_name()
137                .to_str()
138                .map_or(false, |s| s.starts_with('.'))
139            {
140                to_remove.insert(entry.path());
141            }
142        }
143    }
144
145    let mut source_replacement_cache =
146        SourceReplacementCache::new(gctx, opts.respect_source_config)?;
147
148    let mut checksums = HashMap::new();
149    let mut ids = BTreeMap::new();
150
151    // Let's download all crates and start storing internal tables about them.
152    for ws in workspaces {
153        let (packages, resolve) = ops::resolve_ws(ws, dry_run)
154            .with_context(|| format!("failed to load lockfile for {}", ws.root().display()))?;
155
156        packages
157            .get_many(resolve.iter())
158            .with_context(|| format!("failed to download packages for {}", ws.root().display()))?;
159
160        for pkg in resolve.iter() {
161            let sid = source_replacement_cache.get(pkg.source_id())?;
162
163            // Don't vendor path crates since they're already in the repository
164            if sid.is_path() {
165                // And don't delete actual source code!
166                if let Ok(path) = sid.url().to_file_path() {
167                    if let Ok(path) = try_canonicalize(path) {
168                        to_remove.remove(&path);
169                    }
170                }
171                continue;
172            }
173
174            ids.insert(
175                pkg,
176                packages
177                    .get_one(pkg)
178                    .context("failed to fetch package")?
179                    .clone(),
180            );
181
182            checksums.insert(pkg, resolve.checksums().get(&pkg).cloned());
183        }
184    }
185
186    let mut versions = HashMap::new();
187    for id in ids.keys() {
188        let map = versions.entry(id.name()).or_insert_with(BTreeMap::default);
189        if let Some(prev) = map.get(&id.version()) {
190            bail!(
191                "found duplicate version of package `{} v{}` \
192                 vendored from two sources:\n\
193                 \n\
194                 \tsource 1: {}\n\
195                 \tsource 2: {}",
196                id.name(),
197                id.version(),
198                prev,
199                id.source_id()
200            );
201        }
202        map.insert(id.version(), id.source_id());
203    }
204
205    let mut sources = BTreeSet::new();
206    let mut tmp_buf = [0; 64 * 1024];
207    for (id, pkg) in ids.iter() {
208        // Next up, copy it to the vendor directory
209        let src = pkg.root();
210        let max_version = *versions[&id.name()].iter().rev().next().unwrap().0;
211        let dir_has_version_suffix = opts.versioned_dirs || id.version() != max_version;
212        let dst_name = if dir_has_version_suffix {
213            // Eg vendor/futures-0.1.13
214            format!("{}-{}", id.name(), id.version())
215        } else {
216            // Eg vendor/futures
217            id.name().to_string()
218        };
219
220        sources.insert(id.source_id());
221        let dst = vendor_dir.join(&dst_name);
222        to_remove.remove(&dst);
223        let cksum = dst.join(".cargo-checksum.json");
224        // Registries are the only immutable sources,
225        // path and git dependencies' versions cannot be trusted to mean "no change"
226        if dir_has_version_suffix && id.source_id().is_registry() && cksum.exists() {
227            // Don't re-copy directory with version suffix in case it comes from a registry
228            continue;
229        }
230
231        gctx.shell().status(
232            "Vendoring",
233            &format!("{} ({}) to {}", id, src.to_string_lossy(), dst.display()),
234        )?;
235
236        let _ = fs::remove_dir_all(&dst);
237
238        let mut file_cksums = BTreeMap::new();
239
240        // Need this mapping anyway because we will directly consult registry sources,
241        // otherwise builtin source replacement (sparse registry) won't be respected.
242        let sid = source_replacement_cache.get(id.source_id())?;
243
244        if sid.is_registry() {
245            // To keep the unpacked source from registry in a pristine state,
246            // we'll do a direct extraction into the vendor directory.
247            let registry = match sid.kind() {
248                SourceKind::Registry | SourceKind::SparseRegistry => {
249                    RegistrySource::remote(sid, &Default::default(), gctx)?
250                }
251                SourceKind::LocalRegistry => {
252                    let path = sid.url().to_file_path().expect("local path");
253                    RegistrySource::local(sid, &path, &Default::default(), gctx)
254                }
255                _ => unreachable!("not registry source: {sid}"),
256            };
257
258            let walkdir = |root| {
259                WalkDir::new(root)
260                    .into_iter()
261                    // It is safe to skip errors,
262                    // since we'll hit them during copying/reading later anyway.
263                    .filter_map(|e| e.ok())
264                    // There should be no symlink in tarballs on crates.io,
265                    // but might be wrong for local registries.
266                    // Hence here be conservative and include symlinks.
267                    .filter(|e| e.file_type().is_file() || e.file_type().is_symlink())
268            };
269            let mut compute_file_cksums = |root| {
270                for e in walkdir(root) {
271                    let path = e.path();
272                    let relative = path.strip_prefix(&dst).unwrap();
273                    let cksum = Sha256::new()
274                        .update_path(path)
275                        .map(Sha256::finish_hex)
276                        .with_context(|| format!("failed to checksum `{}`", path.display()))?;
277                    file_cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
278                }
279                Ok::<_, anyhow::Error>(())
280            };
281            if dir_has_version_suffix {
282                registry.unpack_package_in(id, &vendor_dir, &vendor_this)?;
283                compute_file_cksums(&dst)?;
284            } else {
285                // Due to the extra sanity check in registry unpack
286                // (ensure it contain only one top-level directory with name `pkg-version`),
287                // we can only unpack a directory with version suffix,
288                // and move it to the no suffix directory.
289                let staging_dir = tempfile::Builder::new()
290                    .prefix(".vendor-staging")
291                    .tempdir_in(vendor_dir)?;
292                let unpacked_src =
293                    registry.unpack_package_in(id, staging_dir.path(), &vendor_this)?;
294
295                let rename_result = if gctx
296                    .get_env_os("__CARGO_TEST_VENDOR_FALLBACK_CP_SOURCES")
297                    .is_some()
298                {
299                    Err(io::Error::new(
300                        io::ErrorKind::Other,
301                        "simulated rename error for testing",
302                    ))
303                } else {
304                    fs::rename(&unpacked_src, &dst)
305                };
306
307                if let Err(e) = rename_result {
308                    // This fallback is worked for sometimes `fs::rename` failed in a specific situation, such as:
309                    // - In Windows 10 versions earlier than 1607, the destination of `fs::rename` can't be a directory in older versions.
310                    // - `from` and `to` are on separate filesystems.
311                    // - AntiVirus or our system indexer are doing stuf simultaneously.
312                    // - Any other reasons documented in std::fs::rename.
313                    tracing::warn!("failed to `mv {unpacked_src:?} {dst:?}`: {e}");
314                    let paths: Vec<_> = walkdir(&unpacked_src).map(|e| e.into_path()).collect();
315                    cp_sources(
316                        pkg,
317                        &unpacked_src,
318                        &paths,
319                        &dst,
320                        &mut file_cksums,
321                        &mut tmp_buf,
322                        gctx,
323                    )
324                    .with_context(|| format!("failed to copy vendored sources for {id}"))?;
325                } else {
326                    compute_file_cksums(&dst)?;
327                }
328            }
329        } else {
330            let paths = PathSource::new(src, sid, gctx)
331                .list_files(pkg)?
332                .into_iter()
333                .map(|p| p.into_path_buf())
334                .collect::<Vec<_>>();
335            cp_sources(pkg, src, &paths, &dst, &mut file_cksums, &mut tmp_buf, gctx)
336                .with_context(|| format!("failed to copy vendored sources for {id}"))?;
337        }
338
339        // Finally, emit the metadata about this package
340        let json = serde_json::json!({
341            "package": checksums.get(id),
342            "files": file_cksums,
343        });
344
345        paths::write(&cksum, json.to_string())?;
346    }
347
348    for path in to_remove {
349        if path.is_dir() {
350            paths::remove_dir_all(&path)?;
351        } else {
352            paths::remove_file(&path)?;
353        }
354    }
355
356    // add our vendored source
357    let mut config = BTreeMap::new();
358
359    let merged_source_name = "vendored-sources";
360
361    // replace original sources with vendor
362    for source_id in sources {
363        let name = if source_id.is_crates_io() {
364            CRATES_IO_REGISTRY.to_string()
365        } else {
366            // Remove `precise` since that makes the source name very long,
367            // and isn't needed to disambiguate multiple sources.
368            source_id.without_precise().as_url().to_string()
369        };
370
371        let source = if source_id.is_crates_io() {
372            VendorSource::Registry {
373                registry: None,
374                replace_with: merged_source_name.to_string(),
375            }
376        } else if source_id.is_remote_registry() {
377            let registry = source_id.url().to_string();
378            VendorSource::Registry {
379                registry: Some(registry),
380                replace_with: merged_source_name.to_string(),
381            }
382        } else if source_id.is_git() {
383            let mut branch = None;
384            let mut tag = None;
385            let mut rev = None;
386            if let Some(reference) = source_id.git_reference() {
387                match *reference {
388                    GitReference::Branch(ref b) => branch = Some(b.clone()),
389                    GitReference::Tag(ref t) => tag = Some(t.clone()),
390                    GitReference::Rev(ref r) => rev = Some(r.clone()),
391                    GitReference::DefaultBranch => {}
392                }
393            }
394            VendorSource::Git {
395                git: source_id.url().to_string(),
396                branch,
397                tag,
398                rev,
399                replace_with: merged_source_name.to_string(),
400            }
401        } else {
402            panic!("Invalid source ID: {}", source_id)
403        };
404        config.insert(name, source);
405    }
406
407    if !config.is_empty() {
408        config.insert(
409            merged_source_name.to_string(),
410            VendorSource::Directory {
411                // Windows-flavour paths are valid here on Windows but Unix.
412                // This backslash normalization is for making output paths more
413                // cross-platform compatible.
414                directory: opts.destination.to_string_lossy().replace("\\", "/"),
415            },
416        );
417    } else if !vendor_dir_already_exists {
418        // Nothing to vendor. Remove the destination dir we've just created.
419        paths::remove_dir(vendor_dir)?;
420    }
421
422    Ok(VendorConfig { source: config })
423}
424
425fn cp_sources(
426    pkg: &Package,
427    src: &Path,
428    paths: &[PathBuf],
429    dst: &Path,
430    cksums: &mut BTreeMap<String, String>,
431    tmp_buf: &mut [u8],
432    gctx: &GlobalContext,
433) -> CargoResult<()> {
434    for p in paths {
435        let relative = p.strip_prefix(&src).unwrap();
436
437        if !vendor_this(relative) {
438            continue;
439        }
440
441        // Join pathname components individually to make sure that the joined
442        // path uses the correct directory separators everywhere, since
443        // `relative` may use Unix-style and `dst` may require Windows-style
444        // backslashes.
445        let dst = relative
446            .iter()
447            .fold(dst.to_owned(), |acc, component| acc.join(&component));
448
449        paths::create_dir_all(dst.parent().unwrap())?;
450        let mut dst_opts = OpenOptions::new();
451        dst_opts.write(true).create(true).truncate(true);
452        // When vendoring git dependencies, the manifest has not been normalized like it would be
453        // when published. This causes issue when the manifest is using workspace inheritance.
454        // To get around this issue we use the "original" manifest after `{}.workspace = true`
455        // has been resolved for git dependencies.
456        let cksum = if dst.file_name() == Some(OsStr::new("Cargo.toml"))
457            && pkg.package_id().source_id().is_git()
458        {
459            let packaged_files = paths
460                .iter()
461                .map(|p| p.strip_prefix(src).unwrap().to_owned())
462                .collect::<Vec<_>>();
463            let vendored_pkg = prepare_for_vendor(pkg, &packaged_files, gctx)?;
464            let contents = vendored_pkg.manifest().to_normalized_contents()?;
465            copy_and_checksum(
466                &dst,
467                &mut dst_opts,
468                &mut contents.as_bytes(),
469                Path::new("Generated Cargo.toml"),
470                tmp_buf,
471            )?
472        } else {
473            let mut src = File::open(&p).with_context(|| format!("failed to open {:?}", &p))?;
474            #[cfg(unix)]
475            {
476                use std::os::unix::fs::{MetadataExt, OpenOptionsExt};
477                let src_metadata = src
478                    .metadata()
479                    .with_context(|| format!("failed to stat {:?}", p))?;
480                dst_opts.mode(src_metadata.mode());
481            }
482            copy_and_checksum(&dst, &mut dst_opts, &mut src, &p, tmp_buf)?
483        };
484
485        cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
486    }
487    Ok(())
488}
489
490/// HACK: Perform the bare minimum of `prepare_for_publish` needed for #14348.
491///
492/// There are parts of `prepare_for_publish` that could be directly useful (e.g. stripping
493/// `[workspace]`) while other parts that require other filesystem operations (moving the README
494/// file) and ideally we'd reuse `cargo package` code to take care of all of this for us.
495fn prepare_for_vendor(
496    me: &Package,
497    packaged_files: &[PathBuf],
498    gctx: &GlobalContext,
499) -> CargoResult<Package> {
500    let contents = me.manifest().contents();
501    let document = me.manifest().document();
502    let original_toml = prepare_toml_for_vendor(
503        me.manifest().normalized_toml().clone(),
504        packaged_files,
505        gctx,
506    )?;
507    let normalized_toml = original_toml.clone();
508    let features = me.manifest().unstable_features().clone();
509    let workspace_config = me.manifest().workspace_config().clone();
510    let source_id = me.package_id().source_id();
511    let mut warnings = Default::default();
512    let mut errors = Default::default();
513    let manifest = crate::util::toml::to_real_manifest(
514        contents.to_owned(),
515        document.clone(),
516        original_toml,
517        normalized_toml,
518        features,
519        workspace_config,
520        source_id,
521        me.manifest_path(),
522        me.manifest().is_embedded(),
523        gctx,
524        &mut warnings,
525        &mut errors,
526    )?;
527    let new_pkg = Package::new(manifest, me.manifest_path());
528    Ok(new_pkg)
529}
530
531fn prepare_toml_for_vendor(
532    mut me: cargo_util_schemas::manifest::TomlManifest,
533    packaged_files: &[PathBuf],
534    gctx: &GlobalContext,
535) -> CargoResult<cargo_util_schemas::manifest::TomlManifest> {
536    let package = me
537        .package
538        .as_mut()
539        .expect("venedored manifests must have packages");
540    // Validates if build script file is included in package. If not, warn and ignore.
541    if let Some(custom_build_scripts) = package.normalized_build().expect("previously normalized") {
542        let mut included_scripts = Vec::new();
543        for script in custom_build_scripts {
544            let path = paths::normalize_path(Path::new(script));
545            let included = packaged_files.contains(&path);
546            if included {
547                let path = path
548                    .into_os_string()
549                    .into_string()
550                    .map_err(|_err| anyhow::format_err!("non-UTF8 `package.build`"))?;
551                let path = crate::util::toml::normalize_path_string_sep(path);
552                included_scripts.push(path);
553            } else {
554                gctx.shell().warn(format!(
555                    "ignoring `package.build` entry `{}` as it is not included in the published package",
556                    path.display()
557                ))?;
558            }
559        }
560        package.build = Some(match included_scripts.len() {
561            0 => TomlPackageBuild::Auto(false),
562            1 => TomlPackageBuild::SingleScript(included_scripts[0].clone()),
563            _ => TomlPackageBuild::MultipleScript(included_scripts),
564        });
565    }
566
567    let lib = if let Some(target) = &me.lib {
568        crate::util::toml::prepare_target_for_publish(
569            target,
570            Some(packaged_files),
571            "library",
572            gctx,
573        )?
574    } else {
575        None
576    };
577    let bin = crate::util::toml::prepare_targets_for_publish(
578        me.bin.as_ref(),
579        Some(packaged_files),
580        "binary",
581        gctx,
582    )?;
583    let example = crate::util::toml::prepare_targets_for_publish(
584        me.example.as_ref(),
585        Some(packaged_files),
586        "example",
587        gctx,
588    )?;
589    let test = crate::util::toml::prepare_targets_for_publish(
590        me.test.as_ref(),
591        Some(packaged_files),
592        "test",
593        gctx,
594    )?;
595    let bench = crate::util::toml::prepare_targets_for_publish(
596        me.bench.as_ref(),
597        Some(packaged_files),
598        "benchmark",
599        gctx,
600    )?;
601
602    me.lib = lib;
603    me.bin = bin;
604    me.example = example;
605    me.test = test;
606    me.bench = bench;
607
608    Ok(me)
609}
610
611fn copy_and_checksum<T: Read>(
612    dst_path: &Path,
613    dst_opts: &mut OpenOptions,
614    contents: &mut T,
615    contents_path: &Path,
616    buf: &mut [u8],
617) -> CargoResult<String> {
618    let mut dst = dst_opts
619        .open(dst_path)
620        .with_context(|| format!("failed to create {:?}", dst_path))?;
621    // Not going to bother setting mode on pre-existing files, since there
622    // shouldn't be any under normal conditions.
623    let mut cksum = Sha256::new();
624    loop {
625        let n = contents
626            .read(buf)
627            .with_context(|| format!("failed to read from {:?}", contents_path))?;
628        if n == 0 {
629            break Ok(cksum.finish_hex());
630        }
631        let data = &buf[..n];
632        cksum.update(data);
633        dst.write_all(data)
634            .with_context(|| format!("failed to write to {:?}", dst_path))?;
635    }
636}
637
638/// Filters files we want to vendor.
639///
640/// `relative` is a path relative to the package root.
641fn vendor_this(relative: &Path) -> bool {
642    match relative.to_str() {
643        // Skip git config files as they're not relevant to builds most of
644        // the time and if we respect them (e.g.  in git) then it'll
645        // probably mess with the checksums when a vendor dir is checked
646        // into someone else's source control
647        Some(".gitattributes" | ".gitignore" | ".git") => false,
648
649        // Temporary Cargo files
650        Some(".cargo-ok") => false,
651
652        _ => true,
653    }
654}