cargo/ops/
vendor.rs

1use crate::core::shell::Verbosity;
2use crate::core::SourceId;
3use crate::core::{GitReference, Package, Workspace};
4use crate::ops;
5use crate::sources::path::PathSource;
6use crate::sources::RegistrySource;
7use crate::sources::SourceConfigMap;
8use crate::sources::CRATES_IO_REGISTRY;
9use crate::util::cache_lock::CacheLockMode;
10use crate::util::{try_canonicalize, CargoResult, GlobalContext};
11
12use anyhow::{bail, Context as _};
13use cargo_util::{paths, Sha256};
14use cargo_util_schemas::core::SourceKind;
15use cargo_util_schemas::manifest::TomlPackageBuild;
16use serde::Serialize;
17use walkdir::WalkDir;
18
19use std::collections::HashSet;
20use std::collections::{BTreeMap, BTreeSet, HashMap};
21use std::ffi::OsStr;
22use std::fs::{self, File, OpenOptions};
23use std::io::{Read, Write};
24use std::path::{Path, PathBuf};
25
26pub struct VendorOptions<'a> {
27    pub no_delete: bool,
28    pub versioned_dirs: bool,
29    pub destination: &'a Path,
30    pub extra: Vec<PathBuf>,
31    pub respect_source_config: bool,
32}
33
34pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
35    let gctx = ws.gctx();
36    let mut extra_workspaces = Vec::new();
37    for extra in opts.extra.iter() {
38        let extra = gctx.cwd().join(extra);
39        let ws = Workspace::new(&extra, gctx)?;
40        extra_workspaces.push(ws);
41    }
42    let workspaces = extra_workspaces.iter().chain(Some(ws)).collect::<Vec<_>>();
43    let _lock = gctx.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)?;
44    let vendor_config = sync(gctx, &workspaces, opts).context("failed to sync")?;
45
46    if gctx.shell().verbosity() != Verbosity::Quiet {
47        if vendor_config.source.is_empty() {
48            crate::drop_eprintln!(gctx, "There is no dependency to vendor in this project.");
49        } else {
50            crate::drop_eprint!(
51                gctx,
52                "To use vendored sources, add this to your .cargo/config.toml for this project:\n\n"
53            );
54            crate::drop_print!(gctx, "{}", &toml::to_string_pretty(&vendor_config).unwrap());
55        }
56    }
57
58    Ok(())
59}
60
61#[derive(Serialize)]
62struct VendorConfig {
63    source: BTreeMap<String, VendorSource>,
64}
65
66#[derive(Serialize)]
67#[serde(rename_all = "lowercase", untagged)]
68enum VendorSource {
69    Directory {
70        directory: String,
71    },
72    Registry {
73        registry: Option<String>,
74        #[serde(rename = "replace-with")]
75        replace_with: String,
76    },
77    Git {
78        git: String,
79        branch: Option<String>,
80        tag: Option<String>,
81        rev: Option<String>,
82        #[serde(rename = "replace-with")]
83        replace_with: String,
84    },
85}
86
87/// Cache for mapping replaced sources to replacements.
88struct SourceReplacementCache<'gctx> {
89    map: SourceConfigMap<'gctx>,
90    cache: HashMap<SourceId, SourceId>,
91}
92
93impl SourceReplacementCache<'_> {
94    fn new(
95        gctx: &GlobalContext,
96        respect_source_config: bool,
97    ) -> CargoResult<SourceReplacementCache<'_>> {
98        Ok(SourceReplacementCache {
99            map: if respect_source_config {
100                SourceConfigMap::new(gctx)
101            } else {
102                SourceConfigMap::empty(gctx)
103            }?,
104            cache: Default::default(),
105        })
106    }
107
108    fn get(&mut self, id: SourceId) -> CargoResult<SourceId> {
109        use std::collections::hash_map::Entry;
110        match self.cache.entry(id) {
111            Entry::Occupied(e) => Ok(e.get().clone()),
112            Entry::Vacant(e) => {
113                let replaced = self.map.load(id, &HashSet::new())?.replaced_source_id();
114                Ok(e.insert(replaced).clone())
115            }
116        }
117    }
118}
119
120fn sync(
121    gctx: &GlobalContext,
122    workspaces: &[&Workspace<'_>],
123    opts: &VendorOptions<'_>,
124) -> CargoResult<VendorConfig> {
125    let dry_run = false;
126    let vendor_dir = try_canonicalize(opts.destination);
127    let vendor_dir = vendor_dir.as_deref().unwrap_or(opts.destination);
128    let vendor_dir_already_exists = vendor_dir.exists();
129
130    paths::create_dir_all(&vendor_dir)?;
131    let mut to_remove = HashSet::new();
132    if !opts.no_delete {
133        for entry in vendor_dir.read_dir()? {
134            let entry = entry?;
135            if !entry
136                .file_name()
137                .to_str()
138                .map_or(false, |s| s.starts_with('.'))
139            {
140                to_remove.insert(entry.path());
141            }
142        }
143    }
144
145    let mut source_replacement_cache =
146        SourceReplacementCache::new(gctx, opts.respect_source_config)?;
147
148    let mut checksums = HashMap::new();
149    let mut ids = BTreeMap::new();
150
151    // Let's download all crates and start storing internal tables about them.
152    for ws in workspaces {
153        let (packages, resolve) = ops::resolve_ws(ws, dry_run)
154            .with_context(|| format!("failed to load lockfile for {}", ws.root().display()))?;
155
156        packages
157            .get_many(resolve.iter())
158            .with_context(|| format!("failed to download packages for {}", ws.root().display()))?;
159
160        for pkg in resolve.iter() {
161            let sid = source_replacement_cache.get(pkg.source_id())?;
162
163            // Don't vendor path crates since they're already in the repository
164            if sid.is_path() {
165                // And don't delete actual source code!
166                if let Ok(path) = sid.url().to_file_path() {
167                    if let Ok(path) = try_canonicalize(path) {
168                        to_remove.remove(&path);
169                    }
170                }
171                continue;
172            }
173
174            ids.insert(
175                pkg,
176                packages
177                    .get_one(pkg)
178                    .context("failed to fetch package")?
179                    .clone(),
180            );
181
182            checksums.insert(pkg, resolve.checksums().get(&pkg).cloned());
183        }
184    }
185
186    let mut versions = HashMap::new();
187    for id in ids.keys() {
188        let map = versions.entry(id.name()).or_insert_with(BTreeMap::default);
189        if let Some(prev) = map.get(&id.version()) {
190            bail!(
191                "found duplicate version of package `{} v{}` \
192                 vendored from two sources:\n\
193                 \n\
194                 \tsource 1: {}\n\
195                 \tsource 2: {}",
196                id.name(),
197                id.version(),
198                prev,
199                id.source_id()
200            );
201        }
202        map.insert(id.version(), id.source_id());
203    }
204
205    let mut sources = BTreeSet::new();
206    let mut tmp_buf = [0; 64 * 1024];
207    for (id, pkg) in ids.iter() {
208        // Next up, copy it to the vendor directory
209        let src = pkg.root();
210        let max_version = *versions[&id.name()].iter().rev().next().unwrap().0;
211        let dir_has_version_suffix = opts.versioned_dirs || id.version() != max_version;
212        let dst_name = if dir_has_version_suffix {
213            // Eg vendor/futures-0.1.13
214            format!("{}-{}", id.name(), id.version())
215        } else {
216            // Eg vendor/futures
217            id.name().to_string()
218        };
219
220        sources.insert(id.source_id());
221        let dst = vendor_dir.join(&dst_name);
222        to_remove.remove(&dst);
223        let cksum = dst.join(".cargo-checksum.json");
224        // Registries are the only immutable sources,
225        // path and git dependencies' versions cannot be trusted to mean "no change"
226        if dir_has_version_suffix && id.source_id().is_registry() && cksum.exists() {
227            // Don't re-copy directory with version suffix in case it comes from a registry
228            continue;
229        }
230
231        gctx.shell().status(
232            "Vendoring",
233            &format!("{} ({}) to {}", id, src.to_string_lossy(), dst.display()),
234        )?;
235
236        let _ = fs::remove_dir_all(&dst);
237
238        let mut file_cksums = BTreeMap::new();
239
240        // Need this mapping anyway because we will directly consult registry sources,
241        // otherwise builtin source replacement (sparse registry) won't be respected.
242        let sid = source_replacement_cache.get(id.source_id())?;
243
244        if sid.is_registry() {
245            // To keep the unpacked source from registry in a pristine state,
246            // we'll do a direct extraction into the vendor directory.
247            let registry = match sid.kind() {
248                SourceKind::Registry | SourceKind::SparseRegistry => {
249                    RegistrySource::remote(sid, &Default::default(), gctx)?
250                }
251                SourceKind::LocalRegistry => {
252                    let path = sid.url().to_file_path().expect("local path");
253                    RegistrySource::local(sid, &path, &Default::default(), gctx)
254                }
255                _ => unreachable!("not registry source: {sid}"),
256            };
257
258            let walkdir = |root| {
259                WalkDir::new(root)
260                    .into_iter()
261                    // It is safe to skip errors,
262                    // since we'll hit them during copying/reading later anyway.
263                    .filter_map(|e| e.ok())
264                    // There should be no symlink in tarballs on crates.io,
265                    // but might be wrong for local registries.
266                    // Hence here be conservative and include symlinks.
267                    .filter(|e| e.file_type().is_file() || e.file_type().is_symlink())
268            };
269            let mut compute_file_cksums = |root| {
270                for e in walkdir(root) {
271                    let path = e.path();
272                    let relative = path.strip_prefix(&dst).unwrap();
273                    let cksum = Sha256::new()
274                        .update_path(path)
275                        .map(Sha256::finish_hex)
276                        .with_context(|| format!("failed to checksum `{}`", path.display()))?;
277                    file_cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
278                }
279                Ok::<_, anyhow::Error>(())
280            };
281            if dir_has_version_suffix {
282                registry.unpack_package_in(id, &vendor_dir, &vendor_this)?;
283                compute_file_cksums(&dst)?;
284            } else {
285                // Due to the extra sanity check in registry unpack
286                // (ensure it contain only one top-level directory with name `pkg-version`),
287                // we can only unpack a directory with version suffix,
288                // and move it to the no suffix directory.
289                let staging_dir = tempfile::Builder::new()
290                    .prefix(".vendor-staging")
291                    .tempdir_in(vendor_dir)?;
292                let unpacked_src =
293                    registry.unpack_package_in(id, staging_dir.path(), &vendor_this)?;
294                if let Err(e) = fs::rename(&unpacked_src, &dst) {
295                    // This fallback is mainly for Windows 10 versions earlier than 1607.
296                    // The destination of `fs::rename` can't be a diretory in older versions.
297                    // Can be removed once the minimal supported Windows version gets bumped.
298                    tracing::warn!("failed to `mv {unpacked_src:?} {dst:?}`: {e}");
299                    let paths: Vec<_> = walkdir(&unpacked_src).map(|e| e.into_path()).collect();
300                    cp_sources(pkg, src, &paths, &dst, &mut file_cksums, &mut tmp_buf, gctx)
301                        .with_context(|| format!("failed to copy vendored sources for {id}"))?;
302                } else {
303                    compute_file_cksums(&dst)?;
304                }
305            }
306        } else {
307            let paths = PathSource::new(src, sid, gctx)
308                .list_files(pkg)?
309                .into_iter()
310                .map(|p| p.into_path_buf())
311                .collect::<Vec<_>>();
312            cp_sources(pkg, src, &paths, &dst, &mut file_cksums, &mut tmp_buf, gctx)
313                .with_context(|| format!("failed to copy vendored sources for {id}"))?;
314        }
315
316        // Finally, emit the metadata about this package
317        let json = serde_json::json!({
318            "package": checksums.get(id),
319            "files": file_cksums,
320        });
321
322        paths::write(&cksum, json.to_string())?;
323    }
324
325    for path in to_remove {
326        if path.is_dir() {
327            paths::remove_dir_all(&path)?;
328        } else {
329            paths::remove_file(&path)?;
330        }
331    }
332
333    // add our vendored source
334    let mut config = BTreeMap::new();
335
336    let merged_source_name = "vendored-sources";
337
338    // replace original sources with vendor
339    for source_id in sources {
340        let name = if source_id.is_crates_io() {
341            CRATES_IO_REGISTRY.to_string()
342        } else {
343            // Remove `precise` since that makes the source name very long,
344            // and isn't needed to disambiguate multiple sources.
345            source_id.without_precise().as_url().to_string()
346        };
347
348        let source = if source_id.is_crates_io() {
349            VendorSource::Registry {
350                registry: None,
351                replace_with: merged_source_name.to_string(),
352            }
353        } else if source_id.is_remote_registry() {
354            let registry = source_id.url().to_string();
355            VendorSource::Registry {
356                registry: Some(registry),
357                replace_with: merged_source_name.to_string(),
358            }
359        } else if source_id.is_git() {
360            let mut branch = None;
361            let mut tag = None;
362            let mut rev = None;
363            if let Some(reference) = source_id.git_reference() {
364                match *reference {
365                    GitReference::Branch(ref b) => branch = Some(b.clone()),
366                    GitReference::Tag(ref t) => tag = Some(t.clone()),
367                    GitReference::Rev(ref r) => rev = Some(r.clone()),
368                    GitReference::DefaultBranch => {}
369                }
370            }
371            VendorSource::Git {
372                git: source_id.url().to_string(),
373                branch,
374                tag,
375                rev,
376                replace_with: merged_source_name.to_string(),
377            }
378        } else {
379            panic!("Invalid source ID: {}", source_id)
380        };
381        config.insert(name, source);
382    }
383
384    if !config.is_empty() {
385        config.insert(
386            merged_source_name.to_string(),
387            VendorSource::Directory {
388                // Windows-flavour paths are valid here on Windows but Unix.
389                // This backslash normalization is for making output paths more
390                // cross-platform compatible.
391                directory: opts.destination.to_string_lossy().replace("\\", "/"),
392            },
393        );
394    } else if !vendor_dir_already_exists {
395        // Nothing to vendor. Remove the destination dir we've just created.
396        paths::remove_dir(vendor_dir)?;
397    }
398
399    Ok(VendorConfig { source: config })
400}
401
402fn cp_sources(
403    pkg: &Package,
404    src: &Path,
405    paths: &[PathBuf],
406    dst: &Path,
407    cksums: &mut BTreeMap<String, String>,
408    tmp_buf: &mut [u8],
409    gctx: &GlobalContext,
410) -> CargoResult<()> {
411    for p in paths {
412        let relative = p.strip_prefix(&src).unwrap();
413
414        if !vendor_this(relative) {
415            continue;
416        }
417
418        // Join pathname components individually to make sure that the joined
419        // path uses the correct directory separators everywhere, since
420        // `relative` may use Unix-style and `dst` may require Windows-style
421        // backslashes.
422        let dst = relative
423            .iter()
424            .fold(dst.to_owned(), |acc, component| acc.join(&component));
425
426        paths::create_dir_all(dst.parent().unwrap())?;
427        let mut dst_opts = OpenOptions::new();
428        dst_opts.write(true).create(true).truncate(true);
429        // When vendoring git dependencies, the manifest has not been normalized like it would be
430        // when published. This causes issue when the manifest is using workspace inheritance.
431        // To get around this issue we use the "original" manifest after `{}.workspace = true`
432        // has been resolved for git dependencies.
433        let cksum = if dst.file_name() == Some(OsStr::new("Cargo.toml"))
434            && pkg.package_id().source_id().is_git()
435        {
436            let packaged_files = paths
437                .iter()
438                .map(|p| p.strip_prefix(src).unwrap().to_owned())
439                .collect::<Vec<_>>();
440            let vendored_pkg = prepare_for_vendor(pkg, &packaged_files, gctx)?;
441            let contents = vendored_pkg.manifest().to_normalized_contents()?;
442            copy_and_checksum(
443                &dst,
444                &mut dst_opts,
445                &mut contents.as_bytes(),
446                Path::new("Generated Cargo.toml"),
447                tmp_buf,
448            )?
449        } else {
450            let mut src = File::open(&p).with_context(|| format!("failed to open {:?}", &p))?;
451            #[cfg(unix)]
452            {
453                use std::os::unix::fs::{MetadataExt, OpenOptionsExt};
454                let src_metadata = src
455                    .metadata()
456                    .with_context(|| format!("failed to stat {:?}", p))?;
457                dst_opts.mode(src_metadata.mode());
458            }
459            copy_and_checksum(&dst, &mut dst_opts, &mut src, &p, tmp_buf)?
460        };
461
462        cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
463    }
464    Ok(())
465}
466
467/// HACK: Perform the bare minimum of `prepare_for_publish` needed for #14348.
468///
469/// There are parts of `prepare_for_publish` that could be directly useful (e.g. stripping
470/// `[workspace]`) while other parts that require other filesystem operations (moving the README
471/// file) and ideally we'd reuse `cargo package` code to take care of all of this for us.
472fn prepare_for_vendor(
473    me: &Package,
474    packaged_files: &[PathBuf],
475    gctx: &GlobalContext,
476) -> CargoResult<Package> {
477    let contents = me.manifest().contents();
478    let document = me.manifest().document();
479    let original_toml = prepare_toml_for_vendor(
480        me.manifest().normalized_toml().clone(),
481        packaged_files,
482        gctx,
483    )?;
484    let normalized_toml = original_toml.clone();
485    let features = me.manifest().unstable_features().clone();
486    let workspace_config = me.manifest().workspace_config().clone();
487    let source_id = me.package_id().source_id();
488    let mut warnings = Default::default();
489    let mut errors = Default::default();
490    let manifest = crate::util::toml::to_real_manifest(
491        contents.to_owned(),
492        document.clone(),
493        original_toml,
494        normalized_toml,
495        features,
496        workspace_config,
497        source_id,
498        me.manifest_path(),
499        me.manifest().is_embedded(),
500        gctx,
501        &mut warnings,
502        &mut errors,
503    )?;
504    let new_pkg = Package::new(manifest, me.manifest_path());
505    Ok(new_pkg)
506}
507
508fn prepare_toml_for_vendor(
509    mut me: cargo_util_schemas::manifest::TomlManifest,
510    packaged_files: &[PathBuf],
511    gctx: &GlobalContext,
512) -> CargoResult<cargo_util_schemas::manifest::TomlManifest> {
513    let package = me
514        .package
515        .as_mut()
516        .expect("venedored manifests must have packages");
517    // Validates if build script file is included in package. If not, warn and ignore.
518    if let Some(custom_build_scripts) = package.normalized_build().expect("previously normalized") {
519        let mut included_scripts = Vec::new();
520        for script in custom_build_scripts {
521            let path = paths::normalize_path(Path::new(script));
522            let included = packaged_files.contains(&path);
523            if included {
524                let path = path
525                    .into_os_string()
526                    .into_string()
527                    .map_err(|_err| anyhow::format_err!("non-UTF8 `package.build`"))?;
528                let path = crate::util::toml::normalize_path_string_sep(path);
529                included_scripts.push(path);
530            } else {
531                gctx.shell().warn(format!(
532                    "ignoring `package.build` entry `{}` as it is not included in the published package",
533                    path.display()
534                ))?;
535            }
536        }
537        package.build = Some(match included_scripts.len() {
538            0 => TomlPackageBuild::Auto(false),
539            1 => TomlPackageBuild::SingleScript(included_scripts[0].clone()),
540            _ => TomlPackageBuild::MultipleScript(included_scripts),
541        });
542    }
543
544    let lib = if let Some(target) = &me.lib {
545        crate::util::toml::prepare_target_for_publish(
546            target,
547            Some(packaged_files),
548            "library",
549            gctx,
550        )?
551    } else {
552        None
553    };
554    let bin = crate::util::toml::prepare_targets_for_publish(
555        me.bin.as_ref(),
556        Some(packaged_files),
557        "binary",
558        gctx,
559    )?;
560    let example = crate::util::toml::prepare_targets_for_publish(
561        me.example.as_ref(),
562        Some(packaged_files),
563        "example",
564        gctx,
565    )?;
566    let test = crate::util::toml::prepare_targets_for_publish(
567        me.test.as_ref(),
568        Some(packaged_files),
569        "test",
570        gctx,
571    )?;
572    let bench = crate::util::toml::prepare_targets_for_publish(
573        me.bench.as_ref(),
574        Some(packaged_files),
575        "benchmark",
576        gctx,
577    )?;
578
579    me.lib = lib;
580    me.bin = bin;
581    me.example = example;
582    me.test = test;
583    me.bench = bench;
584
585    Ok(me)
586}
587
588fn copy_and_checksum<T: Read>(
589    dst_path: &Path,
590    dst_opts: &mut OpenOptions,
591    contents: &mut T,
592    contents_path: &Path,
593    buf: &mut [u8],
594) -> CargoResult<String> {
595    let mut dst = dst_opts
596        .open(dst_path)
597        .with_context(|| format!("failed to create {:?}", dst_path))?;
598    // Not going to bother setting mode on pre-existing files, since there
599    // shouldn't be any under normal conditions.
600    let mut cksum = Sha256::new();
601    loop {
602        let n = contents
603            .read(buf)
604            .with_context(|| format!("failed to read from {:?}", contents_path))?;
605        if n == 0 {
606            break Ok(cksum.finish_hex());
607        }
608        let data = &buf[..n];
609        cksum.update(data);
610        dst.write_all(data)
611            .with_context(|| format!("failed to write to {:?}", dst_path))?;
612    }
613}
614
615/// Filters files we want to vendor.
616///
617/// `relative` is a path relative to the package root.
618fn vendor_this(relative: &Path) -> bool {
619    match relative.to_str() {
620        // Skip git config files as they're not relevant to builds most of
621        // the time and if we respect them (e.g.  in git) then it'll
622        // probably mess with the checksums when a vendor dir is checked
623        // into someone else's source control
624        Some(".gitattributes" | ".gitignore" | ".git") => false,
625
626        // Temporary Cargo files
627        Some(".cargo-ok") => false,
628
629        _ => true,
630    }
631}