cargo/ops/
vendor.rs

1use crate::core::shell::Verbosity;
2use crate::core::SourceId;
3use crate::core::{GitReference, Package, Workspace};
4use crate::ops;
5use crate::sources::path::PathSource;
6use crate::sources::PathEntry;
7use crate::sources::SourceConfigMap;
8use crate::sources::CRATES_IO_REGISTRY;
9use crate::util::cache_lock::CacheLockMode;
10use crate::util::{try_canonicalize, CargoResult, GlobalContext};
11use anyhow::{bail, Context as _};
12use cargo_util::{paths, Sha256};
13use serde::Serialize;
14use std::collections::HashSet;
15use std::collections::{BTreeMap, BTreeSet, HashMap};
16use std::ffi::OsStr;
17use std::fs::{self, File, OpenOptions};
18use std::io::{Read, Write};
19use std::path::{Path, PathBuf};
20
21pub struct VendorOptions<'a> {
22    pub no_delete: bool,
23    pub versioned_dirs: bool,
24    pub destination: &'a Path,
25    pub extra: Vec<PathBuf>,
26    pub respect_source_config: bool,
27}
28
29pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
30    let gctx = ws.gctx();
31    let mut extra_workspaces = Vec::new();
32    for extra in opts.extra.iter() {
33        let extra = gctx.cwd().join(extra);
34        let ws = Workspace::new(&extra, gctx)?;
35        extra_workspaces.push(ws);
36    }
37    let workspaces = extra_workspaces.iter().chain(Some(ws)).collect::<Vec<_>>();
38    let _lock = gctx.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
39    let vendor_config = sync(gctx, &workspaces, opts).context("failed to sync")?;
40
41    if gctx.shell().verbosity() != Verbosity::Quiet {
42        if vendor_config.source.is_empty() {
43            crate::drop_eprintln!(gctx, "There is no dependency to vendor in this project.");
44        } else {
45            crate::drop_eprint!(
46                gctx,
47                "To use vendored sources, add this to your .cargo/config.toml for this project:\n\n"
48            );
49            crate::drop_print!(gctx, "{}", &toml::to_string_pretty(&vendor_config).unwrap());
50        }
51    }
52
53    Ok(())
54}
55
56#[derive(Serialize)]
57struct VendorConfig {
58    source: BTreeMap<String, VendorSource>,
59}
60
61#[derive(Serialize)]
62#[serde(rename_all = "lowercase", untagged)]
63enum VendorSource {
64    Directory {
65        directory: String,
66    },
67    Registry {
68        registry: Option<String>,
69        #[serde(rename = "replace-with")]
70        replace_with: String,
71    },
72    Git {
73        git: String,
74        branch: Option<String>,
75        tag: Option<String>,
76        rev: Option<String>,
77        #[serde(rename = "replace-with")]
78        replace_with: String,
79    },
80}
81
82/// Cache for mapping replaced sources to replacements.
83struct SourceReplacementCache<'gctx> {
84    map: SourceConfigMap<'gctx>,
85    cache: HashMap<SourceId, SourceId>,
86}
87
88impl SourceReplacementCache<'_> {
89    fn new(gctx: &GlobalContext) -> CargoResult<SourceReplacementCache<'_>> {
90        Ok(SourceReplacementCache {
91            map: SourceConfigMap::new(gctx)?,
92            cache: Default::default(),
93        })
94    }
95
96    fn get(&mut self, id: SourceId) -> CargoResult<SourceId> {
97        use std::collections::hash_map::Entry;
98        match self.cache.entry(id) {
99            Entry::Occupied(e) => Ok(e.get().clone()),
100            Entry::Vacant(e) => {
101                let replaced = self.map.load(id, &HashSet::new())?.replaced_source_id();
102                Ok(e.insert(replaced).clone())
103            }
104        }
105    }
106}
107
108fn sync(
109    gctx: &GlobalContext,
110    workspaces: &[&Workspace<'_>],
111    opts: &VendorOptions<'_>,
112) -> CargoResult<VendorConfig> {
113    let dry_run = false;
114    let canonical_destination = try_canonicalize(opts.destination);
115    let canonical_destination = canonical_destination.as_deref().unwrap_or(opts.destination);
116    let dest_dir_already_exists = canonical_destination.exists();
117
118    paths::create_dir_all(&canonical_destination)?;
119    let mut to_remove = HashSet::new();
120    if !opts.no_delete {
121        for entry in canonical_destination.read_dir()? {
122            let entry = entry?;
123            if !entry
124                .file_name()
125                .to_str()
126                .map_or(false, |s| s.starts_with('.'))
127            {
128                to_remove.insert(entry.path());
129            }
130        }
131    }
132
133    let mut source_replacement_cache = SourceReplacementCache::new(gctx)?;
134
135    // First up attempt to work around rust-lang/cargo#5956. Apparently build
136    // artifacts sprout up in Cargo's global cache for whatever reason, although
137    // it's unsure what tool is causing these issues at this time. For now we
138    // apply a heavy-hammer approach which is to delete Cargo's unpacked version
139    // of each crate to start off with. After we do this we'll re-resolve and
140    // redownload again, which should trigger Cargo to re-extract all the
141    // crates.
142    //
143    // Note that errors are largely ignored here as this is a best-effort
144    // attempt. If anything fails here we basically just move on to the next
145    // crate to work with.
146    for ws in workspaces {
147        let (packages, resolve) = ops::resolve_ws(ws, dry_run)
148            .with_context(|| format!("failed to load lockfile for {}", ws.root().display()))?;
149
150        packages
151            .get_many(resolve.iter())
152            .with_context(|| format!("failed to download packages for {}", ws.root().display()))?;
153
154        for pkg in resolve.iter() {
155            let sid = if opts.respect_source_config {
156                source_replacement_cache.get(pkg.source_id())?
157            } else {
158                pkg.source_id()
159            };
160
161            // Don't delete actual source code!
162            if sid.is_path() {
163                if let Ok(path) = sid.url().to_file_path() {
164                    if let Ok(path) = try_canonicalize(path) {
165                        to_remove.remove(&path);
166                    }
167                }
168                continue;
169            }
170            if sid.is_git() {
171                continue;
172            }
173
174            // Only delete sources that are safe to delete, i.e. they are caches.
175            if sid.is_registry() {
176                if let Ok(pkg) = packages.get_one(pkg) {
177                    drop(fs::remove_dir_all(pkg.root()));
178                }
179                continue;
180            }
181        }
182    }
183
184    let mut checksums = HashMap::new();
185    let mut ids = BTreeMap::new();
186
187    // Next up let's actually download all crates and start storing internal
188    // tables about them.
189    for ws in workspaces {
190        let (packages, resolve) = ops::resolve_ws(ws, dry_run)
191            .with_context(|| format!("failed to load lockfile for {}", ws.root().display()))?;
192
193        packages
194            .get_many(resolve.iter())
195            .with_context(|| format!("failed to download packages for {}", ws.root().display()))?;
196
197        for pkg in resolve.iter() {
198            // No need to vendor path crates since they're already in the
199            // repository
200            if pkg.source_id().is_path() {
201                continue;
202            }
203            ids.insert(
204                pkg,
205                packages
206                    .get_one(pkg)
207                    .context("failed to fetch package")?
208                    .clone(),
209            );
210
211            checksums.insert(pkg, resolve.checksums().get(&pkg).cloned());
212        }
213    }
214
215    let mut versions = HashMap::new();
216    for id in ids.keys() {
217        let map = versions.entry(id.name()).or_insert_with(BTreeMap::default);
218        if let Some(prev) = map.get(&id.version()) {
219            bail!(
220                "found duplicate version of package `{} v{}` \
221                 vendored from two sources:\n\
222                 \n\
223                 \tsource 1: {}\n\
224                 \tsource 2: {}",
225                id.name(),
226                id.version(),
227                prev,
228                id.source_id()
229            );
230        }
231        map.insert(id.version(), id.source_id());
232    }
233
234    let mut sources = BTreeSet::new();
235    let mut tmp_buf = [0; 64 * 1024];
236    for (id, pkg) in ids.iter() {
237        // Next up, copy it to the vendor directory
238        let src = pkg.root();
239        let max_version = *versions[&id.name()].iter().rev().next().unwrap().0;
240        let dir_has_version_suffix = opts.versioned_dirs || id.version() != max_version;
241        let dst_name = if dir_has_version_suffix {
242            // Eg vendor/futures-0.1.13
243            format!("{}-{}", id.name(), id.version())
244        } else {
245            // Eg vendor/futures
246            id.name().to_string()
247        };
248
249        sources.insert(id.source_id());
250        let dst = canonical_destination.join(&dst_name);
251        to_remove.remove(&dst);
252        let cksum = dst.join(".cargo-checksum.json");
253        // Registries are the only immutable sources,
254        // path and git dependencies' versions cannot be trusted to mean "no change"
255        if dir_has_version_suffix && id.source_id().is_registry() && cksum.exists() {
256            // Don't re-copy directory with version suffix in case it comes from a registry
257            continue;
258        }
259
260        gctx.shell().status(
261            "Vendoring",
262            &format!("{} ({}) to {}", id, src.to_string_lossy(), dst.display()),
263        )?;
264
265        let _ = fs::remove_dir_all(&dst);
266        let pathsource = PathSource::new(src, id.source_id(), gctx);
267        let paths = pathsource.list_files(pkg)?;
268        let mut map = BTreeMap::new();
269        cp_sources(pkg, src, &paths, &dst, &mut map, &mut tmp_buf, gctx)
270            .with_context(|| format!("failed to copy over vendored sources for: {}", id))?;
271
272        // Finally, emit the metadata about this package
273        let json = serde_json::json!({
274            "package": checksums.get(id),
275            "files": map,
276        });
277
278        paths::write(&cksum, json.to_string())?;
279    }
280
281    for path in to_remove {
282        if path.is_dir() {
283            paths::remove_dir_all(&path)?;
284        } else {
285            paths::remove_file(&path)?;
286        }
287    }
288
289    // add our vendored source
290    let mut config = BTreeMap::new();
291
292    let merged_source_name = "vendored-sources";
293
294    // replace original sources with vendor
295    for source_id in sources {
296        let name = if source_id.is_crates_io() {
297            CRATES_IO_REGISTRY.to_string()
298        } else {
299            // Remove `precise` since that makes the source name very long,
300            // and isn't needed to disambiguate multiple sources.
301            source_id.without_precise().as_url().to_string()
302        };
303
304        let source = if source_id.is_crates_io() {
305            VendorSource::Registry {
306                registry: None,
307                replace_with: merged_source_name.to_string(),
308            }
309        } else if source_id.is_remote_registry() {
310            let registry = source_id.url().to_string();
311            VendorSource::Registry {
312                registry: Some(registry),
313                replace_with: merged_source_name.to_string(),
314            }
315        } else if source_id.is_git() {
316            let mut branch = None;
317            let mut tag = None;
318            let mut rev = None;
319            if let Some(reference) = source_id.git_reference() {
320                match *reference {
321                    GitReference::Branch(ref b) => branch = Some(b.clone()),
322                    GitReference::Tag(ref t) => tag = Some(t.clone()),
323                    GitReference::Rev(ref r) => rev = Some(r.clone()),
324                    GitReference::DefaultBranch => {}
325                }
326            }
327            VendorSource::Git {
328                git: source_id.url().to_string(),
329                branch,
330                tag,
331                rev,
332                replace_with: merged_source_name.to_string(),
333            }
334        } else {
335            panic!("Invalid source ID: {}", source_id)
336        };
337        config.insert(name, source);
338    }
339
340    if !config.is_empty() {
341        config.insert(
342            merged_source_name.to_string(),
343            VendorSource::Directory {
344                // Windows-flavour paths are valid here on Windows but Unix.
345                // This backslash normalization is for making output paths more
346                // cross-platform compatible.
347                directory: opts.destination.to_string_lossy().replace("\\", "/"),
348            },
349        );
350    } else if !dest_dir_already_exists {
351        // Nothing to vendor. Remove the destination dir we've just created.
352        paths::remove_dir(canonical_destination)?;
353    }
354
355    Ok(VendorConfig { source: config })
356}
357
358fn cp_sources(
359    pkg: &Package,
360    src: &Path,
361    paths: &[PathEntry],
362    dst: &Path,
363    cksums: &mut BTreeMap<String, String>,
364    tmp_buf: &mut [u8],
365    gctx: &GlobalContext,
366) -> CargoResult<()> {
367    for p in paths {
368        let p = p.as_ref();
369        let relative = p.strip_prefix(&src).unwrap();
370
371        match relative.to_str() {
372            // Skip git config files as they're not relevant to builds most of
373            // the time and if we respect them (e.g.  in git) then it'll
374            // probably mess with the checksums when a vendor dir is checked
375            // into someone else's source control
376            Some(".gitattributes" | ".gitignore" | ".git") => continue,
377
378            // Temporary Cargo files
379            Some(".cargo-ok") => continue,
380
381            // Skip patch-style orig/rej files. Published crates on crates.io
382            // have `Cargo.toml.orig` which we don't want to use here and
383            // otherwise these are rarely used as part of the build process.
384            Some(filename) => {
385                if filename.ends_with(".orig") || filename.ends_with(".rej") {
386                    continue;
387                }
388            }
389            _ => {}
390        };
391
392        // Join pathname components individually to make sure that the joined
393        // path uses the correct directory separators everywhere, since
394        // `relative` may use Unix-style and `dst` may require Windows-style
395        // backslashes.
396        let dst = relative
397            .iter()
398            .fold(dst.to_owned(), |acc, component| acc.join(&component));
399
400        paths::create_dir_all(dst.parent().unwrap())?;
401        let mut dst_opts = OpenOptions::new();
402        dst_opts.write(true).create(true).truncate(true);
403        // When vendoring git dependencies, the manifest has not been normalized like it would be
404        // when published. This causes issue when the manifest is using workspace inheritance.
405        // To get around this issue we use the "original" manifest after `{}.workspace = true`
406        // has been resolved for git dependencies.
407        let cksum = if dst.file_name() == Some(OsStr::new("Cargo.toml"))
408            && pkg.package_id().source_id().is_git()
409        {
410            let packaged_files = paths
411                .iter()
412                .map(|p| p.strip_prefix(src).unwrap().to_owned())
413                .collect::<Vec<_>>();
414            let vendored_pkg = prepare_for_vendor(pkg, &packaged_files, gctx)?;
415            let contents = vendored_pkg.manifest().to_normalized_contents()?;
416            copy_and_checksum(
417                &dst,
418                &mut dst_opts,
419                &mut contents.as_bytes(),
420                "Generated Cargo.toml",
421                tmp_buf,
422            )?
423        } else {
424            let mut src = File::open(&p).with_context(|| format!("failed to open {:?}", &p))?;
425            #[cfg(unix)]
426            {
427                use std::os::unix::fs::{MetadataExt, OpenOptionsExt};
428                let src_metadata = src
429                    .metadata()
430                    .with_context(|| format!("failed to stat {:?}", p))?;
431                dst_opts.mode(src_metadata.mode());
432            }
433            copy_and_checksum(
434                &dst,
435                &mut dst_opts,
436                &mut src,
437                &p.display().to_string(),
438                tmp_buf,
439            )?
440        };
441
442        cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
443    }
444    Ok(())
445}
446
447/// HACK: Perform the bare minimum of `prepare_for_publish` needed for #14348.
448///
449/// There are parts of `prepare_for_publish` that could be directly useful (e.g. stripping
450/// `[workspace]`) while other parts that require other filesystem operations (moving the README
451/// file) and ideally we'd reuse `cargo package` code to take care of all of this for us.
452fn prepare_for_vendor(
453    me: &Package,
454    packaged_files: &[PathBuf],
455    gctx: &GlobalContext,
456) -> CargoResult<Package> {
457    let contents = me.manifest().contents();
458    let document = me.manifest().document();
459    let original_toml = prepare_toml_for_vendor(
460        me.manifest().normalized_toml().clone(),
461        packaged_files,
462        gctx,
463    )?;
464    let normalized_toml = original_toml.clone();
465    let features = me.manifest().unstable_features().clone();
466    let workspace_config = me.manifest().workspace_config().clone();
467    let source_id = me.package_id().source_id();
468    let mut warnings = Default::default();
469    let mut errors = Default::default();
470    let manifest = crate::util::toml::to_real_manifest(
471        contents.to_owned(),
472        document.clone(),
473        original_toml,
474        normalized_toml,
475        features,
476        workspace_config,
477        source_id,
478        me.manifest_path(),
479        me.manifest().is_embedded(),
480        gctx,
481        &mut warnings,
482        &mut errors,
483    )?;
484    let new_pkg = Package::new(manifest, me.manifest_path());
485    Ok(new_pkg)
486}
487
488fn prepare_toml_for_vendor(
489    mut me: cargo_util_schemas::manifest::TomlManifest,
490    packaged_files: &[PathBuf],
491    gctx: &GlobalContext,
492) -> CargoResult<cargo_util_schemas::manifest::TomlManifest> {
493    let package = me
494        .package
495        .as_mut()
496        .expect("venedored manifests must have packages");
497    if let Some(cargo_util_schemas::manifest::StringOrBool::String(path)) = &package.build {
498        let path = paths::normalize_path(Path::new(path));
499        let included = packaged_files.contains(&path);
500        let build = if included {
501            let path = path
502                .into_os_string()
503                .into_string()
504                .map_err(|_err| anyhow::format_err!("non-UTF8 `package.build`"))?;
505            let path = crate::util::toml::normalize_path_string_sep(path);
506            cargo_util_schemas::manifest::StringOrBool::String(path)
507        } else {
508            gctx.shell().warn(format!(
509                "ignoring `package.build` as `{}` is not included in the published package",
510                path.display()
511            ))?;
512            cargo_util_schemas::manifest::StringOrBool::Bool(false)
513        };
514        package.build = Some(build);
515    }
516
517    let lib = if let Some(target) = &me.lib {
518        crate::util::toml::prepare_target_for_publish(
519            target,
520            Some(packaged_files),
521            "library",
522            gctx,
523        )?
524    } else {
525        None
526    };
527    let bin = crate::util::toml::prepare_targets_for_publish(
528        me.bin.as_ref(),
529        Some(packaged_files),
530        "binary",
531        gctx,
532    )?;
533    let example = crate::util::toml::prepare_targets_for_publish(
534        me.example.as_ref(),
535        Some(packaged_files),
536        "example",
537        gctx,
538    )?;
539    let test = crate::util::toml::prepare_targets_for_publish(
540        me.test.as_ref(),
541        Some(packaged_files),
542        "test",
543        gctx,
544    )?;
545    let bench = crate::util::toml::prepare_targets_for_publish(
546        me.bench.as_ref(),
547        Some(packaged_files),
548        "benchmark",
549        gctx,
550    )?;
551
552    me.lib = lib;
553    me.bin = bin;
554    me.example = example;
555    me.test = test;
556    me.bench = bench;
557
558    Ok(me)
559}
560
561fn copy_and_checksum<T: Read>(
562    dst_path: &Path,
563    dst_opts: &mut OpenOptions,
564    contents: &mut T,
565    contents_path: &str,
566    buf: &mut [u8],
567) -> CargoResult<String> {
568    let mut dst = dst_opts
569        .open(dst_path)
570        .with_context(|| format!("failed to create {:?}", dst_path))?;
571    // Not going to bother setting mode on pre-existing files, since there
572    // shouldn't be any under normal conditions.
573    let mut cksum = Sha256::new();
574    loop {
575        let n = contents
576            .read(buf)
577            .with_context(|| format!("failed to read from {:?}", contents_path))?;
578        if n == 0 {
579            break Ok(cksum.finish_hex());
580        }
581        let data = &buf[..n];
582        cksum.update(data);
583        dst.write_all(data)
584            .with_context(|| format!("failed to write to {:?}", dst_path))?;
585    }
586}