cargo/core/
gc.rs

1//! Support for garbage collecting unused files from downloaded files or
2//! artifacts from the target directory.
3//!
4//! The [`Gc`] type provides the high-level interface for the
5//! garbage-collection system.
6//!
7//! Garbage collection can be done "automatically" by cargo, which it does by
8//! default once a day when running any command that does a lot of work (like
9//! `cargo build`). The entry point for this is the [`auto_gc`] function,
10//! which handles some basic setup, creating the [`Gc`], and calling
11//! [`Gc::auto`].
12//!
13//! Garbage collection can also be done manually via the `cargo clean` command
14//! by passing any option that requests deleting unused files. That is
15//! implemented by calling the [`Gc::gc`] method.
16//!
17//! Garbage collection for the global cache is guided by the last-use tracking
18//! implemented in the [`crate::core::global_cache_tracker`] module. See that
19//! module documentation for an in-depth explanation of how global cache
20//! tracking works.
21
22use crate::core::global_cache_tracker::{self, GlobalCacheTracker};
23use crate::ops::CleanContext;
24use crate::util::cache_lock::{CacheLock, CacheLockMode};
25use crate::{CargoResult, GlobalContext};
26use anyhow::{format_err, Context as _};
27use serde::Deserialize;
28use std::time::Duration;
29
30/// Default max age to auto-clean extracted sources, which can be recovered
31/// without downloading anything.
32const DEFAULT_MAX_AGE_EXTRACTED: &str = "1 month";
33/// Default max ago to auto-clean cache data, which must be downloaded to
34/// recover.
35const DEFAULT_MAX_AGE_DOWNLOADED: &str = "3 months";
36/// How often auto-gc will run by default unless overridden in the config.
37const DEFAULT_AUTO_FREQUENCY: &str = "1 day";
38
39/// Performs automatic garbage collection.
40///
41/// This is called in various places in Cargo where garbage collection should
42/// be performed automatically based on the config settings. The default
43/// behavior is to only clean once a day.
44///
45/// This should only be called in code paths for commands that are already
46/// doing a lot of work. It should only be called *after* crates are
47/// downloaded so that the last-use data is updated first.
48///
49/// It should be cheap to call this multiple times (subsequent calls are
50/// ignored), but try not to abuse that.
51pub fn auto_gc(gctx: &GlobalContext) {
52    if !gctx.network_allowed() {
53        // As a conservative choice, auto-gc is disabled when offline. If the
54        // user is indefinitely offline, we don't want to delete things they
55        // may later depend on.
56        tracing::trace!(target: "gc", "running offline, auto gc disabled");
57        return;
58    }
59
60    if let Err(e) = auto_gc_inner(gctx) {
61        if global_cache_tracker::is_silent_error(&e) && !gctx.extra_verbose() {
62            tracing::warn!(target: "gc", "failed to auto-clean cache data: {e:?}");
63        } else {
64            crate::display_warning_with_error(
65                "failed to auto-clean cache data",
66                &e,
67                &mut gctx.shell(),
68            );
69        }
70    }
71}
72
73fn auto_gc_inner(gctx: &GlobalContext) -> CargoResult<()> {
74    let _lock = match gctx.try_acquire_package_cache_lock(CacheLockMode::MutateExclusive)? {
75        Some(lock) => lock,
76        None => {
77            tracing::debug!(target: "gc", "unable to acquire mutate lock, auto gc disabled");
78            return Ok(());
79        }
80    };
81    // This should not be called when there are pending deferred entries, so check that.
82    let deferred = gctx.deferred_global_last_use()?;
83    debug_assert!(deferred.is_empty());
84    let mut global_cache_tracker = gctx.global_cache_tracker()?;
85    let mut gc = Gc::new(gctx, &mut global_cache_tracker)?;
86    let mut clean_ctx = CleanContext::new(gctx);
87    gc.auto(&mut clean_ctx)?;
88    Ok(())
89}
90
91/// Cache cleaning settings from the `cache.global-clean` config table.
92///
93/// NOTE: Not all of these options may get stabilized. Some of them are very
94/// low-level details, and may not be something typical users need.
95///
96/// If any of these options are `None`, the built-in default is used.
97#[derive(Deserialize, Default)]
98#[serde(rename_all = "kebab-case")]
99struct GlobalCleanConfig {
100    /// Anything older than this duration will be deleted in the source cache.
101    max_src_age: Option<String>,
102    /// Anything older than this duration will be deleted in the compressed crate cache.
103    max_crate_age: Option<String>,
104    /// Any index older than this duration will be deleted from the index cache.
105    max_index_age: Option<String>,
106    /// Any git checkout older than this duration will be deleted from the checkout cache.
107    max_git_co_age: Option<String>,
108    /// Any git clone older than this duration will be deleted from the git cache.
109    max_git_db_age: Option<String>,
110}
111
112/// Options to use for garbage collection.
113#[derive(Clone, Debug, Default)]
114pub struct GcOpts {
115    /// The `--max-src-age` CLI option.
116    pub max_src_age: Option<Duration>,
117    // The `--max-crate-age` CLI option.
118    pub max_crate_age: Option<Duration>,
119    /// The `--max-index-age` CLI option.
120    pub max_index_age: Option<Duration>,
121    /// The `--max-git-co-age` CLI option.
122    pub max_git_co_age: Option<Duration>,
123    /// The `--max-git-db-age` CLI option.
124    pub max_git_db_age: Option<Duration>,
125    /// The `--max-src-size` CLI option.
126    pub max_src_size: Option<u64>,
127    /// The `--max-crate-size` CLI option.
128    pub max_crate_size: Option<u64>,
129    /// The `--max-git-size` CLI option.
130    pub max_git_size: Option<u64>,
131    /// The `--max-download-size` CLI option.
132    pub max_download_size: Option<u64>,
133}
134
135impl GcOpts {
136    /// Returns whether any download cache cleaning options are set.
137    pub fn is_download_cache_opt_set(&self) -> bool {
138        self.max_src_age.is_some()
139            || self.max_crate_age.is_some()
140            || self.max_index_age.is_some()
141            || self.max_git_co_age.is_some()
142            || self.max_git_db_age.is_some()
143            || self.max_src_size.is_some()
144            || self.max_crate_size.is_some()
145            || self.max_git_size.is_some()
146            || self.max_download_size.is_some()
147    }
148
149    /// Returns whether any download cache cleaning options based on size are set.
150    pub fn is_download_cache_size_set(&self) -> bool {
151        self.max_src_size.is_some()
152            || self.max_crate_size.is_some()
153            || self.max_git_size.is_some()
154            || self.max_download_size.is_some()
155    }
156
157    /// Updates the `GcOpts` to incorporate the specified max download age.
158    ///
159    /// "Download" means any cached data that can be re-downloaded.
160    pub fn set_max_download_age(&mut self, max_download_age: Duration) {
161        self.max_src_age = Some(maybe_newer_span(max_download_age, self.max_src_age));
162        self.max_crate_age = Some(maybe_newer_span(max_download_age, self.max_crate_age));
163        self.max_index_age = Some(maybe_newer_span(max_download_age, self.max_index_age));
164        self.max_git_co_age = Some(maybe_newer_span(max_download_age, self.max_git_co_age));
165        self.max_git_db_age = Some(maybe_newer_span(max_download_age, self.max_git_db_age));
166    }
167
168    /// Updates the configuration of this [`GcOpts`] to incorporate the
169    /// settings from config.
170    pub fn update_for_auto_gc(&mut self, gctx: &GlobalContext) -> CargoResult<()> {
171        let config = gctx
172            .get::<Option<GlobalCleanConfig>>("cache.global-clean")?
173            .unwrap_or_default();
174        self.update_for_auto_gc_config(&config, gctx.cli_unstable().gc)
175    }
176
177    fn update_for_auto_gc_config(
178        &mut self,
179        config: &GlobalCleanConfig,
180        unstable_allowed: bool,
181    ) -> CargoResult<()> {
182        macro_rules! config_default {
183            ($config:expr, $field:ident, $default:expr, $unstable_allowed:expr) => {
184                if !unstable_allowed {
185                    // These config options require -Zgc
186                    $default
187                } else {
188                    $config.$field.as_deref().unwrap_or($default)
189                }
190            };
191        }
192
193        self.max_src_age = newer_time_span_for_config(
194            self.max_src_age,
195            "gc.auto.max-src-age",
196            config_default!(
197                config,
198                max_src_age,
199                DEFAULT_MAX_AGE_EXTRACTED,
200                unstable_allowed
201            ),
202        )?;
203        self.max_crate_age = newer_time_span_for_config(
204            self.max_crate_age,
205            "gc.auto.max-crate-age",
206            config_default!(
207                config,
208                max_crate_age,
209                DEFAULT_MAX_AGE_DOWNLOADED,
210                unstable_allowed
211            ),
212        )?;
213        self.max_index_age = newer_time_span_for_config(
214            self.max_index_age,
215            "gc.auto.max-index-age",
216            config_default!(
217                config,
218                max_index_age,
219                DEFAULT_MAX_AGE_DOWNLOADED,
220                unstable_allowed
221            ),
222        )?;
223        self.max_git_co_age = newer_time_span_for_config(
224            self.max_git_co_age,
225            "gc.auto.max-git-co-age",
226            config_default!(
227                config,
228                max_git_co_age,
229                DEFAULT_MAX_AGE_EXTRACTED,
230                unstable_allowed
231            ),
232        )?;
233        self.max_git_db_age = newer_time_span_for_config(
234            self.max_git_db_age,
235            "gc.auto.max-git-db-age",
236            config_default!(
237                config,
238                max_git_db_age,
239                DEFAULT_MAX_AGE_DOWNLOADED,
240                unstable_allowed
241            ),
242        )?;
243        Ok(())
244    }
245}
246
247/// Garbage collector.
248///
249/// See the module docs at [`crate::core::gc`] for more information on GC.
250pub struct Gc<'a, 'gctx> {
251    gctx: &'gctx GlobalContext,
252    global_cache_tracker: &'a mut GlobalCacheTracker,
253    /// A lock on the package cache.
254    ///
255    /// This is important to be held, since we don't want multiple cargos to
256    /// be allowed to write to the cache at the same time, or for others to
257    /// read while we are modifying the cache.
258    #[allow(dead_code)] // Held for drop.
259    lock: CacheLock<'gctx>,
260}
261
262impl<'a, 'gctx> Gc<'a, 'gctx> {
263    pub fn new(
264        gctx: &'gctx GlobalContext,
265        global_cache_tracker: &'a mut GlobalCacheTracker,
266    ) -> CargoResult<Gc<'a, 'gctx>> {
267        let lock = gctx.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
268        Ok(Gc {
269            gctx,
270            global_cache_tracker,
271            lock,
272        })
273    }
274
275    /// Performs automatic garbage cleaning.
276    ///
277    /// This returns immediately without doing work if garbage collection has
278    /// been performed recently (since `cache.auto-clean-frequency`).
279    fn auto(&mut self, clean_ctx: &mut CleanContext<'gctx>) -> CargoResult<()> {
280        let freq = self
281            .gctx
282            .get::<Option<String>>("cache.auto-clean-frequency")?;
283        let Some(freq) = parse_frequency(freq.as_deref().unwrap_or(DEFAULT_AUTO_FREQUENCY))? else {
284            tracing::trace!(target: "gc", "auto gc disabled");
285            return Ok(());
286        };
287        if !self.global_cache_tracker.should_run_auto_gc(freq)? {
288            return Ok(());
289        }
290        let config = self
291            .gctx
292            .get::<Option<GlobalCleanConfig>>("cache.global-clean")?
293            .unwrap_or_default();
294
295        let mut gc_opts = GcOpts::default();
296        gc_opts.update_for_auto_gc_config(&config, self.gctx.cli_unstable().gc)?;
297        self.gc(clean_ctx, &gc_opts)?;
298        if !clean_ctx.dry_run {
299            self.global_cache_tracker.set_last_auto_gc()?;
300        }
301        Ok(())
302    }
303
304    /// Performs garbage collection based on the given options.
305    pub fn gc(&mut self, clean_ctx: &mut CleanContext<'gctx>, gc_opts: &GcOpts) -> CargoResult<()> {
306        self.global_cache_tracker.clean(clean_ctx, gc_opts)?;
307        // In the future, other gc operations go here, such as target cleaning.
308        Ok(())
309    }
310}
311
312/// Returns the shorter duration from `cur_span` versus `config_span`.
313///
314/// This is used because the user may specify multiple options which overlap,
315/// and this will pick whichever one is shorter.
316///
317/// * `cur_span` is the span we are comparing against (the value from the CLI
318///   option). If None, just returns the config duration.
319/// * `config_name` is the name of the config option the span is loaded from.
320/// * `config_span` is the span value loaded from config.
321fn newer_time_span_for_config(
322    cur_span: Option<Duration>,
323    config_name: &str,
324    config_span: &str,
325) -> CargoResult<Option<Duration>> {
326    let config_span = parse_time_span_for_config(config_name, config_span)?;
327    Ok(Some(maybe_newer_span(config_span, cur_span)))
328}
329
330/// Returns whichever [`Duration`] is shorter.
331fn maybe_newer_span(a: Duration, b: Option<Duration>) -> Duration {
332    match b {
333        Some(b) => {
334            if b < a {
335                b
336            } else {
337                a
338            }
339        }
340        None => a,
341    }
342}
343
344/// Parses a frequency string.
345///
346/// Returns `Ok(None)` if the frequency is "never".
347fn parse_frequency(frequency: &str) -> CargoResult<Option<Duration>> {
348    if frequency == "always" {
349        return Ok(Some(Duration::new(0, 0)));
350    } else if frequency == "never" {
351        return Ok(None);
352    }
353    let duration = maybe_parse_time_span(frequency).ok_or_else(|| {
354        format_err!(
355            "config option `cache.auto-clean-frequency` expected a value of \"always\", \"never\", \
356             or \"N seconds/minutes/days/weeks/months\", got: {frequency:?}"
357        )
358    })?;
359    Ok(Some(duration))
360}
361
362/// Parses a time span value fetched from config.
363///
364/// This is here to provide better error messages specific to reading from
365/// config.
366fn parse_time_span_for_config(config_name: &str, span: &str) -> CargoResult<Duration> {
367    maybe_parse_time_span(span).ok_or_else(|| {
368        format_err!(
369            "config option `{config_name}` expected a value of the form \
370             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
371        )
372    })
373}
374
375/// Parses a time span string.
376///
377/// Returns None if the value is not valid. See [`parse_time_span`] if you
378/// need a variant that generates an error message.
379fn maybe_parse_time_span(span: &str) -> Option<Duration> {
380    let Some(right_i) = span.find(|c: char| !c.is_ascii_digit()) else {
381        return None;
382    };
383    let (left, mut right) = span.split_at(right_i);
384    if right.starts_with(' ') {
385        right = &right[1..];
386    }
387    let count: u64 = left.parse().ok()?;
388    let factor = match right {
389        "second" | "seconds" => 1,
390        "minute" | "minutes" => 60,
391        "hour" | "hours" => 60 * 60,
392        "day" | "days" => 24 * 60 * 60,
393        "week" | "weeks" => 7 * 24 * 60 * 60,
394        "month" | "months" => 2_629_746, // average is 30.436875 days
395        _ => return None,
396    };
397    Some(Duration::from_secs(factor * count))
398}
399
400/// Parses a time span string.
401pub fn parse_time_span(span: &str) -> CargoResult<Duration> {
402    maybe_parse_time_span(span).ok_or_else(|| {
403        format_err!(
404            "expected a value of the form \
405             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
406        )
407    })
408}
409
410/// Parses a file size using metric or IEC units.
411pub fn parse_human_size(input: &str) -> CargoResult<u64> {
412    let re = regex::Regex::new(r"(?i)^([0-9]+(\.[0-9])?) ?(b|kb|mb|gb|kib|mib|gib)?$").unwrap();
413    let cap = re.captures(input).ok_or_else(|| {
414        format_err!(
415            "invalid size `{input}`, \
416             expected a number with an optional B, kB, MB, GB, kiB, MiB, or GiB suffix"
417        )
418    })?;
419    let factor = match cap.get(3) {
420        Some(suffix) => match suffix.as_str().to_lowercase().as_str() {
421            "b" => 1.0,
422            "kb" => 1_000.0,
423            "mb" => 1_000_000.0,
424            "gb" => 1_000_000_000.0,
425            "kib" => 1024.0,
426            "mib" => 1024.0 * 1024.0,
427            "gib" => 1024.0 * 1024.0 * 1024.0,
428            s => unreachable!("suffix `{s}` out of sync with regex"),
429        },
430        None => {
431            return cap[1]
432                .parse()
433                .with_context(|| format!("expected an integer size, got `{}`", &cap[1]))
434        }
435    };
436    let num = cap[1]
437        .parse::<f64>()
438        .with_context(|| format!("expected an integer or float, found `{}`", &cap[1]))?;
439    Ok((num * factor) as u64)
440}
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445    #[test]
446    fn time_spans() {
447        let d = |x| Some(Duration::from_secs(x));
448        assert_eq!(maybe_parse_time_span("0 seconds"), d(0));
449        assert_eq!(maybe_parse_time_span("1second"), d(1));
450        assert_eq!(maybe_parse_time_span("23 seconds"), d(23));
451        assert_eq!(maybe_parse_time_span("5 minutes"), d(60 * 5));
452        assert_eq!(maybe_parse_time_span("2 hours"), d(60 * 60 * 2));
453        assert_eq!(maybe_parse_time_span("1 day"), d(60 * 60 * 24));
454        assert_eq!(maybe_parse_time_span("2 weeks"), d(60 * 60 * 24 * 14));
455        assert_eq!(maybe_parse_time_span("6 months"), d(2_629_746 * 6));
456
457        assert_eq!(parse_frequency("5 seconds").unwrap(), d(5));
458        assert_eq!(parse_frequency("always").unwrap(), d(0));
459        assert_eq!(parse_frequency("never").unwrap(), None);
460    }
461
462    #[test]
463    fn time_span_errors() {
464        assert_eq!(maybe_parse_time_span(""), None);
465        assert_eq!(maybe_parse_time_span("1"), None);
466        assert_eq!(maybe_parse_time_span("second"), None);
467        assert_eq!(maybe_parse_time_span("+2 seconds"), None);
468        assert_eq!(maybe_parse_time_span("day"), None);
469        assert_eq!(maybe_parse_time_span("-1 days"), None);
470        assert_eq!(maybe_parse_time_span("1.5 days"), None);
471        assert_eq!(maybe_parse_time_span("1 dayz"), None);
472        assert_eq!(maybe_parse_time_span("always"), None);
473        assert_eq!(maybe_parse_time_span("never"), None);
474        assert_eq!(maybe_parse_time_span("1 day "), None);
475        assert_eq!(maybe_parse_time_span(" 1 day"), None);
476        assert_eq!(maybe_parse_time_span("1  second"), None);
477
478        let e =
479            parse_time_span_for_config("cache.global-clean.max-src-age", "-1 days").unwrap_err();
480        assert_eq!(
481            e.to_string(),
482            "config option `cache.global-clean.max-src-age` \
483             expected a value of the form \"N seconds/minutes/days/weeks/months\", \
484             got: \"-1 days\""
485        );
486        let e = parse_frequency("abc").unwrap_err();
487        assert_eq!(
488            e.to_string(),
489            "config option `cache.auto-clean-frequency` \
490             expected a value of \"always\", \"never\", or \"N seconds/minutes/days/weeks/months\", \
491             got: \"abc\""
492        );
493    }
494
495    #[test]
496    fn human_sizes() {
497        assert_eq!(parse_human_size("0").unwrap(), 0);
498        assert_eq!(parse_human_size("123").unwrap(), 123);
499        assert_eq!(parse_human_size("123b").unwrap(), 123);
500        assert_eq!(parse_human_size("123B").unwrap(), 123);
501        assert_eq!(parse_human_size("123 b").unwrap(), 123);
502        assert_eq!(parse_human_size("123 B").unwrap(), 123);
503        assert_eq!(parse_human_size("1kb").unwrap(), 1_000);
504        assert_eq!(parse_human_size("5kb").unwrap(), 5_000);
505        assert_eq!(parse_human_size("1mb").unwrap(), 1_000_000);
506        assert_eq!(parse_human_size("1gb").unwrap(), 1_000_000_000);
507        assert_eq!(parse_human_size("1kib").unwrap(), 1_024);
508        assert_eq!(parse_human_size("1mib").unwrap(), 1_048_576);
509        assert_eq!(parse_human_size("1gib").unwrap(), 1_073_741_824);
510        assert_eq!(parse_human_size("1.5kb").unwrap(), 1_500);
511        assert_eq!(parse_human_size("1.7b").unwrap(), 1);
512
513        assert!(parse_human_size("").is_err());
514        assert!(parse_human_size("x").is_err());
515        assert!(parse_human_size("1x").is_err());
516        assert!(parse_human_size("1 2").is_err());
517        assert!(parse_human_size("1.5").is_err());
518        assert!(parse_human_size("+1").is_err());
519        assert!(parse_human_size("123  b").is_err());
520    }
521}