crates_io/
lib.rs

1//! > This crate is maintained by the Cargo team for use by the wider
2//! > ecosystem. This crate follows semver compatibility for its APIs.
3
4use std::collections::BTreeMap;
5use std::fs::File;
6use std::io::prelude::*;
7use std::io::{Cursor, SeekFrom};
8use std::time::Instant;
9
10use curl::easy::{Easy, List};
11use percent_encoding::{percent_encode, NON_ALPHANUMERIC};
12use serde::{Deserialize, Serialize};
13use url::Url;
14
15pub type Result<T> = std::result::Result<T, Error>;
16
17pub struct Registry {
18    /// The base URL for issuing API requests.
19    host: String,
20    /// Optional authorization token.
21    /// If None, commands requiring authorization will fail.
22    token: Option<String>,
23    /// Curl handle for issuing requests.
24    handle: Easy,
25    /// Whether to include the authorization token with all requests.
26    auth_required: bool,
27}
28
29#[derive(PartialEq, Clone, Copy)]
30pub enum Auth {
31    Authorized,
32    Unauthorized,
33}
34
35#[derive(Deserialize)]
36pub struct Crate {
37    pub name: String,
38    pub description: Option<String>,
39    pub max_version: String,
40}
41
42/// This struct is serialized as JSON and sent as metadata ahead of the crate
43/// tarball when publishing crates to a crate registry like crates.io.
44///
45/// see <https://doc.rust-lang.org/cargo/reference/registry-web-api.html#publish>
46#[derive(Serialize, Deserialize)]
47pub struct NewCrate {
48    pub name: String,
49    pub vers: String,
50    pub deps: Vec<NewCrateDependency>,
51    pub features: BTreeMap<String, Vec<String>>,
52    pub authors: Vec<String>,
53    pub description: Option<String>,
54    pub documentation: Option<String>,
55    pub homepage: Option<String>,
56    pub readme: Option<String>,
57    pub readme_file: Option<String>,
58    pub keywords: Vec<String>,
59    pub categories: Vec<String>,
60    pub license: Option<String>,
61    pub license_file: Option<String>,
62    pub repository: Option<String>,
63    pub badges: BTreeMap<String, BTreeMap<String, String>>,
64    pub links: Option<String>,
65    pub rust_version: Option<String>,
66}
67
68#[derive(Serialize, Deserialize)]
69pub struct NewCrateDependency {
70    pub optional: bool,
71    pub default_features: bool,
72    pub name: String,
73    pub features: Vec<String>,
74    pub version_req: String,
75    pub target: Option<String>,
76    pub kind: String,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub registry: Option<String>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub explicit_name_in_toml: Option<String>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub artifact: Option<Vec<String>>,
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub bindep_target: Option<String>,
85    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
86    pub lib: bool,
87}
88
89#[derive(Deserialize)]
90pub struct User {
91    pub id: u32,
92    pub login: String,
93    pub avatar: Option<String>,
94    pub email: Option<String>,
95    pub name: Option<String>,
96}
97
98pub struct Warnings {
99    pub invalid_categories: Vec<String>,
100    pub invalid_badges: Vec<String>,
101    pub other: Vec<String>,
102}
103
104#[derive(Deserialize)]
105struct R {
106    ok: bool,
107}
108#[derive(Deserialize)]
109struct OwnerResponse {
110    ok: bool,
111    msg: String,
112}
113#[derive(Deserialize)]
114struct ApiErrorList {
115    errors: Vec<ApiError>,
116}
117#[derive(Deserialize)]
118struct ApiError {
119    detail: String,
120}
121#[derive(Serialize)]
122struct OwnersReq<'a> {
123    users: &'a [&'a str],
124}
125#[derive(Deserialize)]
126struct Users {
127    users: Vec<User>,
128}
129#[derive(Deserialize)]
130struct TotalCrates {
131    total: u32,
132}
133#[derive(Deserialize)]
134struct Crates {
135    crates: Vec<Crate>,
136    meta: TotalCrates,
137}
138
139/// Error returned when interacting with a registry.
140#[derive(Debug, thiserror::Error)]
141pub enum Error {
142    /// Error from libcurl.
143    #[error(transparent)]
144    Curl(#[from] curl::Error),
145
146    /// Error from seriailzing the request payload and deserializing the
147    /// response body (like response body didn't match expected structure).
148    #[error(transparent)]
149    Json(#[from] serde_json::Error),
150
151    /// Error from IO. Mostly from reading the tarball to upload.
152    #[error("failed to seek tarball")]
153    Io(#[from] std::io::Error),
154
155    /// Response body was not valid utf8.
156    #[error("invalid response body from server")]
157    Utf8(#[from] std::string::FromUtf8Error),
158
159    /// Error from API response containing JSON field `errors.details`.
160    #[error(
161        "the remote server responded with an error{}: {}",
162        status(*code),
163        errors.join(", "),
164    )]
165    Api {
166        code: u32,
167        headers: Vec<String>,
168        errors: Vec<String>,
169    },
170
171    /// Error from API response which didn't have pre-programmed `errors.details`.
172    #[error(
173        "failed to get a 200 OK response, got {code}\nheaders:\n\t{}\nbody:\n{body}",
174        headers.join("\n\t"),
175    )]
176    Code {
177        code: u32,
178        headers: Vec<String>,
179        body: String,
180    },
181
182    /// Reason why the token was invalid.
183    #[error("{0}")]
184    InvalidToken(&'static str),
185
186    /// Server was unavailable and timeouted. Happened when uploading a way
187    /// too large tarball to crates.io.
188    #[error(
189        "Request timed out after 30 seconds. If you're trying to \
190         upload a crate it may be too large. If the crate is under \
191         10MB in size, you can email help@crates.io for assistance.\n\
192         Total size was {0}."
193    )]
194    Timeout(u64),
195}
196
197impl Registry {
198    /// Creates a new `Registry`.
199    ///
200    /// ## Example
201    ///
202    /// ```rust
203    /// use curl::easy::Easy;
204    /// use crates_io::Registry;
205    ///
206    /// let mut handle = Easy::new();
207    /// // If connecting to crates.io, a user-agent is required.
208    /// handle.useragent("my_crawler (example.com/info)");
209    /// let mut reg = Registry::new_handle(String::from("https://crates.io"), None, handle, true);
210    /// ```
211    pub fn new_handle(
212        host: String,
213        token: Option<String>,
214        handle: Easy,
215        auth_required: bool,
216    ) -> Registry {
217        Registry {
218            host,
219            token,
220            handle,
221            auth_required,
222        }
223    }
224
225    pub fn set_token(&mut self, token: Option<String>) {
226        self.token = token;
227    }
228
229    fn token(&self) -> Result<&str> {
230        let token = self.token.as_ref().ok_or_else(|| {
231            Error::InvalidToken("no upload token found, please run `cargo login`")
232        })?;
233        check_token(token)?;
234        Ok(token)
235    }
236
237    pub fn host(&self) -> &str {
238        &self.host
239    }
240
241    pub fn host_is_crates_io(&self) -> bool {
242        is_url_crates_io(&self.host)
243    }
244
245    pub fn add_owners(&mut self, krate: &str, owners: &[&str]) -> Result<String> {
246        let body = serde_json::to_string(&OwnersReq { users: owners })?;
247        let body = self.put(&format!("/crates/{}/owners", krate), body.as_bytes())?;
248        assert!(serde_json::from_str::<OwnerResponse>(&body)?.ok);
249        Ok(serde_json::from_str::<OwnerResponse>(&body)?.msg)
250    }
251
252    pub fn remove_owners(&mut self, krate: &str, owners: &[&str]) -> Result<()> {
253        let body = serde_json::to_string(&OwnersReq { users: owners })?;
254        let body = self.delete(&format!("/crates/{}/owners", krate), Some(body.as_bytes()))?;
255        assert!(serde_json::from_str::<OwnerResponse>(&body)?.ok);
256        Ok(())
257    }
258
259    pub fn list_owners(&mut self, krate: &str) -> Result<Vec<User>> {
260        let body = self.get(&format!("/crates/{}/owners", krate))?;
261        Ok(serde_json::from_str::<Users>(&body)?.users)
262    }
263
264    pub fn publish(&mut self, krate: &NewCrate, mut tarball: &File) -> Result<Warnings> {
265        let json = serde_json::to_string(krate)?;
266        // Prepare the body. The format of the upload request is:
267        //
268        //      <le u32 of json>
269        //      <json request> (metadata for the package)
270        //      <le u32 of tarball>
271        //      <source tarball>
272
273        // NOTE: This can be replaced with `stream_len` if it is ever stabilized.
274        //
275        // This checks the length using seeking instead of metadata, because
276        // on some filesystems, getting the metadata will fail because
277        // the file was renamed in ops::package.
278        let tarball_len = tarball.seek(SeekFrom::End(0))?;
279        tarball.seek(SeekFrom::Start(0))?;
280        let header = {
281            let mut w = Vec::new();
282            w.extend(&(json.len() as u32).to_le_bytes());
283            w.extend(json.as_bytes().iter().cloned());
284            w.extend(&(tarball_len as u32).to_le_bytes());
285            w
286        };
287        let size = tarball_len as usize + header.len();
288        let mut body = Cursor::new(header).chain(tarball);
289
290        let url = format!("{}/api/v1/crates/new", self.host);
291
292        self.handle.put(true)?;
293        self.handle.url(&url)?;
294        self.handle.in_filesize(size as u64)?;
295        let mut headers = List::new();
296        headers.append("Accept: application/json")?;
297        headers.append(&format!("Authorization: {}", self.token()?))?;
298        self.handle.http_headers(headers)?;
299
300        let started = Instant::now();
301        let body = self
302            .handle(&mut |buf| body.read(buf).unwrap_or(0))
303            .map_err(|e| match e {
304                Error::Code { code, .. }
305                    if code == 503
306                        && started.elapsed().as_secs() >= 29
307                        && self.host_is_crates_io() =>
308                {
309                    Error::Timeout(tarball_len)
310                }
311                _ => e.into(),
312            })?;
313
314        let response = if body.is_empty() {
315            "{}".parse()?
316        } else {
317            body.parse::<serde_json::Value>()?
318        };
319
320        let invalid_categories: Vec<String> = response
321            .get("warnings")
322            .and_then(|j| j.get("invalid_categories"))
323            .and_then(|j| j.as_array())
324            .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect())
325            .unwrap_or_else(Vec::new);
326
327        let invalid_badges: Vec<String> = response
328            .get("warnings")
329            .and_then(|j| j.get("invalid_badges"))
330            .and_then(|j| j.as_array())
331            .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect())
332            .unwrap_or_else(Vec::new);
333
334        let other: Vec<String> = response
335            .get("warnings")
336            .and_then(|j| j.get("other"))
337            .and_then(|j| j.as_array())
338            .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect())
339            .unwrap_or_else(Vec::new);
340
341        Ok(Warnings {
342            invalid_categories,
343            invalid_badges,
344            other,
345        })
346    }
347
348    pub fn search(&mut self, query: &str, limit: u32) -> Result<(Vec<Crate>, u32)> {
349        let formatted_query = percent_encode(query.as_bytes(), NON_ALPHANUMERIC);
350        let body = self.req(
351            &format!("/crates?q={}&per_page={}", formatted_query, limit),
352            None,
353            Auth::Unauthorized,
354        )?;
355
356        let crates = serde_json::from_str::<Crates>(&body)?;
357        Ok((crates.crates, crates.meta.total))
358    }
359
360    pub fn yank(&mut self, krate: &str, version: &str) -> Result<()> {
361        let body = self.delete(&format!("/crates/{}/{}/yank", krate, version), None)?;
362        assert!(serde_json::from_str::<R>(&body)?.ok);
363        Ok(())
364    }
365
366    pub fn unyank(&mut self, krate: &str, version: &str) -> Result<()> {
367        let body = self.put(&format!("/crates/{}/{}/unyank", krate, version), &[])?;
368        assert!(serde_json::from_str::<R>(&body)?.ok);
369        Ok(())
370    }
371
372    fn put(&mut self, path: &str, b: &[u8]) -> Result<String> {
373        self.handle.put(true)?;
374        self.req(path, Some(b), Auth::Authorized)
375    }
376
377    fn get(&mut self, path: &str) -> Result<String> {
378        self.handle.get(true)?;
379        self.req(path, None, Auth::Authorized)
380    }
381
382    fn delete(&mut self, path: &str, b: Option<&[u8]>) -> Result<String> {
383        self.handle.custom_request("DELETE")?;
384        self.req(path, b, Auth::Authorized)
385    }
386
387    fn req(&mut self, path: &str, body: Option<&[u8]>, authorized: Auth) -> Result<String> {
388        self.handle.url(&format!("{}/api/v1{}", self.host, path))?;
389        let mut headers = List::new();
390        headers.append("Accept: application/json")?;
391        if body.is_some() {
392            headers.append("Content-Type: application/json")?;
393        }
394
395        if self.auth_required || authorized == Auth::Authorized {
396            headers.append(&format!("Authorization: {}", self.token()?))?;
397        }
398        self.handle.http_headers(headers)?;
399        match body {
400            Some(mut body) => {
401                self.handle.upload(true)?;
402                self.handle.in_filesize(body.len() as u64)?;
403                self.handle(&mut |buf| body.read(buf).unwrap_or(0))
404                    .map_err(|e| e.into())
405            }
406            None => self.handle(&mut |_| 0).map_err(|e| e.into()),
407        }
408    }
409
410    fn handle(&mut self, read: &mut dyn FnMut(&mut [u8]) -> usize) -> Result<String> {
411        let mut headers = Vec::new();
412        let mut body = Vec::new();
413        {
414            let mut handle = self.handle.transfer();
415            handle.read_function(|buf| Ok(read(buf)))?;
416            handle.write_function(|data| {
417                body.extend_from_slice(data);
418                Ok(data.len())
419            })?;
420            handle.header_function(|data| {
421                // Headers contain trailing \r\n, trim them to make it easier
422                // to work with.
423                let s = String::from_utf8_lossy(data).trim().to_string();
424                // Don't let server sneak extra lines anywhere.
425                if s.contains('\n') {
426                    return true;
427                }
428                headers.push(s);
429                true
430            })?;
431            handle.perform()?;
432        }
433
434        let body = String::from_utf8(body)?;
435        let errors = serde_json::from_str::<ApiErrorList>(&body)
436            .ok()
437            .map(|s| s.errors.into_iter().map(|s| s.detail).collect::<Vec<_>>());
438
439        match (self.handle.response_code()?, errors) {
440            (0, None) => Ok(body),
441            (code, None) if is_success(code) => Ok(body),
442            (code, Some(errors)) => Err(Error::Api {
443                code,
444                headers,
445                errors,
446            }),
447            (code, None) => Err(Error::Code {
448                code,
449                headers,
450                body,
451            }),
452        }
453    }
454}
455
456fn is_success(code: u32) -> bool {
457    code >= 200 && code < 300
458}
459
460fn status(code: u32) -> String {
461    if is_success(code) {
462        String::new()
463    } else {
464        let reason = reason(code);
465        format!(" (status {code} {reason})")
466    }
467}
468
469fn reason(code: u32) -> &'static str {
470    // Taken from https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
471    match code {
472        100 => "Continue",
473        101 => "Switching Protocol",
474        103 => "Early Hints",
475        200 => "OK",
476        201 => "Created",
477        202 => "Accepted",
478        203 => "Non-Authoritative Information",
479        204 => "No Content",
480        205 => "Reset Content",
481        206 => "Partial Content",
482        300 => "Multiple Choice",
483        301 => "Moved Permanently",
484        302 => "Found",
485        303 => "See Other",
486        304 => "Not Modified",
487        307 => "Temporary Redirect",
488        308 => "Permanent Redirect",
489        400 => "Bad Request",
490        401 => "Unauthorized",
491        402 => "Payment Required",
492        403 => "Forbidden",
493        404 => "Not Found",
494        405 => "Method Not Allowed",
495        406 => "Not Acceptable",
496        407 => "Proxy Authentication Required",
497        408 => "Request Timeout",
498        409 => "Conflict",
499        410 => "Gone",
500        411 => "Length Required",
501        412 => "Precondition Failed",
502        413 => "Payload Too Large",
503        414 => "URI Too Long",
504        415 => "Unsupported Media Type",
505        416 => "Request Range Not Satisfiable",
506        417 => "Expectation Failed",
507        429 => "Too Many Requests",
508        431 => "Request Header Fields Too Large",
509        500 => "Internal Server Error",
510        501 => "Not Implemented",
511        502 => "Bad Gateway",
512        503 => "Service Unavailable",
513        504 => "Gateway Timeout",
514        _ => "<unknown>",
515    }
516}
517
518/// Returns `true` if the host of the given URL is "crates.io".
519pub fn is_url_crates_io(url: &str) -> bool {
520    Url::parse(url)
521        .map(|u| u.host_str() == Some("crates.io"))
522        .unwrap_or(false)
523}
524
525/// Checks if a token is valid or malformed.
526///
527/// This check is necessary to prevent sending tokens which create an invalid HTTP request.
528/// It would be easier to check just for alphanumeric tokens, but we can't be sure that all
529/// registries only create tokens in that format so that is as less restricted as possible.
530pub fn check_token(token: &str) -> Result<()> {
531    if token.is_empty() {
532        return Err(Error::InvalidToken("please provide a non-empty token"));
533    }
534    if token.bytes().all(|b| {
535        // This is essentially the US-ASCII limitation of
536        // https://www.rfc-editor.org/rfc/rfc9110#name-field-values. That is,
537        // visible ASCII characters (0x21-0x7e), space, and tab. We want to be
538        // able to pass this in an HTTP header without encoding.
539        b >= 32 && b < 127 || b == b'\t'
540    }) {
541        Ok(())
542    } else {
543        Err(Error::InvalidToken(
544            "token contains invalid characters.\nOnly printable ISO-8859-1 characters \
545             are allowed as it is sent in a HTTPS header.",
546        ))
547    }
548}