commit: | e8d6a8657abf902bfcf0c2c9c2886c4676eb36bd |
author: | Trevor Bentley |
committer: | Trevor Bentley |
date: | Fri Jul 5 14:28:27 2019 +0200 |
parents: |
diff --git a/Cargo.toml b/Cargo.toml line changes: +13/-0 index 0000000..5d2140f --- /dev/null +++ b/Cargo.toml
@@ -0,0 +1,13 @@ +[package] +name = "crate_dep_analyzer" +version = "0.1.0" +authors = ["Trevor Bentley"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +crates_io_api = { version = "0.5", path = "deps/crates_io_api" } +statistical = "1.0" +serde = "1.0" +serde_json = "1.0"
diff --git a/deps/crates_io_api/.circleci/config.yml b/deps/crates_io_api/.circleci/config.yml line changes: +62/-0 index 0000000..f9863ea --- /dev/null +++ b/deps/crates_io_api/.circleci/config.yml
@@ -0,0 +1,62 @@ +version: 2.0 + +jobs: + build_stable: + docker: + - image: debian + steps: + - restore_cache: + key: rust + - checkout + - run: ~/.circleci/run-tests.sh + - save_cache: + key: rustup + paths: + - ~/.rustup + - ~/.cargo + + build_beta: + docker: + - image: debian + steps: + - restore_cache: + key: rust + - checkout + - run: ~/.circleci/run-tests.sh + - save_cache: + key: rustup + paths: + - ~/.rustup + - ~/.cargo + + build_nightly: + docker: + - image: debian + steps: + - restore_cache: + key: rust + - checkout + - run: ~/.circleci/run-tests.sh + - save_cache: + key: rustup + paths: + - ~/.rustup + - ~/.cargo + +workflows: + version: 2 + build: + jobs: + build_stable: + filters: + tags: + ignore: /*/ + build_beta: + filters: + tags: + ignore: /*/ + build_nightly: + filters: + tags: + ignore: /*/ +
diff --git a/deps/crates_io_api/.circleci/run-tests.sh b/deps/crates_io_api/.circleci/run-tests.sh line changes: +23/-0 index 0000000..b37dd67 --- /dev/null +++ b/deps/crates_io_api/.circleci/run-tests.sh
@@ -0,0 +1,23 @@ +#! /bin/sh + +set -eo pipefail + +TOOLCHAIN=$1 + +if [ -z "$TOOLCHAIN" ]; then + echo "Usage: ./run-tests.sh TOOLCHAIN" + exit 1 +fi + +apt update +apt install -y curl build-essential openssl libssl-dev + +if [ ! -f ~/.cargo/bin/rustup ]; then + curl https://sh.rustup.rs -sSf | bash -s -- -y --default-toolchain=none +fi + +~/.cargo/bin/rustup install $TOOLCHAIN +~/.cargo/bin/rustup default $TOOLCHAIN + +~/.cargo/bin/cargo test +
diff --git a/deps/crates_io_api/.travis.yml b/deps/crates_io_api/.travis.yml line changes: +6/-0 index 0000000..b36c3ee --- /dev/null +++ b/deps/crates_io_api/.travis.yml
@@ -0,0 +1,6 @@ +language: rust +rust: + - stable + - beta + - nightly +cache: cargo
diff --git a/deps/crates_io_api/CHANGELOG.md b/deps/crates_io_api/CHANGELOG.md line changes: +42/-0 index 0000000..6fa3dc1 --- /dev/null +++ b/deps/crates_io_api/CHANGELOG.md
@@ -0,0 +1,42 @@ +# Changelog + +## 0.5.0 - 2019/06/22 + +* Add 7 missing type fields for: + * Crate {recent_downloads, exact_match} + * CrateResponse {versions, keywords, categories} + * Version {crate_size, published_by} +* Make field optional: User {kind} +* Fix getting the reverse dependencies. + * Rearrange the received data for simpler manipulation. + * Add 3 new types: + * ReverseDependenciesAsReceived {dependencies, versions, meta} + * ReverseDependencies {dependencies, meta} + * ReverseDependency {crate_version, dependency} + +## 0.4.1 - 2019/03/09 + +* Fixed errors for version information due to the `id` field being removed from the API. [PR #11](https://github.com/theduke/crates_io_api/pull/11) + +## 0.4.0 - 2019/03/01 + +* Added `with_user_agent` method to client +* Switch to 2018 edition, requiring rustc 1.31+ + +## 0.3.0 - 2018/10/09 + +* Upgrade reqwest to 0.9 +* Upgrade to tokio instead of tokio_core + +## 0.2.0 - 2018/04/29 + +* Add AsyncClient +* Switch from error_chain to failure +* Remove unused time dependency and loosen dependency constraints + +## 0.1.0 - 2018/02/10 + +* Add some newly introduced fields in the API +* Fix URL for the /summary endpoint +* Upgrade dependencies +* Add a simple test
diff --git a/deps/crates_io_api/Cargo.toml b/deps/crates_io_api/Cargo.toml line changes: +25/-0 index 0000000..f0a670d --- /dev/null +++ b/deps/crates_io_api/Cargo.toml
@@ -0,0 +1,25 @@ +[package] +authors = ["theduke <chris@theduke.at>"] +name = "crates_io_api" +description = "API client for crates.io" +license = "MIT/Apache-2.0" +repository = "https://github.com/theduke/crates_io_api" +documentation = "https://docs.rs/crates_io_api" +readme = "README.md" +keywords = [ "crates", "api" ] +categories = [ "web-programming", "web-programming::http-client" ] +edition = "2018" + +version = "0.5.0" + +[dependencies] +chrono = { version = "0.4.6", features = ["serde"] } +reqwest = { version = "0.9" } +serde = "1.0.79" +serde_derive = "1.0.79" +serde_json = "1.0.32" +url = "1.7.1" +log = "0.4.5" +failure = "0.1.2" +futures = "0.1.25" +tokio = "0.1"
diff --git a/deps/crates_io_api/LICENSE-APACHE b/deps/crates_io_api/LICENSE-APACHE line changes: +201/-0 index 0000000..9ade67e --- /dev/null +++ b/deps/crates_io_api/LICENSE-APACHE
@@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright 2017 Christoph Herzog <chris@theduke.at> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.
diff --git a/deps/crates_io_api/LICENSE-MIT b/deps/crates_io_api/LICENSE-MIT line changes: +25/-0 index 0000000..8cf3658 --- /dev/null +++ b/deps/crates_io_api/LICENSE-MIT
@@ -0,0 +1,25 @@ +Copyright (c) 2017 Christoph Herzog <chris@theduke.at> + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE.
diff --git a/deps/crates_io_api/README.md b/deps/crates_io_api/README.md line changes: +13/-0 index 0000000..d3e0f22 --- /dev/null +++ b/deps/crates_io_api/README.md
@@ -0,0 +1,13 @@ +# crates_io_api + +A Rust client for the [crates.io](https://crates.io) API. + +This crate aims to provide an easy to use and complete client for retrieving +detailed information about Rust's crate ecosystem. + +Both a synchronous and a Tokio/futures based async client are provided. + +## How to use + +For usage information and examples, check out the +[Documentation](https://docs.rs/crates_io_api).
diff --git a/deps/crates_io_api/src/async_client.rs b/deps/crates_io_api/src/async_client.rs line changes: +340/-0 index 0000000..348a1bb --- /dev/null +++ b/deps/crates_io_api/src/async_client.rs
@@ -0,0 +1,340 @@ +use futures::{future, stream, Future, Stream}; +use log::trace; +use reqwest::{header, r#async, StatusCode, Url}; +use serde::de::DeserializeOwned; + +use super::Error; +use crate::types::*; + +/// Asynchronous client for the crates.io API. +#[derive(Clone)] +pub struct Client { + client: r#async::Client, + base_url: Url, +} + +impl Client { + /// Instantiate a new client. + /// + /// This will fail if the underlying http client could not be created. + pub fn new() -> Self { + Self { + client: r#async::Client::new(), + base_url: Url::parse("https://crates.io/api/v1/").unwrap(), + } + } + + pub fn with_user_agent(user_agent: &str) -> Self { + let mut headers = header::HeaderMap::new(); + headers.insert( + header::USER_AGENT, + header::HeaderValue::from_str(user_agent).unwrap(), + ); + Self { + client: r#async::Client::builder() + .default_headers(headers) + .build() + .unwrap(), + base_url: Url::parse("https://crates.io/api/v1/").unwrap(), + } + } + + fn get<T: DeserializeOwned>(&self, url: &Url) -> impl Future<Item = T, Error = Error> { + trace!("GET {}", url); + + self.client + .get(url.clone()) + .send() + .map_err(Error::from) + .and_then(|res| { + if res.status() == StatusCode::NOT_FOUND { + return Err(Error::NotFound); + } + let res = res.error_for_status()?; + Ok(res) + }) + .and_then(|mut res| res.json().map_err(Error::from)) + } + + /// Retrieve a summary containing crates.io wide information. + pub fn summary(&self) -> impl Future<Item = Summary, Error = Error> { + let url = self.base_url.join("summary").unwrap(); + self.get(&url) + } + + /// Retrieve information of a crate. + /// + /// If you require detailed information, consider using [full_crate](). + pub fn get_crate(&self, name: &str) -> impl Future<Item = CrateResponse, Error = Error> { + let url = self.base_url.join("crates/").unwrap().join(name).unwrap(); + self.get(&url) + } + + /// Retrieve download stats for a crate. + pub fn crate_downloads(&self, name: &str) -> impl Future<Item = Downloads, Error = Error> { + let url = self + .base_url + .join(&format!("crates/{}/downloads", name)) + .unwrap(); + self.get(&url) + } + + /// Retrieve the owners of a crate. + pub fn crate_owners(&self, name: &str) -> impl Future<Item = Vec<User>, Error = Error> { + let url = self + .base_url + .join(&format!("crates/{}/owners", name)) + .unwrap(); + self.get::<Owners>(&url).map(|data| data.users) + } + + /// Load all reverse dependencies of a crate. + /// + /// Note: Since the reverse dependency endpoint requires pagination, this + /// will result in multiple requests if the crate has more than 100 reverse + /// dependencies. + pub fn crate_reverse_dependencies(&self, name: &str) + -> impl Future<Item = ReverseDependencies, Error = Error> { + + fn fetch_page(c: Client, name: String, mut tidy_rdeps: ReverseDependencies, page: u64) + -> impl Future<Item = ReverseDependencies, Error = Error> + Send { + + let url = c.base_url.join(&format!( + "crates/{0}/reverse_dependencies?per_page=100&page={1}", name, page + )).unwrap(); + + c.get::<ReverseDependenciesAsReceived>(&url).and_then(move |rdeps| + -> Box<dyn Future<Item = ReverseDependencies, Error = Error> + Send> { + + tidy_rdeps.from_received(&rdeps); + + if !rdeps.dependencies.is_empty() { + tidy_rdeps.meta = rdeps.meta; + Box::new(fetch_page(c, name, tidy_rdeps, page + 1)) + } else { + Box::new(::futures::future::ok(tidy_rdeps)) + } + }) + } + + fetch_page(self.clone(), name.to_string(), ReverseDependencies { + dependencies: Vec::new(), meta:Meta{total:0} }, 1) + } + + /// Retrieve the authors for a crate version. + pub fn crate_authors( + &self, + name: &str, + version: &str, + ) -> impl Future<Item = Authors, Error = Error> { + let url = self + .base_url + .join(&format!("crates/{}/{}/authors", name, version)) + .unwrap(); + self.get::<AuthorsResponse>(&url).map(|res| Authors { + names: res.meta.names, + users: res.users, + }) + } + + /// Retrieve the dependencies of a crate version. + pub fn crate_dependencies( + &self, + name: &str, + version: &str, + ) -> impl Future<Item = Vec<Dependency>, Error = Error> { + let url = self + .base_url + .join(&format!("crates/{}/{}/dependencies", name, version)) + .unwrap(); + self.get::<Dependencies>(&url).map(|res| res.dependencies) + } + + fn full_version(&self, version: Version) -> impl Future<Item = FullVersion, Error = Error> { + let authors = self.crate_authors(&version.crate_name, &version.num); + let deps = self.crate_dependencies(&version.crate_name, &version.num); + + authors.join(deps).map(|(authors, deps)| FullVersion { + created_at: version.created_at, + updated_at: version.updated_at, + dl_path: version.dl_path, + downloads: version.downloads, + features: version.features, + id: version.id, + num: version.num, + yanked: version.yanked, + license: version.license, + links: version.links, + readme_path: version.readme_path, + + author_names: authors.names, + authors: authors.users, + dependencies: deps, + }) + } + + /// Retrieve all available information for a crate, including download + /// stats, owners and reverse dependencies. + /// + /// The `all_versions` argument controls the retrieval of detailed version + /// information. + /// If false, only the data for the latest version will be fetched, if true, + /// detailed information for all versions will be available. + /// Note: Each version requires two extra requests. + pub fn full_crate( + &self, + name: &str, + all_versions: bool, + ) -> impl Future<Item = FullCrate, Error = Error> { + let c = self.clone(); + let crate_and_versions = self.get_crate(name).and_then( + move |info| -> Box< + Future<Item = (CrateResponse, Vec<FullVersion>), Error = Error> + Send, + > { + if !all_versions { + Box::new( + c.full_version(info.versions[0].clone()) + .map(|v| (info, vec![v])), + ) + } else { + Box::new( + ::futures::future::join_all( + info.versions + .clone() + .into_iter() + .map(|v| c.full_version(v)) + .collect::<Vec<_>>(), + ) + .map(|versions| (info, versions)), + ) + } + }, + ); + + let dls = self.crate_downloads(name); + let owners = self.crate_owners(name); + let reverse_dependencies = self.crate_reverse_dependencies(name); + + crate_and_versions + .join4(dls, owners, reverse_dependencies) + .map(|((resp, versions), dls, owners, reverse_dependencies)| { + let data = resp.crate_data; + FullCrate { + id: data.id, + name: data.name, + description: data.description, + license: resp.versions[0].license.clone(), + documentation: data.documentation, + homepage: data.homepage, + repository: data.repository, + total_downloads: data.downloads, + max_version: data.max_version, + created_at: data.created_at, + updated_at: data.updated_at, + + categories: resp.categories, + keywords: resp.keywords, + downloads: dls, + owners, + reverse_dependencies, + versions, + } + }) + } + + /// Retrieve a page of crates, optionally constrained by a query. + /// + /// If you want to get all results without worrying about paging, + /// use [all_crates](). + /// + /// ``` + pub fn crates(&self, spec: ListOptions) -> impl Future<Item = CratesResponse, Error = Error> { + let mut url = self.base_url.join("crates").unwrap(); + { + let mut q = url.query_pairs_mut(); + q.append_pair("page", &spec.page.to_string()); + q.append_pair("per_page", &spec.per_page.to_string()); + q.append_pair("sort", spec.sort.to_str()); + if let Some(query) = spec.query { + q.append_pair("q", &query); + } + if let Some(category) = spec.category { + q.append_pair("category", &category); + } + } + self.get(&url) + } + + /// Retrieve all crates, optionally constrained by a query. + /// + /// Note: This method fetches all pages of the result. + /// This can result in a lot queries (100 results per query). + pub fn all_crates(&self, query: Option<String>) -> impl Stream<Item = Crate, Error = Error> { + let opts = ListOptions { + query, + sort: Sort::Alphabetical, + per_page: 100, + page: 1, + category: None, + }; + + let c = self.clone(); + self.crates(opts.clone()) + .and_then(move |res| { + let pages = (res.meta.total as f64 / 100.0).ceil() as u64; + let streams_futures = (1..pages) + .map(|page| { + let opts = ListOptions { + page, + ..opts.clone() + }; + c.crates(opts) + .and_then(|res| future::ok(stream::iter_ok(res.crates))) + }) + .collect::<Vec<_>>(); + let stream = stream::futures_ordered(streams_futures).flatten(); + future::ok(stream) + }) + .flatten_stream() + } + + /// Retrieve all crates with all available extra information. + /// + /// Note: This method fetches not only all crates, but does multiple requests for each crate + /// to retrieve extra information. + /// + /// This can result in A LOT of queries. + pub fn all_crates_full( + &self, + query: Option<String>, + all_versions: bool, + ) -> impl Stream<Item = FullCrate, Error = Error> { + let c = self.clone(); + self.all_crates(query) + .and_then(move |cr| c.full_crate(&cr.name, all_versions)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_client() { + let mut rt = ::tokio::runtime::Runtime::new().unwrap(); + + let client = Client::new(); + + let summary = rt.block_on(client.summary()).unwrap(); + assert!(summary.most_downloaded.len() > 0); + + for item in &summary.most_downloaded[0..3] { + let _ = rt.block_on(client.full_crate(&item.name, false)).unwrap(); + } + + let crates = rt + .block_on(client.all_crates(None).take(3).collect()) + .unwrap(); + println!("{:?}", crates); + } +}
diff --git a/deps/crates_io_api/src/lib.rs b/deps/crates_io_api/src/lib.rs line changes: +75/-0 index 0000000..f1caf9f --- /dev/null +++ b/deps/crates_io_api/src/lib.rs
@@ -0,0 +1,75 @@ +//! API client for [crates.io](https://crates.io). +//! +//! It aims to provide an easy to use and complete client for retrieving +//! information about Rust's crate ecosystem. +//! +//! **Note:** Right now, only a synchronous client is available. +//! Once the Async version of hyper stabilizes, an asynchronous client based +//! on Tokio will be added. +//! +//! # Examples +//! +//! Print the most downloaded crates and their non-optional dependencies: +//! +//! ``` +//! use crates_io_api::{SyncClient, Error}; +//! +//! fn list_top_dependencies() -> Result<(), Error> { +//! // Instantiate the client. +//! let client = SyncClient::new(); +//! // Retrieve summary data. +//! let summary = client.summary()?; +//! for c in summary.most_downloaded { +//! println!("{}:", c.id); +//! for dep in client.crate_dependencies(&c.id, &c.max_version)? { +//! // Ignore optional dependencies. +//! if !dep.optional { +//! println!(" * {} - {}", dep.id, dep.version_id); +//! } +//! } +//! } +//! Ok(()) +//! } +//! ``` + +#![recursion_limit = "128"] + +use failure::Fail; + +mod async_client; +mod sync_client; +mod types; + +pub use crate::async_client::Client as AsyncClient; +pub use crate::sync_client::SyncClient; +pub use crate::types::*; + +#[derive(Fail, Debug)] +pub enum Error { + #[fail(display = "{}", _0)] + Http(reqwest::Error), + #[fail(display = "{}", _0)] + Url(url::ParseError), + #[fail(display = "{}", _0)] + InvalidHeader(reqwest::header::InvalidHeaderValue), + #[fail(display = "Not found")] + NotFound, +} + +impl From<reqwest::Error> for Error { + fn from(e: reqwest::Error) -> Self { + Error::Http(e) + } +} + +impl From<url::ParseError> for Error { + fn from(e: url::ParseError) -> Self { + Error::Url(e) + } +} + +impl From<reqwest::header::InvalidHeaderValue> for Error { + fn from(e: reqwest::header::InvalidHeaderValue) -> Self { + Error::InvalidHeader(e) + } +}
diff --git a/deps/crates_io_api/src/sync_client.rs b/deps/crates_io_api/src/sync_client.rs line changes: +293/-0 index 0000000..48fa9ac --- /dev/null +++ b/deps/crates_io_api/src/sync_client.rs
@@ -0,0 +1,293 @@ +use super::*; +use std::iter::Extend; + +use log::trace; +use reqwest::{header, StatusCode, Url}; +use serde::de::DeserializeOwned; + +use crate::types::*; + +/// A synchronous client for the crates.io API. +pub struct SyncClient { + client: reqwest::Client, + base_url: Url, +} + +impl SyncClient { + /// Instantiate a new synchronous API client. + /// + /// This will fail if the underlying http client could not be created. + pub fn new() -> Self { + Self { + client: reqwest::Client::new(), + base_url: Url::parse("https://crates.io/api/v1/").unwrap(), + } + } + + pub fn with_user_agent(user_agent: &str) -> Self { + let mut headers = header::HeaderMap::new(); + headers.insert( + header::USER_AGENT, + header::HeaderValue::from_str(user_agent).unwrap(), + ); + Self { + client: reqwest::Client::builder() + .default_headers(headers) + .build() + .unwrap(), + base_url: Url::parse("https://crates.io/api/v1/").unwrap(), + } + } + + fn get<T: DeserializeOwned>(&self, url: Url) -> Result<T, Error> { + trace!("GET {}", url); + let mut res = { + let res = self.client.get(url).send()?; + + if res.status() == StatusCode::NOT_FOUND { + return Err(Error::NotFound); + } + res.error_for_status()? + }; + + let data: T = res.json()?; + Ok(data) + } + + /// Retrieve a summary containing crates.io wide information. + pub fn summary(&self) -> Result<Summary, Error> { + let url = self.base_url.join("summary").unwrap(); + self.get(url) + } + + /// Retrieve information of a crate. + /// + /// If you require detailed information, consider using [full_crate](). + pub fn get_crate(&self, name: &str) -> Result<CrateResponse, Error> { + let url = self.base_url.join("crates/")?.join(name)?; + self.get(url) + } + + /// Retrieve download stats for a crate. + pub fn crate_downloads(&self, name: &str) -> Result<Downloads, Error> { + let url = self.base_url.join(&format!("crates/{}/downloads", name))?; + self.get(url) + } + + /// Retrieve the owners of a crate. + pub fn crate_owners(&self, name: &str) -> Result<Vec<User>, Error> { + let url = self.base_url.join(&format!("crates/{}/owners", name))?; + let resp: Owners = self.get(url)?; + Ok(resp.users) + } + + /// Load all reverse dependencies of a crate. + /// + /// Note: Since the reverse dependency endpoint requires pagination, this + /// will result in multiple requests if the crate has more than 100 reverse + /// dependencies. + pub fn crate_reverse_dependencies(&self, name: &str) -> Result<ReverseDependencies, Error> { + let mut page = 1; + let mut rdeps: ReverseDependenciesAsReceived; + let mut tidy_rdeps = ReverseDependencies {dependencies: Vec::new(), meta: Meta {total:0}}; + + loop { + let url = self.base_url.join(&format!( + "crates/{}/reverse_dependencies?per_page=100&page={}", + name, page + ))?; + + rdeps = self.get(url)?; + + tidy_rdeps.from_received(&rdeps); + + if !rdeps.dependencies.is_empty() { + tidy_rdeps.meta = rdeps.meta; + page += 1; + } else { + break; + } + } + Ok(tidy_rdeps) + } + + /// Retrieve the authors for a crate version. + pub fn crate_authors(&self, name: &str, version: &str) -> Result<Authors, Error> { + let url = self + .base_url + .join(&format!("crates/{}/{}/authors", name, version))?; + let res: AuthorsResponse = self.get(url)?; + Ok(Authors { + names: res.meta.names, + users: res.users, + }) + } + + /// Retrieve the dependencies of a crate version. + pub fn crate_dependencies(&self, name: &str, version: &str) -> Result<Vec<Dependency>, Error> { + let url = self + .base_url + .join(&format!("crates/{}/{}/dependencies", name, version))?; + let resp: Dependencies = self.get(url)?; + Ok(resp.dependencies) + } + + fn full_version(&self, version: Version) -> Result<FullVersion, Error> { + let authors = self.crate_authors(&version.crate_name, &version.num)?; + let deps = self.crate_dependencies(&version.crate_name, &version.num)?; + + let v = FullVersion { + created_at: version.created_at, + updated_at: version.updated_at, + dl_path: version.dl_path, + downloads: version.downloads, + features: version.features, + id: version.id, + num: version.num, + yanked: version.yanked, + license: version.license, + links: version.links, + readme_path: version.readme_path, + + author_names: authors.names, + authors: authors.users, + dependencies: deps, + }; + Ok(v) + } + + /// Retrieve all available information for a crate, including download + /// stats, owners and reverse dependencies. + /// + /// The `all_versions` argument controls the retrieval of detailed version + /// information. + /// If false, only the data for the latest version will be fetched, if true, + /// detailed information for all versions will be available. + /// Note: Each version requires two extra requests. + pub fn full_crate(&self, name: &str, all_versions: bool) -> Result<FullCrate, Error> { + let resp = self.get_crate(name)?; + let data = resp.crate_data; + + let dls = self.crate_downloads(name)?; + let owners = self.crate_owners(name)?; + let reverse_dependencies = self.crate_reverse_dependencies(name)?; + + let versions = if resp.versions.is_empty() { + vec![] + } else if all_versions { + //let versions_res: Result<Vec<FullVersion>> = resp.versions + resp.versions + .into_iter() + .map(|v| self.full_version(v)) + .collect::<Result<Vec<FullVersion>, Error>>()? + } else { + let v = self.full_version(resp.versions[0].clone())?; + vec![v] + }; + + let full = FullCrate { + id: data.id, + name: data.name, + description: data.description, + license: versions[0].license.clone(), + documentation: data.documentation, + homepage: data.homepage, + repository: data.repository, + total_downloads: data.downloads, + max_version: data.max_version, + created_at: data.created_at, + updated_at: data.updated_at, + + categories: resp.categories, + keywords: resp.keywords, + downloads: dls, + owners, + reverse_dependencies, + versions, + }; + Ok(full) + } + + /// Retrieve a page of crates, optionally constrained by a query. + /// + /// If you want to get all results without worrying about paging, + /// use [all_crates](). + /// + /// # Examples + /// + /// Retrieve the first page of results for the query "api", with 100 items + /// per page and sorted alphabetically. + /// + /// ``` + /// # use crates_io_api::{SyncClient, ListOptions, Sort, Error}; + /// + /// # fn f() -> Result<(), Error> { + /// let client = SyncClient::new(); + /// client.crates(ListOptions{ + /// sort: Sort::Alphabetical, + /// per_page: 100, + /// page: 1, + /// query: Some("api".to_string()), + /// })?; + /// # Ok(()) + /// # } + /// ``` + /// + pub fn crates(&self, spec: ListOptions) -> Result<CratesResponse, Error> { + let mut url = self.base_url.join("crates")?; + { + let mut q = url.query_pairs_mut(); + q.append_pair("page", &spec.page.to_string()); + q.append_pair("per_page", &spec.per_page.to_string()); + q.append_pair("sort", spec.sort.to_str()); + if let Some(query) = spec.query { + q.append_pair("q", &query); + } + if let Some(category) = spec.category { + q.append_pair("category", &category); + } + } + self.get(url) + } + + /// Retrieve all crates, optionally constrained by a query. + /// + /// Note: This method fetches all pages of the result. + /// This can result in a lot queries (100 results per query). + pub fn all_crates(&self, query: Option<String>) -> Result<Vec<Crate>, Error> { + let mut page = 1; + let mut crates = Vec::new(); + loop { + let res = self.crates(ListOptions { + query: query.clone(), + sort: Sort::Alphabetical, + per_page: 100, + page, + category: None, + })?; + if res.crates.is_empty() { + crates.extend(res.crates); + page += 1; + } else { + break; + } + } + Ok(crates) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_client() { + let client = SyncClient::new(); + let summary = client.summary().unwrap(); + assert!(summary.most_downloaded.len() > 0); + + for item in &summary.most_downloaded[0..3] { + let _ = client.full_crate(&item.name, false).unwrap(); + } + } +}
diff --git a/deps/crates_io_api/src/types.rs b/deps/crates_io_api/src/types.rs line changes: +316/-0 index 0000000..5086991 --- /dev/null +++ b/deps/crates_io_api/src/types.rs
@@ -0,0 +1,316 @@ +//! Types for the data that is available via the API. + +use chrono::{DateTime, NaiveDate, Utc}; +use serde_derive::*; +use std::collections::HashMap; + +/// Used to specify the sort behaviour of the `Client::crates()` method. +#[derive(Debug, Clone)] +pub enum Sort { + /// Sort alphabetically. + Alphabetical, + /// Sort by relevance (meaningless if used without a query). + Relevance, + /// Sort by downloads. + Downloads, +} + +impl Sort { + pub(crate) fn to_str(&self) -> &str { + use self::Sort::*; + match *self { + Alphabetical => "alpha", + Relevance => "", + Downloads => "downloads", + } + } +} + +/// Options for the [crates]() method of the client. +/// +/// Used to specify pagination, sorting and a query. +#[derive(Clone, Debug)] +pub struct ListOptions { + pub sort: Sort, + pub per_page: u64, + pub page: u64, + pub query: Option<String>, + pub category: Option<String>, +} + +/// Pagination information. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Meta { + /// The total amount of results. + pub total: u64, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct CrateLinks { + pub owner_team: String, + pub owner_user: String, + pub owners: String, + pub reverse_dependencies: String, + pub version_downloads: String, + pub versions: Option<String>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Crate { + pub id: String, + pub name: String, + pub description: Option<String>, + pub license: Option<String>, + pub documentation: Option<String>, + pub homepage: Option<String>, + pub repository: Option<String>, + // TODO: determine badge format. + // pub badges: Vec<??>, + pub downloads: u64, + pub recent_downloads: Option<u64>, + pub categories: Option<Vec<String>>, + pub keywords: Option<Vec<String>>, + pub versions: Option<Vec<u64>>, + pub max_version: String, + pub links: CrateLinks, + pub created_at: DateTime<Utc>, + pub updated_at: DateTime<Utc>, + pub exact_match: Option<bool>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct CratesResponse { + pub crates: Vec<Crate>, + #[serde(default)] + pub versions: Vec<Version>, + #[serde(default)] + pub keywords: Vec<Keyword>, + #[serde(default)] + pub categories: Vec<Category>, + pub meta: Meta, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct VersionLinks { + pub authors: String, + pub dependencies: String, + pub version_downloads: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Version { + #[serde(rename = "crate")] + pub crate_name: String, + pub created_at: DateTime<Utc>, + pub updated_at: DateTime<Utc>, + pub dl_path: String, + pub downloads: u64, + pub features: HashMap<String, Vec<String>>, + pub id: u64, + pub num: String, + pub yanked: bool, + pub license: Option<String>, + pub readme_path: Option<String>, + pub links: VersionLinks, + pub crate_size: Option<u64>, + pub published_by: Option<User>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Category { + pub category: String, + pub crates_cnt: u64, + pub created_at: DateTime<Utc>, + pub description: String, + pub id: String, + pub slug: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Keyword { + pub id: String, + pub keyword: String, + pub crates_cnt: u64, + pub created_at: DateTime<Utc>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct CrateResponse { + pub categories: Vec<Category>, + #[serde(rename = "crate")] + pub crate_data: Crate, + pub keywords: Vec<Keyword>, + pub versions: Vec<Version>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Summary { + pub just_updated: Vec<Crate>, + pub most_downloaded: Vec<Crate>, + pub new_crates: Vec<Crate>, + pub most_recently_downloaded: Vec<Crate>, + pub num_crates: u64, + pub num_downloads: u64, + pub popular_categories: Vec<Category>, + pub popular_keywords: Vec<Keyword>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct VersionDownloads { + pub date: NaiveDate, + pub downloads: u64, + pub version: u64, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ExtraDownloads { + pub date: NaiveDate, + pub downloads: u64, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct DownloadsMeta { + pub extra_downloads: Vec<ExtraDownloads>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Downloads { + pub version_downloads: Vec<VersionDownloads>, + pub meta: DownloadsMeta, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct User { + pub avatar: Option<String>, + pub email: Option<String>, + pub id: u64, + pub kind: Option<String>, + pub login: String, + pub name: Option<String>, + pub url: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct AuthorsMeta { + pub names: Vec<String>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct AuthorsResponse { + pub meta: AuthorsMeta, + pub users: Vec<User>, +} + +pub struct Authors { + pub names: Vec<String>, + pub users: Vec<User>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Owners { + pub users: Vec<User>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Dependency { + pub crate_id: String, + pub default_features: bool, + pub downloads: u64, + pub features: Vec<String>, + pub id: u64, + pub kind: String, + pub optional: bool, + pub req: String, + pub target: Option<String>, + pub version_id: u64, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Dependencies { + pub dependencies: Vec<Dependency>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ReverseDependency { + pub crate_version: Version, + pub dependency: Dependency, +} + +// This is how reverse dependencies are received +#[derive(Serialize, Deserialize, Debug, Clone)] +pub(super) struct ReverseDependenciesAsReceived { + pub dependencies: Vec<Dependency>, + pub versions: Vec<Version>, + pub meta: Meta +} + +// This is how reverse dependencies are presented +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ReverseDependencies { + pub dependencies: Vec<ReverseDependency>, + pub meta: Meta +} + +impl ReverseDependencies { + + /// Fills the dependencies field from a ReverseDependenciesAsReceived struct. + pub(crate) fn from_received(&mut self, rdeps: &ReverseDependenciesAsReceived) { + + for d in rdeps.dependencies.iter() { + for v in rdeps.versions.iter() { + if v.id == d.version_id { + // Right now it iterates over the full vector for each vector element. + // For large vectors, it may be faster to remove each matched element + // using the drain_filter() method once it's stabilized: + // https://doc.rust-lang.org/nightly/std/vec/struct.Vec.html#method.drain_filter + self.dependencies.push( + ReverseDependency {crate_version: v.clone(), dependency: d.clone()} + ); + } + } + } + } +} + + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct FullVersion { + pub created_at: DateTime<Utc>, + pub updated_at: DateTime<Utc>, + pub dl_path: String, + pub downloads: u64, + pub features: HashMap<String, Vec<String>>, + pub id: u64, + pub num: String, + pub yanked: bool, + pub license: Option<String>, + pub readme_path: Option<String>, + pub links: VersionLinks, + + pub author_names: Vec<String>, + pub authors: Vec<User>, + pub dependencies: Vec<Dependency>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct FullCrate { + pub id: String, + pub name: String, + pub description: Option<String>, + pub license: Option<String>, + pub documentation: Option<String>, + pub homepage: Option<String>, + pub repository: Option<String>, + pub total_downloads: u64, + pub max_version: String, + pub created_at: DateTime<Utc>, + pub updated_at: DateTime<Utc>, + + pub categories: Vec<Category>, + pub keywords: Vec<Keyword>, + pub downloads: Downloads, + pub owners: Vec<User>, + pub reverse_dependencies: ReverseDependencies, + + pub versions: Vec<FullVersion>, +}
diff --git a/src/main.rs b/src/main.rs line changes: +516/-0 index 0000000..80c7786 --- /dev/null +++ b/src/main.rs
@@ -0,0 +1,516 @@ +use crates_io_api::{SyncClient, Crate, Error, Sort, ListOptions}; +use serde::{Deserialize}; +use serde_json::{Deserializer}; +use std::process::{Command, Stdio}; +use std::io::Write; + +#[derive(Deserialize, Debug)] +struct BuildMetadataTarget { + kind: Vec<String>, + crate_types: Vec<String>, + name: String, +} + +#[derive(Deserialize, Debug)] +struct BuildMetadata { + reason: String, + package_id: String, + target: Option<BuildMetadataTarget>, + filenames: Option<Vec<String>>, + executable: Option<String> +} + +#[derive(Debug, PartialEq)] +enum ArtifactType { + Binary, + RustLibrary, +} + +#[derive(Debug)] +struct Artifact { + filename: String, + kind: ArtifactType, + size: u64, +} + +#[derive(Debug)] +struct CrateResult { + deps: usize, + artifacts: Vec<Artifact>, +} +impl std::fmt::Display for CrateResult { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let libs: Vec<&Artifact> = self.artifacts.iter().filter(|a| a.kind == ArtifactType::RustLibrary).collect(); + let bins: Vec<&Artifact> = self.artifacts.iter().filter(|a| a.kind == ArtifactType::Binary).collect(); + let lib = libs.first().map(|a| a.size).unwrap_or(0); + let bin = bins.first().map(|a| a.size).unwrap_or(0); + let lib_str = match lib { + 0 => " ".to_string(), + _ => format!("{:>8.2}", lib as f64 / 1024. / 1024.), + }; + let bin_str = match bin { + 0 => " ".to_string(), + _ => format!("{:>8.2}", bin as f64 / 1024. / 1024.), + }; + let res = write!(f, " {:>6} \t {} \t {} ", self.deps, lib_str, bin_str); + if libs.len() > 1 { + let _ = write!(f, " WARNING: >1 lib"); + } + res + } +} + +#[derive(Debug)] +struct PkgId { + name: String, + version: String, +} + +fn top_crates(count: usize, category: Option<String>, filter_fn: Option<fn(&Crate) -> bool>) -> Result<Vec<Crate>, Error> { + let mut remaining = count; + let mut page = 1; + let mut all_crates = Vec::<Crate>::with_capacity(count); + while remaining > 0 { + let client = SyncClient::new(); + let options = ListOptions { + sort: Sort::Downloads, + per_page: 100, + page: page, + query: None, + category: category.clone(), + }; + let mut crates = client.crates(options)?; + + crates.crates.retain(|c| match filter_fn { + Some(f) => f(&c), + None => true, + }); + all_crates.append(&mut crates.crates); + remaining -= match remaining { + x if x > 100 => 100, + x => x, + }; + page += 1; + } + Ok(all_crates) +} + +#[allow(dead_code)] +fn search_crates(query: &str) -> Result<Vec<Crate>, Error> { + let client = SyncClient::new(); + let options = ListOptions { + sort: Sort::Downloads, + per_page: 100, + page: 1, + query: Some(query.to_string()), + category: None, + }; + let crates = client.crates(options)?; + Ok(crates.crates) +} + +fn pkgid(crt: &Crate) -> Result<PkgId, std::io::Error> { + let dir = format!("clone_{}", crt.id); + let output = Command::new("cargo") + .args(&["pkgid"]) + .current_dir(&dir) + .output() + .expect("pkgid failed"); + let output = String::from_utf8(output.stdout).expect("Unreadable pkgid output"); + let substr = output.split("#").nth(1).expect("pkgid missing #"); + let mut bits = substr.split(":"); + + Ok(PkgId { + name: bits.next().expect("pkgid missing crate").trim().into(), + version: bits.next().expect("pkgid missing version").trim().into(), + }) +} + +fn artifacts(_crt: &Crate, metadata: &str, pkgid: &PkgId) -> Result<Vec<Artifact>, std::io::Error> { + let mut artifacts: Vec<Artifact> = vec!(); + let pkgid_str = format!("{} {}", pkgid.name, pkgid.version); + let stream = Deserializer::from_str(metadata).into_iter::<BuildMetadata>(); + let mut meta_objs: Vec<BuildMetadata> = vec!(); + for value in stream { + let m: BuildMetadata = value.expect("Fail parsing metadata json"); + if m.package_id.starts_with(&pkgid_str) && + m.reason == "compiler-artifact" && + m.target.is_some() { + meta_objs.push(m); + } + } + for m in &meta_objs { + let target = m.target.as_ref().unwrap(); + if target.kind.iter().any(|x| x == "bin") { + if let Some(exe) = &m.executable { + artifacts.push(Artifact { + filename: exe.clone(), + kind: ArtifactType::Binary, + size: 0, + }); + } + } + if target.kind.iter().any(|x| x == "lib" || x == "rlib") { + if let Some(filenames) = &m.filenames { + for file in filenames { + if file.ends_with(".rlib") { + artifacts.push(Artifact { + filename: file.clone(), + kind: ArtifactType::RustLibrary, + size: 0, + }); + } + } + } + } + } + for mut arty in &mut artifacts { + if let Ok(file) = std::fs::File::open(&arty.filename) { + if let Ok(stat) = file.metadata() { + arty.size = stat.len(); + } + } + } + Ok(artifacts) +} + +fn analyze_crate(crt: &Crate) -> Result<CrateResult, std::io::Error> { + let dir = format!("clone_{}", crt.id); + let repo = crt.repository.as_ref().ok_or(std::io::ErrorKind::NotFound)?; + // Always provide a username/password so git fails fast if one is required. + let repo = repo.replace("https://", "https://dummy_user:dummy_password@"); + let _result = Command::new("git") + .args(&["clone", "--recursive", "--quiet", &repo, &dir]) + .stdin(Stdio::null()) + .stderr(Stdio::null()) + .status() + .expect("clone failed"); + if !std::path::Path::new(&dir).exists() { + return Err(std::io::ErrorKind::Other.into()); + } + + let cargo_toml_path = format!("{}/Cargo.toml", dir); + if !std::path::Path::new(&cargo_toml_path).exists() { + return Err(std::io::ErrorKind::Other.into()); + } + + let result = Command::new("cargo") + .args(&["build", "--release", "--message-format=json"]) + .current_dir(&dir) + .stderr(Stdio::null()) + .output() + .expect("build failed"); + if !result.status.success() { + return Err(std::io::ErrorKind::Other.into()); + } + let metadata = String::from_utf8(result.stdout).expect("Unreadable pkgid output"); + + // $ cargo tree --no-indent -a |sort |uniq -c |sort -nr |wc -l + let mut cargo_result = Command::new("cargo") + .current_dir(&dir) + .args(&["tree", "--no-indent", "--no-dev-dependencies", "-a"]) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .expect("tree failed"); + let cargo_out = cargo_result.stdout.take().expect("Cargo tree stdout failed"); + let mut sort_result = Command::new("sort") + .current_dir(&dir) + .stdin(Stdio::from(cargo_out)) + .stdout(Stdio::piped()) + .spawn() + .expect("sort failed"); + let sort_out = sort_result.stdout.take().expect("sort stdout failed"); + let mut awk_result = Command::new("awk") + .current_dir(&dir) + .args(&["{print $1}"]) + .stdin(Stdio::from(sort_out)) + .stdout(Stdio::piped()) + .spawn() + .expect("awk failed"); + let awk_out = awk_result.stdout.take().expect("awk stdout failed"); + let mut uniq_result = Command::new("uniq") + .current_dir(&dir) + .args(&["-c"]) + .stdin(Stdio::from(awk_out)) + .stdout(Stdio::piped()) + .spawn() + .expect("uniq failed"); + let uniq_out = uniq_result.stdout.take().expect("uniq stdout failed"); + let sort2_result = Command::new("sort") + .current_dir(&dir) + .args(&["-nr"]) + .stdin(Stdio::from(uniq_out)) + .stdout(Stdio::piped()) + .spawn() + .expect("sort failed"); + let output = sort2_result.wait_with_output().expect("sort failed"); + let _ = cargo_result.wait(); + let _ = sort_result.wait(); + let _ = awk_result.wait(); + let _ = uniq_result.wait(); + let output = String::from_utf8(output.stdout).expect("Unreadable output"); + + // Subtract 1 for the root crate. + let dep_count = match output.lines().count() { + e if e > 0 => e - 1, + _ => return Err(std::io::ErrorKind::Other.into()), + }; + + let artifacts = artifacts(crt, &metadata, &pkgid(crt)?)?; + + Ok(CrateResult { + deps: dep_count, + artifacts: artifacts, + }) +} + +#[derive(Debug, Default)] +struct Statistics { + count: usize, + mean: f64, + median: f64, + stddev: f64, + max: usize, +} + +#[derive(Debug)] +struct BatchStatistics { + deps: Statistics, + libs: Statistics, + bins: Statistics, +} + +fn statistics(crates: &Vec<CrateResult>) -> Result<BatchStatistics, std::io::Error> { + let deps: Vec<usize> = crates.iter().filter_map(|c| match c.deps { + 0 => None, + c => Some(c), + }).collect(); + let deps_f64: Vec<f64> = deps.iter().map(|v| *v as f64).collect(); + + let libs: Vec<u64> = crates.iter().filter_map(|c| { + c.artifacts.iter().filter_map(|a| { + match a.kind { + ArtifactType::RustLibrary => Some(a.size), + _ => None, + } + }).next() + }).collect(); + let libs_f64: Vec<f64> = libs.iter().map(|v| *v as f64).collect(); + + let bins: Vec<u64> = crates.iter().filter_map(|c| { + c.artifacts.iter().filter_map(|a| { + match a.kind { + ArtifactType::Binary => Some(a.size), + _ => None, + } + }).next() + }).collect(); + let bins_f64: Vec<f64> = bins.iter().map(|v| *v as f64).collect(); + + Ok(BatchStatistics { + deps: Statistics { + count: deps.len(), + mean: statistical::mean(deps_f64.as_slice()), + median: statistical::median(deps_f64.as_slice()), + stddev: statistical::standard_deviation(deps_f64.as_slice(), None), + max: *deps.iter().max().unwrap_or(&0), + }, + libs: Statistics { + count: libs.len(), + mean: statistical::mean(libs_f64.as_slice()), + median: statistical::median(libs_f64.as_slice()), + stddev: statistical::standard_deviation(libs_f64.as_slice(), None), + max: *libs.iter().max().unwrap_or(&0) as usize, + }, + bins: Statistics { + count: bins.len(), + mean: statistical::mean(bins_f64.as_slice()), + median: statistical::median(bins_f64.as_slice()), + stddev: statistical::standard_deviation(bins_f64.as_slice(), None), + max: *bins.iter().max().unwrap_or(&0) as usize, + }, + }) +} + +fn analyze(crates: Vec<Crate>) { + let blacklist = [ + "rustc-ap-rustc_cratesio_shim", // all of rust compiler + "rustc-ap-rustc_target", + "rustc-ap-serialize", + "rustc-ap-rustc_data_structures", + "rustc-ap-syntax_pos", + "rustc-ap-syntax", + "rustc-ap-rustc_errors", + // these are identical to rls-analysis + "rls-data", + "rls-span", + "rls-vfs", + // these are identical to actix-http + "actix-files", + "actix-http-test", + "actix-web", + "actix-web-httpauth", + // identical to winapi + "winapi-build", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", + // identical to rand + "rand_xorshift", + "rand_pcg", + "rand_os", + "rand_jitter", + "rand_isaac", + "rand_hc", + "rand_core", + "rand_chacha", + // identical to wayland-client + "wayland-commons", + "wayland-kbd", + "wayland-protocols", + "wayland-scanner", + "wayland-server", + "wayland-window", + // identical to clone_tokio + "clone_tokio-codec", + "clone_tokio-core", + "clone_tokio-curl", + "clone_tokio-current-thread", + "clone_tokio-executor", + "clone_tokio-fs", + "clone_tokio-io", + "clone_tokio-proto", + "clone_tokio-reactor", + "clone_tokio-service", + "clone_tokio-signal", + "clone_tokio-sync", + "clone_tokio-tcp", + "clone_tokio-threadpool", + "clone_tokio-timer", + "clone_tokio-tls", + "clone_tokio-trace-core", + "clone_tokio-tungstenite", + "clone_tokio-udp", + "clone_tokio-uds", + ]; + + // Buckets of 1, up to 20 + let mut buckets: [u8; 22] = [ + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0, + 0, + ]; + + let mut results: Vec<CrateResult> = vec!(); + println!(""); + println!("{:<32}: {:>6} \t {:>7} \t {:>7} ", "CRATE", "DEPS", "LIB (MB)", "BIN (MB)"); + println!("{}", std::iter::repeat("-").take(73).collect::<String>()); + let crate_count = crates.len(); + for (idx,c) in crates.iter().enumerate() { + if !blacklist.contains(&c.id.as_str()) { + let progress = format!("[{:>3}/{}]", idx, crate_count); + print!("{:<10} {:<21}: ", progress, c.id.chars().take(21).collect::<String>()); + let _ = std::io::stdout().flush(); + match analyze_crate(c) { + Err(_) => { + println!(""); + }, + Ok(res) => { + println!("{}", res); + match res.deps { + e if e <= 20 => buckets[e] += 1, + _ => buckets[21] += 1, + } + results.push(res); + }, + } + } + } + + let stats = statistics(&results).expect("failed to generate statistics"); + + println!(""); + println!("Number of crates analyzed: {}", results.len()); + println!(""); + println!("Dependencies:"); + println!(" count: {}", stats.deps.count); + println!(" mean: {:.2} +/- {:.2}", stats.deps.mean, stats.deps.stddev); + println!(" median: {:.2}", stats.deps.median); + println!(" maximum: {}", stats.deps.max); + println!(""); + println!("Library size:"); + println!(" count: {}", stats.libs.count); + println!(" mean: {:.2} +/- {:.2} [{:.2} MB + / {:.2} MB]", + stats.libs.mean, stats.libs.stddev, + stats.libs.mean / 1024. / 1024., stats.libs.stddev / 1024. / 1024.); + println!(" median: {:.2} [{:.2} MB]", stats.libs.median, stats.libs.median / 1024. / 1024.); + println!(" maximum: {} [{:.2} MB]", stats.libs.max, stats.libs.max as f64 / 1024. / 1024.); + println!(""); + println!("Binary size:"); + println!(" count: {}", stats.bins.count); + println!(" mean: {:.2} +/- {:.2} [{:.2} MB + / {:.2} MB]", + stats.bins.mean, stats.bins.stddev, + stats.bins.mean / 1024. / 1024., stats.bins.stddev / 1024. / 1024.); + println!(" median: {:.2} [{:.2} MB]", stats.bins.median, stats.bins.median / 1024. / 1024.); + println!(" maximum: {} [{:.2} MB]", stats.bins.max, stats.bins.max as f64 / 1024. / 1024.); + + println!(""); + println!("Dependency count histogram (buckets 0-20 by 1, 20+):"); + for (i, count) in buckets.iter().enumerate() { + let idx = match i { + 21 => "> 20".to_string(), + _ => format!("{:>4}", i), + }; + print!("{} ({:>5.1}%): ", idx, 100.0 * (*count as f64) / results.len() as f64); + println!("{}", ['*'].iter().cycle().take(*count as usize).collect::<String>()); + } + + // Buckets of 10, up to 200 + let mut buckets: [u8; 21] = [ + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0, + ]; + for res in &results { + match res.deps { + e if e < 200 => buckets[e / 10] += 1, + _ => buckets[20] += 1, + } + } + + println!(""); + println!("Dependency count histogram (buckets 0-200 by 10, 200+):"); + for (i, count) in buckets.iter().enumerate() { + let idx = match i { + 20 => " > 200".to_string(), + _ => format!("{:>3} - {:>3}", 10*i, 10*(i+1)), + }; + print!("{} ({:>5.1}%): ", idx, 100.0 * (*count as f64) / results.len() as f64); + println!("{}", ['*'].iter().cycle().take(std::cmp::min(50, *count as usize)).collect::<String>()); + } + println!(""); +} + +fn main() { + println!("========== 200 command-line-utilities crates =========="); + let crates = top_crates(200, Some("command-line-utilities".into()), None).unwrap(); + analyze(crates); + + println!("========== 100 graphics crates =========="); + let crates = top_crates(100, Some("graphics".into()), None).unwrap(); + analyze(crates); + + println!("========== 100 gui crates =========="); + let crates = top_crates(100, Some("gui".into()), None).unwrap(); + analyze(crates); + + println!("========== 100 web-programming crates =========="); + let crates = top_crates(100, Some("web-programming".into()), None).unwrap(); + analyze(crates); + + println!("========== Top 400 crates =========="); + let crates = top_crates(400, None, None).unwrap(); + analyze(crates); +}