commit: | d3a2e9432b0649ba8654fb6436c2d6531eac1e05 |
author: | Trevor Bentley |
committer: | Trevor Bentley |
date: | Thu Jan 12 16:01:45 2023 +0100 |
parents: | ad79db18ac06f66bca4ac6c220e08a85908214eb |
diff --git a/Cargo.lock b/Cargo.lock line changes: +97/-0 index 77326d1..b3618d5 --- a/Cargo.lock +++ b/Cargo.lock
@@ -217,6 +217,49 @@ dependencies = [ ] [[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +dependencies = [ + "cfg-if", +] + +[[package]] name = "crypto-common" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -287,6 +330,12 @@ dependencies = [ ] [[package]] +name = "either" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" + +[[package]] name = "errno" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -535,6 +584,7 @@ dependencies = [ "clap", "git2", "pulldown-cmark", + "rayon", "serde", "syntect", "tera",
@@ -657,6 +707,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + +[[package]] name = "miniz_oxide" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -685,6 +744,16 @@ dependencies = [ ] [[package]] +name = "num_cpus" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] name = "once_cell" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -946,6 +1015,28 @@ dependencies = [ ] [[package]] +name = "rayon" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] name = "regex" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -998,6 +1089,12 @@ dependencies = [ ] [[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] name = "scratch" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml line changes: +1/-0 index 2263456..57c26cc --- a/Cargo.toml +++ b/Cargo.toml
@@ -22,6 +22,7 @@ chrono = { version = "0.4.23", features=["clock"] } clap = { version="4.0.32", features=["derive"] } git2 = "0.15.0" pulldown-cmark = { version = "0.9.2", optional = true } +rayon = "1.6.1" serde = { version = "1.0.152", features = ["derive"] } syntect = { version = "5.0.0", default-features = false, optional = true } tera = "1.17.1"
diff --git a/src/main.rs b/src/main.rs line changes: +33/-20 index fce007c..d21f36c --- a/src/main.rs +++ b/src/main.rs
@@ -5,6 +5,7 @@ use chrono::{ }; use clap::Parser; use git2::{DiffOptions, Repository, Error}; +use rayon::prelude::*; use serde::{Serialize, Deserialize}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap, HashSet};
@@ -1029,12 +1030,14 @@ fn main() { tera.register_function("ts_to_git_timestamp", TsTimestampFn{}); macro_rules! size_check { - ($settings:ident, $cur:ident, $total:expr) => { - if $cur > $settings.limit_repo_size.unwrap_or(usize::MAX) { - break; + ($settings:ident, $cur:expr, $total:expr, $action:expr) => { + let cur: usize = $cur; + if cur > $settings.limit_repo_size.unwrap_or(usize::MAX) { + $action; } - if $total + $cur > $settings.limit_total_size.unwrap_or(usize::MAX) { - break; + let total: usize = $total; + if total.saturating_add($cur) > $settings.limit_total_size.unwrap_or(usize::MAX) { + $action; } } }
@@ -1184,7 +1187,7 @@ fn main() { } for branch in &summary.branches { - size_check!(repo_desc, repo_bytes, total_bytes); + size_check!(repo_desc, repo_bytes, total_bytes, break); local_ctx.insert("branch", branch); if let Some(templ_file) = settings.templates.branch.as_deref() { match tera.render(templ_file, &local_ctx) {
@@ -1200,7 +1203,7 @@ fn main() { } for tag in &summary.tags { - size_check!(repo_desc, repo_bytes, total_bytes); + size_check!(repo_desc, repo_bytes, total_bytes, break); local_ctx.insert("tag", tag); if let Some(tagged_id) = tag.tagged_id.as_ref() { if let Some(commit) = summary.commits.get(tagged_id) {
@@ -1222,7 +1225,7 @@ fn main() { } for (_id, commit) in &summary.commits { - size_check!(repo_desc, repo_bytes, total_bytes); + size_check!(repo_desc, repo_bytes, total_bytes, break); local_ctx.try_insert("commit", &commit).expect("Failed to add commit to template engine."); if let Some(templ_file) = settings.templates.commit.as_deref() { match tera.render(templ_file, &local_ctx) {
@@ -1237,11 +1240,6 @@ fn main() { local_ctx.remove("commit"); } - // TODO: most of these generation blocks can be done in - // parallel. This one is particularly costly, especially with - // markdown+highlighting, and would probably benefit from it. - // A potential drawback is that each parallel run needs a - // clone of the Tera context. #[cfg(any(feature = "highlight", feature = "highlight_fast"))] if settings.templates.file.is_some() { let ts = ThemeSet::load_defaults();
@@ -1252,17 +1250,29 @@ fn main() { repo_bytes += write_rendered(&settings.outputs.syntax_css(Some(&summary), None), css.as_str()); } - for file in summary.all_files.iter().filter(|x| x.kind == "file") { - size_check!(repo_desc, repo_bytes, total_bytes); + + // TODO: parallelize the rest of the processing steps. This one is + // done first because syntax highlighting is very slow. + let files: Vec<&GitFile> = summary.all_files.iter().filter(|x| x.kind == "file").collect(); + let atomic_bytes: AtomicUsize = AtomicUsize::new(repo_bytes); + let _ = files.par_iter().fold(|| Some(0), |acc, file| { + // These two have to be recreated. Cloning the Tera context is expensive. + let repo = Repository::open(&repo_path).expect("Unable to find git repository."); + let mut local_ctx = local_ctx.clone(); + + let mut local_bytes = 0; + let cur_repo_bytes = atomic_bytes.load(Ordering::Relaxed); + size_check!(repo_desc, cur_repo_bytes, total_bytes, return None); let file = match file.size < repo_desc.limit_file_size.unwrap_or(usize::MAX) { true => fill_file_contents(&repo, &file, &repo_desc).expect("Failed to parse file."), - false => file.clone(), + false => (*file).clone(), }; local_ctx.try_insert("file", &file).expect("Failed to add file to template engine."); if let Some(templ_file) = settings.templates.file.as_deref() { match tera.render(templ_file, &local_ctx) { Ok(rendered) => { - repo_bytes += write_rendered(&settings.outputs.file(Some(&summary), Some(&file)), &rendered); + local_bytes = write_rendered(&settings.outputs.file(Some(&summary), Some(&file)), &rendered); + atomic_bytes.fetch_add(local_bytes, Ordering::Relaxed); }, Err(x) => match x.kind { _ => error!("ERROR: {:?}", x),
@@ -1270,10 +1280,13 @@ fn main() { } } local_ctx.remove("file"); - } + Some(acc.unwrap() + local_bytes)}) + .while_some() // allow short-circuiting if size limit is reached + .sum::<usize>(); + repo_bytes = atomic_bytes.load(Ordering::Relaxed); for dir in summary.all_files.iter().filter(|x| x.kind == "dir") { - size_check!(repo_desc, repo_bytes, total_bytes); + size_check!(repo_desc, repo_bytes, total_bytes, break); if dir.tree_depth >= repo_desc.limit_tree_depth.unwrap_or(usize::MAX) - 1 { continue; }
@@ -1317,7 +1330,7 @@ fn main() { }, start_repo.elapsed().as_secs_f32(), repo_bytes); total_bytes += repo_bytes; - size_check!(repo_desc, repo_bytes, total_bytes); + size_check!(repo_desc, 0, total_bytes, break); // break if total is exceeded } let start_global = Instant::now();