};
macro_rules! size_check {
- ($settings:ident, $cur:expr, $total:expr, $action:expr) => {
+ ($settings:expr, $cur:expr, $total:expr, $action:expr) => {
let cur: usize = $cur;
if cur > $settings.limit_repo_size.unwrap_or(usize::MAX) {
$action;
};
}
+macro_rules! size_check_atomic {
+ ($settings:expr, $cur:expr, $total:expr, $action:expr) => {
+ let cur: usize = $cur.load(Ordering::SeqCst);
+ if cur > $settings.limit_repo_size.unwrap_or(usize::MAX) {
+ $action;
+ }
+ let total: usize = $total.load(Ordering::SeqCst);
+ if total.saturating_add(cur) > $settings.limit_total_size.unwrap_or(usize::MAX) {
+ $action;
+ }
+ };
+}
+
pub struct GitsyGenerator {
cli: GitsyCli,
settings: GitsySettings,
Ok(global_bytes)
}
- pub fn gen_summary(&self, ctx: &Context, parsed_repo: &GitRepo, _repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_summary(&self, ctx: &Context, atomic_bytes: &AtomicUsize,
+ parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for (templ_path, out_path) in self.settings.outputs.summary::<GitRepo>(Some(parsed_repo), None) {
let out_path = out_path.to_str().expect(&format!("ERROR: a summary output path is invalid: {}", out_path.display()));
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
- repo_bytes +=
- self.write_rendered(&out_path, &rendered);
+ let bytes = self.write_rendered(&out_path, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
},
}
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
- pub fn gen_history(&self, ctx: &Context, parsed_repo: &GitRepo, _repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_history(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
- let mut repo_bytes = 0;
+ let repo_bytes = AtomicUsize::new(0);
for (templ_path, out_path) in self.settings.outputs.history::<GitRepo>(Some(parsed_repo), None) {
let templ_path = templ_path.to_str().expect(&format!("ERROR: a summary template path is invalid: {}", templ_path.display()));
let out_path = out_path.to_str().expect(&format!("ERROR: a summary output path is invalid: {}", out_path.display()));
- let mut paged_ctx = ctx.clone();
- paged_ctx.remove("history");
let pages = parsed_repo.history.chunks(self.settings.paginate_history());
let page_count = pages.len();
- for (idx, page) in pages.enumerate() {
+ parsed_repo.history.par_chunks(self.settings.paginate_history()).enumerate().try_for_each(|(idx, page)| {
+ let mut paged_ctx = ctx.clone();
let pagination = Pagination::new(
idx + 1,
page_count,
);
paged_ctx.insert("page", &pagination.with_relative_paths());
paged_ctx.insert("history", &page);
- match tera.render(templ_path, &paged_ctx) {
- Ok(rendered) => {
- repo_bytes += self.write_rendered(&pagination.cur_page, &rendered);
- }
- Err(x) => match x.kind {
- _ => error!("ERROR: {:?}", x),
- },
- }
+ let rendered = tera.render(templ_path, &paged_ctx)?;
+ let bytes = self.write_rendered(&pagination.cur_page, &rendered);
+ repo_bytes.fetch_add(bytes, Ordering::SeqCst);
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
paged_ctx.remove("page");
paged_ctx.remove("history");
- }
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
+ Ok::<(), GitsyError>(())
+ })?;
}
- Ok(repo_bytes)
+ Ok(repo_bytes.load(Ordering::SeqCst))
}
- pub fn gen_commit(&self, ctx: &Context, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_commit(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let mut ctx = ctx.clone();
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for (_id, commit) in &parsed_repo.commits {
- size_check!(repo_desc, repo_bytes, self.total_bytes.load(Ordering::Relaxed), break);
ctx
.try_insert("commit", &commit)
.expect("Failed to add commit to template engine.");
let out_path = out_path.to_str().expect(&format!("ERROR: a summary output path is invalid: {}", out_path.display()));
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
- repo_bytes += self
- .write_rendered(&out_path, &rendered);
+ let bytes = self.write_rendered(&out_path, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
}
}
ctx.remove("commit");
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
- pub fn gen_branches(&self, ctx: &Context, parsed_repo: &GitRepo, _repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_branches(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for (templ_path, out_path) in self.settings.outputs.branches::<GitRepo>(Some(parsed_repo), None) {
paged_ctx.insert("branches", &page);
match tera.render(templ_path, &paged_ctx) {
Ok(rendered) => {
- repo_bytes += self.write_rendered(&pagination.cur_page, &rendered);
+ let bytes = self.write_rendered(&pagination.cur_page, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
paged_ctx.remove("page");
paged_ctx.remove("branches");
}
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
- pub fn gen_branch(&self, ctx: &Context, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_branch(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let mut ctx = ctx.clone();
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for branch in &parsed_repo.branches {
- size_check!(repo_desc, repo_bytes, self.total_bytes.load(Ordering::Relaxed), break);
ctx.insert("branch", branch);
for (templ_path, out_path) in self.settings.outputs.branch(Some(parsed_repo), Some(branch)) {
let templ_path = templ_path.to_str().expect(&format!("ERROR: a summary template path is invalid: {}", templ_path.display()));
let out_path = out_path.to_str().expect(&format!("ERROR: a summary output path is invalid: {}", out_path.display()));
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
- repo_bytes += self
+ let bytes = self
.write_rendered(&out_path, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
}
}
ctx.remove("branch");
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
- pub fn gen_tags(&self, ctx: &Context, parsed_repo: &GitRepo, _repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_tags(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for (templ_path, out_path) in self.settings.outputs.tags::<GitRepo>(Some(parsed_repo), None) {
paged_ctx.insert("tags", &page);
match tera.render(templ_path, &paged_ctx) {
Ok(rendered) => {
- repo_bytes += self.write_rendered(&pagination.cur_page, &rendered);
+ let bytes = self.write_rendered(&pagination.cur_page, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
}
paged_ctx.remove("page");
paged_ctx.remove("tags");
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
}
Ok(repo_bytes)
}
- pub fn gen_tag(&self, ctx: &Context, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_tag(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let mut ctx = ctx.clone();
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for tag in &parsed_repo.tags {
- size_check!(repo_desc, repo_bytes, self.total_bytes.load(Ordering::Relaxed), break);
ctx.insert("tag", tag);
if let Some(tagged_id) = tag.tagged_id.as_ref() {
if let Some(commit) = parsed_repo.commits.get(tagged_id) {
let out_path = out_path.to_str().expect(&format!("ERROR: a summary output path is invalid: {}", out_path.display()));
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
- repo_bytes +=
+ let bytes =
self.write_rendered(&out_path, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
}
ctx.remove("tag");
ctx.remove("commit");
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
- pub fn gen_files(&self, ctx: &Context, parsed_repo: &GitRepo, _repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_files(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, _repo: &Repository) -> Result<usize, GitsyError> {
let mut ctx = ctx.clone();
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
ctx.insert("all_files", &parsed_repo.all_files);
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
- repo_bytes +=
+ let bytes =
self.write_rendered(&out_path, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
},
}
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
- pub fn gen_file(&self, ctx: &Context, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_file(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, repo: &Repository) -> Result<usize, GitsyError> {
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
.expect("Invalid syntax highlighting theme specified.");
let css: String = css_for_theme_with_class_style(theme, syntect::html::ClassStyle::Spaced)
.expect("Invalid syntax highlighting theme specified.");
- repo_bytes +=
+ let bytes =
self.write_rendered(&self.settings.outputs.syntax_css::<GitFile>(Some(&parsed_repo), None), css.as_str());
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
// TODO: parallelize the rest of the processing steps. This one is
// done first because syntax highlighting is very slow.
let files: Vec<&GitFile> = parsed_repo.all_files.iter().filter(|x| x.kind == "file").collect();
- let atomic_bytes: AtomicUsize = AtomicUsize::new(repo_bytes);
+ let atomic_repo_bytes: AtomicUsize = AtomicUsize::new(repo_bytes);
let repo_path = repo.path().to_str().expect("ERROR: unable to determine path to local repository");
let _ = files
.par_iter()
let mut ctx = ctx.clone();
let mut local_bytes = 0;
- let cur_repo_bytes = atomic_bytes.load(Ordering::Relaxed);
- size_check!(repo_desc, cur_repo_bytes, self.total_bytes.load(Ordering::Relaxed), return None);
+ let cur_repo_bytes = atomic_repo_bytes.load(Ordering::SeqCst);
+ size_check!(repo_desc, cur_repo_bytes, self.total_bytes.load(Ordering::SeqCst), return None);
let file = match file.size < repo_desc.limit_file_size.unwrap_or(usize::MAX) {
true => GitsyGenerator::fill_file_contents(&repo, &file, &repo_desc)
.expect("Failed to parse file."),
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
local_bytes = self.write_rendered(&out_path, &rendered,);
- atomic_bytes.fetch_add(local_bytes, Ordering::Relaxed);
+ atomic_repo_bytes.fetch_add(local_bytes, Ordering::SeqCst);
+ atomic_bytes.fetch_add(local_bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
}
}
ctx.remove("file");
+ if atomic_repo_bytes.load(Ordering::SeqCst) >= repo_desc.limit_repo_size.unwrap_or(usize::MAX) {
+ return None;
+ }
Some(acc.unwrap() + local_bytes)
},
)
.while_some() // allow short-circuiting if size limit is reached
.sum::<usize>();
- repo_bytes = atomic_bytes.load(Ordering::Relaxed);
+ repo_bytes = atomic_repo_bytes.load(Ordering::SeqCst);
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
Ok(repo_bytes)
}
- pub fn gen_dir(&self, ctx: &Context, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, repo: &Repository) -> Result<usize, GitsyError> {
+ pub fn gen_dir(&self, ctx: &Context, atomic_bytes: &AtomicUsize, parsed_repo: &GitRepo, repo_desc: &GitsySettingsRepo, repo: &Repository) -> Result<usize, GitsyError> {
let mut ctx = ctx.clone();
let tera = self.tera.as_ref().expect("ERROR: generate called without a context!?");
let mut repo_bytes = 0;
for dir in parsed_repo.all_files.iter().filter(|x| x.kind == "dir") {
- size_check!(repo_desc, repo_bytes, self.total_bytes.load(Ordering::Relaxed), break);
let listing = dir_listing(&repo, &dir).expect("Failed to parse file.");
ctx.insert("dir", dir);
ctx
let out_path = out_path.to_str().expect(&format!("ERROR: a summary output path is invalid: {}", out_path.display()));
match tera.render(templ_path, &ctx) {
Ok(rendered) => {
- repo_bytes +=
+ let bytes =
self.write_rendered(&out_path, &rendered);
+ repo_bytes += bytes;
+ atomic_bytes.fetch_add(bytes, Ordering::SeqCst);
}
Err(x) => match x.kind {
_ => error!("ERROR: {:?}", x),
}
ctx.remove("files");
ctx.remove("dir");
+ size_check_atomic!(repo_desc, atomic_bytes, self.total_bytes,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
}
Ok(repo_bytes)
}
Ok(bytes)
}
+ pub fn generate_repo(&self, repo_desc: &GitsySettingsRepo, pad_name_len: usize) -> Result<(GitRepo, usize), GitsyError> {
+ loudest!("Repo settings:\n{:#?}", &repo_desc);
+ let start_repo = Instant::now();
+
+ let name = repo_desc.name.as_deref().expect("A configured repository has no name!");
+ if self.settings.threads.unwrap_or(0) == 1 || VERBOSITY.load(Ordering::SeqCst) > 1 {
+ normal_noln!("[{}{}]... ", name, " ".repeat(pad_name_len - name.len()));
+ }
+ let repo_path = self.find_repo(&name, &repo_desc)?;
+ let repo = Repository::open(&repo_path).expect("Unable to find git repository.");
+
+ let metadata = GitsyMetadata {
+ full_name: repo_desc.name.clone(),
+ description: repo_desc.description.clone(),
+ website: repo_desc.website.clone(),
+ clone: repo_desc.clone_url.clone(),
+ attributes: repo_desc.attributes.clone().unwrap_or_default(),
+ };
+ let parsed_repo = parse_repo(&repo, &name, &repo_desc, metadata).expect("Failed to analyze repo HEAD.");
+ let minimized_repo = parsed_repo.minimal_clone(self.settings.limit_context.unwrap_or(usize::MAX));
+ let atomic_bytes = AtomicUsize::new(0);
+
+ let mut local_ctx = self.new_context(Some(&minimized_repo))?;
+
+ // Add README file to context, if specified and found
+ if let Some(readmes) = &repo_desc.readme_files {
+ for readme in readmes {
+ if let Some(file) = parsed_repo.root_files.iter().filter(|x| &x.name == readme).next() {
+ louder!(" - found readme file: {}", file.name);
+ let _ = GitsyGenerator::fill_file_contents(&repo, &file, &repo_desc)
+ .expect("Failed to parse file.");
+ local_ctx.insert("readme", &file);
+ break;
+ }
+ }
+ };
+
+ let fns = &[GitsyGenerator::gen_summary,
+ GitsyGenerator::gen_branches,
+ GitsyGenerator::gen_branch,
+ GitsyGenerator::gen_tags,
+ GitsyGenerator::gen_tag,
+ GitsyGenerator::gen_history,
+ GitsyGenerator::gen_commit,
+ GitsyGenerator::gen_file,
+ GitsyGenerator::gen_dir,
+ GitsyGenerator::gen_files,
+ ];
+
+ let repo_bytes: usize = fns.par_iter().try_fold(
+ || 0,
+ |acc, x| {
+ let repo = Repository::open(&repo_path).expect("Unable to find git repository.");
+ let bytes = x(&self, &local_ctx, &atomic_bytes, &parsed_repo, repo_desc, &repo)?;
+ // remove these bytes from the current repo bytes and move them to the total bytes.
+ atomic_bytes.fetch_sub(bytes, Ordering::SeqCst);
+ self.total_bytes.fetch_add(bytes, Ordering::SeqCst);
+ Ok::<usize, GitsyError>(acc + bytes)
+ })
+ .try_reduce(|| 0, |acc, x| Ok(acc + x))?;
+
+ size_check!(repo_desc, 0, self.total_bytes.load(Ordering::SeqCst),
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: size limit exceeded"))));
+
+ self.copy_assets(Some(&repo_desc), Some(&parsed_repo), Some(&repo))?;
+
+ normal!(
+ "{}{}done in {:.2}s ({} bytes)",
+ match self.settings.threads.unwrap_or(0) == 1 && VERBOSITY.load(Ordering::SeqCst) <= 1 {
+ true => "".into(),
+ false => format!("[{}{}]... ", name, " ".repeat(pad_name_len - name.len())),
+ },
+ match VERBOSITY.load(Ordering::SeqCst) > 1 {
+ true => " - ",
+ _ => "",
+ },
+ start_repo.elapsed().as_secs_f32(),
+ repo_bytes
+ );
+ Ok((minimized_repo, repo_bytes))
+ }
+
pub fn generate(&mut self) -> Result<(), GitsyError> {
let start_all = Instant::now();
self.tera = Some(self.tera_init()?);
self.generated_dt = chrono::offset::Local::now();
- let mut total_bytes = 0;
- let mut repos: Vec<GitRepo> = vec![];
if self.cli.should_clean {
self.settings.outputs.clean();
loudest!("Global settings:\n{:#?}", &self.settings);
+ let shared_repos = std::sync::Mutex::new(Vec::<GitRepo>::new());
+
// Iterate over each repository, generating outputs
- for repo_desc in &repo_vec {
- loudest!("Repo settings:\n{:#?}", &repo_desc);
- let start_repo = Instant::now();
- let mut repo_bytes = 0;
-
- let name = repo_desc.name.as_deref().expect("A configured repository has no name!");
- normal_noln!("[{}{}]... ", name, " ".repeat(longest_repo_name - name.len()));
- let repo_path = self.find_repo(&name, &repo_desc)?;
- let repo = Repository::open(&repo_path).expect("Unable to find git repository.");
-
- let metadata = GitsyMetadata {
- full_name: repo_desc.name.clone(),
- description: repo_desc.description.clone(),
- website: repo_desc.website.clone(),
- clone: repo_desc.clone_url.clone(),
- attributes: repo_desc.attributes.clone().unwrap_or_default(),
- };
- let parsed_repo = parse_repo(&repo, &name, &repo_desc, metadata).expect("Failed to analyze repo HEAD.");
- let minimized_repo = parsed_repo.minimal_clone(self.settings.limit_context.unwrap_or(usize::MAX));
-
- let mut local_ctx = self.new_context(Some(&minimized_repo))?;
-
- // Add README file to context, if specified and found
- if let Some(readmes) = &repo_desc.readme_files {
- for readme in readmes {
- if let Some(file) = parsed_repo.root_files.iter().filter(|x| &x.name == readme).next() {
- louder!(" - found readme file: {}", file.name);
- let _ = GitsyGenerator::fill_file_contents(&repo, &file, &repo_desc)
- .expect("Failed to parse file.");
- local_ctx.insert("readme", &file);
- break;
- }
+ let mut total_bytes = match self.settings.threads.unwrap_or(0) {
+ n if n == 1 => {
+ let mut tb = 0;
+ for repo_desc in &repo_vec {
+ let (minimized_repo, repo_bytes) = self.generate_repo(repo_desc, longest_repo_name)?;
+ size_check!(repo_desc, 0, tb,
+ return Err(GitsyError::kind(GitsyErrorKind::Settings, Some("ERROR: site size limit exceeded"))));
+ shared_repos.lock().unwrap().push(minimized_repo);
+ tb += repo_bytes;
}
- };
+ tb
+ },
+ n if n == 0 => {
+ let total_bytes: usize = repo_vec.par_iter().try_fold(|| 0, |acc, repo_desc| {
+ let (minimized_repo, repo_bytes) = self.generate_repo(repo_desc, longest_repo_name)?;
+ size_check!(repo_desc, 0, acc + repo_bytes, return Err(GitsyError::kind(GitsyErrorKind::Unknown,
+ Some("ERROR: site size limit exceeded"))));
+ shared_repos.lock().unwrap().push(minimized_repo);
+ Ok::<usize, GitsyError>(repo_bytes)
+ })
+ .try_reduce(|| 0, |acc, x| Ok(acc + x))?;
+ total_bytes
+ },
+ n => {
+ let pool = rayon::ThreadPoolBuilder::new()
+ .num_threads(n)
+ .build().unwrap();
+
+ let total_bytes = pool.install(|| {
+ let total_bytes: usize = repo_vec.par_iter().try_fold(|| 0, |acc, repo_desc| {
+ let (minimized_repo, repo_bytes) = self.generate_repo(repo_desc, longest_repo_name)?;
+ size_check!(repo_desc, 0, acc + repo_bytes, return Err(GitsyError::kind(GitsyErrorKind::Unknown,
+ Some("ERROR: site size limit exceeded"))));
+ shared_repos.lock().unwrap().push(minimized_repo);
+ Ok::<usize, GitsyError>(repo_bytes)
+ })
+ .try_reduce(|| 0, |acc, x| Ok(acc + x))?;
+ Ok::<usize, GitsyError>(total_bytes)
+ })?;
+ total_bytes
+ }
+ };
+ size_check!(self.settings, 0, total_bytes, return Err(GitsyError::kind(GitsyErrorKind::Unknown,
+ Some("ERROR: site size limit exceeded"))));
- repo_bytes += self.gen_summary( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_branches(&local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_branch( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_tags( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_tag( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_history( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_commit( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_file( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_dir( &local_ctx, &parsed_repo, repo_desc, &repo)?;
- repo_bytes += self.gen_files( &local_ctx, &parsed_repo, repo_desc, &repo)?;
-
- self.copy_assets(Some(&repo_desc), Some(&parsed_repo), Some(&repo))?;
-
- repos.push(minimized_repo);
- normal!(
- "{}done in {:.2}s ({} bytes)",
- match VERBOSITY.load(Ordering::Relaxed) > 1 {
- true => " - ",
- _ => "",
- },
- start_repo.elapsed().as_secs_f32(),
- repo_bytes
- );
- total_bytes += repo_bytes;
- size_check!(repo_desc, 0, total_bytes, break); // break if total is exceeded
- }
+ let repos = shared_repos;
let start_global = Instant::now();
normal_noln!(
self.copy_assets(None, None, None)?;
total_bytes += global_bytes;
+ size_check!(self.settings, 0, total_bytes, return Err(GitsyError::kind(GitsyErrorKind::Unknown,
+ Some("ERROR: site size limit exceeded"))));
normal!(
"done in {:.2}s ({} bytes)",
start_global.elapsed().as_secs_f32(),
*/
use crate::settings::GitsySettingsRepo;
use crate::util::{sanitize_path_component, SafePathVar, urlify_path};
-use crate::{error, loud, loudest};
+use crate::{error, loud, louder, loudest};
use git2::{DiffOptions, Error, Repository};
+use rayon::prelude::*;
use serde::{Deserialize, Serialize};
+use std::sync::atomic::AtomicUsize;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
Ok(files)
}
+pub fn parse_revwalk(repo: &Repository, mut revwalk: git2::Revwalk, references: &BTreeMap<String, Vec<String>>, settings: &GitsySettingsRepo) -> Result<Vec<GitObject>, Error> {
+ let mut history: Vec<GitObject> = vec![];
+
+ for (idx, oid) in revwalk.by_ref().enumerate() {
+ let oid = oid?;
+ if idx >= settings.limit_history.unwrap_or(usize::MAX) {
+ break;
+ }
+ let parsed = parse_commit(idx, settings, repo, &oid.to_string(), &references)?;
+ loudest!(" + [{}] {} {}", idx, parsed.full_hash,
+ parsed.summary.as_deref().unwrap_or_default());
+ history.push(parsed);
+ }
+ Ok(history)
+}
+
pub fn parse_repo(
repo: &Repository,
name: &str,
settings: &GitsySettingsRepo,
metadata: GitsyMetadata,
) -> Result<GitRepo, Error> {
- let mut history: Vec<GitObject> = vec![];
let mut branches: Vec<GitObject> = vec![];
let mut tags: Vec<GitObject> = vec![];
let mut commits: BTreeMap<String, GitObject> = BTreeMap::new();
- let mut commit_count = 0;
- let mut history_count = 0;
let mut branch_count = 0;
let mut tag_count = 0;
let branch_name = settings.branch.as_deref().unwrap_or("master");
}
loud!(" - parsed {} references", references.len());
- let mut revwalk = repo.revwalk()?;
- // TODO: TOPOLOGICAL might be better, but it's also ungodly slow
- // on large repos. Maybe this should be configurable.
- //
- //revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?;
- revwalk.set_sorting(git2::Sort::NONE)?;
- revwalk.push(branch_obj.id())?;
loudest!(" - Parsing history:");
- for (idx, oid) in revwalk.by_ref().enumerate() {
- let oid = oid?;
- if commit_count >= settings.limit_commits.unwrap_or(usize::MAX)
- || history_count >= settings.limit_history.unwrap_or(usize::MAX)
- {
- break;
- }
- commits.insert(oid.to_string(), parse_commit(idx, settings, repo, &oid.to_string())?);
- commit_count += 1;
- let commit = repo.find_commit(oid)?;
- let obj = repo.revparse_single(&commit.id().to_string())?;
- let full_hash = commit.id().to_string();
- let short_hash = obj.short_id()?.as_str().unwrap_or_default().to_string();
- let mut parents: Vec<String> = vec![];
- let a = match commit.parents().len() {
- x if x == 1 => {
- let parent = commit.parent(0).unwrap();
- parents.push(parent.id().to_string());
- Some(parent.tree()?)
- }
- x if x > 1 => {
- for parent in commit.parents() {
- parents.push(parent.id().to_string());
- }
- let parent = commit.parent(0).unwrap();
- Some(parent.tree()?)
- }
- _ => None,
+ // Figure out how many commits we have, to determine whether we
+ // should parallelize. Unfortunately, git doesn't optimize for
+ // counting commits... this is a heavy operation.
+ let commit_count = {
+ let mut revwalk = repo.revwalk()?;
+ revwalk.set_sorting(git2::Sort::NONE)?;
+ // Using first parent counts the "mainline" commits, rather than
+ // the commits on the merged in branches. These are also the
+ // commits thare a accessible via "HEAD~{N}" references.
+ revwalk.simplify_first_parent()?;
+ revwalk.push(branch_obj.id())?;
+ revwalk.count().min(settings.limit_history.unwrap_or(usize::MAX))
+ };
+
+ // Let's arbitrarily say it's not worth parallelizing unless we
+ // can give all cores at least 1k commits to parse. This could
+ // certainly use some configurability...
+ let thread_jobs = match rayon::current_num_threads() > 1 &&
+ commit_count > 1000 * rayon::current_num_threads() {
+ // Divide a chunk up into even smaller units, so each core
+ // runs about 10. This makes it more efficient to detect when
+ // the commit limit is reached and short-circuit.
+ true => rayon::current_num_threads() * 10,
+ false => 1,
+ };
+
+ // Chunk size is only an estimate, since we used
+ // simplify_first_parent() above, and do not use it below. Each
+ // thread will include `chunk_size` direct parent commits, *plus*
+ // all commits from branches that merged into that range. This
+ // might not be evenly distributed.
+ let chunk_size = ((commit_count as f64) / (thread_jobs as f64)).ceil() as usize;
+ if thread_jobs > 1 {
+ loud!(" - splitting {} commits across {} threads of approximate size {}", commit_count, thread_jobs, chunk_size);
+ }
+
+ let repo_path = repo.path();
+
+ let thread_jobs: Vec<usize> = (0..thread_jobs).rev().collect(); // note the subtle rev() to do this in the right order
+ let atomic_commits = AtomicUsize::new(0);
+ let mut history: Vec<_> = thread_jobs.par_iter().try_fold(|| Vec::<_>::new(), |mut acc, thread| {
+ if atomic_commits.load(Ordering::SeqCst) > settings.limit_history.unwrap_or(usize::MAX) {
+ // TODO: should convert all error paths in this function
+ // to GitsyErrors, and differentiate between real failures
+ // and soft limits. For now, they're all stop processing,
+ // but don't raise any errors. Here, we take advantage of
+ // that.
+ return Err(git2::Error::from_str("history limit reached"));
+ }
+ let repo = Repository::open(repo_path)?;
+ let mut revwalk = repo.revwalk()?;
+ // TODO: TOPOLOGICAL might be better, but it's also ungodly slow
+ // on large repos. Maybe this should be configurable.
+ //
+ //revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?;
+ revwalk.set_sorting(git2::Sort::NONE)?;
+ let start_commit = match (chunk_size * thread) + 1 > commit_count {
+ true => 1,
+ false => commit_count - 1 - (chunk_size * thread),
+ };
+ let end_commit = match chunk_size > start_commit {
+ true => "".into(),
+ false => format!("~{}", start_commit - chunk_size),
};
- let b = commit.tree()?;
- let mut diffopts = DiffOptions::new();
- let stats = match idx < settings.limit_diffs.unwrap_or(usize::MAX) {
+ let range = format!("{}~{}..{}{}",
+ branch_name, start_commit,
+ branch_name, end_commit);
+ loud!(" - Parse range: {} on thread {}", range, thread);
+ match *thread == 0 {
true => {
- let diff = repo.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))?;
- let stats = diff.stats()?;
- let stats = GitStats {
- files: stats.files_changed(),
- additions: stats.insertions(),
- deletions: stats.deletions(),
- };
- Some(stats)
- },
- false => {
- None
+ // The last chunk gets a single ref instead of a
+ // range, because ranges can't seem to represent the
+ // very first commit in a repository...
+ let end_commit = format!("{}{}", branch_name, end_commit);
+ let branch_obj = repo.revparse_single(&end_commit).unwrap();
+ revwalk.push(branch_obj.id())?
},
- };
-
- let alt_refs: Vec<String> = references
- .get(&commit.id().to_string())
- .map(|x| x.to_owned())
- .unwrap_or_default();
-
- if history_count < settings.limit_history.unwrap_or(usize::MAX) {
- loudest!(" + {} {}", full_hash, first_line(commit.message_bytes()));
- // TODO: this is basically a duplicate of the commit
- // array, and really should be pointers to that array
- // instead. But it's not a quick task to switch to
- // self-referential data structures in Rust.
- history.push(GitObject {
- full_hash,
- short_hash,
- ts_utc: commit.author().when().seconds(),
- ts_offset: (commit.author().when().offset_minutes() as i64) * 60,
- parents,
- ref_name: None,
- alt_refs,
- author: GitAuthor {
- name: commit.author().name().map(|x| x.to_owned()),
- email: commit.author().email().map(|x| x.to_owned()),
- },
- summary: Some(first_line(commit.message_bytes())),
- stats,
- ..Default::default()
- });
- history_count += 1;
+ false => revwalk.push_range(&range)?,
}
+ let res = parse_revwalk(&repo, revwalk, &references, &settings)?;
+ louder!(" - Parsed {} on thread {}", res.len(), thread);
+ atomic_commits.fetch_add(res.len(), Ordering::SeqCst);
+ acc.extend(res);
+ Ok(acc)
+ })
+ .map(|x: Result<Vec<GitObject>, Error>| x.ok())
+ .while_some()
+ .flatten_iter() // concatenate all of the vecs in series
+ .collect();
+ // Have to truncate, because the logic above can overshoot.
+ history.truncate(settings.limit_history.unwrap_or(usize::MAX));
+ let history_count = history.len();
+
+ // TODO: very inefficient memory usage: all commits are cloned.
+ // Also done linearly, so this takes some time for large repos.
+ for commit in &history {
+ let _ = commits.insert(commit.full_hash.clone(), commit.clone());
}
- loud!(" - parsed {} history entries", history_count);
- loud!(" - parsed {} commits", commit_count);
+
+ loud!(" - parsed {} commits", history_count);
loudest!(" - Parsing branches:");
for branch in repo.branches(None)? {
}
pub fn parse_commit(idx: usize, settings: &GitsySettingsRepo,
- repo: &Repository, refr: &str) -> Result<GitObject, Error> {
+ repo: &Repository, refr: &str,
+ references: &BTreeMap<String, Vec<String>>) -> Result<GitObject, Error> {
let obj = repo.revparse_single(refr)?;
let commit = repo.find_commit(obj.id())?;
- let mut parents: Vec<String> = vec![];
+ let alt_refs: Vec<String> = references
+ .get(&commit.id().to_string())
+ .map(|x| x.to_owned())
+ .unwrap_or_default();
+
+ let mut parents: Vec<String> = vec![];
let a = match commit.parents().len() {
x if x == 1 => {
let parent = commit.parent(0).unwrap();
}
_ => None,
};
- let b = commit.tree()?;
- let mut diffopts = DiffOptions::new();
- diffopts.enable_fast_untracked_dirs(true);
- let diff = repo.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))?;
- let commit_diff: Option<GitDiffCommit> = match idx < settings.limit_diffs.unwrap_or(usize::MAX) {
+
+ let (stats, commit_diff) = match idx < settings.limit_diffs.unwrap_or(usize::MAX) {
+ false => {
+ (None, None)
+ },
true => {
+ let b = commit.tree()?;
+ let mut diffopts = DiffOptions::new();
+ diffopts.enable_fast_untracked_dirs(true);
+ let diff = repo.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))?;
let stats = diff.stats()?;
-
- Some(GitDiffCommit {
- file_count: stats.files_changed(),
+ let commit_diff: Option<GitDiffCommit> = match idx < settings.limit_diffs.unwrap_or(usize::MAX) {
+ true => {
+ Some(GitDiffCommit {
+ file_count: stats.files_changed(),
+ additions: stats.insertions(),
+ deletions: stats.deletions(),
+ ..Default::default()
+ })
+ },
+ false => {
+ None
+ }
+ };
+ let stats = GitStats {
+ files: stats.files_changed(),
additions: stats.insertions(),
deletions: stats.deletions(),
- ..Default::default()
- })
- },
- false => {
- None
- }
- };
+ };
- let commit_diff = match commit_diff {
- None => None,
- Some(mut commit_diff) => {
- let files: Rc<RefCell<Vec<GitDiffFile>>> = Rc::new(RefCell::new(vec![]));
- diff.foreach(
- &mut |file, _progress| {
- let mut file_diff: GitDiffFile = Default::default();
- file_diff.newfile = match file.status() {
- git2::Delta::Deleted => "/dev/null".to_owned(),
- _ => file
- .new_file()
- .path()
- .map(|x| "b/".to_string() + &x.to_string_lossy())
- .unwrap_or("/dev/null".to_string()),
- };
- file_diff.oldfile = match file.status() {
- git2::Delta::Added => "/dev/null".to_owned(),
- _ => file
- .old_file()
- .path()
- .map(|x| "a/".to_string() + &x.to_string_lossy())
- .unwrap_or("/dev/null".to_string()),
- };
- file_diff.basefile = match file.status() {
- git2::Delta::Added => file
- .new_file()
- .path()
- .map(|x| x.to_string_lossy().to_string())
- .unwrap_or("/dev/null".to_string()),
- _ => file
- .old_file()
- .path()
- .map(|x| x.to_string_lossy().to_string())
- .unwrap_or("/dev/null".to_string()),
- };
- file_diff.oldid = file.old_file().id().to_string();
- file_diff.newid = file.new_file().id().to_string();
- files.borrow_mut().push(file_diff);
- true
- },
- None, // TODO: handle binary files?
- Some(&mut |_file, hunk| {
- let mut files = files.borrow_mut();
- let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
- let mut hunk_diff: GitDiffHunk = Default::default();
- hunk_diff.context = String::from_utf8_lossy(hunk.header()).to_string();
- file_diff.hunks.push(hunk_diff);
- true
- }),
- Some(&mut |_file, _hunk, line| {
- let mut files = files.borrow_mut();
- let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
- let hunk_diff: &mut GitDiffHunk = file_diff
- .hunks
- .last_mut()
- .expect("Diff line not associated with a hunk!");
- let (kind, prefix) = match line.origin() {
- ' ' => ("ctx", " "),
- '-' => ("del", "-"),
- '+' => ("add", "+"),
- _ => ("other", " "),
- };
- match line.origin() {
- '-' => file_diff.deletions += 1,
- '+' => file_diff.additions += 1,
- _ => {}
+ let commit_diff = match commit_diff {
+ None => None,
+ Some(mut commit_diff) => {
+ let files: Rc<RefCell<Vec<GitDiffFile>>> = Rc::new(RefCell::new(vec![]));
+ diff.foreach(
+ &mut |file, _progress| {
+ let mut file_diff: GitDiffFile = Default::default();
+ file_diff.newfile = match file.status() {
+ git2::Delta::Deleted => "/dev/null".to_owned(),
+ _ => file
+ .new_file()
+ .path()
+ .map(|x| "b/".to_string() + &x.to_string_lossy())
+ .unwrap_or("/dev/null".to_string()),
+ };
+ file_diff.oldfile = match file.status() {
+ git2::Delta::Added => "/dev/null".to_owned(),
+ _ => file
+ .old_file()
+ .path()
+ .map(|x| "a/".to_string() + &x.to_string_lossy())
+ .unwrap_or("/dev/null".to_string()),
+ };
+ file_diff.basefile = match file.status() {
+ git2::Delta::Added => file
+ .new_file()
+ .path()
+ .map(|x| x.to_string_lossy().to_string())
+ .unwrap_or("/dev/null".to_string()),
+ _ => file
+ .old_file()
+ .path()
+ .map(|x| x.to_string_lossy().to_string())
+ .unwrap_or("/dev/null".to_string()),
+ };
+ file_diff.oldid = file.old_file().id().to_string();
+ file_diff.newid = file.new_file().id().to_string();
+ files.borrow_mut().push(file_diff);
+ true
+ },
+ None, // TODO: handle binary files?
+ Some(&mut |_file, hunk| {
+ let mut files = files.borrow_mut();
+ let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
+ let mut hunk_diff: GitDiffHunk = Default::default();
+ hunk_diff.context = String::from_utf8_lossy(hunk.header()).to_string();
+ file_diff.hunks.push(hunk_diff);
+ true
+ }),
+ Some(&mut |_file, _hunk, line| {
+ let mut files = files.borrow_mut();
+ let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
+ let hunk_diff: &mut GitDiffHunk = file_diff
+ .hunks
+ .last_mut()
+ .expect("Diff line not associated with a hunk!");
+ let (kind, prefix) = match line.origin() {
+ ' ' => ("ctx", " "),
+ '-' => ("del", "-"),
+ '+' => ("add", "+"),
+ _ => ("other", " "),
+ };
+ match line.origin() {
+ '-' => file_diff.deletions += 1,
+ '+' => file_diff.additions += 1,
+ _ => {}
+ }
+ let line_diff = GitDiffLine {
+ text: String::from_utf8_lossy(line.content()).to_string(),
+ kind,
+ prefix,
+ };
+ hunk_diff.lines.push(line_diff);
+ true
+ }),
+ )?;
+
+ match Rc::try_unwrap(files) {
+ Ok(files) => {
+ let files: Vec<GitDiffFile> = files.into_inner();
+ commit_diff.files = files;
+ }
+ Err(_) => {}
}
- let line_diff = GitDiffLine {
- text: String::from_utf8_lossy(line.content()).to_string(),
- kind,
- prefix,
- };
- hunk_diff.lines.push(line_diff);
- true
- }),
- )?;
- match Rc::try_unwrap(files) {
- Ok(files) => {
- let files: Vec<GitDiffFile> = files.into_inner();
- commit_diff.files = files;
+ Some(commit_diff)
}
- Err(_) => {}
- }
- Some(commit_diff)
- }
+ };
+ (Some(stats), commit_diff)
+ },
};
let tree = obj.peel_to_tree()?;
tree_id: Some(tree.id().to_string()),
parents,
ref_name: None,
- alt_refs: vec![],
+ alt_refs,
author: GitAuthor {
name: commit.author().name().map(|x| x.to_string()),
email: commit.author().email().map(|x| x.to_string()),
},
summary: Some(first_line(commit.message_bytes())),
message: commit.message().map(|x| x.to_string()),
- stats: None,
+ stats,
diff: commit_diff,
};
-
Ok(summary)
}
pub paginate_history: Option<usize>,
pub paginate_branches: Option<usize>,
pub paginate_tags: Option<usize>,
+ pub threads: Option<usize>,
pub limit_history: Option<usize>,
pub limit_commits: Option<usize>,
pub limit_branches: Option<usize>,
($($arg:tt)*) => {{ if crate::util::VERBOSITY.load(Ordering::Relaxed) > 3 { println!($($arg)*); } }};
}
+#[derive(Default, Clone)]
#[allow(dead_code)]
pub enum GitsyErrorKind {
+ #[default]
Unknown,
Settings,
Template,
Git,
}
+
+#[derive(Default)]
pub struct GitsyError {
msg: Option<String>,
kind: GitsyErrorKind,
source: Option<Box<dyn std::error::Error>>,
}
+unsafe impl Send for GitsyError {}
+
+impl Clone for GitsyError {
+ fn clone(&self) -> Self {
+ GitsyError {
+ msg: self.msg.clone(),
+ kind: self.kind.clone(),
+ source: None,
+ }
+ }
+}
+
#[allow(dead_code)]
impl GitsyError {
pub fn kind(kind: GitsyErrorKind, msg: Option<&str>) -> Self {
impl std::fmt::Display for GitsyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.kind {
+ GitsyErrorKind::Git => write!(f, "gitsy error (git)")?,
+ GitsyErrorKind::Settings => write!(f, "gitsy error (settings)")?,
+ GitsyErrorKind::Template => write!(f, "gitsy error (template)")?,
_ => write!(f, "gitsy error (unknown)")?,
}
write!(f, ": {}", self.msg.as_deref().unwrap_or_default())