summary history branches tags files
commit:c257ba12f00f80d01eba39f8582c8e42379373fc
author:Trevor Bentley
committer:Trevor Bentley
date:Fri Jan 20 02:26:16 2023 +0100
parents:5f0f9978c95410c7fdf47df10034bc1f52904cc6
Add `limit_commit_ids_to_related` setting.

Each template received `commit_ids`, a list of ALL commit hashes known
to Itsy-Gitsy.  For large repos with hundreds of thousands or millions
of commits, this huge list uses lots of memory and takes a long time
to search.

`limit_commit_ids_to_related` reduces the list to just a handful of
relevant commits for certain templates, which can give a massive
performance boost.
diff --git a/README.md b/README.md
line changes: +1/-1
index 201a1d6..000ad58
--- a/README.md
+++ b/README.md
@@ -183,7 +183,7 @@ Syntax highlighting uses syntect's pure-Rust implementation by default, to avoid
 
 All metadata of all repositories, except for file contents, is held in memory.  Large repositories can easily exhaust memory, and disk usage can also get quite high.  There are several `limit_*` settings available in the configuration for restricting the amount of data held in memory, with the tradeoff of reducing the amount of data available for the generated output.  `limit_context` and `limit_diffs` are particularly important restrictions to set on repositories with thousands of commits.
 
-Small repositories with dozens to hundreds of commits can be generated on the order of a few seconds or less.  Large repositories take *considerably* longer; parsing 1,115,000 commits from the Linux kernel repository with `limit_tree_depth = 3`, `limit_context = 100` and `limit_diffs = 0` took ~25 minutes on a fast laptop, and produced a ~5.6GB website.  Peak memory usage was 8GB.
+Small repositories with dozens to hundreds of commits can be generated on the order of a few seconds or less.  Large repositories take *considerably* longer and significant amounts of RAM, but aggressively applying limits in the configuration can help; parsing 1,115,000 commits from the Linux kernel repository with syntax highlighting disabled, `limit_commit_ids_to_related = true`, `limit_tree_depth = 3`, `limit_context = 100` and `limit_diffs = 0` took ~7 minutes on a fast laptop, and produced a ~5.7GB website.  Peak memory usage was ~8GB.
 
 ## Other Considerations
 

diff --git a/config.toml b/config.toml
line changes: +21/-0
index cf195d3..7af3374
--- a/config.toml
+++ b/config.toml
@@ -209,6 +209,27 @@ threads = 0
 # This can also be set per-repository.
 #limit_tags       = 500
 
+# Limit the `commit_ids` variable to only related commits
+#
+# Normally the `commit_ids` variable contains a list of *all* git
+# commit hashes that Itsy-Gitsy is parsing.  For large repos, this can
+# be enormous, and take a lot of CPU time to search.
+#
+# Enabling this option limits `commit_ids` to only contain hashes of
+# commits that are referenced by the current object:
+#
+# - in `history` templates, all commits on the current page
+# - in `commit` templates, all the parent commits
+# - in `branch` templates, the commit it references
+# - in `tag` templates, the commit that it tags
+#
+# Enabling this provides a performance boost with the default
+# templates with no tradeoffs.  This is only a restriction for custom
+# templates that require more metadata.
+#
+# This can also be set per-repository.
+limit_commit_ids_to_related = true
+
 # Limits directory depth to traverse when parsing files.
 #
 # Limits the number of directories traversed when enumerating files in the

diff --git a/src/generate.rs b/src/generate.rs
line changes: +30/-0
index 97d4bc1..235d610
--- a/src/generate.rs
+++ b/src/generate.rs
@@ -454,6 +454,10 @@ impl GitsyGenerator {
                         })
                         .map_while(|x| x)
                         .collect();
+                    if repo_desc.limit_commit_ids_to_related == Some(true) {
+                        let parent_ids: Vec<String> = commits.keys().cloned().collect();
+                        paged_ctx.insert("commit_ids", &parent_ids);
+                    }
                     paged_ctx.insert("page", &pagination.with_relative_paths());
                     paged_ctx.insert("history", &page);
                     paged_ctx.insert("commits", &commits);
@@ -493,6 +497,15 @@ impl GitsyGenerator {
         for (_id, commit) in &parsed_repo.commits {
             ctx.try_insert("commit", &commit)
                 .expect("Failed to add commit to template engine.");
+            if repo_desc.limit_commit_ids_to_related == Some(true) {
+                let parent_ids: Vec<String> = commit
+                    .parents
+                    .iter()
+                    .filter(|x| parsed_repo.commits.contains_key(*x))
+                    .cloned()
+                    .collect();
+                ctx.insert("commit_ids", &parent_ids);
+            }
             for (templ_path, out_path) in self.settings.outputs.commit(Some(parsed_repo), Some(commit)) {
                 let templ_path = templ_path.to_str().expect(&format!(
                     "ERROR: a summary template path is invalid: {}",
@@ -593,6 +606,14 @@ impl GitsyGenerator {
         let mut repo_bytes = 0;
         for branch in &parsed_repo.branches {
             ctx.insert("branch", branch);
+            if repo_desc.limit_commit_ids_to_related == Some(true) {
+                let parent_ids: Vec<String> = [&branch.full_hash]
+                    .iter()
+                    .filter(|x| parsed_repo.commits.contains_key(**x))
+                    .map(|x| (**x).clone())
+                    .collect();
+                ctx.insert("commit_ids", &parent_ids);
+            }
             for (templ_path, out_path) in self.settings.outputs.branch(Some(parsed_repo), Some(branch)) {
                 let templ_path = templ_path.to_str().expect(&format!(
                     "ERROR: a summary template path is invalid: {}",
@@ -693,6 +714,15 @@ impl GitsyGenerator {
         let mut repo_bytes = 0;
         for tag in &parsed_repo.tags {
             ctx.insert("tag", tag);
+            if repo_desc.limit_commit_ids_to_related == Some(true) {
+                let parent_ids: Vec<String> = [tag.tagged_id.as_deref()]
+                    .iter()
+                    .map_while(|x| *x)
+                    .filter(|x| parsed_repo.commits.contains_key(*x))
+                    .map(|x| x.to_string())
+                    .collect();
+                ctx.insert("commit_ids", &parent_ids);
+            }
             if let Some(tagged_id) = tag.tagged_id.as_ref() {
                 if let Some(commit) = parsed_repo.commits.get(tagged_id) {
                     ctx.insert("commit", &commit);

diff --git a/src/settings.rs b/src/settings.rs
line changes: +5/-0
index dfe7381..f0b7edd
--- a/src/settings.rs
+++ b/src/settings.rs
@@ -372,6 +372,7 @@ pub struct GitsySettingsRepo {
     pub limit_commits: Option<usize>,
     pub limit_branches: Option<usize>,
     pub limit_tags: Option<usize>,
+    pub limit_commit_ids_to_related: Option<bool>,
     pub limit_tree_depth: Option<usize>,
     pub limit_file_size: Option<usize>,
     pub limit_repo_size: Option<usize>,
@@ -413,6 +414,7 @@ pub struct GitsySettings {
     pub limit_commits: Option<usize>,
     pub limit_branches: Option<usize>,
     pub limit_tags: Option<usize>,
+    pub limit_commit_ids_to_related: Option<bool>,
     pub limit_tree_depth: Option<usize>,
     pub limit_file_size: Option<usize>,
     pub limit_repo_size: Option<usize>,
@@ -496,6 +498,7 @@ impl GitsySettings {
                     global_to_repo!(settings, repo, limit_commits);
                     global_to_repo!(settings, repo, limit_branches);
                     global_to_repo!(settings, repo, limit_tags);
+                    global_to_repo!(settings, repo, limit_commit_ids_to_related);
                     global_to_repo!(settings, repo, limit_tree_depth);
                     global_to_repo!(settings, repo, limit_file_size);
                     global_to_repo!(settings, repo, limit_repo_size);
@@ -537,6 +540,7 @@ impl GitsySettings {
                             limit_commits: settings.limit_commits.clone(),
                             limit_branches: settings.limit_branches.clone(),
                             limit_tags: settings.limit_tags.clone(),
+                            limit_commit_ids_to_related: settings.limit_commit_ids_to_related.clone(),
                             limit_tree_depth: settings.limit_tree_depth.clone(),
                             limit_file_size: settings.limit_file_size.clone(),
                             limit_repo_size: settings.limit_repo_size.clone(),
@@ -579,6 +583,7 @@ impl GitsySettings {
                     limit_commits: settings.limit_commits.clone(),
                     limit_branches: settings.limit_branches.clone(),
                     limit_tags: settings.limit_tags.clone(),
+                    limit_commit_ids_to_related: settings.limit_commit_ids_to_related.clone(),
                     limit_tree_depth: settings.limit_tree_depth.clone(),
                     limit_file_size: settings.limit_file_size.clone(),
                     limit_repo_size: settings.limit_repo_size.clone(),