summary history branches tags files
commit:6d301831b96d7e228703ac7a6093687e0862a137
author:Trevor Bentley
committer:Trevor Bentley
date:Sat Jan 14 19:52:11 2023 +0100
parents:d903e4adb92b298d8c8272e2bb27858926692919
config options and behavior tweaks to support large repositories
diff --git a/config.toml b/config.toml
line changes: +40/-0
index e048fd3..da788dd
--- a/config.toml
+++ b/config.toml
@@ -232,6 +232,44 @@ syntax_highlight_theme = "base16-ocean.light"
 # This can also be set per-repository.
 #limit_total_size = 524288000
 
+# Limits number of contextual elements available to templates.
+#
+# By default, if this limit is not set, all repository elements
+# (history, branches, tags, commits) are provided in full to all
+# repository templates, so every page can see all repo metadata.
+#
+# For parallel output generation, one copy of the repo metadata is
+# made per CPU core.  All of this must be loaded into RAM, which means
+# large repositories can be very slow to process, or exhaust memory.
+#
+# This setting limits all elements, *except* those directly relevant
+# to the page being rendered.  For example, all elements except
+# `branches` are limited in the `branches` template, all elements
+# except `tags` are limited in the `tags` template, and so on.
+#
+# This is critically important for very large repositories, to prevent
+# them from consuming all memory and destroying all things.
+#
+# This can also be set per-repository.
+#limit_context = 200
+
+# Limit number of diffs and diff statistics.
+#
+# By default, if this limit is not set, every history item contains
+# statistics (files changes, additions, and deletions), and every
+# commit item contains a diff.  These are all held in memory and
+# passed to each repository template.
+#
+# While parsing history, and if this limit is reached, subsequent
+# history and commit items will not have diffs and statistics
+# included.
+#
+# This is critically important for very large repositories, to prevent
+# them from consuming all memory and destroying all things.
+#
+# This can also be set per-repository.
+#limit_diffs = 200
+
 
 
 ###############################################################################
@@ -486,6 +524,8 @@ generated_by = "Itsy-Gitsy"
 #limit_file_size        = 2097152
 #limit_repo_size        = 52428800
 #limit_total_size       = 524288000
+#limit_context          = 200
+#limit_diffs            = 200
 #asset_files            = ["LICENSE"]
 
 # An alternative way to specify the user-defined attributes.

diff --git a/src/generate.rs b/src/generate.rs
line changes: +30/-28
index e62500f..9be5c16
--- a/src/generate.rs
+++ b/src/generate.rs
@@ -221,6 +221,7 @@ impl GitsyGenerator {
             let start_repo = Instant::now();
             let mut repo_bytes = 0;
             let name = repo_desc.name.as_deref().expect("A configured repository has no name!");
+            normal_noln!("[{}{}]... ", name, " ".repeat(longest_repo_name - name.len()));
 
             let repo_path = match &repo_desc.path {
                 url if url.starts_with("https://") || url.to_str().unwrap_or_default().contains("@") => {
@@ -307,6 +308,7 @@ impl GitsyGenerator {
                     dir.to_string_lossy().to_string()
                 }
             };
+
             let repo = Repository::open(&repo_path).expect("Unable to find git repository.");
             let metadata = GitsyMetadata {
                 full_name: repo_desc.name.clone(),
@@ -315,10 +317,10 @@ impl GitsyGenerator {
                 clone: None,
                 attributes: repo_desc.attributes.clone().unwrap_or_default(),
             };
-            normal_noln!("[{}{}]... ", name, " ".repeat(longest_repo_name - name.len()));
-            let summary = parse_repo(&repo, &name, &repo_desc, metadata).expect("Failed to analyze repo HEAD.");
+            let parsed_repo = parse_repo(&repo, &name, &repo_desc, metadata).expect("Failed to analyze repo HEAD.");
+            let minimized_repo = parsed_repo.minimal_clone(self.settings.limit_context.unwrap_or(usize::MAX));
 
-            let mut local_ctx = Context::from_serialize(&summary).unwrap();
+            let mut local_ctx = Context::from_serialize(&minimized_repo).unwrap();
             if let Some(extra) = &self.settings.extra {
                 local_ctx
                     .try_insert("extra", extra)
@@ -342,7 +344,7 @@ impl GitsyGenerator {
                 match tera.render(templ_file, &local_ctx) {
                     Ok(rendered) => {
                         repo_bytes +=
-                            self.write_rendered(&self.settings.outputs.repo_summary(Some(&summary), None), &rendered);
+                            self.write_rendered(&self.settings.outputs.repo_summary(Some(&parsed_repo), None), &rendered);
                     }
                     Err(x) => match x.kind {
                         _ => error!("ERROR: {:?}", x),
@@ -353,13 +355,13 @@ impl GitsyGenerator {
             if let Some(templ_file) = self.settings.templates.branches.as_deref() {
                 let mut paged_ctx = local_ctx.clone();
                 paged_ctx.remove("branches");
-                let pages = summary.branches.chunks(self.settings.paginate_branches());
+                let pages = parsed_repo.branches.chunks(self.settings.paginate_branches());
                 let page_count = pages.len();
                 for (idx, page) in pages.enumerate() {
                     let pagination = Pagination::new(
                         idx + 1,
                         page_count,
-                        &self.settings.outputs.branches(Some(&summary), None),
+                        &self.settings.outputs.branches(Some(&parsed_repo), None),
                     );
                     paged_ctx.insert("page", &pagination.with_relative_paths());
                     paged_ctx.insert("branches", &page);
@@ -376,14 +378,14 @@ impl GitsyGenerator {
                 }
             }
 
-            for branch in &summary.branches {
+            for branch in &parsed_repo.branches {
                 size_check!(repo_desc, repo_bytes, total_bytes, break);
                 local_ctx.insert("branch", branch);
                 if let Some(templ_file) = self.settings.templates.branch.as_deref() {
                     match tera.render(templ_file, &local_ctx) {
                         Ok(rendered) => {
                             repo_bytes += self
-                                .write_rendered(&self.settings.outputs.branch(Some(&summary), Some(branch)), &rendered);
+                                .write_rendered(&self.settings.outputs.branch(Some(&parsed_repo), Some(branch)), &rendered);
                         }
                         Err(x) => match x.kind {
                             _ => error!("ERROR: {:?}", x),
@@ -396,11 +398,11 @@ impl GitsyGenerator {
             if let Some(templ_file) = self.settings.templates.tags.as_deref() {
                 let mut paged_ctx = local_ctx.clone();
                 paged_ctx.remove("tags");
-                let pages = summary.tags.chunks(self.settings.paginate_tags());
+                let pages = parsed_repo.tags.chunks(self.settings.paginate_tags());
                 let page_count = pages.len();
                 for (idx, page) in pages.enumerate() {
                     let pagination =
-                        Pagination::new(idx + 1, page_count, &self.settings.outputs.tags(Some(&summary), None));
+                        Pagination::new(idx + 1, page_count, &self.settings.outputs.tags(Some(&parsed_repo), None));
                     paged_ctx.insert("page", &pagination.with_relative_paths());
                     paged_ctx.insert("tags", &page);
                     match tera.render(templ_file, &paged_ctx) {
@@ -416,11 +418,11 @@ impl GitsyGenerator {
                 }
             }
 
-            for tag in &summary.tags {
+            for tag in &parsed_repo.tags {
                 size_check!(repo_desc, repo_bytes, total_bytes, break);
                 local_ctx.insert("tag", tag);
                 if let Some(tagged_id) = tag.tagged_id.as_ref() {
-                    if let Some(commit) = summary.commits.get(tagged_id) {
+                    if let Some(commit) = parsed_repo.commits.get(tagged_id) {
                         local_ctx.insert("commit", &commit);
                     }
                 }
@@ -428,7 +430,7 @@ impl GitsyGenerator {
                     match tera.render(templ_file, &local_ctx) {
                         Ok(rendered) => {
                             repo_bytes +=
-                                self.write_rendered(&self.settings.outputs.tag(Some(&summary), Some(tag)), &rendered);
+                                self.write_rendered(&self.settings.outputs.tag(Some(&parsed_repo), Some(tag)), &rendered);
                         }
                         Err(x) => match x.kind {
                             _ => error!("ERROR: {:?}", x),
@@ -442,13 +444,13 @@ impl GitsyGenerator {
             if let Some(templ_file) = self.settings.templates.history.as_deref() {
                 let mut paged_ctx = local_ctx.clone();
                 paged_ctx.remove("history");
-                let pages = summary.history.chunks(self.settings.paginate_history());
+                let pages = parsed_repo.history.chunks(self.settings.paginate_history());
                 let page_count = pages.len();
                 for (idx, page) in pages.enumerate() {
                     let pagination = Pagination::new(
                         idx + 1,
                         page_count,
-                        &self.settings.outputs.history(Some(&summary), None),
+                        &self.settings.outputs.history(Some(&parsed_repo), None),
                     );
                     paged_ctx.insert("page", &pagination.with_relative_paths());
                     paged_ctx.insert("history", &page);
@@ -465,7 +467,7 @@ impl GitsyGenerator {
                 }
             }
 
-            for (_id, commit) in &summary.commits {
+            for (_id, commit) in &parsed_repo.commits {
                 size_check!(repo_desc, repo_bytes, total_bytes, break);
                 local_ctx
                     .try_insert("commit", &commit)
@@ -474,7 +476,7 @@ impl GitsyGenerator {
                     match tera.render(templ_file, &local_ctx) {
                         Ok(rendered) => {
                             repo_bytes += self
-                                .write_rendered(&self.settings.outputs.commit(Some(&summary), Some(commit)), &rendered);
+                                .write_rendered(&self.settings.outputs.commit(Some(&parsed_repo), Some(commit)), &rendered);
                         }
                         Err(x) => match x.kind {
                             _ => error!("ERROR: {:?}", x),
@@ -499,12 +501,12 @@ impl GitsyGenerator {
                 let css: String = css_for_theme_with_class_style(theme, syntect::html::ClassStyle::Spaced)
                     .expect("Invalid syntax highlighting theme specified.");
                 repo_bytes +=
-                    self.write_rendered(&self.settings.outputs.syntax_css(Some(&summary), None), css.as_str());
+                    self.write_rendered(&self.settings.outputs.syntax_css(Some(&parsed_repo), None), css.as_str());
             }
 
             // TODO: parallelize the rest of the processing steps.  This one is
             // done first because syntax highlighting is very slow.
-            let files: Vec<&GitFile> = summary.all_files.iter().filter(|x| x.kind == "file").collect();
+            let files: Vec<&GitFile> = parsed_repo.all_files.iter().filter(|x| x.kind == "file").collect();
             let atomic_bytes: AtomicUsize = AtomicUsize::new(repo_bytes);
             let _ = files
                 .par_iter()
@@ -530,7 +532,7 @@ impl GitsyGenerator {
                             match tera.render(templ_file, &local_ctx) {
                                 Ok(rendered) => {
                                     local_bytes = self.write_rendered(
-                                        &self.settings.outputs.file(Some(&summary), Some(&file)),
+                                        &self.settings.outputs.file(Some(&parsed_repo), Some(&file)),
                                         &rendered,
                                     );
                                     atomic_bytes.fetch_add(local_bytes, Ordering::Relaxed);
@@ -548,11 +550,8 @@ impl GitsyGenerator {
                 .sum::<usize>();
             repo_bytes = atomic_bytes.load(Ordering::Relaxed);
 
-            for dir in summary.all_files.iter().filter(|x| x.kind == "dir") {
+            for dir in parsed_repo.all_files.iter().filter(|x| x.kind == "dir") {
                 size_check!(repo_desc, repo_bytes, total_bytes, break);
-                if dir.tree_depth >= repo_desc.limit_tree_depth.unwrap_or(usize::MAX) - 1 {
-                    continue;
-                }
                 let listing = dir_listing(&repo, &dir).expect("Failed to parse file.");
                 local_ctx.insert("dir", dir);
                 local_ctx
@@ -562,7 +561,7 @@ impl GitsyGenerator {
                     match tera.render(templ_file, &local_ctx) {
                         Ok(rendered) => {
                             repo_bytes +=
-                                self.write_rendered(&self.settings.outputs.dir(Some(&summary), Some(dir)), &rendered);
+                                self.write_rendered(&self.settings.outputs.dir(Some(&parsed_repo), Some(dir)), &rendered);
                         }
                         Err(x) => match x.kind {
                             _ => error!("ERROR: {:?}", x),
@@ -574,10 +573,13 @@ impl GitsyGenerator {
             }
 
             if let Some(templ_file) = self.settings.templates.files.as_deref() {
+                let mut local_ctx = local_ctx.clone();
+                local_ctx.insert("root_files", &parsed_repo.root_files);
+                local_ctx.insert("all_files", &parsed_repo.all_files);
                 match tera.render(templ_file, &local_ctx) {
                     Ok(rendered) => {
                         repo_bytes +=
-                            self.write_rendered(&self.settings.outputs.files(Some(&summary), None), &rendered);
+                            self.write_rendered(&self.settings.outputs.files(Some(&parsed_repo), None), &rendered);
                     }
                     Err(x) => match x.kind {
                         _ => error!("ERROR: {:?}", x),
@@ -586,7 +588,7 @@ impl GitsyGenerator {
             }
 
             if repo_desc.asset_files.is_some() {
-                let target_dir = self.settings.outputs.repo_assets(Some(&summary), None);
+                let target_dir = self.settings.outputs.repo_assets(Some(&parsed_repo), None);
                 for src_file in repo_desc.asset_files.as_ref().unwrap() {
                     let src_file = PathBuf::from(repo_path.to_owned() + "/" + src_file);
                     let mut dst_file = PathBuf::from(&target_dir);
@@ -606,7 +608,7 @@ impl GitsyGenerator {
                 }
             }
 
-            repos.push(summary);
+            repos.push(minimized_repo);
             normal!(
                 "{}done in {:.2}s ({} bytes)",
                 match crate::util::VERBOSITY.load(Ordering::Relaxed) > 1 {

diff --git a/src/git.rs b/src/git.rs
line changes: +196/-117
index ff4adba..58b4b4b
--- a/src/git.rs
+++ b/src/git.rs
@@ -12,7 +12,7 @@ fn first_line(msg: &[u8]) -> String {
     message.lines().next().unwrap_or("[no commit message]").to_owned()
 }
 
-#[derive(Serialize)]
+#[derive(Serialize, Default)]
 pub struct GitRepo {
     pub name: String,
     pub metadata: GitsyMetadata,
@@ -22,9 +22,45 @@ pub struct GitRepo {
     pub root_files: Vec<GitFile>,
     pub all_files: Vec<GitFile>,
     pub commits: BTreeMap<String, GitObject>,
+    // TODO: this is duplication that should be handled with
+    // references.  Used so templates can deduce which files have been
+    // generated.
+    pub commit_ids: Vec<String>,
+    pub file_ids: Vec<String>,
 }
 
-#[derive(Serialize, Default)]
+impl GitRepo {
+    pub fn minimal_clone(&self, max_entries: usize) -> Self {
+        let mut new_commits: BTreeMap<String, GitObject> = BTreeMap::new();
+        let new_history: Vec<GitObject> = self.history.iter().cloned().take(max_entries).collect();
+        for entry in &new_history {
+            if self.commits.contains_key(&entry.full_hash) {
+                new_commits.insert(entry.full_hash.clone(),
+                                   self.commits.get(&entry.full_hash).unwrap().clone());
+            }
+        }
+        let all_files: Vec<GitFile> = self.all_files.iter().cloned().take(max_entries).collect();
+        GitRepo {
+            name: self.name.clone(),
+            metadata: self.metadata.clone(),
+            history: new_history,
+            branches: self.branches.iter().cloned().take(max_entries).collect(),
+            tags: self.tags.iter().cloned().take(max_entries).collect(),
+            // Don't minimize the root tree, because that's weird UX
+            // for the summary page.
+            root_files: self.root_files.clone(),
+            all_files,
+            commits: new_commits,
+            // These are not minimized because they're a listing of
+            // which generated files should exist, and are needed for
+            // ensuring valid links on every page.
+            file_ids: self.file_ids.clone(),
+            commit_ids: self.commit_ids.clone(),
+        }
+    }
+}
+
+#[derive(Clone, Serialize, Default)]
 pub struct GitsyMetadata {
     pub full_name: Option<String>,
     pub description: Option<String>,
@@ -33,13 +69,13 @@ pub struct GitsyMetadata {
     pub attributes: BTreeMap<String, toml::Value>,
 }
 
-#[derive(Serialize, Default)]
+#[derive(Clone, Serialize, Default)]
 pub struct GitAuthor {
     pub name: Option<String>,
     pub email: Option<String>,
 }
 
-#[derive(Serialize, Default)]
+#[derive(Clone, Serialize, Default)]
 pub struct GitObject {
     pub full_hash: String,
     pub short_hash: String,
@@ -58,7 +94,7 @@ pub struct GitObject {
     pub diff: Option<GitDiffCommit>,
 }
 
-#[derive(Serialize, Default)]
+#[derive(Clone, Serialize, Default)]
 pub struct GitStats {
     pub files: usize,
     pub additions: usize,
@@ -80,7 +116,7 @@ pub struct GitFile {
     pub contents_preformatted: bool,
 }
 
-#[derive(Serialize, Default)]
+#[derive(Clone, Serialize, Default)]
 pub struct GitDiffCommit {
     pub files: Vec<GitDiffFile>,
     pub file_count: usize,
@@ -88,7 +124,7 @@ pub struct GitDiffCommit {
     pub deletions: usize,
 }
 
-#[derive(Serialize, Default)]
+#[derive(Clone, Serialize, Default)]
 pub struct GitDiffFile {
     pub oldfile: String,
     pub newfile: String,
@@ -101,13 +137,13 @@ pub struct GitDiffFile {
     pub hunks: Vec<GitDiffHunk>,
 }
 
-#[derive(Serialize, Default)]
+#[derive(Clone, Serialize, Default)]
 pub struct GitDiffHunk {
     pub context: String,
     pub lines: Vec<GitDiffLine>,
 }
 
-#[derive(Serialize)]
+#[derive(Clone, Serialize)]
 pub struct GitDiffLine {
     pub kind: &'static str,
     pub prefix: &'static str,
@@ -195,7 +231,10 @@ pub fn parse_repo(
     let branch_name = settings.branch.as_deref().unwrap_or("master");
     let branch_obj = repo.revparse_single(branch_name)?;
 
+    loud!();
+
     // Cache the shortnames of all references
+    loudest!(" - Parsing references");
     let mut references: BTreeMap<String, Vec<String>> = BTreeMap::new();
     for refr in repo.references()? {
         let refr = refr?;
@@ -211,20 +250,24 @@ pub fn parse_repo(
             }
         }
     }
+    loud!(" - parsed {} references", references.len());
 
-    loud!();
     let mut revwalk = repo.revwalk()?;
-    revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?;
+    // TODO: TOPOLOGICAL might be better, but it's also ungodly slow
+    // on large repos.  Maybe this should be configurable.
+    //
+    //revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?;
+    revwalk.set_sorting(git2::Sort::NONE)?;
     revwalk.push(branch_obj.id())?;
     loudest!(" - Parsing history:");
-    for oid in revwalk {
+    for (idx, oid) in revwalk.by_ref().enumerate() {
         let oid = oid?;
         if commit_count >= settings.limit_commits.unwrap_or(usize::MAX)
             || history_count >= settings.limit_history.unwrap_or(usize::MAX)
         {
             break;
         }
-        commits.insert(oid.to_string(), parse_commit(repo, &oid.to_string())?);
+        commits.insert(oid.to_string(), parse_commit(idx, settings, repo, &oid.to_string())?);
         commit_count += 1;
         let commit = repo.find_commit(oid)?;
         let obj = repo.revparse_single(&commit.id().to_string())?;
@@ -232,21 +275,37 @@ pub fn parse_repo(
         let short_hash = obj.short_id()?.as_str().unwrap_or_default().to_string();
 
         let mut parents: Vec<String> = vec![];
-        let a = if commit.parents().len() == 1 {
-            let parent = commit.parent(0)?;
-            parents.push(parent.id().to_string());
-            Some(parent.tree()?)
-        } else {
-            None
+        let a = match commit.parents().len() {
+            x if x == 1 => {
+                let parent = commit.parent(0).unwrap();
+                parents.push(parent.id().to_string());
+                Some(parent.tree()?)
+            }
+            x if x > 1 => {
+                for parent in commit.parents() {
+                    parents.push(parent.id().to_string());
+                }
+                let parent = commit.parent(0).unwrap();
+                Some(parent.tree()?)
+            }
+            _ => None,
         };
         let b = commit.tree()?;
         let mut diffopts = DiffOptions::new();
-        let diff = repo.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))?;
-        let stats = diff.stats()?;
-        let stats = GitStats {
-            files: stats.files_changed(),
-            additions: stats.insertions(),
-            deletions: stats.deletions(),
+        let stats = match idx < settings.limit_diffs.unwrap_or(usize::MAX) {
+            true => {
+                let diff = repo.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))?;
+                let stats = diff.stats()?;
+                let stats = GitStats {
+                    files: stats.files_changed(),
+                    additions: stats.insertions(),
+                    deletions: stats.deletions(),
+                };
+                Some(stats)
+            },
+            false => {
+                None
+            },
         };
 
         let alt_refs: Vec<String> = references
@@ -273,7 +332,7 @@ pub fn parse_repo(
                     email: commit.author().email().map(|x| x.to_owned()),
                 },
                 summary: Some(first_line(commit.message_bytes())),
-                stats: Some(stats),
+                stats,
                 ..Default::default()
             });
             history_count += 1;
@@ -325,7 +384,7 @@ pub fn parse_repo(
     loud!(" - parsed {} branches", branch_count);
 
     loudest!(" - Parsing tags:");
-    for tag in repo.tag_names(None)?.iter() {
+    for tag in repo.tag_names(None)?.iter().rev() {
         if tag_count >= settings.limit_tags.unwrap_or(usize::MAX) {
             break;
         }
@@ -389,6 +448,8 @@ pub fn parse_repo(
     }
     loud!(" - parsed {} files", all_files.len());
 
+    let file_ids = all_files.iter().map(|x| x.id.clone()).collect();
+    let commit_ids = commits.keys().cloned().collect();
     Ok(GitRepo {
         name: name.to_string(),
         metadata,
@@ -398,10 +459,13 @@ pub fn parse_repo(
         root_files,
         all_files,
         commits,
+        commit_ids,
+        file_ids,
     })
 }
 
-pub fn parse_commit(repo: &Repository, refr: &str) -> Result<GitObject, Error> {
+pub fn parse_commit(idx: usize, settings: &GitsySettingsRepo,
+                    repo: &Repository, refr: &str) -> Result<GitObject, Error> {
     let obj = repo.revparse_single(refr)?;
     let commit = repo.find_commit(obj.id())?;
     let mut parents: Vec<String> = vec![];
@@ -416,103 +480,118 @@ pub fn parse_commit(repo: &Repository, refr: &str) -> Result<GitObject, Error> {
             for parent in commit.parents() {
                 parents.push(parent.id().to_string());
             }
-            None
+            let parent = commit.parent(0).unwrap();
+            Some(parent.tree()?)
         }
         _ => None,
     };
     let b = commit.tree()?;
     let mut diffopts = DiffOptions::new();
+    diffopts.enable_fast_untracked_dirs(true);
     let diff = repo.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))?;
-    let stats = diff.stats()?;
-
-    let mut commit_diff: GitDiffCommit = GitDiffCommit {
-        file_count: stats.files_changed(),
-        additions: stats.insertions(),
-        deletions: stats.deletions(),
-        ..Default::default()
-    };
-    let files: Rc<RefCell<Vec<GitDiffFile>>> = Rc::new(RefCell::new(vec![]));
-
-    diff.foreach(
-        &mut |file, _progress| {
-            let mut file_diff: GitDiffFile = Default::default();
-            file_diff.newfile = match file.status() {
-                git2::Delta::Deleted => "/dev/null".to_owned(),
-                _ => file
-                    .new_file()
-                    .path()
-                    .map(|x| "b/".to_string() + &x.to_string_lossy())
-                    .unwrap_or("/dev/null".to_string()),
-            };
-            file_diff.oldfile = match file.status() {
-                git2::Delta::Added => "/dev/null".to_owned(),
-                _ => file
-                    .old_file()
-                    .path()
-                    .map(|x| "a/".to_string() + &x.to_string_lossy())
-                    .unwrap_or("/dev/null".to_string()),
-            };
-            file_diff.basefile = match file.status() {
-                git2::Delta::Added => file
-                    .new_file()
-                    .path()
-                    .map(|x| x.to_string_lossy().to_string())
-                    .unwrap_or("/dev/null".to_string()),
-                _ => file
-                    .old_file()
-                    .path()
-                    .map(|x| x.to_string_lossy().to_string())
-                    .unwrap_or("/dev/null".to_string()),
-            };
-            file_diff.oldid = file.old_file().id().to_string();
-            file_diff.newid = file.new_file().id().to_string();
-            files.borrow_mut().push(file_diff);
-            true
+    let commit_diff: Option<GitDiffCommit> = match idx < settings.limit_diffs.unwrap_or(usize::MAX) {
+        true => {
+            let stats = diff.stats()?;
+
+            Some(GitDiffCommit {
+                file_count: stats.files_changed(),
+                additions: stats.insertions(),
+                deletions: stats.deletions(),
+                ..Default::default()
+            })
         },
-        None, // TODO: handle binary files?
-        Some(&mut |_file, hunk| {
-            let mut files = files.borrow_mut();
-            let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
-            let mut hunk_diff: GitDiffHunk = Default::default();
-            hunk_diff.context = String::from_utf8_lossy(hunk.header()).to_string();
-            file_diff.hunks.push(hunk_diff);
-            true
-        }),
-        Some(&mut |_file, _hunk, line| {
-            let mut files = files.borrow_mut();
-            let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
-            let hunk_diff: &mut GitDiffHunk = file_diff
-                .hunks
-                .last_mut()
-                .expect("Diff line not associated with a hunk!");
-            let (kind, prefix) = match line.origin() {
-                ' ' => ("ctx", " "),
-                '-' => ("del", "-"),
-                '+' => ("add", "+"),
-                _ => ("other", " "),
-            };
-            match line.origin() {
-                '-' => file_diff.deletions += 1,
-                '+' => file_diff.additions += 1,
-                _ => {}
+        false => {
+            None
+        }
+    };
+
+    let commit_diff = match commit_diff {
+        None => None,
+        Some(mut commit_diff) => {
+            let files: Rc<RefCell<Vec<GitDiffFile>>> = Rc::new(RefCell::new(vec![]));
+            diff.foreach(
+                &mut |file, _progress| {
+                    let mut file_diff: GitDiffFile = Default::default();
+                    file_diff.newfile = match file.status() {
+                        git2::Delta::Deleted => "/dev/null".to_owned(),
+                        _ => file
+                            .new_file()
+                            .path()
+                            .map(|x| "b/".to_string() + &x.to_string_lossy())
+                            .unwrap_or("/dev/null".to_string()),
+                    };
+                    file_diff.oldfile = match file.status() {
+                        git2::Delta::Added => "/dev/null".to_owned(),
+                        _ => file
+                            .old_file()
+                            .path()
+                            .map(|x| "a/".to_string() + &x.to_string_lossy())
+                            .unwrap_or("/dev/null".to_string()),
+                    };
+                    file_diff.basefile = match file.status() {
+                        git2::Delta::Added => file
+                            .new_file()
+                            .path()
+                            .map(|x| x.to_string_lossy().to_string())
+                            .unwrap_or("/dev/null".to_string()),
+                        _ => file
+                            .old_file()
+                            .path()
+                            .map(|x| x.to_string_lossy().to_string())
+                            .unwrap_or("/dev/null".to_string()),
+                    };
+                    file_diff.oldid = file.old_file().id().to_string();
+                    file_diff.newid = file.new_file().id().to_string();
+                    files.borrow_mut().push(file_diff);
+                    true
+                },
+                None, // TODO: handle binary files?
+                Some(&mut |_file, hunk| {
+                    let mut files = files.borrow_mut();
+                    let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
+                    let mut hunk_diff: GitDiffHunk = Default::default();
+                    hunk_diff.context = String::from_utf8_lossy(hunk.header()).to_string();
+                    file_diff.hunks.push(hunk_diff);
+                    true
+                }),
+                Some(&mut |_file, _hunk, line| {
+                    let mut files = files.borrow_mut();
+                    let file_diff: &mut GitDiffFile = files.last_mut().expect("Diff hunk not associated with a file!");
+                    let hunk_diff: &mut GitDiffHunk = file_diff
+                        .hunks
+                        .last_mut()
+                        .expect("Diff line not associated with a hunk!");
+                    let (kind, prefix) = match line.origin() {
+                        ' ' => ("ctx", " "),
+                        '-' => ("del", "-"),
+                        '+' => ("add", "+"),
+                        _ => ("other", " "),
+                    };
+                    match line.origin() {
+                        '-' => file_diff.deletions += 1,
+                        '+' => file_diff.additions += 1,
+                        _ => {}
+                    }
+                    let line_diff = GitDiffLine {
+                        text: String::from_utf8_lossy(line.content()).to_string(),
+                        kind,
+                        prefix,
+                    };
+                    hunk_diff.lines.push(line_diff);
+                    true
+                }),
+            )?;
+
+            match Rc::try_unwrap(files) {
+                Ok(files) => {
+                    let files: Vec<GitDiffFile> = files.into_inner();
+                    commit_diff.files = files;
+                }
+                Err(_) => {}
             }
-            let line_diff = GitDiffLine {
-                text: String::from_utf8_lossy(line.content()).to_string(),
-                kind,
-                prefix,
-            };
-            hunk_diff.lines.push(line_diff);
-            true
-        }),
-    )?;
-
-    match Rc::try_unwrap(files) {
-        Ok(files) => {
-            let files: Vec<GitDiffFile> = files.into_inner();
-            commit_diff.files = files;
+            Some(commit_diff)
         }
-        Err(_) => {}
-    }
+    };
 
     let tree = obj.peel_to_tree()?;
     let summary = GitObject {
@@ -536,7 +615,7 @@ pub fn parse_commit(repo: &Repository, refr: &str) -> Result<GitObject, Error> {
         summary: Some(first_line(commit.message_bytes())),
         message: commit.message().map(|x| x.to_string()),
         stats: None,
-        diff: Some(commit_diff),
+        diff: commit_diff,
     };
 
     Ok(summary)

diff --git a/src/settings.rs b/src/settings.rs
line changes: +10/-0
index c026929..a8ef62e
--- a/src/settings.rs
+++ b/src/settings.rs
@@ -227,6 +227,8 @@ pub struct GitsySettingsRepo {
     pub limit_file_size: Option<usize>,
     pub limit_repo_size: Option<usize>,
     pub limit_total_size: Option<usize>,
+    pub limit_context: Option<usize>,
+    pub limit_diffs: Option<usize>,
 }
 
 impl Hash for GitsySettingsRepo {
@@ -267,6 +269,8 @@ pub struct GitsySettings {
     pub limit_file_size: Option<usize>,
     pub limit_repo_size: Option<usize>,
     pub limit_total_size: Option<usize>,
+    pub limit_context: Option<usize>,
+    pub limit_diffs: Option<usize>,
     pub render_markdown: Option<bool>,
     pub syntax_highlight: Option<bool>,
     pub syntax_highlight_theme: Option<String>,
@@ -339,6 +343,8 @@ impl GitsySettings {
                     global_to_repo!(settings, repo, limit_file_size);
                     global_to_repo!(settings, repo, limit_repo_size);
                     global_to_repo!(settings, repo, limit_total_size);
+                    global_to_repo!(settings, repo, limit_context);
+                    global_to_repo!(settings, repo, limit_diffs);
 
                     repo_descriptions.insert(repo);
                 }
@@ -372,6 +378,8 @@ impl GitsySettings {
                             limit_file_size: settings.limit_file_size.clone(),
                             limit_repo_size: settings.limit_repo_size.clone(),
                             limit_total_size: settings.limit_total_size.clone(),
+                            limit_context: settings.limit_context.clone(),
+                            limit_diffs: settings.limit_diffs.clone(),
                             ..Default::default()
                         });
                     }
@@ -404,6 +412,8 @@ impl GitsySettings {
                     limit_file_size: settings.limit_file_size.clone(),
                     limit_repo_size: settings.limit_repo_size.clone(),
                     limit_total_size: settings.limit_total_size.clone(),
+                    limit_context: settings.limit_context.clone(),
+                    limit_diffs: settings.limit_diffs.clone(),
                     ..Default::default()
                 });
             }

diff --git a/templates/commit.html b/templates/commit.html
line changes: +2/-0
index 442ae4a..89982ed
--- a/templates/commit.html
+++ b/templates/commit.html
@@ -14,6 +14,7 @@
   <div class="commit-message">
     <pre style="margin: 0;">{{commit.message}}</pre>
   </div>
+{% if commit.diff -%}
 {% for file in commit.diff.files -%}
   <div class="commit-diff">
     <div class="commit-diff-header">
@@ -32,6 +33,7 @@ index {{file.oldid | truncate(length=7,end="")}}..{{file.newid | truncate(length
 {% endfor -%}
 <br/>
 {% endfor -%}
+{% endif -%}
 </div>
 </div>
 {% endblock content %}

diff --git a/templates/dir.html b/templates/dir.html
line changes: +6/-1
index bdbc262..94f51f3
--- a/templates/dir.html
+++ b/templates/dir.html
@@ -23,7 +23,12 @@
   </tr>
   {% for file in files -%}
   <tr class="file">
-    <td class="name"><a href="{{repo_url | safe}}/{{file.kind}}/{{file.id}}.html">{{file.name}}{% if file.kind == "dir" -%}/{% endif -%}</a></td>
+    {% if file.kind == "dir" -%}
+    {% set file_name = file.name ~ "/" -%}
+    {% else -%}
+    {% set file_name = file.name -%}
+    {% endif -%}
+    <td class="name">{% if file.id in file_ids -%}<a href="{{repo_url | safe}}/{{file.kind}}/{{file.id}}.html">{{file_name}}</a>{% else -%}{{file_name}}{% endif -%}</td>
     <td class="type">{{file.kind}}</td>
     <td class="mode">{{file.mode | mask(mask="0xfff") | oct}}</td>
     <td class="size">{{file.size}}</td>

diff --git a/templates/files.html b/templates/files.html
line changes: +6/-1
index d155880..9bf2b24
--- a/templates/files.html
+++ b/templates/files.html
@@ -19,7 +19,12 @@
   </tr>
   {% for file in root_files -%}
   <tr class="file">
-    <td class="name"><a href="{{file.kind}}/{{file.id}}.html">{{file.name}}{% if file.kind == "dir" -%}/{% endif -%}</a></td>
+    {% if file.kind == "dir" -%}
+    {% set file_name = file.name ~ "/" -%}
+    {% else -%}
+    {% set file_name = file.name -%}
+    {% endif -%}
+    <td class="name">{% if file.id in file_ids -%}<a href="{{repo_url | safe}}/{{file.kind}}/{{file.id}}.html">{{file_name}}</a>{% else -%}{{file_name}}{% endif -%}</td>
     <td class="type nosmall">{{file.kind}}</td>
     <td class="mode nosmall">{{file.mode | mask(mask="0xfff") | oct}}</td>
     <td class="size nosmall">{{file.size}}</td>

diff --git a/templates/history.html b/templates/history.html
line changes: +2/-2
index e61ae59..4920d19
--- a/templates/history.html
+++ b/templates/history.html
@@ -23,11 +23,11 @@
   </tr>
   {% for entry in history -%}
   <tr class="commit">
-    <td class="oid"><a href="commit/{{entry.full_hash}}.html">{{entry.short_hash}}</a></td>
+    <td class="oid">{% if entry.full_hash in commit_ids -%}<a href="commit/{{entry.full_hash}}.html">{{entry.short_hash}}</a>{% else -%}{{entry.short_hash}}{% endif -%}</td>
     <td class="msg sans">{{entry.summary}}</td>
     <td class="author sans">{{entry.author.name}}</td>
     <td class="date">{{ts_to_date(ts=entry.ts_utc, tz=entry.ts_offset)}}</td>
-    <td class="diff nosmall">{{entry.stats.files}} (+{{entry.stats.additions}}/-{{entry.stats.deletions}})</td>
+    <td class="diff nosmall">{% if entry.stats -%}{{entry.stats.files}} (+{{entry.stats.additions}}/-{{entry.stats.deletions}}){% endif -%}</td>
     <td class="refs nosmall">{%- for ref in entry.alt_refs -%}{%- if loop.index0 < 3 -%}<span class="ref">{{ref}}</span>{%- endif -%}{%- endfor -%}{% if entry.alt_refs | length > 3 -%}<span class="ref">...</span>{% endif -%}</td>
   </tr>
 {% endfor -%}

diff --git a/templates/summary.html b/templates/summary.html
line changes: +8/-3
index 429581a..309a6af
--- a/templates/summary.html
+++ b/templates/summary.html
@@ -24,11 +24,11 @@
   {% for entry in history -%}
   {% if loop.index0 < 10  -%}
   <tr class="commit">
-    <td class="oid"><a href="commit/{{entry.full_hash}}.html">{{entry.short_hash}}</a></td>
+    <td class="oid">{% if entry.full_hash in commit_ids -%}<a href="commit/{{entry.full_hash}}.html">{{entry.short_hash}}</a>{% else -%}{{entry.short_hash}}{% endif -%}</td>
     <td class="msg sans">{{entry.summary}}</td>
     <td class="author sans">{{entry.author.name}}</td>
     <td class="date">{{ts_to_date(ts=entry.ts_utc, tz=entry.ts_offset)}}</td>
-    <td class="diff nosmall">{{entry.stats.files}} (+{{entry.stats.additions}}/-{{entry.stats.deletions}})</td>
+    <td class="diff nosmall">{% if entry.stats -%}{{entry.stats.files}} (+{{entry.stats.additions}}/-{{entry.stats.deletions}}){% endif -%}</td>
     <td class="refs nosmall">{%- for ref in entry.alt_refs -%}{%- if loop.index0 < 3 -%}<span class="ref">{{ref}}</span>{%- endif -%}{%- endfor -%}{% if entry.alt_refs | length > 3 -%}<span class="ref">...</span>{% endif -%}</td>
   </tr>
 {% endif -%}
@@ -109,7 +109,12 @@
   </tr>
   {% for file in root_files -%}
   <tr class="file">
-    <td class="name"><a href="{{repo_url | safe}}/{{file.kind}}/{{file.id}}.html">{{file.name}}{% if file.kind == "dir" -%}/{% endif -%}</a></td>
+    {% if file.kind == "dir" -%}
+    {% set file_name = file.name ~ "/" -%}
+    {% else -%}
+    {% set file_name = file.name -%}
+    {% endif -%}
+    <td class="name">{% if file.id in file_ids -%}<a href="{{repo_url | safe}}/{{file.kind}}/{{file.id}}.html">{{file_name}}</a>{% else -%}{{file_name}}{% endif -%}</td>
     <td class="type nosmall">{{file.kind}}</td>
     <td class="mode nosmall">{{file.mode | mask(mask="0xfff") | oct}}</td>
     <td class="size nosmall">{{file.size}}</td>