diff options
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | blame.c | 443 | ||||
-rw-r--r-- | fetch-pack.c | 15 | ||||
-rwxr-xr-x | git-annotate.perl | 321 | ||||
-rwxr-xr-x | git-fetch.sh | 2 | ||||
-rwxr-xr-x | git-push.sh | 4 | ||||
-rw-r--r-- | pack-objects.c | 300 | ||||
-rw-r--r-- | rev-list.c | 34 | ||||
-rw-r--r-- | rev-parse.c | 1 | ||||
-rw-r--r-- | send-pack.c | 10 | ||||
-rw-r--r-- | upload-pack.c | 11 |
11 files changed, 1047 insertions, 97 deletions
@@ -130,6 +130,7 @@ SCRIPT_SH = \ SCRIPT_PERL = \ git-archimport.perl git-cvsimport.perl git-relink.perl \ git-shortlog.perl git-fmt-merge-msg.perl git-rerere.perl \ + git-annotate.perl \ git-svnimport.perl git-mv.perl git-cvsexportcommit.perl SCRIPT_PYTHON = \ @@ -164,7 +165,7 @@ PROGRAMS = \ git-upload-pack$X git-verify-pack$X git-write-tree$X \ git-update-ref$X git-symbolic-ref$X git-check-ref-format$X \ git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \ - git-describe$X git-merge-tree$X + git-describe$X git-merge-tree$X git-blame$X # what 'all' will build and 'install' will install, in gitexecdir ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS) diff --git a/blame.c b/blame.c new file mode 100644 index 0000000000..1e655466a8 --- /dev/null +++ b/blame.c @@ -0,0 +1,443 @@ +#include <assert.h> + +#include "cache.h" +#include "refs.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "epoch.h" +#include "diff.h" + +#define DEBUG 0 + +struct commit** blame_lines; +int num_blame_lines; + +struct util_info +{ + int* line_map; + int num_lines; + unsigned char sha1[20]; /* blob sha, not commit! */ + char* buf; + unsigned long size; +// const char* path; +}; + +struct chunk +{ + int off1, len1; // --- + int off2, len2; // +++ +}; + +struct patch +{ + struct chunk* chunks; + int num; +}; + +static void get_blob(struct commit* commit); + +int num_get_patch = 0; +int num_commits = 0; + +struct patch* get_patch(struct commit* commit, struct commit* other) +{ + struct patch* ret = xmalloc(sizeof(struct patch)); + ret->chunks = NULL; + ret->num = 0; + + struct util_info* info_c = (struct util_info*) commit->object.util; + struct util_info* info_o = (struct util_info*) other->object.util; + + if(!memcmp(info_c->sha1, info_o->sha1, 20)) + return ret; + + get_blob(commit); + get_blob(other); + + FILE* fout = fopen("/tmp/git-blame-tmp1", "w"); + if(!fout) + die("fopen tmp1 failed: %s", strerror(errno)); + + if(fwrite(info_c->buf, info_c->size, 1, fout) != 1) + die("fwrite 1 failed: %s", strerror(errno)); + fclose(fout); + + fout = fopen("/tmp/git-blame-tmp2", "w"); + if(!fout) + die("fopen tmp2 failed: %s", strerror(errno)); + + if(fwrite(info_o->buf, info_o->size, 1, fout) != 1) + die("fwrite 2 failed: %s", strerror(errno)); + fclose(fout); + + FILE* fin = popen("diff -u0 /tmp/git-blame-tmp1 /tmp/git-blame-tmp2", "r"); + if(!fin) + die("popen failed: %s", strerror(errno)); + + char buf[1024]; + while(fgets(buf, sizeof(buf), fin)) { + if(buf[0] != '@' || buf[1] != '@') + continue; + + if(DEBUG) + printf("chunk line: %s", buf); + ret->num++; + ret->chunks = xrealloc(ret->chunks, sizeof(struct chunk)*ret->num); + struct chunk* chunk = &ret->chunks[ret->num-1]; + + assert(!strncmp(buf, "@@ -", 4)); + + char* start = buf+4; + char* sp = index(start, ' '); + *sp = '\0'; + if(index(start, ',')) { + int ret = sscanf(start, "%d,%d", &chunk->off1, &chunk->len1); + assert(ret == 2); + } else { + int ret = sscanf(start, "%d", &chunk->off1); + assert(ret == 1); + chunk->len1 = 1; + } + *sp = ' '; + + start = sp+1; + sp = index(start, ' '); + *sp = '\0'; + if(index(start, ',')) { + int ret = sscanf(start, "%d,%d", &chunk->off2, &chunk->len2); + assert(ret == 2); + } else { + int ret = sscanf(start, "%d", &chunk->off2); + assert(ret == 1); + chunk->len2 = 1; + } + *sp = ' '; + + if(chunk->off1 > 0) + chunk->off1 -= 1; + if(chunk->off2 > 0) + chunk->off2 -= 1; + + assert(chunk->off1 >= 0); + assert(chunk->off2 >= 0); + } + fclose(fin); + + num_get_patch++; + return ret; +} + +void free_patch(struct patch* p) +{ + free(p->chunks); + free(p); +} + +static int get_blob_sha1_internal(unsigned char *sha1, const char *base, int baselen, + const char *pathname, unsigned mode, int stage); + + +static unsigned char blob_sha1[20]; +static int get_blob_sha1(struct tree* t, const char* pathname, unsigned char* sha1) +{ + const char *pathspec[2]; + pathspec[0] = pathname; + pathspec[1] = NULL; + memset(blob_sha1, 0, sizeof(blob_sha1)); + read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal); + + int i; + for(i = 0; i < 20; i++) { + if(blob_sha1[i] != 0) + break; + } + + if(i == 20) + return -1; + + memcpy(sha1, blob_sha1, 20); + return 0; +} + +static int get_blob_sha1_internal(unsigned char *sha1, const char *base, int baselen, + const char *pathname, unsigned mode, int stage) +{ +// printf("Got blob: %s base: '%s' baselen: %d pathname: '%s' mode: %o stage: %d\n", +// sha1_to_hex(sha1), base, baselen, pathname, mode, stage); + + if(S_ISDIR(mode)) + return READ_TREE_RECURSIVE; + + memcpy(blob_sha1, sha1, 20); + return -1; +} + +static void get_blob(struct commit* commit) +{ + struct util_info* info = commit->object.util; + char type[20]; + + if(info->buf) + return; + + info->buf = read_sha1_file(info->sha1, type, &info->size); + assert(!strcmp(type, "blob")); +} + +void print_patch(struct patch* p) +{ + printf("Num chunks: %d\n", p->num); + int i; + for(i = 0; i < p->num; i++) { + printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1, p->chunks[i].off2, p->chunks[i].len2); + } +} + + +// p is a patch from commit to other. +void fill_line_map(struct commit* commit, struct commit* other, struct patch* p) +{ + int num_lines = ((struct util_info*) commit->object.util)->num_lines; + int* line_map = ((struct util_info*) commit->object.util)->line_map; + int num_lines2 = ((struct util_info*) other->object.util)->num_lines; + int* line_map2 = ((struct util_info*) other->object.util)->line_map; + int cur_chunk = 0; + int i1, i2; + + if(p->num && DEBUG) + print_patch(p); + + for(i1 = 0; i1 < num_lines; i1++) + line_map[i1] = -1; + + if(DEBUG) + printf("num lines 1: %d num lines 2: %d\n", num_lines, num_lines2); + + for(i1 = 0, i2 = 0; i1 < num_lines; i1++, i2++) { + if(DEBUG > 1) + printf("%d %d\n", i1, i2); + + if(i2 >= num_lines2) + break; + + line_map[i1] = line_map2[i2]; + + struct chunk* chunk = NULL; + if(cur_chunk < p->num) + chunk = &p->chunks[cur_chunk]; + + if(chunk && chunk->off1 == i1) { + i2 = chunk->off2; + + if(chunk->len1 > 0) + i1 += chunk->len1-1; + if(chunk->len2 > 0) + i2 += chunk->len2-1; + cur_chunk++; + } + } +} + +int map_line(struct commit* commit, int line) +{ + struct util_info* info = commit->object.util; + assert(line >= 0 && line < info->num_lines); + return info->line_map[line]; +} + +int fill_util_info(struct commit* commit, const char* path) +{ + if(commit->object.util) + return 0; + + struct util_info* util = xmalloc(sizeof(struct util_info)); + util->buf = NULL; + util->size = 0; + util->num_lines = -1; + util->line_map = NULL; + + commit->object.util = util; + + if(get_blob_sha1(commit->tree, path, util->sha1)) + return -1; + + return 0; +} + +void alloc_line_map(struct commit* commit) +{ + struct util_info* util = commit->object.util; + + if(util->line_map) + return; + + get_blob(commit); + + int i; + util->num_lines = 0; + for(i = 0; i < util->size; i++) { + if(util->buf[i] == '\n') + util->num_lines++; + } + util->line_map = xmalloc(sizeof(int)*util->num_lines); +} + +void copy_line_map(struct commit* dst, struct commit* src) +{ + struct util_info* u_dst = dst->object.util; + struct util_info* u_src = src->object.util; + + u_dst->line_map = u_src->line_map; + u_dst->num_lines = u_src->num_lines; + u_dst->buf = u_src->buf; + u_dst->size = u_src->size; +} + +void process_commits(struct commit_list* list, const char* path) +{ + int i; + + while(list) { + struct commit* commit = pop_commit(&list); + struct commit_list* parents; + struct util_info* info; + + info = commit->object.util; + num_commits++; + if(DEBUG) + printf("\nProcessing commit: %d %s\n", num_commits, sha1_to_hex(commit->object.sha1)); + for(parents = commit->parents; + parents != NULL; parents = parents->next) { + struct commit* parent = parents->item; + + if(parse_commit(parent) < 0) + die("parse_commit error"); + + if(DEBUG) + printf("parent: %s\n", sha1_to_hex(parent->object.sha1)); + + if(fill_util_info(parent, path)) + continue; + + // Temporarily assign everything to the parent. + int num_blame = 0; + for(i = 0; i < num_blame_lines; i++) { + if(blame_lines[i] == commit) { + num_blame++; + blame_lines[i] = parent; + } + } + + if(num_blame == 0) + continue; + + struct patch* patch = get_patch(parent, commit); + if(patch->num == 0) { + copy_line_map(parent, commit); + } else { + alloc_line_map(parent); + fill_line_map(parent, commit, patch); + } + + for(i = 0; i < patch->num; i++) { + int l; + for(l = 0; l < patch->chunks[i].len2; l++) { + int mapped_line = map_line(commit, patch->chunks[i].off2 + l); + if(mapped_line != -1 && blame_lines[mapped_line] == parent) + blame_lines[mapped_line] = commit; + } + } + free_patch(patch); + } + } +} + +#define SEEN 1 +struct commit_list* get_commit_list(struct commit* commit, const char* pathname) +{ + struct commit_list* ret = NULL; + struct commit_list* process = NULL; + unsigned char sha1[20]; + + commit_list_insert(commit, &process); + + while(process) { + struct commit* com = pop_commit(&process); + if(com->object.flags & SEEN) + continue; + + com->object.flags |= SEEN; + commit_list_insert(com, &ret); + struct commit_list* parents; + + parse_commit(com); + + for(parents = com->parents; + parents != NULL; parents = parents->next) { + struct commit* parent = parents->item; + + parse_commit(parent); + + if(!get_blob_sha1(parent->tree, pathname, sha1)) + commit_list_insert(parent, &process); + } + } + + return ret; +} + +int main(int argc, const char **argv) +{ + unsigned char sha1[20]; + struct commit *commit; + const char* filename; + int i; + + setup_git_directory(); + + if (argc != 3) + die("Usage: blame commit-ish file"); + + if (get_sha1(argv[1], sha1)) + die("get_sha1 failed"); + + commit = lookup_commit_reference(sha1); + + filename = argv[2]; + + struct commit_list* list = get_commit_list(commit, filename); + sort_in_topological_order(&list, 1); + + if(fill_util_info(commit, filename)) { + printf("%s not found in %s\n", filename, argv[1]); + return 0; + } + alloc_line_map(commit); + + struct util_info* util = commit->object.util; + num_blame_lines = util->num_lines; + blame_lines = xmalloc(sizeof(struct commit*)*num_blame_lines); + + + for(i = 0; i < num_blame_lines; i++) { + blame_lines[i] = commit; + + ((struct util_info*) commit->object.util)->line_map[i] = i; + } + + process_commits(list, filename); + + for(i = 0; i < num_blame_lines; i++) { + printf("%d %s\n", i+1-1, sha1_to_hex(blame_lines[i]->object.sha1)); +// printf("%d %s\n", i+1-1, find_unique_abbrev(blame_lines[i]->object.sha1, 6)); + } + + if(DEBUG) { + printf("num get patch: %d\n", num_get_patch); + printf("num commits: %d\n", num_commits); + } + + return 0; +} diff --git a/fetch-pack.c b/fetch-pack.c index aa6f42ae1b..09738fee91 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -8,7 +8,7 @@ static int keep_pack; static int quiet; static int verbose; static const char fetch_pack_usage[] = -"git-fetch-pack [-q] [-v] [-k] [--exec=upload-pack] [host:]directory <refs>..."; +"git-fetch-pack [-q] [-v] [-k] [--thin] [--exec=upload-pack] [host:]directory <refs>..."; static const char *exec = "git-upload-pack"; #define COMPLETE (1U << 0) @@ -18,7 +18,7 @@ static const char *exec = "git-upload-pack"; #define POPPED (1U << 4) static struct commit_list *rev_list = NULL; -static int non_common_revs = 0, multi_ack = 0; +static int non_common_revs = 0, multi_ack = 0, use_thin_pack = 0; static void rev_list_push(struct commit *commit, int mark) { @@ -156,8 +156,9 @@ static int find_common(int fd[2], unsigned char *result_sha1, continue; } - packet_write(fd[1], "want %s%s\n", sha1_to_hex(remote), - multi_ack ? " multi_ack" : ""); + packet_write(fd[1], "want %s%s%s\n", sha1_to_hex(remote), + (multi_ack ? " multi_ack" : ""), + (use_thin_pack ? " thin-pack" : "")); fetching++; } packet_flush(fd[1]); @@ -421,6 +422,10 @@ int main(int argc, char **argv) keep_pack = 1; continue; } + if (!strcmp("--thin", arg)) { + use_thin_pack = 1; + continue; + } if (!strcmp("-v", arg)) { verbose = 1; continue; @@ -434,6 +439,8 @@ int main(int argc, char **argv) } if (!dest) usage(fetch_pack_usage); + if (keep_pack) + use_thin_pack = 0; pid = git_connect(fd, dest, exec); if (pid < 0) return 1; diff --git a/git-annotate.perl b/git-annotate.perl new file mode 100755 index 0000000000..8f984318af --- /dev/null +++ b/git-annotate.perl @@ -0,0 +1,321 @@ +#!/usr/bin/perl +# Copyright 2006, Ryan Anderson <ryan@michonline.com> +# +# GPL v2 (See COPYING) +# +# This file is licensed under the GPL v2, or a later version +# at the discretion of Linus Torvalds. + +use warnings; +use strict; + +my $filename = shift @ARGV; + + +my @stack = ( + { + 'rev' => "HEAD", + 'filename' => $filename, + }, +); + +our (@lineoffsets, @pendinglineoffsets); +our @filelines = (); +open(F,"<",$filename) + or die "Failed to open filename: $!"; + +while(<F>) { + chomp; + push @filelines, $_; +} +close(F); +our $leftover_lines = @filelines; +our %revs; +our @revqueue; +our $head; + +my $revsprocessed = 0; +while (my $bound = pop @stack) { + my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'}); + foreach my $revinst (@revisions) { + my ($rev, @parents) = @$revinst; + $head ||= $rev; + + $revs{$rev}{'filename'} = $bound->{'filename'}; + if (scalar @parents > 0) { + $revs{$rev}{'parents'} = \@parents; + next; + } + + my $newbound = find_parent_renames($rev, $bound->{'filename'}); + if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) { + push @stack, $newbound; + $revs{$rev}{'parents'} = [$newbound->{'rev'}]; + } + } +} +push @revqueue, $head; +init_claim($head); +$revs{$head}{'lineoffsets'} = {}; +handle_rev(); + + +my $i = 0; +foreach my $l (@filelines) { + my ($output, $rev, $committer, $date); + if (ref $l eq 'ARRAY') { + ($output, $rev, $committer, $date) = @$l; + if (length($rev) > 8) { + $rev = substr($rev,0,8); + } + } else { + $output = $l; + ($rev, $committer, $date) = ('unknown', 'unknown', 'unknown'); + } + + printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output); +} + +sub init_claim { + my ($rev) = @_; + my %revinfo = git_commit_info($rev); + for (my $i = 0; $i < @filelines; $i++) { + $filelines[$i] = [ $filelines[$i], '', '', '', 1]; + # line, + # rev, + # author, + # date, + # 1 <-- belongs to the original file. + } + $revs{$rev}{'lines'} = \@filelines; +} + + +sub handle_rev { + my $i = 0; + while (my $rev = shift @revqueue) { + + my %revinfo = git_commit_info($rev); + + foreach my $p (@{$revs{$rev}{'parents'}}) { + + git_diff_parse($p, $rev, %revinfo); + push @revqueue, $p; + } + + + if (scalar @{$revs{$rev}{parents}} == 0) { + # We must be at the initial rev here, so claim everything that is left. + for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) { + if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') { + claim_line($i, $rev, $revs{$rev}{lines}, %revinfo); + } + } + } + } +} + + +sub git_rev_list { + my ($rev, $file) = @_; + + open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file) + or die "Failed to exec git-rev-list: $!"; + + my @revs; + while(my $line = <P>) { + chomp $line; + my ($rev, @parents) = split /\s+/, $line; + push @revs, [ $rev, @parents ]; + } + close(P); + + printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0); + return @revs; +} + +sub find_parent_renames { + my ($rev, $file) = @_; + + open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev") + or die "Failed to exec git-diff: $!"; + + local $/ = "\0"; + my %bound; + my $junk = <P>; + while (my $change = <P>) { + chomp $change; + my $filename = <P>; + chomp $filename; + + if ($change =~ m/^[AMD]$/ ) { + next; + } elsif ($change =~ m/^R/ ) { + my $oldfilename = $filename; + $filename = <P>; + chomp $filename; + if ( $file eq $filename ) { + my $parent = git_find_parent($rev, $oldfilename); + @bound{'rev','filename'} = ($parent, $oldfilename); + last; + } + } + } + close(P); + + return \%bound; +} + + +sub git_find_parent { + my ($rev, $filename) = @_; + + open(REVPARENT,"-|","git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename) + or die "Failed to open git-rev-list to find a single parent: $!"; + + my $parentline = <REVPARENT>; + chomp $parentline; + my ($revfound,$parent) = split m/\s+/, $parentline; + + close(REVPARENT); + + return $parent; +} + + +# Get a diff between the current revision and a parent. +# Record the commit information that results. +sub git_diff_parse { + my ($parent, $rev, %revinfo) = @_; + + my ($ri, $pi) = (0,0); + open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--", + $revs{$rev}{'filename'}, $revs{$parent}{'filename'}) + or die "Failed to call git-diff for annotation: $!"; + + my $slines = $revs{$rev}{'lines'}; + my @plines; + + my $gotheader = 0; + my ($remstart, $remlength, $addstart, $addlength); + my ($hunk_start, $hunk_index, $hunk_adds); + while(<DIFF>) { + chomp; + if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) { + ($remstart, $remlength, $addstart, $addlength) = ($1, $2, $3, $4); + # Adjust for 0-based arrays + $remstart--; + $addstart--; + # Reinit hunk tracking. + $hunk_start = $remstart; + $hunk_index = 0; + $gotheader = 1; + + for (my $i = $ri; $i < $remstart; $i++) { + $plines[$pi++] = $slines->[$i]; + $ri++; + } + next; + } elsif (!$gotheader) { + next; + } + + if (m/^\+(.*)$/) { + my $line = $1; + $plines[$pi++] = [ $line, '', '', '', 0 ]; + next; + + } elsif (m/^-(.*)$/) { + my $line = $1; + if (get_line($slines, $ri) eq $line) { + # Found a match, claim + claim_line($ri, $rev, $slines, %revinfo); + } else { + die sprintf("Sync error: %d/%d\n|%s\n|%s\n%s => %s\n", + $ri, $hunk_start + $hunk_index, + $line, + get_line($slines, $ri), + $rev, $parent); + } + $ri++; + + } else { + if (substr($_,1) ne get_line($slines,$ri) ) { + die sprintf("Line %d (%d) does not match:\n|%s\n|%s\n%s => %s\n", + $hunk_start + $hunk_index, $ri, + substr($_,1), + get_line($slines,$ri), + $rev, $parent); + } + $plines[$pi++] = $slines->[$ri++]; + } + $hunk_index++; + } + close(DIFF); + for (my $i = $ri; $i < @{$slines} ; $i++) { + push @plines, $slines->[$ri++]; + } + + $revs{$parent}{lines} = \@plines; + return; +} + +sub get_line { + my ($lines, $index) = @_; + + return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index]; +} + +sub git_cat_file { + my ($parent, $filename) = @_; + return () unless defined $parent && defined $filename; + my $blobline = `git-ls-tree $parent $filename`; + my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4); + + open(C,"-|","git-cat-file", "blob", $blob) + or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!; + + my @lines; + while(<C>) { + chomp; + push @lines, $_; + } + close(C); + + return @lines; +} + + +sub claim_line { + my ($floffset, $rev, $lines, %revinfo) = @_; + my $oline = get_line($lines, $floffset); + @{$lines->[$floffset]} = ( $oline, $rev, + $revinfo{'author'}, $revinfo{'author_date'} ); + #printf("Claiming line %d with rev %s: '%s'\n", + # $floffset, $rev, $oline) if 1; +} + +sub git_commit_info { + my ($rev) = @_; + open(COMMIT, "-|","git-cat-file", "commit", $rev) + or die "Failed to call git-cat-file: $!"; + + my %info; + while(<COMMIT>) { + chomp; + last if (length $_ == 0); + + if (m/^author (.*) <(.*)> (.*)$/) { + $info{'author'} = $1; + $info{'author_email'} = $2; + $info{'author_date'} = $3; + } elsif (m/^committer (.*) <(.*)> (.*)$/) { + $info{'committer'} = $1; + $info{'committer_email'} = $2; + $info{'committer_date'} = $3; + } + } + close(COMMIT); + + return %info; +} diff --git a/git-fetch.sh b/git-fetch.sh index b4325d9d98..23d965f327 100755 --- a/git-fetch.sh +++ b/git-fetch.sh @@ -320,7 +320,7 @@ fetch_main () { ( : subshell because we muck with IFS IFS=" $LF" ( - git-fetch-pack $exec $keep "$remote" $rref || echo failed "$remote" + git-fetch-pack $exec $keep --thin "$remote" $rref || echo failed "$remote" ) | while read sha1 remote_name do diff --git a/git-push.sh b/git-push.sh index 706db9933e..73dcf067cb 100755 --- a/git-push.sh +++ b/git-push.sh @@ -8,6 +8,7 @@ USAGE='[--all] [--tags] [--force] <repository> [<refspec>...]' has_all= has_force= has_exec= +has_thin= remote= do_tags= @@ -22,6 +23,8 @@ do has_force=--force ;; --exec=*) has_exec="$1" ;; + --thin) + has_thin="$1" ;; -*) usage ;; *) @@ -72,6 +75,7 @@ set x "$remote" "$@"; shift test "$has_all" && set x "$has_all" "$@" && shift test "$has_force" && set x "$has_force" "$@" && shift test "$has_exec" && set x "$has_exec" "$@" && shift +test "$has_thin" && set x "$has_thin" "$@" && shift case "$remote" in http://* | https://*) diff --git a/pack-objects.c b/pack-objects.c index e3946db5ca..4f8814d8ee 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -3,6 +3,7 @@ #include "delta.h" #include "pack.h" #include "csum-file.h" +#include "diff.h" #include <sys/time.h> static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; @@ -26,6 +27,13 @@ struct object_entry { struct object_entry *delta_sibling; /* other deltified objects who * uses the same base as me */ + int preferred_base; /* we do not pack this, but is encouraged to + * be used as the base objectto delta huge + * objects against. + */ + int based_on_preferred; /* current delta candidate is a preferred + * one, or delta against a preferred one. + */ }; /* @@ -48,7 +56,7 @@ static int local = 0; static int incremental = 0; static struct object_entry **sorted_by_sha, **sorted_by_type; static struct object_entry *objects = NULL; -static int nr_objects = 0, nr_alloc = 0; +static int nr_objects = 0, nr_alloc = 0, nr_result = 0; static const char *base_name; static unsigned char pack_file_sha1[20]; static int progress = 1; @@ -229,7 +237,8 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha return n; } -static unsigned long write_object(struct sha1file *f, struct object_entry *entry) +static unsigned long write_object(struct sha1file *f, + struct object_entry *entry) { unsigned long size; char type[10]; @@ -239,6 +248,9 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry enum object_type obj_type; int to_reuse = 0; + if (entry->preferred_base) + return 0; + obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ @@ -326,10 +338,11 @@ static void write_pack_file(void) if (!base_name) f = sha1fd(1, "<stdout>"); else - f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); + f = sha1create("%s-%s.%s", base_name, + sha1_to_hex(object_list_sha1), "pack"); hdr.hdr_signature = htonl(PACK_SIGNATURE); hdr.hdr_version = htonl(PACK_VERSION); - hdr.hdr_entries = htonl(nr_objects); + hdr.hdr_entries = htonl(nr_result); sha1write(f, &hdr, sizeof(hdr)); offset = sizeof(hdr); for (i = 0; i < nr_objects; i++) @@ -341,9 +354,10 @@ static void write_pack_file(void) static void write_index_file(void) { int i; - struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); + struct sha1file *f = sha1create("%s-%s.%s", base_name, + sha1_to_hex(object_list_sha1), "idx"); struct object_entry **list = sorted_by_sha; - struct object_entry **last = list + nr_objects; + struct object_entry **last = list + nr_result; unsigned int array[256]; /* @@ -368,7 +382,7 @@ static void write_index_file(void) * Write the actual SHA1 entries.. */ list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_result; i++) { struct object_entry *entry = *list++; unsigned int offset = htonl(entry->offset); sha1write(f, &offset, 4); @@ -378,27 +392,87 @@ static void write_index_file(void) sha1close(f, NULL, 1); } -static int add_object_entry(unsigned char *sha1, unsigned int hash) +static int locate_object_entry_hash(const unsigned char *sha1) +{ + int i; + unsigned int ui; + memcpy(&ui, sha1, sizeof(unsigned int)); + i = ui % object_ix_hashsz; + while (0 < object_ix[i]) { + if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) + return i; + if (++i == object_ix_hashsz) + i = 0; + } + return -1 - i; +} + +static struct object_entry *locate_object_entry(const unsigned char *sha1) +{ + int i; + + if (!object_ix_hashsz) + return NULL; + + i = locate_object_entry_hash(sha1); + if (0 <= i) + return &objects[object_ix[i]-1]; + return NULL; +} + +static void rehash_objects(void) { + int i; + struct object_entry *oe; + + object_ix_hashsz = nr_objects * 3; + if (object_ix_hashsz < 1024) + object_ix_hashsz = 1024; + object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz); + object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz); + for (i = 0, oe = objects; i < nr_objects; i++, oe++) { + int ix = locate_object_entry_hash(oe->sha1); + if (0 <= ix) + continue; + ix = -1 - ix; + object_ix[ix] = i + 1; + } +} + +static int add_object_entry(const unsigned char *sha1, const char *name, int exclude) +{ + unsigned int hash = 0; unsigned int idx = nr_objects; struct object_entry *entry; struct packed_git *p; unsigned int found_offset = 0; struct packed_git *found_pack = NULL; - - for (p = packed_git; p; p = p->next) { - struct pack_entry e; - if (find_pack_entry_one(sha1, &e, p)) { - if (incremental) - return 0; - if (local && !p->pack_local) - return 0; - if (!found_pack) { - found_offset = e.offset; - found_pack = e.p; + int ix; + + if (!exclude) { + for (p = packed_git; p; p = p->next) { + struct pack_entry e; + if (find_pack_entry_one(sha1, &e, p)) { + if (incremental) + return 0; + if (local && !p->pack_local) + return 0; + if (!found_pack) { + found_offset = e.offset; + found_pack = e.p; + } } } } + if ((entry = locate_object_entry(sha1)) != NULL) + goto already_added; + + while (*name) { + unsigned char c = *name++; + if (isspace(c)) + continue; + hash = hash * 11 + c; + } if (idx >= nr_alloc) { unsigned int needed = (idx + 1024) * 3 / 2; @@ -406,45 +480,79 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash) nr_alloc = needed; } entry = objects + idx; + nr_objects = idx + 1; memset(entry, 0, sizeof(*entry)); memcpy(entry->sha1, sha1, 20); entry->hash = hash; - if (found_pack) { - entry->in_pack = found_pack; - entry->in_pack_offset = found_offset; + + if (object_ix_hashsz * 3 <= nr_objects * 4) + rehash_objects(); + else { + ix = locate_object_entry_hash(entry->sha1); + if (0 <= ix) + die("internal error in object hashing."); + object_ix[-1 - ix] = idx + 1; + } + + already_added: + if (exclude) + entry->preferred_base = 1; + else { + if (found_pack) { + entry->in_pack = found_pack; + entry->in_pack_offset = found_offset; + } } - nr_objects = idx+1; return 1; } -static int locate_object_entry_hash(unsigned char *sha1) +static void add_pbase_tree(struct tree_desc *tree) { - int i; - unsigned int ui; - memcpy(&ui, sha1, sizeof(unsigned int)); - i = ui % object_ix_hashsz; - while (0 < object_ix[i]) { - if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) - return i; - if (++i == object_ix_hashsz) - i = 0; + while (tree->size) { + const unsigned char *sha1; + const char *name; + unsigned mode; + unsigned long size; + char type[20]; + + sha1 = tree_entry_extract(tree, &name, &mode); + update_tree_entry(tree); + if (!has_sha1_file(sha1)) + continue; + if (sha1_object_info(sha1, type, &size)) + continue; + add_object_entry(sha1, name, 1); + if (!strcmp(type, "tree")) { + struct tree_desc sub; + void *elem; + elem = read_sha1_file(sha1, type, &sub.size); + sub.buf = elem; + if (sub.buf) { + add_pbase_tree(&sub); + free(elem); + } + } } - return -1 - i; } -static struct object_entry *locate_object_entry(unsigned char *sha1) +static void add_preferred_base(unsigned char *sha1) { - int i = locate_object_entry_hash(sha1); - if (0 <= i) - return &objects[object_ix[i]-1]; - return NULL; + struct tree_desc tree; + void *elem; + elem = read_object_with_reference(sha1, "tree", &tree.size, NULL); + tree.buf = elem; + if (!tree.buf) + return; + add_object_entry(sha1, "", 1); + add_pbase_tree(&tree); + free(elem); } static void check_object(struct object_entry *entry) { char type[20]; - if (entry->in_pack) { + if (entry->in_pack && !entry->preferred_base) { unsigned char base[20]; unsigned long size; struct object_entry *base_entry; @@ -463,7 +571,8 @@ static void check_object(struct object_entry *entry) */ if (!no_reuse_delta && entry->in_pack_type == OBJ_DELTA && - (base_entry = locate_object_entry(base))) { + (base_entry = locate_object_entry(base)) && + (!base_entry->preferred_base)) { /* Depth value does not matter - find_deltas() * will never consider reused delta as the @@ -501,25 +610,6 @@ static void check_object(struct object_entry *entry) sha1_to_hex(entry->sha1), type); } -static void hash_objects(void) -{ - int i; - struct object_entry *oe; - - object_ix_hashsz = nr_objects * 2; - object_ix = xcalloc(sizeof(int), object_ix_hashsz); - for (i = 0, oe = objects; i < nr_objects; i++, oe++) { - int ix = locate_object_entry_hash(oe->sha1); - if (0 <= ix) { - error("the same object '%s' added twice", - sha1_to_hex(oe->sha1)); - continue; - } - ix = -1 - ix; - object_ix[ix] = i + 1; - } -} - static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) { struct object_entry *child = me->delta_child; @@ -538,7 +628,6 @@ static void get_object_details(void) int i; struct object_entry *entry; - hash_objects(); prepare_pack_ix(); for (i = 0, entry = objects; i < nr_objects; i++, entry++) check_object(entry); @@ -576,6 +665,24 @@ static int sha1_sort(const struct object_entry *a, const struct object_entry *b) return memcmp(a->sha1, b->sha1, 20); } +static struct object_entry **create_final_object_list() +{ + struct object_entry **list; + int i, j; + + for (i = nr_result = 0; i < nr_objects; i++) + if (!objects[i].preferred_base) + nr_result++; + list = xmalloc(nr_result * sizeof(struct object_entry *)); + for (i = j = 0; i < nr_objects; i++) { + if (!objects[i].preferred_base) + list[j++] = objects + i; + } + current_sort = sha1_sort; + qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator); + return list; +} + static int type_size_sort(const struct object_entry *a, const struct object_entry *b) { if (a->type < b->type) @@ -586,6 +693,10 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr return -1; if (a->hash > b->hash) return 1; + if (a->preferred_base < b->preferred_base) + return -1; + if (a->preferred_base > b->preferred_base) + return 1; if (a->size < b->size) return -1; if (a->size > b->size) @@ -610,6 +721,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de { struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; + int old_preferred = (old_entry->preferred_base || + old_entry->based_on_preferred); unsigned long size, oldsize, delta_size, sizediff; long max_size; void *delta_buf; @@ -618,9 +731,15 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de if (cur_entry->type != old_entry->type) return -1; - /* If the current object is at edge, take the depth the objects - * that depend on the current object into account -- otherwise - * they would become too deep. + /* We do not compute delta to *create* objects we are not + * going to pack. + */ + if (cur_entry->preferred_base) + return -1; + + /* If the current object is at pack edge, take the depth the + * objects that depend on the current object into account -- + * otherwise they would become too deep. */ if (cur_entry->delta_child) { if (max_depth <= cur_entry->delta_limit) @@ -645,8 +764,27 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de * delete). */ max_size = size / 2 - 20; - if (cur_entry->delta) - max_size = cur_entry->delta_size-1; + if (cur_entry->delta) { + if (cur_entry->based_on_preferred) { + if (old_preferred) + max_size = cur_entry->delta_size-1; + else + /* trying with non-preferred one when we + * already have a delta based on preferred + * one is pointless. + */ + return 0; + } + else if (!old_preferred) + max_size = cur_entry->delta_size-1; + else + /* otherwise... even if delta with a + * preferred one produces a bigger result than + * what we currently have, which is based on a + * non-preferred one, it is OK. + */ + ; + } if (sizediff >= max_size) return -1; delta_buf = diff_delta(old->data, oldsize, @@ -656,6 +794,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de cur_entry->delta = old_entry; cur_entry->delta_size = delta_size; cur_entry->depth = old_entry->depth + 1; + cur_entry->based_on_preferred = old_preferred; free(delta_buf); return 0; } @@ -721,7 +860,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) static void prepare_pack(int window, int depth) { if (progress) - fprintf(stderr, "Packing %d objects", nr_objects); + fprintf(stderr, "Packing %d objects", nr_result); get_object_details(); if (progress) fputc('.', stderr); @@ -860,8 +999,6 @@ int main(int argc, char **argv) gettimeofday(&prev_tv, NULL); } while (fgets(line, sizeof(line), stdin) != NULL) { - unsigned int hash; - char *p; unsigned char sha1[20]; if (progress && (eye_candy <= nr_objects)) { @@ -880,31 +1017,32 @@ int main(int argc, char **argv) } eye_candy += eye_candy_incr; } + if (line[0] == '-') { + if (get_sha1_hex(line+1, sha1)) + die("expected edge sha1, got garbage:\n %s", + line+1); + add_preferred_base(sha1); + continue; + } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - hash = 0; - p = line+40; - while (*p) { - unsigned char c = *p++; - if (isspace(c)) - continue; - hash = hash * 11 + c; - } - add_object_entry(sha1, hash); + add_object_entry(sha1, line+40, 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects); if (non_empty && !nr_objects) return 0; - sorted_by_sha = create_sorted_list(sha1_sort); + sorted_by_sha = create_final_object_list(); SHA1_Init(&ctx); list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_result; i++) { struct object_entry *entry = *list++; SHA1_Update(&ctx, entry->sha1, 20); } SHA1_Final(object_list_sha1, &ctx); + if (progress && (nr_objects != nr_result)) + fprintf(stderr, "Result has %d objects.\n", nr_result); if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) ; @@ -917,6 +1055,6 @@ int main(int argc, char **argv) } if (progress) fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", - nr_objects, written, written_delta, reused, reused_delta); + nr_result, written, written_delta, reused, reused_delta); return 0; } diff --git a/rev-list.c b/rev-list.c index f2d1105cae..373549e59e 100644 --- a/rev-list.c +++ b/rev-list.c @@ -30,7 +30,7 @@ static const char rev_list_usage[] = " --date-order\n" " formatting output:\n" " --parents\n" -" --objects\n" +" --objects | --objects-edge\n" " --unpacked\n" " --header | --pretty\n" " --abbrev=nr | --no-abbrev\n" @@ -44,6 +44,7 @@ static int bisect_list = 0; static int tag_objects = 0; static int tree_objects = 0; static int blob_objects = 0; +static int edge_hint = 0; static int verbose_header = 0; static int abbrev = DEFAULT_ABBREV; static int show_parents = 0; @@ -430,16 +431,30 @@ static struct commit_list *find_bisection(struct commit_list *list) return best; } +static void mark_edge_parents_uninteresting(struct commit *commit) +{ + struct commit_list *parents; + + for (parents = commit->parents; parents; parents = parents->next) { + struct commit *parent = parents->item; + if (!(parent->object.flags & UNINTERESTING)) + continue; + mark_tree_uninteresting(parent->tree); + if (edge_hint) + printf("-%s\n", sha1_to_hex(parent->object.sha1)); + } +} + static void mark_edges_uninteresting(struct commit_list *list) { for ( ; list; list = list->next) { - struct commit_list *parents = list->item->parents; + struct commit *commit = list->item; - for ( ; parents; parents = parents->next) { - struct commit *commit = parents->item; - if (commit->object.flags & UNINTERESTING) - mark_tree_uninteresting(commit->tree); + if (commit->object.flags & UNINTERESTING) { + mark_tree_uninteresting(commit->tree); + continue; } + mark_edge_parents_uninteresting(commit); } } @@ -843,6 +858,13 @@ int main(int argc, const char **argv) blob_objects = 1; continue; } + if (!strcmp(arg, "--objects-edge")) { + tag_objects = 1; + tree_objects = 1; + blob_objects = 1; + edge_hint = 1; + continue; + } if (!strcmp(arg, "--unpacked")) { unpacked = 1; limited = 1; diff --git a/rev-parse.c b/rev-parse.c index a5fb93c3ca..610eacb35a 100644 --- a/rev-parse.c +++ b/rev-parse.c @@ -43,6 +43,7 @@ static int is_rev_argument(const char *arg) "--min-age=", "--no-merges", "--objects", + "--objects-edge", "--parents", "--pretty", "--show-breaks", diff --git a/send-pack.c b/send-pack.c index 990be3f1a3..ad22da56e9 100644 --- a/send-pack.c +++ b/send-pack.c @@ -12,6 +12,7 @@ static const char *exec = "git-receive-pack"; static int verbose = 0; static int send_all = 0; static int force_update = 0; +static int use_thin_pack = 0; static int is_zero_sha1(const unsigned char *sha1) { @@ -41,7 +42,10 @@ static void exec_rev_list(struct ref *refs) int i = 0; args[i++] = "rev-list"; /* 0 */ - args[i++] = "--objects"; /* 1 */ + if (use_thin_pack) /* 1 */ + args[i++] = "--objects-edge"; + else + args[i++] = "--objects"; while (refs) { char *buf = malloc(100); if (i > 900) @@ -361,6 +365,10 @@ int main(int argc, char **argv) verbose = 1; continue; } + if (!strcmp(arg, "--thin")) { + use_thin_pack = 1; + continue; + } usage(send_pack_usage); } if (!dest) { diff --git a/upload-pack.c b/upload-pack.c index 3606529f61..635abb371d 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -14,6 +14,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n #define MAX_HAS 256 #define MAX_NEEDS 256 static int nr_has = 0, nr_needs = 0, multi_ack = 0, nr_our_refs = 0; +static int use_thin_pack = 0; static unsigned char has_sha1[MAX_HAS][20]; static unsigned char needs_sha1[MAX_NEEDS][20]; static unsigned int timeout = 0; @@ -49,8 +50,10 @@ static void create_pack_file(void) char *buf; char **p; - if (create_full_pack) + if (create_full_pack) { args = 10; + use_thin_pack = 0; /* no point doing it */ + } else args = nr_has + nr_needs + 5; argv = xmalloc(args * sizeof(char *)); @@ -62,7 +65,7 @@ static void create_pack_file(void) close(fd[0]); close(fd[1]); *p++ = "rev-list"; - *p++ = "--objects"; + *p++ = use_thin_pack ? "--objects-edge" : "--objects"; if (create_full_pack || MAX_NEEDS <= nr_needs) *p++ = "--all"; else { @@ -192,6 +195,8 @@ static int receive_needs(void) "expected to get sha, not '%s'", line); if (strstr(line+45, "multi_ack")) multi_ack = 1; + if (strstr(line+45, "thin-pack")) + use_thin_pack = 1; /* We have sent all our refs already, and the other end * should have chosen out of them; otherwise they are @@ -213,7 +218,7 @@ static int receive_needs(void) static int send_ref(const char *refname, const unsigned char *sha1) { - static char *capabilities = "multi_ack"; + static char *capabilities = "multi_ack thin-pack"; struct object *o = parse_object(sha1); if (!o) |