diff options
Diffstat (limited to 'commit.c')
| -rw-r--r-- | commit.c | 218 | 
1 files changed, 177 insertions, 41 deletions
| @@ -8,12 +8,15 @@  #include "notes.h"  #include "gpg-interface.h"  #include "mergesort.h" +#include "commit-slab.h" +#include "prio-queue.h"  static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);  int save_commit_buffer = 1;  const char *commit_type = "commit"; +static int commit_count;  static struct commit *check_commit(struct object *obj,  				   const unsigned char *sha1, @@ -58,8 +61,11 @@ struct commit *lookup_commit_or_die(const unsigned char *sha1, const char *ref_n  struct commit *lookup_commit(const unsigned char *sha1)  {  	struct object *obj = lookup_object(sha1); -	if (!obj) -		return create_object(sha1, OBJ_COMMIT, alloc_commit_node()); +	if (!obj) { +		struct commit *c = alloc_commit_node(); +		c->index = commit_count++; +		return create_object(sha1, OBJ_COMMIT, c); +	}  	if (!obj->type)  		obj->type = OBJ_COMMIT;  	return check_commit(obj, sha1, 0); @@ -507,32 +513,136 @@ struct commit *pop_commit(struct commit_list **stack)  }  /* + * Topological sort support + */ + +/* count number of children that have not been emitted */ +define_commit_slab(indegree_slab, int); + +/* record author-date for each commit object */ +define_commit_slab(author_date_slab, unsigned long); + +static void record_author_date(struct author_date_slab *author_date, +			       struct commit *commit) +{ +	const char *buf, *line_end; +	char *buffer = NULL; +	struct ident_split ident; +	char *date_end; +	unsigned long date; + +	if (!commit->buffer) { +		unsigned long size; +		enum object_type type; +		buffer = read_sha1_file(commit->object.sha1, &type, &size); +		if (!buffer) +			return; +	} + +	for (buf = commit->buffer ? commit->buffer : buffer; +	     buf; +	     buf = line_end + 1) { +		line_end = strchrnul(buf, '\n'); +		if (prefixcmp(buf, "author ")) { +			if (!line_end[0] || line_end[1] == '\n') +				return; /* end of header */ +			continue; +		} +		if (split_ident_line(&ident, +				     buf + strlen("author "), +				     line_end - (buf + strlen("author "))) || +		    !ident.date_begin || !ident.date_end) +			goto fail_exit; /* malformed "author" line */ +		break; +	} + +	date = strtoul(ident.date_begin, &date_end, 10); +	if (date_end != ident.date_end) +		goto fail_exit; /* malformed date */ +	*(author_date_slab_at(author_date, commit)) = date; + +fail_exit: +	free(buffer); +} + +static int compare_commits_by_author_date(const void *a_, const void *b_, +					  void *cb_data) +{ +	const struct commit *a = a_, *b = b_; +	struct author_date_slab *author_date = cb_data; +	unsigned long a_date = *(author_date_slab_at(author_date, a)); +	unsigned long b_date = *(author_date_slab_at(author_date, b)); + +	/* newer commits with larger date first */ +	if (a_date < b_date) +		return 1; +	else if (a_date > b_date) +		return -1; +	return 0; +} + +int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) +{ +	const struct commit *a = a_, *b = b_; +	/* newer commits with larger date first */ +	if (a->date < b->date) +		return 1; +	else if (a->date > b->date) +		return -1; +	return 0; +} + +/*   * Performs an in-place topological sort on the list supplied.   */ -void sort_in_topological_order(struct commit_list ** list, int lifo) +void sort_in_topological_order(struct commit_list **list, enum rev_sort_order sort_order)  {  	struct commit_list *next, *orig = *list; -	struct commit_list *work, **insert;  	struct commit_list **pptr; +	struct indegree_slab indegree; +	struct prio_queue queue; +	struct commit *commit; +	struct author_date_slab author_date;  	if (!orig)  		return;  	*list = NULL; +	init_indegree_slab(&indegree); +	memset(&queue, '\0', sizeof(queue)); + +	switch (sort_order) { +	default: /* REV_SORT_IN_GRAPH_ORDER */ +		queue.compare = NULL; +		break; +	case REV_SORT_BY_COMMIT_DATE: +		queue.compare = compare_commits_by_commit_date; +		break; +	case REV_SORT_BY_AUTHOR_DATE: +		init_author_date_slab(&author_date); +		queue.compare = compare_commits_by_author_date; +		queue.cb_data = &author_date; +		break; +	} +  	/* Mark them and clear the indegree */  	for (next = orig; next; next = next->next) {  		struct commit *commit = next->item; -		commit->indegree = 1; +		*(indegree_slab_at(&indegree, commit)) = 1; +		/* also record the author dates, if needed */ +		if (sort_order == REV_SORT_BY_AUTHOR_DATE) +			record_author_date(&author_date, commit);  	}  	/* update the indegree */  	for (next = orig; next; next = next->next) { -		struct commit_list * parents = next->item->parents; +		struct commit_list *parents = next->item->parents;  		while (parents) {  			struct commit *parent = parents->item; +			int *pi = indegree_slab_at(&indegree, parent); -			if (parent->indegree) -				parent->indegree++; +			if (*pi) +				(*pi)++;  			parents = parents->next;  		}  	} @@ -544,34 +654,33 @@ void sort_in_topological_order(struct commit_list ** list, int lifo)  	 *  	 * the tips serve as a starting set for the work queue.  	 */ -	work = NULL; -	insert = &work;  	for (next = orig; next; next = next->next) {  		struct commit *commit = next->item; -		if (commit->indegree == 1) -			insert = &commit_list_insert(commit, insert)->next; +		if (*(indegree_slab_at(&indegree, commit)) == 1) +			prio_queue_put(&queue, commit);  	} -	/* process the list in topological order */ -	if (!lifo) -		commit_list_sort_by_date(&work); +	/* +	 * This is unfortunate; the initial tips need to be shown +	 * in the order given from the revision traversal machinery. +	 */ +	if (sort_order == REV_SORT_IN_GRAPH_ORDER) +		prio_queue_reverse(&queue); + +	/* We no longer need the commit list */ +	free_commit_list(orig);  	pptr = list;  	*list = NULL; -	while (work) { -		struct commit *commit; -		struct commit_list *parents, *work_item; - -		work_item = work; -		work = work_item->next; -		work_item->next = NULL; +	while ((commit = prio_queue_get(&queue)) != NULL) { +		struct commit_list *parents; -		commit = work_item->item;  		for (parents = commit->parents; parents ; parents = parents->next) {  			struct commit *parent = parents->item; +			int *pi = indegree_slab_at(&indegree, parent); -			if (!parent->indegree) +			if (!*pi)  				continue;  			/* @@ -579,21 +688,22 @@ void sort_in_topological_order(struct commit_list ** list, int lifo)  			 * when all their children have been emitted thereby  			 * guaranteeing topological order.  			 */ -			if (--parent->indegree == 1) { -				if (!lifo) -					commit_list_insert_by_date(parent, &work); -				else -					commit_list_insert(parent, &work); -			} +			if (--(*pi) == 1) +				prio_queue_put(&queue, parent);  		}  		/* -		 * work_item is a commit all of whose children -		 * have already been emitted. we can emit it now. +		 * all children of commit have already been +		 * emitted. we can emit it now.  		 */ -		commit->indegree = 0; -		*pptr = work_item; -		pptr = &work_item->next; +		*(indegree_slab_at(&indegree, commit)) = 0; + +		pptr = &commit_list_insert(commit, pptr)->next;  	} + +	clear_indegree_slab(&indegree); +	clear_prio_queue(&queue); +	if (sort_order == REV_SORT_BY_AUTHOR_DATE) +		clear_author_date_slab(&author_date);  }  /* merge-base stuff */ @@ -1240,10 +1350,15 @@ int commit_tree(const struct strbuf *msg, unsigned char *tree,  static int find_invalid_utf8(const char *buf, int len)  {  	int offset = 0; +	static const unsigned int max_codepoint[] = { +		0x7f, 0x7ff, 0xffff, 0x10ffff +	};  	while (len) {  		unsigned char c = *buf++;  		int bytes, bad_offset; +		unsigned int codepoint; +		unsigned int min_val, max_val;  		len--;  		offset++; @@ -1264,24 +1379,48 @@ static int find_invalid_utf8(const char *buf, int len)  			bytes++;  		} -		/* Must be between 1 and 5 more bytes */ -		if (bytes < 1 || bytes > 5) +		/* +		 * Must be between 1 and 3 more bytes.  Longer sequences result in +		 * codepoints beyond U+10FFFF, which are guaranteed never to exist. +		 */ +		if (bytes < 1 || 3 < bytes)  			return bad_offset;  		/* Do we *have* that many bytes? */  		if (len < bytes)  			return bad_offset; +		/* +		 * Place the encoded bits at the bottom of the value and compute the +		 * valid range. +		 */ +		codepoint = (c & 0x7f) >> bytes; +		min_val = max_codepoint[bytes-1] + 1; +		max_val = max_codepoint[bytes]; +  		offset += bytes;  		len -= bytes;  		/* And verify that they are good continuation bytes */  		do { +			codepoint <<= 6; +			codepoint |= *buf & 0x3f;  			if ((*buf++ & 0xc0) != 0x80)  				return bad_offset;  		} while (--bytes); -		/* We could/should check the value and length here too */ +		/* Reject codepoints that are out of range for the sequence length. */ +		if (codepoint < min_val || codepoint > max_val) +			return bad_offset; +		/* Surrogates are only for UTF-16 and cannot be encoded in UTF-8. */ +		if ((codepoint & 0x1ff800) == 0xd800) +			return bad_offset; +		/* U+xxFFFE and U+xxFFFF are guaranteed non-characters. */ +		if ((codepoint & 0xfffe) == 0xfffe) +			return bad_offset; +		/* So are anything in the range U+FDD0..U+FDEF. */ +		if (codepoint >= 0xfdd0 && codepoint <= 0xfdef) +			return bad_offset;  	}  	return -1;  } @@ -1291,9 +1430,6 @@ static int find_invalid_utf8(const char *buf, int len)   *   * If it isn't, it assumes any non-utf8 characters are Latin1,   * and does the conversion. - * - * Fixme: we should probably also disallow overlong forms and - * invalid characters. But we don't do that currently.   */  static int verify_utf8(struct strbuf *buf)  { | 
