diff options
-rw-r--r-- | include/git2/diff.h | 84 | ||||
-rwxr-xr-x | src/amiga/map.c | 11 | ||||
-rw-r--r-- | src/blob.c | 19 | ||||
-rw-r--r-- | src/crlf.c | 2 | ||||
-rw-r--r-- | src/diff.c | 33 | ||||
-rw-r--r-- | src/diff.h | 24 | ||||
-rw-r--r-- | src/diff_output.c | 358 | ||||
-rw-r--r-- | src/filebuf.c | 9 | ||||
-rw-r--r-- | src/fileops.c | 48 | ||||
-rw-r--r-- | src/fileops.h | 1 | ||||
-rw-r--r-- | src/object.c | 98 | ||||
-rw-r--r-- | src/object.h | 39 | ||||
-rw-r--r-- | src/odb.c | 75 | ||||
-rw-r--r-- | src/odb.h | 25 | ||||
-rw-r--r-- | src/repository.h | 25 | ||||
-rw-r--r-- | tests-clar/diff/diff_helpers.c | 11 | ||||
-rw-r--r-- | tests-clar/diff/diffiter.c | 85 | ||||
-rw-r--r-- | tests-clar/status/worktree.c | 44 |
18 files changed, 710 insertions, 281 deletions
diff --git a/include/git2/diff.h b/include/git2/diff.h index 85bb308dd..05825c50d 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -56,7 +56,13 @@ enum { * values. Similarly, passing NULL for the options structure will * give the defaults. The default values are marked below. * - * @todo Most of the parameters here are not actually supported at this time. + * - flags: a combination of the GIT_DIFF_... values above + * - context_lines: number of lines of context to show around diffs + * - interhunk_lines: min lines between diff hunks to merge them + * - old_prefix: "directory" to prefix to old file names (default "a") + * - new_prefix: "directory" to prefix to new file names (default "b") + * - pathspec: array of paths / patterns to constrain diff + * - max_size: maximum blob size to diff, above this treated as binary */ typedef struct { uint32_t flags; /**< defaults to GIT_DIFF_NORMAL */ @@ -65,6 +71,7 @@ typedef struct { char *old_prefix; /**< defaults to "a" */ char *new_prefix; /**< defaults to "b" */ git_strarray pathspec; /**< defaults to show all paths */ + git_off_t max_size; /**< defaults to 512Mb */ } git_diff_options; /** @@ -72,13 +79,28 @@ typedef struct { */ typedef struct git_diff_list git_diff_list; +/** + * Flags that can be set for the file on side of a diff. + * + * Most of the flags are just for internal consumption by libgit2, + * but some of them may be interesting to external users. They are: + * + * - VALID_OID - the `oid` value is computed and correct + * - FREE_PATH - the `path` string is separated allocated memory + * - BINARY - this file should be considered binary data + * - NOT_BINARY - this file should be considered text data + * - FREE_DATA - the internal file data is kept in allocated memory + * - UNMAP_DATA - the internal file data is kept in mmap'ed memory + * - NO_DATA - this side of the diff should not be loaded + */ enum { GIT_DIFF_FILE_VALID_OID = (1 << 0), GIT_DIFF_FILE_FREE_PATH = (1 << 1), GIT_DIFF_FILE_BINARY = (1 << 2), GIT_DIFF_FILE_NOT_BINARY = (1 << 3), GIT_DIFF_FILE_FREE_DATA = (1 << 4), - GIT_DIFF_FILE_UNMAP_DATA = (1 << 5) + GIT_DIFF_FILE_UNMAP_DATA = (1 << 5), + GIT_DIFF_FILE_NO_DATA = (1 << 6), }; /** @@ -169,7 +191,7 @@ enum { GIT_DIFF_LINE_CONTEXT = ' ', GIT_DIFF_LINE_ADDITION = '+', GIT_DIFF_LINE_DELETION = '-', - GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< DEPRECATED - will not be returned */ + GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< Removed line w/o LF & added one with */ GIT_DIFF_LINE_DEL_EOFNL = '\0', /**< LF was removed at end of file */ /* The following values will only be sent to a `git_diff_data_fn` when @@ -384,25 +406,16 @@ GIT_EXTERN(int) git_diff_iterator_new( GIT_EXTERN(void) git_diff_iterator_free(git_diff_iterator *iterator); /** - * Return the number of files in the diff. - * - * Note that there is an uncommon scenario where this number might be too - * high -- if a file in the working directory has been "touched" on disk but - * the contents were then reverted, it might have been added to the - * `git_diff_list` as a MODIFIED file along with a note that the status - * needs to be confirmed when the file contents are loaded into memory. In - * that case, when the file is loaded, we will check the contents and might - * switch it back to UNMODIFIED. The loading of the file is deferred until - * as late as possible. As a result, this might return a value what was too - * high in those circumstances. + * Return progress value for traversing the diff. * - * This is true of `git_diff_foreach` as well, but the only implication - * there is that the `progress` value would not advance evenly. + * This returns a value between 0.0 and 1.0 that represents the progress + * through the diff iterator. The value is monotonically increasing and + * will advance gradually as you progress through the iteration. * - * @param iterator The iterator object - * @return The maximum number of files to be iterated over + * @param iterator The diff iterator + * @return Value between 0.0 and 1.0 */ -GIT_EXTERN(int) git_diff_iterator_num_files(git_diff_iterator *iterator); +GIT_EXTERN(float) git_diff_iterator_progress(git_diff_iterator *iterator); /** * Return the number of hunks in the current file @@ -450,16 +463,19 @@ GIT_EXTERN(int) git_diff_iterator_next_file( * It is recommended that you not call this if the file is a binary * file, but it is allowed to do so. * - * Warning! Call this function for the first time on a file is when the + * The `header` text output will contain the standard hunk header that + * would appear in diff output. The header string will be NUL terminated. + * + * WARNING! Call this function for the first time on a file is when the * actual text diff will be computed (it cannot be computed incrementally) * so the first call for a new file is expensive (at least in relative * terms - in reality, it is still pretty darn fast). * - * @param range Pointer where to store the range for the hunk - * @param header Pointer where to store the header for the chunk; - * this string is owned by the library and should not be freed by - * the user - * @param header_len Pointer where to store the length of the returned header + * @param range Output pointer to range of lines covered by the hunk; + * This range object is owned by the library and should not be freed. + * @param header Output pointer to the text of the hunk header + * This string is owned by the library and should not be freed. + * @param header_len Output pointer to store the length of the header text * @param iterator The iterator object * @return 0 on success, GIT_ITEROVER when done with current file, other * value < 0 on error @@ -473,12 +489,18 @@ GIT_EXTERN(int) git_diff_iterator_next_hunk( /** * Return the next line of the current hunk of diffs. * - * @param line_origin Pointer where to store a GIT_DIFF_LINE_ value; - * this value is a single character, not a buffer - * @param content Pointer where to store the content of the line; - * this string is owned by the library and should not be freed by - * the user - * @param Pointer where to store the length of the returned content + * The `line_origin` output will tell you what type of line this is + * (e.g. was it added or removed or is it just context for the diff). + * + * The `content` will be a pointer to the file data that goes in the + * line. IT WILL NOT BE NUL TERMINATED. You have to use the `content_len` + * value and only process that many bytes of data from the content string. + * + * @param line_origin Output pointer to store a GIT_DIFF_LINE value for this + * next chunk of data. The value is a single character, not a buffer. + * @param content Output pointer to store the content of the diff; this + * string is owned by the library and should not be freed. + * @param content_len Output pointer to store the length of the content. * @param iterator The iterator object * @return 0 on success, GIT_ITEROVER when done with current line, other * value < 0 on error diff --git a/src/amiga/map.c b/src/amiga/map.c index 2fb065c8b..c601de724 100755 --- a/src/amiga/map.c +++ b/src/amiga/map.c @@ -24,18 +24,15 @@ int p_mmap(git_map *out, size_t len, int prot, int flags, int fd, git_off_t offs return -1; } - if((out->data = malloc(len))) { - p_lseek(fd, offset, SEEK_SET); - p_read(fd, out->data, len); - } + out->data = malloc(len); + GITERR_CHECK_ALLOC(out->data); - if (!out->data || (out->data == MAP_FAILED)) { - giterr_set(GITERR_OS, "Failed to mmap. Could not write data"); + if (p_lseek(fd, offset, SEEK_SET) < 0 || p_read(fd, out->data, len) != len) + giterr_set(GITERR_OS, "mmap emulation failed"); return -1; } out->len = len; - return 0; } diff --git a/src/blob.c b/src/blob.c index 5a4a26bfa..a5a0b6dde 100644 --- a/src/blob.c +++ b/src/blob.c @@ -68,6 +68,7 @@ static int write_file_stream( int fd, error; char buffer[4096]; git_odb_stream *stream = NULL; + ssize_t read_len, written = 0; if ((error = git_odb_open_wstream( &stream, odb, (size_t)file_size, GIT_OBJ_BLOB)) < 0) @@ -78,20 +79,18 @@ static int write_file_stream( return -1; } - while (!error && file_size > 0) { - ssize_t read_len = p_read(fd, buffer, sizeof(buffer)); - - if (read_len < 0) { - giterr_set( - GITERR_OS, "Failed to create blob. Can't read whole file"); - error = -1; - } - else if (!(error = stream->write(stream, buffer, read_len))) - file_size -= read_len; + while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) { + error = stream->write(stream, buffer, read_len); + written += read_len; } p_close(fd); + if (written != file_size || read_len < 0) { + giterr_set(GITERR_OS, "Failed to read file into stream"); + error = -1; + } + if (!error) error = stream->finalize_write(oid, stream); diff --git a/src/crlf.c b/src/crlf.c index 509e55897..1b6898ba6 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -276,7 +276,7 @@ static int find_and_add_filter(git_vector *filters, git_repository *repo, const /* * Use the core Git logic to see if we should perform CRLF for this file - * based on its attributes & the value of `core.auto_crlf` + * based on its attributes & the value of `core.autocrlf` */ ca.crlf_action = crlf_input_action(&ca); diff --git a/src/diff.c b/src/diff.c index f8a01086c..499b95b44 100644 --- a/src/diff.c +++ b/src/diff.c @@ -11,6 +11,7 @@ #include "fileops.h" #include "config.h" #include "attr_file.h" +#include "filter.h" static char *diff_prefix_from_pathspec(const git_strarray *pathspec) { @@ -63,8 +64,8 @@ static bool diff_path_matches_pathspec(git_diff_list *diff, const char *path) git_vector_foreach(&diff->pathspec, i, match) { int result = strcmp(match->pattern, path) ? FNM_NOMATCH : 0; - - if (((diff->opts.flags & GIT_DIFF_DISABLE_PATHSPEC_MATCH) == 0) && + + if (((diff->opts.flags & GIT_DIFF_DISABLE_PATHSPEC_MATCH) == 0) && result == FNM_NOMATCH) result = p_fnmatch(match->pattern, path, 0); @@ -262,12 +263,14 @@ static int diff_delta__from_two( delta = diff_delta__alloc(diff, status, old_entry->path); GITERR_CHECK_ALLOC(delta); - delta->old_file.mode = old_mode; git_oid_cpy(&delta->old_file.oid, &old_entry->oid); + delta->old_file.size = old_entry->file_size; + delta->old_file.mode = old_mode; delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; - delta->new_file.mode = new_mode; git_oid_cpy(&delta->new_file.oid, new_oid ? new_oid : &new_entry->oid); + delta->new_file.size = new_entry->file_size; + delta->new_file.mode = new_mode; if (new_oid || !git_oid_iszero(&new_entry->oid)) delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; @@ -440,14 +443,22 @@ static int oid_for_workdir_item( giterr_set(GITERR_OS, "File size overflow for 32-bit systems"); result = -1; } else { - int fd = git_futils_open_ro(full_path.ptr); - if (fd < 0) - result = fd; - else { - result = git_odb__hashfd( - oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB); - p_close(fd); + git_vector filters = GIT_VECTOR_INIT; + + result = git_filters_load( + &filters, repo, item->path, GIT_FILTER_TO_ODB); + if (result >= 0) { + int fd = git_futils_open_ro(full_path.ptr); + if (fd < 0) + result = fd; + else { + result = git_odb__hashfd_filtered( + oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB, &filters); + p_close(fd); + } } + + git_filters_free(&filters); } git_buf_free(&full_path); diff --git a/src/diff.h b/src/diff.h index 2785fa425..ea38a678f 100644 --- a/src/diff.h +++ b/src/diff.h @@ -25,6 +25,8 @@ enum { GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */ }; +#define MAX_DIFF_FILESIZE 0x20000000 + struct git_diff_list { git_refcount rc; git_repository *repo; @@ -40,5 +42,27 @@ struct git_diff_list { extern void git_diff__cleanup_modes( uint32_t diffcaps, uint32_t *omode, uint32_t *nmode); +/** + * Return the maximum possible number of files in the diff. + * + * NOTE: This number has to be treated as an upper bound on the number of + * files that have changed if the diff is with the working directory. + * + * Why?! For efficiency, we defer loading the file contents as long as + * possible, so if a file has been "touched" in the working directory and + * then reverted to the original content, it may get stored in the diff list + * as MODIFIED along with a flag that the status should be reconfirmed when + * it is actually loaded into memory. When that load happens, it could get + * flipped to UNMODIFIED. If unmodified files are being skipped, then the + * iterator will skip that file and this number may be too high. + * + * This behavior is true of `git_diff_foreach` as well, but the only + * implication there is that the `progress` value would not advance evenly. + * + * @param iterator The iterator object + * @return The maximum number of files to be iterated over + */ +int git_diff_iterator__max_files(git_diff_iterator *iterator); + #endif diff --git a/src/diff_output.c b/src/diff_output.c index 2c64b92ee..ea40c3355 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -22,7 +22,18 @@ * git_diff_foreach() call it is an emphemeral structure that is filled * in to execute each diff. In the case of a git_diff_iterator, it holds * most of the information for the diff in progress. - */ + * + * As each delta is processed, it goes through 3 phases: prep, load, exec. + * + * - In the prep phase, we just set the delta and quickly check the file + * attributes to see if it should be treated as binary. + * - In the load phase, we actually load the file content into memory. + * At this point, if we had deferred calculating OIDs, we might have to + * correct the delta to be UNMODIFIED. + * - In the exec phase, we actually run the diff and execute the callbacks. + * For foreach, this is just a pass-through to the user's callbacks. For + * iterators, we record the hunks and data spans into memory. + */ typedef struct { git_repository *repo; git_diff_options *opts; @@ -140,7 +151,8 @@ static int update_file_is_binary_by_attr( { const char *value; - if (!repo) + /* because of blob diffs, cannot assume path is set */ + if (!file->path || !strlen(file->path)) return 0; if (git_attr_get(&value, repo, 0, file->path, "diff") < 0) @@ -160,9 +172,13 @@ static void update_delta_is_binary(git_diff_delta *delta) if ((delta->old_file.flags & GIT_DIFF_FILE_BINARY) != 0 || (delta->new_file.flags & GIT_DIFF_FILE_BINARY) != 0) delta->binary = 1; - else if ((delta->old_file.flags & GIT_DIFF_FILE_NOT_BINARY) != 0 || - (delta->new_file.flags & GIT_DIFF_FILE_NOT_BINARY) != 0) + +#define NOT_BINARY_FLAGS (GIT_DIFF_FILE_NOT_BINARY|GIT_DIFF_FILE_NO_DATA) + + else if ((delta->old_file.flags & NOT_BINARY_FLAGS) != 0 && + (delta->new_file.flags & NOT_BINARY_FLAGS) != 0) delta->binary = 0; + /* otherwise leave delta->binary value untouched */ } @@ -207,34 +223,46 @@ static int diff_delta_is_binary_by_attr(diff_delta_context *ctxt) return error; } -static int diff_delta_is_binary_by_content(diff_delta_context *ctxt) +static int diff_delta_is_binary_by_content( + diff_delta_context *ctxt, git_diff_file *file, git_map *map) { - git_diff_delta *delta = ctxt->delta; git_buf search; - if ((delta->old_file.flags & BINARY_DIFF_FLAGS) == 0) { - search.ptr = ctxt->old_data.data; - search.size = min(ctxt->old_data.len, 4000); + if ((file->flags & BINARY_DIFF_FLAGS) == 0) { + search.ptr = map->data; + search.size = min(map->len, 4000); if (git_buf_is_binary(&search)) - delta->old_file.flags |= GIT_DIFF_FILE_BINARY; + file->flags |= GIT_DIFF_FILE_BINARY; else - delta->old_file.flags |= GIT_DIFF_FILE_NOT_BINARY; + file->flags |= GIT_DIFF_FILE_NOT_BINARY; } - if ((delta->new_file.flags & BINARY_DIFF_FLAGS) == 0) { - search.ptr = ctxt->new_data.data; - search.size = min(ctxt->new_data.len, 4000); + update_delta_is_binary(ctxt->delta); - if (git_buf_is_binary(&search)) - delta->new_file.flags |= GIT_DIFF_FILE_BINARY; - else - delta->new_file.flags |= GIT_DIFF_FILE_NOT_BINARY; + return 0; +} + +static int diff_delta_is_binary_by_size( + diff_delta_context *ctxt, git_diff_file *file) +{ + git_off_t threshold = MAX_DIFF_FILESIZE; + + if ((file->flags & BINARY_DIFF_FLAGS) != 0) + return 0; + + if (ctxt && ctxt->opts) { + if (ctxt->opts->max_size < 0) + return 0; + + if (ctxt->opts->max_size > 0) + threshold = ctxt->opts->max_size; } - update_delta_is_binary(delta); + if (file->size > threshold) + file->flags |= GIT_DIFF_FILE_BINARY; - /* TODO: if value != NULL, implement diff drivers */ + update_delta_is_binary(ctxt->delta); return 0; } @@ -262,31 +290,65 @@ static void setup_xdiff_options( } static int get_blob_content( - git_repository *repo, - const git_oid *oid, + diff_delta_context *ctxt, + git_diff_file *file, git_map *map, git_blob **blob) { - if (git_oid_iszero(oid)) + int error; + git_odb_object *odb_obj = NULL; + + if (git_oid_iszero(&file->oid)) return 0; - if (git_blob_lookup(blob, repo, oid) < 0) - return -1; + if (!file->size) { + git_odb *odb; + size_t len; + git_otype type; + + /* peek at object header to avoid loading if too large */ + if ((error = git_repository_odb__weakptr(&odb, ctxt->repo)) < 0 || + (error = git_odb__read_header_or_object( + &odb_obj, &len, &type, odb, &file->oid)) < 0) + return error; + + assert(type == GIT_OBJ_BLOB); + + file->size = len; + } + + /* if blob is too large to diff, mark as binary */ + if ((error = diff_delta_is_binary_by_size(ctxt, file)) < 0) + return error; + if (ctxt->delta->binary == 1) + return 0; + + if (odb_obj != NULL) { + error = git_object__from_odb_object( + (git_object **)blob, ctxt->repo, odb_obj, GIT_OBJ_BLOB); + git_odb_object_free(odb_obj); + } else + error = git_blob_lookup(blob, ctxt->repo, &file->oid); + + if (error) + return error; map->data = (void *)git_blob_rawcontent(*blob); map->len = git_blob_rawsize(*blob); - return 0; + + return diff_delta_is_binary_by_content(ctxt, file, map); } static int get_workdir_content( - git_repository *repo, + diff_delta_context *ctxt, git_diff_file *file, git_map *map) { int error = 0; git_buf path = GIT_BUF_INIT; + const char *wd = git_repository_workdir(ctxt->repo); - if (git_buf_joinpath(&path, git_repository_workdir(repo), file->path) < 0) + if (git_buf_joinpath(&path, wd, file->path) < 0) return -1; if (S_ISLNK(file->mode)) { @@ -307,13 +369,68 @@ static int get_workdir_content( if (read_len < 0) { giterr_set(GITERR_OS, "Failed to read symlink '%s'", file->path); error = -1; - } else - map->len = read_len; + goto cleanup; + } + + map->len = read_len; } else { - error = git_futils_mmap_ro_file(map, path.ptr); - file->flags |= GIT_DIFF_FILE_UNMAP_DATA; + git_file fd = git_futils_open_ro(path.ptr); + git_vector filters = GIT_VECTOR_INIT; + + if (fd < 0) { + error = fd; + goto cleanup; + } + + if (!file->size) + file->size = git_futils_filesize(fd); + + if ((error = diff_delta_is_binary_by_size(ctxt, file)) < 0 || + ctxt->delta->binary == 1) + goto close_and_cleanup; + + error = git_filters_load( + &filters, ctxt->repo, file->path, GIT_FILTER_TO_ODB); + if (error < 0) + goto close_and_cleanup; + + if (error == 0) { /* note: git_filters_load returns filter count */ + error = git_futils_mmap_ro(map, fd, 0, (size_t)file->size); + file->flags |= GIT_DIFF_FILE_UNMAP_DATA; + } else { + git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; + + if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)file->size)) && + !(error = git_filters_apply(&filtered, &raw, &filters))) + { + map->len = git_buf_len(&filtered); + map->data = git_buf_detach(&filtered); + + file->flags |= GIT_DIFF_FILE_FREE_DATA; + } + + git_buf_free(&raw); + git_buf_free(&filtered); + } + +close_and_cleanup: + git_filters_free(&filters); + p_close(fd); + } + + /* once data is loaded, update OID if we didn't have it previously */ + if (!error && (file->flags & GIT_DIFF_FILE_VALID_OID) == 0) { + error = git_odb_hash( + &file->oid, map->data, map->len, GIT_OBJ_BLOB); + if (!error) + file->flags |= GIT_DIFF_FILE_VALID_OID; } + + if (!error) + error = diff_delta_is_binary_by_content(ctxt, file, map); + +cleanup: git_buf_free(&path); return error; } @@ -394,6 +511,7 @@ static int diff_delta_load(diff_delta_context *ctxt) { int error = 0; git_diff_delta *delta = ctxt->delta; + bool check_if_unmodified = false; if (ctxt->loaded || !ctxt->delta) return 0; @@ -405,75 +523,93 @@ static int diff_delta_load(diff_delta_context *ctxt) ctxt->old_data.len = 0; ctxt->old_blob = NULL; - if (!error && delta->binary != 1 && - (delta->status == GIT_DELTA_DELETED || - delta->status == GIT_DELTA_MODIFIED)) - { - if (ctxt->old_src == GIT_ITERATOR_WORKDIR) - error = get_workdir_content( - ctxt->repo, &delta->old_file, &ctxt->old_data); - else { - error = get_blob_content( - ctxt->repo, &delta->old_file.oid, - &ctxt->old_data, &ctxt->old_blob); - - if (ctxt->new_src == GIT_ITERATOR_WORKDIR) { - /* TODO: convert crlf of blob content */ - } - } - } - ctxt->new_data.data = ""; ctxt->new_data.len = 0; ctxt->new_blob = NULL; - if (!error && delta->binary != 1 && - (delta->status == GIT_DELTA_ADDED || - delta->status == GIT_DELTA_MODIFIED)) - { - if (ctxt->new_src == GIT_ITERATOR_WORKDIR) - error = get_workdir_content( - ctxt->repo, &delta->new_file, &ctxt->new_data); - else { - error = get_blob_content( - ctxt->repo, &delta->new_file.oid, - &ctxt->new_data, &ctxt->new_blob); - - if (ctxt->old_src == GIT_ITERATOR_WORKDIR) { - /* TODO: convert crlf of blob content */ - } - } + if (delta->binary == 1) + goto cleanup; - if (!error && !(delta->new_file.flags & GIT_DIFF_FILE_VALID_OID)) { - error = git_odb_hash( - &delta->new_file.oid, ctxt->new_data.data, - ctxt->new_data.len, GIT_OBJ_BLOB); - if (error < 0) - goto cleanup; + switch (delta->status) { + case GIT_DELTA_ADDED: + delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + break; + case GIT_DELTA_DELETED: + delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; + break; + case GIT_DELTA_MODIFIED: + break; + default: + delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + break; + } - delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; +#define CHECK_UNMODIFIED (GIT_DIFF_FILE_NO_DATA | GIT_DIFF_FILE_VALID_OID) - /* since we did not have the definitive oid, we may have - * incorrect status and need to skip this item. - */ - if (delta->old_file.mode == delta->new_file.mode && - !git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid)) - { - delta->status = GIT_DELTA_UNMODIFIED; + check_if_unmodified = + (delta->old_file.flags & CHECK_UNMODIFIED) == 0 && + (delta->new_file.flags & CHECK_UNMODIFIED) == 0; - if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) - goto cleanup; - } - } + /* Always try to load workdir content first, since it may need to be + * filtered (and hence use 2x memory) and we want to minimize the max + * memory footprint during diff. + */ + + if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->old_src == GIT_ITERATOR_WORKDIR) { + if ((error = get_workdir_content( + ctxt, &delta->old_file, &ctxt->old_data)) < 0) + goto cleanup; + if (delta->binary == 1) + goto cleanup; + } + + if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->new_src == GIT_ITERATOR_WORKDIR) { + if ((error = get_workdir_content( + ctxt, &delta->new_file, &ctxt->new_data)) < 0) + goto cleanup; + if (delta->binary == 1) + goto cleanup; + } + + if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->old_src != GIT_ITERATOR_WORKDIR) { + if ((error = get_blob_content( + ctxt, &delta->old_file, &ctxt->old_data, &ctxt->old_blob)) < 0) + goto cleanup; + if (delta->binary == 1) + goto cleanup; + } + + if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + ctxt->new_src != GIT_ITERATOR_WORKDIR) { + if ((error = get_blob_content( + ctxt, &delta->new_file, &ctxt->new_data, &ctxt->new_blob)) < 0) + goto cleanup; + if (delta->binary == 1) + goto cleanup; } - /* if we have not already decided whether file is binary, - * check the first 4K for nul bytes to decide... + /* if we did not previously have the definitive oid, we may have + * incorrect status and need to switch this to UNMODIFIED. */ - if (!error && delta->binary == -1) - error = diff_delta_is_binary_by_content(ctxt); + if (check_if_unmodified && + delta->old_file.mode == delta->new_file.mode && + !git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid)) + { + delta->status = GIT_DELTA_UNMODIFIED; + + if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) + goto cleanup; + } cleanup: + /* last change to update binary flag */ + if (delta->binary == -1) + update_delta_is_binary(delta); + ctxt->loaded = !error; /* flag if we would want to diff the contents of these files */ @@ -515,9 +651,10 @@ static int diff_delta_cb(void *priv, mmbuffer_t *bufs, int len) } if (len == 3 && !ctxt->cb_error) { - /* This should only happen if we are adding a line that does not - * have a newline at the end and the old code did. In that case, - * we have a ADD with a DEL_EOFNL as a pair. + /* If we have a '+' and a third buf, then we have added a line + * without a newline and the old code had one, so DEL_EOFNL. + * If we have a '-' and a third buf, then we have removed a line + * with out a newline but added a blank line, so ADD_EOFNL. */ char origin = (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : @@ -922,6 +1059,7 @@ static void set_data_from_blob( } else { map->data = ""; file->size = map->len = 0; + file->flags |= GIT_DIFF_FILE_NO_DATA; } } @@ -938,6 +1076,7 @@ int git_diff_blobs( diff_delta_context ctxt; git_diff_delta delta; git_blob *new, *old; + git_repository *repo; new = new_blob; old = old_blob; @@ -948,8 +1087,15 @@ int git_diff_blobs( new = swap; } + if (new) + repo = git_object_owner((git_object *)new); + else if (old) + repo = git_object_owner((git_object *)old); + else + repo = NULL; + diff_delta_init_context( - &ctxt, NULL, options, GIT_ITERATOR_TREE, GIT_ITERATOR_TREE); + &ctxt, repo, options, GIT_ITERATOR_TREE, GIT_ITERATOR_TREE); /* populate a "fake" delta record */ @@ -970,9 +1116,13 @@ int git_diff_blobs( if ((error = diff_delta_prep(&ctxt)) < 0) goto cleanup; - if (delta.binary == -1 && - (error = diff_delta_is_binary_by_content(&ctxt)) < 0) - goto cleanup; + if (delta.binary == -1) { + if ((error = diff_delta_is_binary_by_content( + &ctxt, &delta.old_file, &ctxt.old_data)) < 0 || + (error = diff_delta_is_binary_by_content( + &ctxt, &delta.new_file, &ctxt.new_data)) < 0) + goto cleanup; + } ctxt.loaded = 1; ctxt.diffable = (delta.binary != 1 && delta.status != GIT_DELTA_UNMODIFIED); @@ -1016,7 +1166,6 @@ struct git_diff_iterator { diff_delta_context ctxt; size_t file_index; size_t next_index; - size_t file_count; git_pool hunks; size_t hunk_count; diffiter_hunk *hunk_head; @@ -1140,8 +1289,6 @@ int git_diff_iterator_new( git_diff_iterator **iterator_ptr, git_diff_list *diff) { - size_t i; - git_diff_delta *delta; git_diff_iterator *iter; assert(diff && iterator_ptr); @@ -1162,12 +1309,6 @@ int git_diff_iterator_new( git_pool_init(&iter->lines, sizeof(diffiter_line), 0) < 0) goto fail; - git_vector_foreach(&diff->deltas, i, delta) { - if (diff_delta_should_skip(iter->ctxt.opts, delta)) - continue; - iter->file_count++; - } - *iterator_ptr = iter; return 0; @@ -1185,9 +1326,14 @@ void git_diff_iterator_free(git_diff_iterator *iter) git__free(iter); } -int git_diff_iterator_num_files(git_diff_iterator *iter) +float git_diff_iterator_progress(git_diff_iterator *iter) +{ + return (float)iter->next_index / (float)iter->diff->deltas.length; +} + +int git_diff_iterator__max_files(git_diff_iterator *iter) { - return (int)iter->file_count; + return (int)iter->diff->deltas.length; } int git_diff_iterator_num_hunks_in_file(git_diff_iterator *iter) diff --git a/src/filebuf.c b/src/filebuf.c index cfc8528e6..b9b908c8d 100644 --- a/src/filebuf.c +++ b/src/filebuf.c @@ -73,7 +73,7 @@ static int lock_file(git_filebuf *file, int flags) if ((flags & GIT_FILEBUF_APPEND) && git_path_exists(file->path_original) == true) { git_file source; char buffer[2048]; - size_t read_bytes; + ssize_t read_bytes; source = p_open(file->path_original, O_RDONLY); if (source < 0) { @@ -83,13 +83,18 @@ static int lock_file(git_filebuf *file, int flags) return -1; } - while ((read_bytes = p_read(source, buffer, 2048)) > 0) { + while ((read_bytes = p_read(source, buffer, sizeof(buffer))) > 0) { p_write(file->fd, buffer, read_bytes); if (file->digest) git_hash_update(file->digest, buffer, read_bytes); } p_close(source); + + if (read_bytes < 0) { + giterr_set(GITERR_OS, "Failed to read file '%s'", file->path_original); + return -1; + } } return 0; diff --git a/src/fileops.c b/src/fileops.c index 95eacb5f1..8ccf063d5 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -115,10 +115,33 @@ mode_t git_futils_canonical_mode(mode_t raw_mode) return 0; } -int git_futils_readbuffer_updated(git_buf *buf, const char *path, time_t *mtime, int *updated) +int git_futils_readbuffer_fd(git_buf *buf, git_file fd, size_t len) +{ + ssize_t read_size; + + git_buf_clear(buf); + + if (git_buf_grow(buf, len + 1) < 0) + return -1; + + /* p_read loops internally to read len bytes */ + read_size = p_read(fd, buf->ptr, len); + + if (read_size != (ssize_t)len) { + giterr_set(GITERR_OS, "Failed to read descriptor"); + return -1; + } + + buf->ptr[read_size] = '\0'; + buf->size = read_size; + + return 0; +} + +int git_futils_readbuffer_updated( + git_buf *buf, const char *path, time_t *mtime, int *updated) { git_file fd; - size_t len; struct stat st; assert(buf && path && *path); @@ -147,30 +170,11 @@ int git_futils_readbuffer_updated(git_buf *buf, const char *path, time_t *mtime, if (mtime != NULL) *mtime = st.st_mtime; - len = (size_t) st.st_size; - - git_buf_clear(buf); - - if (git_buf_grow(buf, len + 1) < 0) { + if (git_futils_readbuffer_fd(buf, fd, (size_t)st.st_size) < 0) { p_close(fd); return -1; } - buf->ptr[len] = '\0'; - - while (len > 0) { - ssize_t read_size = p_read(fd, buf->ptr, len); - - if (read_size < 0) { - p_close(fd); - giterr_set(GITERR_OS, "Failed to read descriptor for '%s'", path); - return -1; - } - - len -= read_size; - buf->size += read_size; - } - p_close(fd); if (updated != NULL) diff --git a/src/fileops.h b/src/fileops.h index 5c23ce30b..d2944f460 100644 --- a/src/fileops.h +++ b/src/fileops.h @@ -19,6 +19,7 @@ */ extern int git_futils_readbuffer(git_buf *obj, const char *path); extern int git_futils_readbuffer_updated(git_buf *obj, const char *path, time_t *mtime, int *updated); +extern int git_futils_readbuffer_fd(git_buf *obj, git_file fd, size_t len); /** * File utils diff --git a/src/object.c b/src/object.c index 5130d97ac..2e45eb86a 100644 --- a/src/object.c +++ b/src/object.c @@ -77,6 +77,58 @@ static int create_object(git_object **object_out, git_otype type) return 0; } +int git_object__from_odb_object( + git_object **object_out, + git_repository *repo, + git_odb_object *odb_obj, + git_otype type) +{ + int error; + git_object *object = NULL; + + if (type != GIT_OBJ_ANY && type != odb_obj->raw.type) { + giterr_set(GITERR_ODB, "The requested type does not match the type in the ODB"); + return GIT_ENOTFOUND; + } + + type = odb_obj->raw.type; + + if ((error = create_object(&object, type)) < 0) + return error; + + /* Initialize parent object */ + git_oid_cpy(&object->cached.oid, &odb_obj->cached.oid); + object->repo = repo; + + switch (type) { + case GIT_OBJ_COMMIT: + error = git_commit__parse((git_commit *)object, odb_obj); + break; + + case GIT_OBJ_TREE: + error = git_tree__parse((git_tree *)object, odb_obj); + break; + + case GIT_OBJ_TAG: + error = git_tag__parse((git_tag *)object, odb_obj); + break; + + case GIT_OBJ_BLOB: + error = git_blob__parse((git_blob *)object, odb_obj); + break; + + default: + break; + } + + if (error < 0) + git_object__free(object); + else + *object_out = git_cache_try_store(&repo->objects, object); + + return error; +} + int git_object_lookup_prefix( git_object **object_out, git_repository *repo, @@ -148,53 +200,11 @@ int git_object_lookup_prefix( if (error < 0) return error; - if (type != GIT_OBJ_ANY && type != odb_obj->raw.type) { - git_odb_object_free(odb_obj); - giterr_set(GITERR_ODB, "The given type does not match the type on the ODB"); - return GIT_ENOTFOUND; - } - - type = odb_obj->raw.type; - - if (create_object(&object, type) < 0) { - git_odb_object_free(odb_obj); - return -1; - } - - /* Initialize parent object */ - git_oid_cpy(&object->cached.oid, &odb_obj->cached.oid); - object->repo = repo; - - switch (type) { - case GIT_OBJ_COMMIT: - error = git_commit__parse((git_commit *)object, odb_obj); - break; - - case GIT_OBJ_TREE: - error = git_tree__parse((git_tree *)object, odb_obj); - break; - - case GIT_OBJ_TAG: - error = git_tag__parse((git_tag *)object, odb_obj); - break; - - case GIT_OBJ_BLOB: - error = git_blob__parse((git_blob *)object, odb_obj); - break; - - default: - break; - } + error = git_object__from_odb_object(object_out, repo, odb_obj, type); git_odb_object_free(odb_obj); - if (error < 0) { - git_object__free(object); - return -1; - } - - *object_out = git_cache_try_store(&repo->objects, object); - return 0; + return error; } int git_object_lookup(git_object **object_out, git_repository *repo, const git_oid *id, git_otype type) { diff --git a/src/object.h b/src/object.h new file mode 100644 index 000000000..bc12aad04 --- /dev/null +++ b/src/object.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_object_h__ +#define INCLUDE_object_h__ + +/** Base git object for inheritance */ +struct git_object { + git_cached_obj cached; + git_repository *repo; + git_otype type; +}; + +/* fully free the object; internal method, DO NOT EXPORT */ +void git_object__free(void *object); + +GIT_INLINE(int) git_object__dup(git_object **dest, git_object *source) +{ + git_cached_obj_incref(source); + *dest = source; + return 0; +} + +int git_object__from_odb_object( + git_object **object_out, + git_repository *repo, + git_odb_object *odb_obj, + git_otype type); + +int git_object__resolve_to_type(git_object **obj, git_otype type); + +int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header); + +void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid); + +#endif @@ -12,6 +12,7 @@ #include "hash.h" #include "odb.h" #include "delta-apply.h" +#include "filter.h" #include "git2/odb_backend.h" #include "git2/oid.h" @@ -114,32 +115,62 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type) int hdr_len; char hdr[64], buffer[2048]; git_hash_ctx *ctx; + ssize_t read_len; hdr_len = format_object_header(hdr, sizeof(hdr), size, type); ctx = git_hash_new_ctx(); + GITERR_CHECK_ALLOC(ctx); git_hash_update(ctx, hdr, hdr_len); - while (size > 0) { - ssize_t read_len = read(fd, buffer, sizeof(buffer)); - - if (read_len < 0) { - git_hash_free_ctx(ctx); - giterr_set(GITERR_OS, "Error reading file"); - return -1; - } - + while (size > 0 && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) { git_hash_update(ctx, buffer, read_len); size -= read_len; } + /* If p_read returned an error code, the read obviously failed. + * If size is not zero, the file was truncated after we originally + * stat'd it, so we consider this a read failure too */ + if (read_len < 0 || size > 0) { + git_hash_free_ctx(ctx); + giterr_set(GITERR_OS, "Error reading file for hashing"); + return -1; + } + git_hash_final(out, ctx); git_hash_free_ctx(ctx); return 0; } +int git_odb__hashfd_filtered( + git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters) +{ + int error; + git_buf raw = GIT_BUF_INIT; + git_buf filtered = GIT_BUF_INIT; + + if (!filters || !filters->length) + return git_odb__hashfd(out, fd, size, type); + + /* size of data is used in header, so we have to read the whole file + * into memory to apply filters before beginning to calculate the hash + */ + + if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) + error = git_filters_apply(&filtered, &raw, filters); + + git_buf_free(&raw); + + if (!error) + error = git_odb_hash(out, filtered.ptr, filtered.size, type); + + git_buf_free(&filtered); + + return error; +} + int git_odb__hashlink(git_oid *out, const char *path) { struct stat st; @@ -171,7 +202,7 @@ int git_odb__hashlink(git_oid *out, const char *path) result = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB); git__free(link_data); - } else { + } else { int fd = git_futils_open_ro(path); if (fd < 0) return -1; @@ -485,19 +516,36 @@ int git_odb_exists(git_odb *db, const git_oid *id) int git_odb_read_header(size_t *len_p, git_otype *type_p, git_odb *db, const git_oid *id) { + int error; + git_odb_object *object; + + error = git_odb__read_header_or_object(&object, len_p, type_p, db, id); + + if (object) + git_odb_object_free(object); + + return error; +} + +int git_odb__read_header_or_object( + git_odb_object **out, size_t *len_p, git_otype *type_p, + git_odb *db, const git_oid *id) +{ unsigned int i; int error = GIT_ENOTFOUND; git_odb_object *object; - assert(db && id); + assert(db && id && out && len_p && type_p); if ((object = git_cache_get(&db->cache, id)) != NULL) { *len_p = object->raw.len; *type_p = object->raw.type; - git_odb_object_free(object); + *out = object; return 0; } + *out = NULL; + for (i = 0; i < db->backends.length && error < 0; ++i) { backend_internal *internal = git_vector_get(&db->backends, i); git_odb_backend *b = internal->backend; @@ -518,7 +566,8 @@ int git_odb_read_header(size_t *len_p, git_otype *type_p, git_odb *db, const git *len_p = object->raw.len; *type_p = object->raw.type; - git_odb_object_free(object); + *out = object; + return 0; } @@ -58,12 +58,19 @@ int git_odb__hashobj(git_oid *id, git_rawobj *obj); int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type); /* - * Hash a `path`, assuming it could be a POSIX symlink: if the path is a symlink, - * then the raw contents of the symlink will be hashed. Otherwise, this will - * fallback to `git_odb__hashfd`. + * Hash an open file descriptor applying an array of filters + * Acts just like git_odb__hashfd with the addition of filters... + */ +int git_odb__hashfd_filtered( + git_oid *out, git_file fd, size_t len, git_otype type, git_vector *filters); + +/* + * Hash a `path`, assuming it could be a POSIX symlink: if the path is a + * symlink, then the raw contents of the symlink will be hashed. Otherwise, + * this will fallback to `git_odb__hashfd`. * - * The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may only - * point to blobs. + * The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may + * only point to blobs. */ int git_odb__hashlink(git_oid *out, const char *path); @@ -77,4 +84,12 @@ int git_odb__error_notfound(const char *message, const git_oid *oid); */ int git_odb__error_ambiguous(const char *message); +/* + * Attempt to read object header or just return whole object if it could + * not be read. + */ +int git_odb__read_header_or_object( + git_odb_object **out, size_t *len_p, git_otype *type_p, + git_odb *db, const git_oid *id); + #endif diff --git a/src/repository.h b/src/repository.h index 4aa8af292..b919fd814 100644 --- a/src/repository.h +++ b/src/repository.h @@ -18,6 +18,7 @@ #include "refs.h" #include "buffer.h" #include "odb.h" +#include "object.h" #include "attr.h" #include "strmap.h" @@ -75,13 +76,6 @@ enum { GIT_REPOSITORY_INIT__IS_REINIT = (1u << 18), }; -/** Base git object for inheritance */ -struct git_object { - git_cached_obj cached; - git_repository *repo; - git_otype type; -}; - /** Internal structure for repository object */ struct git_repository { git_odb *_odb; @@ -102,21 +96,6 @@ struct git_repository { git_cvar_value cvar_cache[GIT_CVAR_CACHE_MAX]; }; -/* fully free the object; internal method, DO NOT EXPORT */ -void git_object__free(void *object); - -GIT_INLINE(int) git_object__dup(git_object **dest, git_object *source) -{ - git_cached_obj_incref(source); - *dest = source; - return 0; -} - -int git_object__resolve_to_type(git_object **obj, git_otype type); - -int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header); -void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid); - GIT_INLINE(git_attr_cache *) git_repository_attr_cache(git_repository *repo) { return &repo->attrcache; @@ -136,7 +115,7 @@ int git_repository_odb__weakptr(git_odb **out, git_repository *repo); int git_repository_index__weakptr(git_index **out, git_repository *repo); /* - * CVAR cache + * CVAR cache * * Efficient access to the most used config variables of a repository. * The cache is cleared everytime the config backend is replaced. diff --git a/tests-clar/diff/diff_helpers.c b/tests-clar/diff/diff_helpers.c index 59e01802c..767b34392 100644 --- a/tests-clar/diff/diff_helpers.c +++ b/tests-clar/diff/diff_helpers.c @@ -89,7 +89,8 @@ int diff_line_fn( e->line_adds++; break; case GIT_DIFF_LINE_ADD_EOFNL: - assert(0); + /* technically not a line add, but we'll count it as such */ + e->line_adds++; break; case GIT_DIFF_LINE_DELETION: e->line_dels++; @@ -111,23 +112,21 @@ int diff_foreach_via_iterator( git_diff_hunk_fn hunk_cb, git_diff_data_fn line_cb) { - int error, curr, total; + int error; git_diff_iterator *iter; git_diff_delta *delta; if ((error = git_diff_iterator_new(&iter, diff)) < 0) return error; - curr = 0; - total = git_diff_iterator_num_files(iter); - while (!(error = git_diff_iterator_next_file(&delta, iter))) { git_diff_range *range; const char *hdr; size_t hdr_len; + float progress = git_diff_iterator_progress(iter); /* call file_cb for this file */ - if (file_cb != NULL && file_cb(data, delta, (float)curr / total) != 0) + if (file_cb != NULL && file_cb(data, delta, progress) != 0) goto abort; if (!hunk_cb && !line_cb) diff --git a/tests-clar/diff/diffiter.c b/tests-clar/diff/diffiter.c index 56c254741..23071e48b 100644 --- a/tests-clar/diff/diffiter.c +++ b/tests-clar/diff/diffiter.c @@ -114,3 +114,88 @@ void test_diff_diffiter__iterate_files_and_hunks(void) git_diff_iterator_free(iter); git_diff_list_free(diff); } + +void test_diff_diffiter__max_size_threshold(void) +{ + git_repository *repo = cl_git_sandbox_init("status"); + git_diff_options opts = {0}; + git_diff_list *diff = NULL; + git_diff_iterator *iter; + git_diff_delta *delta; + int error, file_count = 0, binary_count = 0, hunk_count = 0; + + opts.context_lines = 3; + opts.interhunk_lines = 1; + opts.flags |= GIT_DIFF_INCLUDE_IGNORED | GIT_DIFF_INCLUDE_UNTRACKED; + + cl_git_pass(git_diff_workdir_to_index(repo, &opts, &diff)); + cl_git_pass(git_diff_iterator_new(&iter, diff)); + + while ((error = git_diff_iterator_next_file(&delta, iter)) != GIT_ITEROVER) { + cl_assert_equal_i(0, error); + cl_assert(delta); + + file_count++; + + hunk_count += git_diff_iterator_num_hunks_in_file(iter); + + assert(delta->binary == 0 || delta->binary == 1); + + binary_count += delta->binary; + } + + cl_assert_equal_i(GIT_ITEROVER, error); + cl_assert(delta == NULL); + + cl_assert_equal_i(13, file_count); + cl_assert_equal_i(0, binary_count); + cl_assert_equal_i(8, hunk_count); + + git_diff_iterator_free(iter); + git_diff_list_free(diff); + + /* try again with low file size threshold */ + + file_count = 0; + binary_count = 0; + hunk_count = 0; + + opts.context_lines = 3; + opts.interhunk_lines = 1; + opts.flags |= GIT_DIFF_INCLUDE_IGNORED | GIT_DIFF_INCLUDE_UNTRACKED; + opts.max_size = 50; /* treat anything over 50 bytes as binary! */ + + cl_git_pass(git_diff_workdir_to_index(repo, &opts, &diff)); + cl_git_pass(git_diff_iterator_new(&iter, diff)); + + while ((error = git_diff_iterator_next_file(&delta, iter)) != GIT_ITEROVER) { + cl_assert_equal_i(0, error); + cl_assert(delta); + + file_count++; + + hunk_count += git_diff_iterator_num_hunks_in_file(iter); + + assert(delta->binary == 0 || delta->binary == 1); + + binary_count += delta->binary; + } + + cl_assert_equal_i(GIT_ITEROVER, error); + cl_assert(delta == NULL); + + cl_assert_equal_i(13, file_count); + + /* Three files are over the 50 byte threshold: + * - staged_changes_file_deleted + * - staged_changes_modified_file + * - staged_new_file_modified_file + */ + cl_assert_equal_i(3, binary_count); + + cl_assert_equal_i(5, hunk_count); + + git_diff_iterator_free(iter); + git_diff_list_free(diff); + +} diff --git a/tests-clar/status/worktree.c b/tests-clar/status/worktree.c index 75975c988..05e396e1f 100644 --- a/tests-clar/status/worktree.c +++ b/tests-clar/status/worktree.c @@ -797,3 +797,47 @@ void test_status_worktree__interruptable_foreach(void) cl_assert_equal_i(8, count); } + +void test_status_worktree__new_staged_file_must_handle_crlf(void) +{ + git_repository *repo; + git_index *index; + git_config *config; + unsigned int status; + + cl_git_pass(git_repository_init(&repo, "getting_started", 0)); + + // Ensure that repo has core.autocrlf=true + cl_git_pass(git_repository_config(&config, repo)); + cl_git_pass(git_config_set_bool(config, "core.autocrlf", true)); + + cl_git_mkfile("getting_started/testfile.txt", "content\r\n"); // Content with CRLF + + cl_git_pass(git_repository_index(&index, repo)); + cl_git_pass(git_index_add(index, "testfile.txt", 0)); + cl_git_pass(git_index_write(index)); + + cl_git_pass(git_status_file(&status, repo, "testfile.txt")); + cl_assert_equal_i(GIT_STATUS_INDEX_NEW, status); + + git_config_free(config); + git_index_free(index); + git_repository_free(repo); +} + +void test_status_worktree__line_endings_dont_count_as_changes_with_autocrlf(void) +{ + git_repository *repo = cl_git_sandbox_init("status"); + git_config *config; + unsigned int status; + + cl_git_pass(git_repository_config(&config, repo)); + cl_git_pass(git_config_set_bool(config, "core.autocrlf", true)); + git_config_free(config); + + cl_git_rewritefile("status/current_file", "current_file\r\n"); + + cl_git_pass(git_status_file(&status, repo, "current_file")); + + cl_assert_equal_i(GIT_STATUS_CURRENT, status); +} |