summaryrefslogtreecommitdiff
path: root/tests-clar/diff/rename.c
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2013-07-31 16:40:42 -0700
committerRussell Belfer <rb@github.com>2013-07-31 16:40:42 -0700
commitd730d3f4f0efb269dd760a3100ae86c460b8ba36 (patch)
treef18efb0a929734ca2668b8a0f4762a0661810397 /tests-clar/diff/rename.c
parent8dd8aa480ba46863e9c7df40bb9695e88a0286ee (diff)
downloadlibgit2-d730d3f4f0efb269dd760a3100ae86c460b8ba36.tar.gz
Major rename detection changes
After doing further profiling, I found that a lot of time was being spent attempting to insert hashes into the file hash signature when using the rolling hash because the rolling hash approach generates a hash per byte of the file instead of one per run/line of data. To optimize this, I decided to convert back to a run-based file signature algorithm which would be more like core Git. After changing this, a number of the existing tests started to fail. In some cases, this appears to have been because the test was coded to be too specific to the particular results of the file similarity metric and in some cases there appear to have been bugs in the core rename detection code where only by the coincidence of the file similarity scoring were the expected results being generated. This renames all the variables in the core rename detection code to be more consistent and hopefully easier to follow which made it a bit easier to reason about the behavior of that code and fix the problems that I was seeing. I think it's in better shape now. There are a couple of tests now that attempt to stress test the rename detection code and they are quite slow. Most of the time is spent setting up the test data on disk and in the index. When we roll out performance improvements for index insertion, it should also speed up these tests I hope.
Diffstat (limited to 'tests-clar/diff/rename.c')
-rw-r--r--tests-clar/diff/rename.c99
1 files changed, 73 insertions, 26 deletions
diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c
index 79c89e362..20ee66288 100644
--- a/tests-clar/diff/rename.c
+++ b/tests-clar/diff/rename.c
@@ -236,6 +236,8 @@ void test_diff_rename__not_exact_match(void)
&diff, g_repo, old_tree, new_tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL;
+ opts.break_rewrite_threshold = 70;
+
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
@@ -312,8 +314,8 @@ void test_diff_rename__not_exact_match(void)
/* the default match algorithm is going to find the internal
* whitespace differences in the lines of sixserving.txt to be
- * significant enough that this will decide to split it into
- * an ADD and a DELETE
+ * significant enough that this will decide to split it into an ADD
+ * and a DELETE
*/
memset(&exp, 0, sizeof(exp));
@@ -480,6 +482,7 @@ void test_diff_rename__working_directory_changes(void)
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE;
+ opts.rename_threshold = 70;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
@@ -1123,7 +1126,10 @@ void test_diff_rename__unmodified_can_be_renamed(void)
git_tree_free(tree);
}
-void test_diff_rename__many_files(void)
+#define ANOTHER_POEM \
+"OH, glorious are the guarded heights\nWhere guardian souls abide—\nSelf-exiled from our gross delights—\nAbove, beyond, outside:\nAn ampler arc their spirit swings—\nCommands a juster view—\nWe have their word for all these things,\nNo doubt their words are true.\n\nYet we, the bond slaves of our day,\nWhom dirt and danger press—\nCo-heirs of insolence, delay,\nAnd leagued unfaithfulness—\nSuch is our need must seek indeed\nAnd, having found, engage\nThe men who merely do the work\nFor which they draw the wage.\n\nFrom forge and farm and mine and bench,\nDeck, altar, outpost lone—\nMill, school, battalion, counter, trench,\nRail, senate, sheepfold, throne—\nCreation's cry goes up on high\nFrom age to cheated age:\n\"Send us the men who do the work\n\"For which they draw the wage!\"\n"
+
+static void test_with_many(size_t expected_new)
{
git_index *index;
git_tree *tree, *new_tree;
@@ -1131,9 +1137,6 @@ void test_diff_rename__many_files(void)
diff_expects exp;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
- git_buf b = GIT_BUF_INIT;
- int i, j;
- char tmp[64];
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(
@@ -1142,18 +1145,6 @@ void test_diff_rename__many_files(void)
cl_git_pass(p_rename("renames/ikeepsix.txt", "renames/ikeepsix2.txt"));
cl_git_pass(git_index_remove_bypath(index, "ikeepsix.txt"));
cl_git_pass(git_index_add_bypath(index, "ikeepsix2.txt"));
-
- for (i = 0; i < 100; i += 2) {
- snprintf(tmp, sizeof(tmp), "renames/newfile%03d", i);
-
- for (j = 0; j < i * 128; ++j)
- git_buf_printf(&b, "more content %d\n", i);
-
- cl_git_mkfile(tmp, b.ptr);
- cl_git_pass(git_index_add_bypath(index, tmp + strlen("renames/")));
- }
- git_buf_free(&b);
-
cl_git_pass(git_index_write(index));
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
@@ -1162,8 +1153,8 @@ void test_diff_rename__many_files(void)
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, NULL, NULL, &exp));
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
- cl_assert_equal_i(51, exp.file_status[GIT_DELTA_ADDED]);
- cl_assert_equal_i(52, exp.files);
+ cl_assert_equal_i(expected_new + 1, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(expected_new + 2, exp.files);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
@@ -1172,8 +1163,8 @@ void test_diff_rename__many_files(void)
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, NULL, NULL, &exp));
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
- cl_assert_equal_i(50, exp.file_status[GIT_DELTA_ADDED]);
- cl_assert_equal_i(51, exp.files);
+ cl_assert_equal_i(expected_new, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(expected_new + 1, exp.files);
git_diff_list_free(diff);
@@ -1206,8 +1197,8 @@ void test_diff_rename__many_files(void)
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, NULL, NULL, &exp));
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
- cl_assert_equal_i(51, exp.file_status[GIT_DELTA_ADDED]);
- cl_assert_equal_i(52, exp.files);
+ cl_assert_equal_i(expected_new + 1, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(expected_new + 2, exp.files);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
@@ -1216,8 +1207,8 @@ void test_diff_rename__many_files(void)
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, NULL, NULL, &exp));
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
- cl_assert_equal_i(50, exp.file_status[GIT_DELTA_ADDED]);
- cl_assert_equal_i(51, exp.files);
+ cl_assert_equal_i(expected_new, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(expected_new + 1, exp.files);
git_diff_list_free(diff);
@@ -1225,3 +1216,59 @@ void test_diff_rename__many_files(void)
git_tree_free(tree);
git_index_free(index);
}
+
+void test_diff_rename__many_files(void)
+{
+ git_index *index;
+ char tmp[64];
+ int i, j;
+ git_buf b = GIT_BUF_INIT;
+
+ cl_git_pass(git_repository_index(&index, g_repo));
+
+ for (i = 0; i < 100; i += 1) {
+ snprintf(tmp, sizeof(tmp), "renames/newfile%03d", i);
+ for (j = i * 256; j > 0; --j)
+ git_buf_printf(&b, "more content %d\n", i);
+ cl_git_mkfile(tmp, b.ptr);
+ }
+
+ for (i = 0; i < 100; i += 1) {
+ snprintf(tmp, sizeof(tmp), "renames/newfile%03d", i);
+ cl_git_pass(git_index_add_bypath(index, tmp + strlen("renames/")));
+ }
+
+ git_buf_free(&b);
+ git_index_free(index);
+
+ test_with_many(100);
+}
+
+void test_diff_rename__again_many_files(void)
+{
+ git_index *index;
+ char tmp[64];
+ int i;
+ git_buf b = GIT_BUF_INIT;
+
+ cl_git_pass(git_repository_index(&index, g_repo));
+
+ git_buf_printf(&b, "%08d\n" ANOTHER_POEM "%08d\n" ANOTHER_POEM ANOTHER_POEM, 0, 0);
+
+ for (i = 0; i < 2500; i += 1) {
+ snprintf(tmp, sizeof(tmp), "renames/newfile%03d", i);
+ snprintf(b.ptr, 9, "%08d", i);
+ b.ptr[8] = '\n';
+ cl_git_mkfile(tmp, b.ptr);
+ }
+ git_buf_free(&b);
+
+ for (i = 0; i < 2500; i += 1) {
+ snprintf(tmp, sizeof(tmp), "renames/newfile%03d", i);
+ cl_git_pass(git_index_add_bypath(index, tmp + strlen("renames/")));
+ }
+
+ git_index_free(index);
+
+ test_with_many(2500);
+}