diff options
author | Russell Belfer <rb@github.com> | 2013-02-19 10:25:41 -0800 |
---|---|---|
committer | Russell Belfer <rb@github.com> | 2013-02-20 15:09:41 -0800 |
commit | 9bc8be3d7e5134de1d912c7ef08d6207079bd8c1 (patch) | |
tree | f4840ab516ccee1ce427201624b2deaff3d81c7b /include/git2/diff.h | |
parent | a235e9d355c2188eb35efeac8147b2e8b626caa3 (diff) | |
download | libgit2-9bc8be3d7e5134de1d912c7ef08d6207079bd8c1.tar.gz |
Refine pluggable similarity API
This plugs in the three basic similarity strategies for handling
whitespace via internal use of the pluggable API. In so doing, I
realized that the use of git_buf in the hashsig API was not needed
and actually just made it harder to use, so I tweaked that API as
well.
Note that the similarity metric is still not hooked up in the
find_similarity code - this is just setting out the function that
will be used.
Diffstat (limited to 'include/git2/diff.h')
-rw-r--r-- | include/git2/diff.h | 30 |
1 files changed, 28 insertions, 2 deletions
diff --git a/include/git2/diff.h b/include/git2/diff.h index d90fedfbd..c0f48368e 100644 --- a/include/git2/diff.h +++ b/include/git2/diff.h @@ -387,20 +387,46 @@ typedef enum { /** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */ GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4), + + /** measure similarity ignoring leading whitespace (default) */ + GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0, + /** measure similarity ignoring all whitespace */ + GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 6), + /** measure similarity including all data */ + GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 7), } git_diff_find_t; /** * Pluggable similarity metric */ typedef struct { - int (*calc_signature)(void **out, const git_diff_file *file, void *payload); + int (*file_signature)( + void **out, const git_diff_file *file, + const char *fullpath, void *payload); + int (*buffer_signature)( + void **out, const git_diff_file *file, + const char *buf, size_t buflen, void *payload); void (*free_signature)(void *sig, void *payload); - int (*calc_similarity)(int *score, void *siga, void *sigb, void *payload); + int (*similarity)(int *score, void *siga, void *sigb, void *payload); void *payload; } git_diff_similarity_metric; /** * Control behavior of rename and copy detection + * + * These options mostly mimic parameters that can be passed to git-diff. + * + * - `rename_threshold` is the same as the -M option with a value + * - `copy_threshold` is the same as the -C option with a value + * - `rename_from_rewrite_threshold` matches the top of the -B option + * - `break_rewrite_threshold` matches the bottom of the -B option + * - `target_limit` matches the -l option + * + * The `metric` option allows you to plug in a custom similarity metric. + * Set it to NULL for the default internal metric which is based on sampling + * hashes of ranges of data in the file. The default metric is a pretty + * good similarity approximation that should work fairly well for both text + * and binary data, and is pretty fast with fixed memory overhead. */ typedef struct { unsigned int version; |