diff options
| author | Edward Thomson <ethomson@edwardthomson.com> | 2021-05-10 23:04:59 +0100 |
|---|---|---|
| committer | Edward Thomson <ethomson@edwardthomson.com> | 2021-05-11 01:29:22 +0100 |
| commit | d525e063ba4e478cc4afac4cdf60f7acd989dbf2 (patch) | |
| tree | 40cad165fb5324ae430ebccdf7a1cc7de6687472 /src | |
| parent | 4bd172087c30e09e7720a7df11cace47ee002256 (diff) | |
| download | libgit2-d525e063ba4e478cc4afac4cdf60f7acd989dbf2.tar.gz | |
buf: remove internal `git_buf_text` namespace
The `git_buf_text` namespace is unnecessary and strange. Remove it,
just keep the functions prefixed with `git_buf`.
Diffstat (limited to 'src')
| -rw-r--r-- | src/attr_file.c | 3 | ||||
| -rw-r--r-- | src/blob.c | 3 | ||||
| -rw-r--r-- | src/buf_text.c | 316 | ||||
| -rw-r--r-- | src/buf_text.h | 122 | ||||
| -rw-r--r-- | src/buffer.c | 320 | ||||
| -rw-r--r-- | src/buffer.h | 92 | ||||
| -rw-r--r-- | src/checkout.c | 1 | ||||
| -rw-r--r-- | src/config.c | 3 | ||||
| -rw-r--r-- | src/config_parse.c | 4 | ||||
| -rw-r--r-- | src/crlf.c | 9 | ||||
| -rw-r--r-- | src/diff_driver.c | 5 | ||||
| -rw-r--r-- | src/ident.c | 3 | ||||
| -rw-r--r-- | src/pathspec.c | 5 | ||||
| -rw-r--r-- | src/submodule.c | 1 |
14 files changed, 414 insertions, 473 deletions
diff --git a/src/attr_file.c b/src/attr_file.c index d1b90c5ba..6575e5a49 100644 --- a/src/attr_file.c +++ b/src/attr_file.c @@ -10,7 +10,6 @@ #include "repository.h" #include "filebuf.h" #include "attrcache.h" -#include "buf_text.h" #include "git2/blob.h" #include "git2/tree.h" #include "blob.h" @@ -192,7 +191,7 @@ int git_attr_file__load( /* advance over a UTF8 BOM */ content_str = git_buf_cstr(&content); - bom_offset = git_buf_text_detect_bom(&bom, &content); + bom_offset = git_buf_detect_bom(&bom, &content); if (bom == GIT_BOM_UTF8) content_str += bom_offset; diff --git a/src/blob.c b/src/blob.c index ddc2494c8..169e34503 100644 --- a/src/blob.c +++ b/src/blob.c @@ -14,7 +14,6 @@ #include "filebuf.h" #include "filter.h" -#include "buf_text.h" const void *git_blob_rawcontent(const git_blob *blob) { @@ -401,7 +400,7 @@ int git_blob_is_binary(const git_blob *blob) git_buf_attach_notowned(&content, git_blob_rawcontent(blob), (size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL)); - return git_buf_text_is_binary(&content); + return git_buf_is_binary(&content); } int git_blob_filter_options_init( diff --git a/src/buf_text.c b/src/buf_text.c deleted file mode 100644 index 0fd22319f..000000000 --- a/src/buf_text.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#include "buf_text.h" - -int git_buf_text_puts_escaped( - git_buf *buf, - const char *string, - const char *esc_chars, - const char *esc_with) -{ - const char *scan; - size_t total = 0, esc_len = strlen(esc_with), count, alloclen; - - if (!string) - return 0; - - for (scan = string; *scan; ) { - /* count run of non-escaped characters */ - count = strcspn(scan, esc_chars); - total += count; - scan += count; - /* count run of escaped characters */ - count = strspn(scan, esc_chars); - total += count * (esc_len + 1); - scan += count; - } - - GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1); - if (git_buf_grow_by(buf, alloclen) < 0) - return -1; - - for (scan = string; *scan; ) { - count = strcspn(scan, esc_chars); - - memmove(buf->ptr + buf->size, scan, count); - scan += count; - buf->size += count; - - for (count = strspn(scan, esc_chars); count > 0; --count) { - /* copy escape sequence */ - memmove(buf->ptr + buf->size, esc_with, esc_len); - buf->size += esc_len; - /* copy character to be escaped */ - buf->ptr[buf->size] = *scan; - buf->size++; - scan++; - } - } - - buf->ptr[buf->size] = '\0'; - - return 0; -} - -void git_buf_text_unescape(git_buf *buf) -{ - buf->size = git__unescape(buf->ptr); -} - -int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) -{ - const char *scan = src->ptr; - const char *scan_end = src->ptr + src->size; - const char *next = memchr(scan, '\r', src->size); - size_t new_size; - char *out; - - GIT_ASSERT(tgt != src); - - if (!next) - return git_buf_set(tgt, src->ptr, src->size); - - /* reduce reallocs while in the loop */ - GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1); - if (git_buf_grow(tgt, new_size) < 0) - return -1; - - out = tgt->ptr; - tgt->size = 0; - - /* Find the next \r and copy whole chunk up to there to tgt */ - for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { - if (next > scan) { - size_t copylen = (size_t)(next - scan); - memcpy(out, scan, copylen); - out += copylen; - } - - /* Do not drop \r unless it is followed by \n */ - if (next + 1 == scan_end || next[1] != '\n') - *out++ = '\r'; - } - - /* Copy remaining input into dest */ - if (scan < scan_end) { - size_t remaining = (size_t)(scan_end - scan); - memcpy(out, scan, remaining); - out += remaining; - } - - tgt->size = (size_t)(out - tgt->ptr); - tgt->ptr[tgt->size] = '\0'; - - return 0; -} - -int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) -{ - const char *start = src->ptr; - const char *end = start + src->size; - const char *scan = start; - const char *next = memchr(scan, '\n', src->size); - size_t alloclen; - - GIT_ASSERT(tgt != src); - - if (!next) - return git_buf_set(tgt, src->ptr, src->size); - - /* attempt to reduce reallocs while in the loop */ - GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4); - GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1); - if (git_buf_grow(tgt, alloclen) < 0) - return -1; - tgt->size = 0; - - for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { - size_t copylen = next - scan; - - /* if we find mixed line endings, carry on */ - if (copylen && next[-1] == '\r') - copylen--; - - GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3); - if (git_buf_grow_by(tgt, alloclen) < 0) - return -1; - - if (copylen) { - memcpy(tgt->ptr + tgt->size, scan, copylen); - tgt->size += copylen; - } - - tgt->ptr[tgt->size++] = '\r'; - tgt->ptr[tgt->size++] = '\n'; - } - - tgt->ptr[tgt->size] = '\0'; - return git_buf_put(tgt, scan, end - scan); -} - -int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) -{ - size_t i; - const char *str, *pfx; - - git_buf_clear(buf); - - if (!strings || !strings->count) - return 0; - - /* initialize common prefix to first string */ - if (git_buf_sets(buf, strings->strings[0]) < 0) - return -1; - - /* go through the rest of the strings, truncating to shared prefix */ - for (i = 1; i < strings->count; ++i) { - - for (str = strings->strings[i], pfx = buf->ptr; - *str && *str == *pfx; str++, pfx++) - /* scanning */; - - git_buf_truncate(buf, pfx - buf->ptr); - - if (!buf->size) - break; - } - - return 0; -} - -bool git_buf_text_is_binary(const git_buf *buf) -{ - const char *scan = buf->ptr, *end = buf->ptr + buf->size; - git_bom_t bom; - int printable = 0, nonprintable = 0; - - scan += git_buf_text_detect_bom(&bom, buf); - - if (bom > GIT_BOM_UTF8) - return 1; - - while (scan < end) { - unsigned char c = *scan++; - - /* Printable characters are those above SPACE (0x1F) excluding DEL, - * and including BS, ESC and FF. - */ - if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') - printable++; - else if (c == '\0') - return true; - else if (!git__isspace(c)) - nonprintable++; - } - - return ((printable >> 7) < nonprintable); -} - -bool git_buf_text_contains_nul(const git_buf *buf) -{ - return (memchr(buf->ptr, '\0', buf->size) != NULL); -} - -int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf) -{ - const char *ptr; - size_t len; - - *bom = GIT_BOM_NONE; - /* need at least 2 bytes to look for any BOM */ - if (buf->size < 2) - return 0; - - ptr = buf->ptr; - len = buf->size; - - switch (*ptr++) { - case 0: - if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { - *bom = GIT_BOM_UTF32_BE; - return 4; - } - break; - case '\xEF': - if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { - *bom = GIT_BOM_UTF8; - return 3; - } - break; - case '\xFE': - if (*ptr == '\xFF') { - *bom = GIT_BOM_UTF16_BE; - return 2; - } - break; - case '\xFF': - if (*ptr != '\xFE') - break; - if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { - *bom = GIT_BOM_UTF32_LE; - return 4; - } else { - *bom = GIT_BOM_UTF16_LE; - return 2; - } - break; - default: - break; - } - - return 0; -} - -bool git_buf_text_gather_stats( - git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) -{ - const char *scan = buf->ptr, *end = buf->ptr + buf->size; - int skip; - - memset(stats, 0, sizeof(*stats)); - - /* BOM detection */ - skip = git_buf_text_detect_bom(&stats->bom, buf); - if (skip_bom) - scan += skip; - - /* Ignore EOF character */ - if (buf->size > 0 && end[-1] == '\032') - end--; - - /* Counting loop */ - while (scan < end) { - unsigned char c = *scan++; - - if (c > 0x1F && c != 0x7F) - stats->printable++; - else switch (c) { - case '\0': - stats->nul++; - stats->nonprintable++; - break; - case '\n': - stats->lf++; - break; - case '\r': - stats->cr++; - if (scan < end && *scan == '\n') - stats->crlf++; - break; - case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ - stats->printable++; - break; - default: - stats->nonprintable++; - break; - } - } - - /* Treat files with a bare CR as binary */ - return (stats->cr != stats->crlf || stats->nul > 0 || - ((stats->printable >> 7) < stats->nonprintable)); -} diff --git a/src/buf_text.h b/src/buf_text.h deleted file mode 100644 index 726b0ae7b..000000000 --- a/src/buf_text.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_buf_text_h__ -#define INCLUDE_buf_text_h__ - -#include "common.h" - -#include "buffer.h" - -typedef enum { - GIT_BOM_NONE = 0, - GIT_BOM_UTF8 = 1, - GIT_BOM_UTF16_LE = 2, - GIT_BOM_UTF16_BE = 3, - GIT_BOM_UTF32_LE = 4, - GIT_BOM_UTF32_BE = 5 -} git_bom_t; - -typedef struct { - git_bom_t bom; /* BOM found at head of text */ - unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */ - unsigned int printable, nonprintable; /* These are just approximations! */ -} git_buf_text_stats; - -/** - * Append string to buffer, prefixing each character from `esc_chars` with - * `esc_with` string. - * - * @param buf Buffer to append data to - * @param string String to escape and append - * @param esc_chars Characters to be escaped - * @param esc_with String to insert in from of each found character - * @return 0 on success, <0 on failure (probably allocation problem) - */ -extern int git_buf_text_puts_escaped( - git_buf *buf, - const char *string, - const char *esc_chars, - const char *esc_with); - -/** - * Append string escaping characters that are regex special - */ -GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string) -{ - return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\"); -} - -/** - * Unescape all characters in a buffer in place - * - * I.e. remove backslashes - */ -extern void git_buf_text_unescape(git_buf *buf); - -/** - * Replace all \r\n with \n. - * - * @return 0 on success, -1 on memory error - */ -extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src); - -/** - * Replace all \n with \r\n. Does not modify existing \r\n. - * - * @return 0 on success, -1 on memory error - */ -extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src); - -/** - * Fill buffer with the common prefix of a array of strings - * - * Buffer will be set to empty if there is no common prefix - */ -extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs); - -/** - * Check quickly if buffer looks like it contains binary data - * - * @param buf Buffer to check - * @return true if buffer looks like non-text data - */ -extern bool git_buf_text_is_binary(const git_buf *buf); - -/** - * Check quickly if buffer contains a NUL byte - * - * @param buf Buffer to check - * @return true if buffer contains a NUL byte - */ -extern bool git_buf_text_contains_nul(const git_buf *buf); - -/** - * Check if a buffer begins with a UTF BOM - * - * @param bom Set to the type of BOM detected or GIT_BOM_NONE - * @param buf Buffer in which to check the first bytes for a BOM - * @return Number of bytes of BOM data (or 0 if no BOM found) - */ -extern int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf); - -/** - * Gather stats for a piece of text - * - * Fill the `stats` structure with counts of unreadable characters, carriage - * returns, etc, so it can be used in heuristics. This automatically skips - * a trailing EOF (\032 character). Also it will look for a BOM at the - * start of the text and can be told to skip that as well. - * - * @param stats Structure to be filled in - * @param buf Text to process - * @param skip_bom Exclude leading BOM from stats if true - * @return Does the buffer heuristically look like binary data - */ -extern bool git_buf_text_gather_stats( - git_buf_text_stats *stats, const git_buf *buf, bool skip_bom); - -#endif diff --git a/src/buffer.c b/src/buffer.c index f6ecc6e41..ffce73c68 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -7,7 +7,6 @@ #include "buffer.h" #include "posix.h" #include "git2/buffer.h" -#include "buf_text.h" #include <ctype.h> /* Used as default value for git_buf->ptr so that people can always @@ -187,16 +186,6 @@ int git_buf_set(git_buf *buf, const void *data, size_t len) return 0; } -int git_buf_is_binary(const git_buf *buf) -{ - return git_buf_text_is_binary(buf); -} - -int git_buf_contains_nul(const git_buf *buf) -{ - return git_buf_text_contains_nul(buf); -} - int git_buf_sets(git_buf *buf, const char *string) { return git_buf_set(buf, string, string ? strlen(string) : 0); @@ -1058,3 +1047,312 @@ invalid: git_error_set(GIT_ERROR_INVALID, "invalid quoted line"); return -1; } + +int git_buf_puts_escaped( + git_buf *buf, + const char *string, + const char *esc_chars, + const char *esc_with) +{ + const char *scan; + size_t total = 0, esc_len = strlen(esc_with), count, alloclen; + + if (!string) + return 0; + + for (scan = string; *scan; ) { + /* count run of non-escaped characters */ + count = strcspn(scan, esc_chars); + total += count; + scan += count; + /* count run of escaped characters */ + count = strspn(scan, esc_chars); + total += count * (esc_len + 1); + scan += count; + } + + GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1); + if (git_buf_grow_by(buf, alloclen) < 0) + return -1; + + for (scan = string; *scan; ) { + count = strcspn(scan, esc_chars); + + memmove(buf->ptr + buf->size, scan, count); + scan += count; + buf->size += count; + + for (count = strspn(scan, esc_chars); count > 0; --count) { + /* copy escape sequence */ + memmove(buf->ptr + buf->size, esc_with, esc_len); + buf->size += esc_len; + /* copy character to be escaped */ + buf->ptr[buf->size] = *scan; + buf->size++; + scan++; + } + } + + buf->ptr[buf->size] = '\0'; + + return 0; +} + +void git_buf_unescape(git_buf *buf) +{ + buf->size = git__unescape(buf->ptr); +} + +int git_buf_crlf_to_lf(git_buf *tgt, const git_buf *src) +{ + const char *scan = src->ptr; + const char *scan_end = src->ptr + src->size; + const char *next = memchr(scan, '\r', src->size); + size_t new_size; + char *out; + + GIT_ASSERT(tgt != src); + + if (!next) + return git_buf_set(tgt, src->ptr, src->size); + + /* reduce reallocs while in the loop */ + GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1); + if (git_buf_grow(tgt, new_size) < 0) + return -1; + + out = tgt->ptr; + tgt->size = 0; + + /* Find the next \r and copy whole chunk up to there to tgt */ + for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { + if (next > scan) { + size_t copylen = (size_t)(next - scan); + memcpy(out, scan, copylen); + out += copylen; + } + + /* Do not drop \r unless it is followed by \n */ + if (next + 1 == scan_end || next[1] != '\n') + *out++ = '\r'; + } + + /* Copy remaining input into dest */ + if (scan < scan_end) { + size_t remaining = (size_t)(scan_end - scan); + memcpy(out, scan, remaining); + out += remaining; + } + + tgt->size = (size_t)(out - tgt->ptr); + tgt->ptr[tgt->size] = '\0'; + + return 0; +} + +int git_buf_lf_to_crlf(git_buf *tgt, const git_buf *src) +{ + const char *start = src->ptr; + const char *end = start + src->size; + const char *scan = start; + const char *next = memchr(scan, '\n', src->size); + size_t alloclen; + + GIT_ASSERT(tgt != src); + + if (!next) + return git_buf_set(tgt, src->ptr, src->size); + + /* attempt to reduce reallocs while in the loop */ + GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4); + GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1); + if (git_buf_grow(tgt, alloclen) < 0) + return -1; + tgt->size = 0; + + for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { + size_t copylen = next - scan; + + /* if we find mixed line endings, carry on */ + if (copylen && next[-1] == '\r') + copylen--; + + GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3); + if (git_buf_grow_by(tgt, alloclen) < 0) + return -1; + + if (copylen) { + memcpy(tgt->ptr + tgt->size, scan, copylen); + tgt->size += copylen; + } + + tgt->ptr[tgt->size++] = '\r'; + tgt->ptr[tgt->size++] = '\n'; + } + + tgt->ptr[tgt->size] = '\0'; + return git_buf_put(tgt, scan, end - scan); +} + +int git_buf_common_prefix(git_buf *buf, const git_strarray *strings) +{ + size_t i; + const char *str, *pfx; + + git_buf_clear(buf); + + if (!strings || !strings->count) + return 0; + + /* initialize common prefix to first string */ + if (git_buf_sets(buf, strings->strings[0]) < 0) + return -1; + + /* go through the rest of the strings, truncating to shared prefix */ + for (i = 1; i < strings->count; ++i) { + + for (str = strings->strings[i], pfx = buf->ptr; + *str && *str == *pfx; str++, pfx++) + /* scanning */; + + git_buf_truncate(buf, pfx - buf->ptr); + + if (!buf->size) + break; + } + + return 0; +} + +int git_buf_is_binary(const git_buf *buf) +{ + const char *scan = buf->ptr, *end = buf->ptr + buf->size; + git_bom_t bom; + int printable = 0, nonprintable = 0; + + scan += git_buf_detect_bom(&bom, buf); + + if (bom > GIT_BOM_UTF8) + return 1; + + while (scan < end) { + unsigned char c = *scan++; + + /* Printable characters are those above SPACE (0x1F) excluding DEL, + * and including BS, ESC and FF. + */ + if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') + printable++; + else if (c == '\0') + return true; + else if (!git__isspace(c)) + nonprintable++; + } + + return ((printable >> 7) < nonprintable); +} + +int git_buf_contains_nul(const git_buf *buf) +{ + return (memchr(buf->ptr, '\0', buf->size) != NULL); +} + +int git_buf_detect_bom(git_bom_t *bom, const git_buf *buf) +{ + const char *ptr; + size_t len; + + *bom = GIT_BOM_NONE; + /* need at least 2 bytes to look for any BOM */ + if (buf->size < 2) + return 0; + + ptr = buf->ptr; + len = buf->size; + + switch (*ptr++) { + case 0: + if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { + *bom = GIT_BOM_UTF32_BE; + return 4; + } + break; + case '\xEF': + if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { + *bom = GIT_BOM_UTF8; + return 3; + } + break; + case '\xFE': + if (*ptr == '\xFF') { + *bom = GIT_BOM_UTF16_BE; + return 2; + } + break; + case '\xFF': + if (*ptr != '\xFE') + break; + if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { + *bom = GIT_BOM_UTF32_LE; + return 4; + } else { + *bom = GIT_BOM_UTF16_LE; + return 2; + } + break; + default: + break; + } + + return 0; +} + +bool git_buf_gather_text_stats( + git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) +{ + const char *scan = buf->ptr, *end = buf->ptr + buf->size; + int skip; + + memset(stats, 0, sizeof(*stats)); + + /* BOM detection */ + skip = git_buf_detect_bom(&stats->bom, buf); + if (skip_bom) + scan += skip; + + /* Ignore EOF character */ + if (buf->size > 0 && end[-1] == '\032') + end--; + + /* Counting loop */ + while (scan < end) { + unsigned char c = *scan++; + + if (c > 0x1F && c != 0x7F) + stats->printable++; + else switch (c) { + case '\0': + stats->nul++; + stats->nonprintable++; + break; + case '\n': + stats->lf++; + break; + case '\r': + stats->cr++; + if (scan < end && *scan == '\n') + stats->crlf++; + break; + case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ + stats->printable++; + break; + default: + stats->nonprintable++; + break; + } + } + + /* Treat files with a bare CR as binary */ + return (stats->cr != stats->crlf || stats->nul > 0 || + ((stats->printable >> 7) < stats->nonprintable)); +} diff --git a/src/buffer.h b/src/buffer.h index 8c2096bce..e75ecc15c 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -17,6 +17,21 @@ * } git_buf; */ +typedef enum { + GIT_BOM_NONE = 0, + GIT_BOM_UTF8 = 1, + GIT_BOM_UTF16_LE = 2, + GIT_BOM_UTF16_BE = 3, + GIT_BOM_UTF32_LE = 4, + GIT_BOM_UTF32_BE = 5 +} git_bom_t; + +typedef struct { + git_bom_t bom; /* BOM found at head of text */ + unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */ + unsigned int printable, nonprintable; /* These are just approximations! */ +} git_buf_text_stats; + extern char git_buf__initbuf[]; extern char git_buf__oom[]; @@ -219,4 +234,81 @@ int git_buf_splice( const char *data, size_t nb_to_insert); +/** + * Append string to buffer, prefixing each character from `esc_chars` with + * `esc_with` string. + * + * @param buf Buffer to append data to + * @param string String to escape and append + * @param esc_chars Characters to be escaped + * @param esc_with String to insert in from of each found character + * @return 0 on success, <0 on failure (probably allocation problem) + */ +extern int git_buf_puts_escaped( + git_buf *buf, + const char *string, + const char *esc_chars, + const char *esc_with); + +/** + * Append string escaping characters that are regex special + */ +GIT_INLINE(int) git_buf_puts_escape_regex(git_buf *buf, const char *string) +{ + return git_buf_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\"); +} + +/** + * Unescape all characters in a buffer in place + * + * I.e. remove backslashes + */ +extern void git_buf_unescape(git_buf *buf); + +/** + * Replace all \r\n with \n. + * + * @return 0 on success, -1 on memory error + */ +extern int git_buf_crlf_to_lf(git_buf *tgt, const git_buf *src); + +/** + * Replace all \n with \r\n. Does not modify existing \r\n. + * + * @return 0 on success, -1 on memory error + */ +extern int git_buf_lf_to_crlf(git_buf *tgt, const git_buf *src); + +/** + * Fill buffer with the common prefix of a array of strings + * + * Buffer will be set to empty if there is no common prefix + */ +extern int git_buf_common_prefix(git_buf *buf, const git_strarray *strs); + +/** + * Check if a buffer begins with a UTF BOM + * + * @param bom Set to the type of BOM detected or GIT_BOM_NONE + * @param buf Buffer in which to check the first bytes for a BOM + * @return Number of bytes of BOM data (or 0 if no BOM found) + */ +extern int git_buf_detect_bom(git_bom_t *bom, const git_buf *buf); + +/** + * Gather stats for a piece of text + * + * Fill the `stats` structure with counts of unreadable characters, carriage + * returns, etc, so it can be used in heuristics. This automatically skips + * a trailing EOF (\032 character). Also it will look for a BOM at the + * start of the text and can be told to skip that as well. + * + * @param stats Structure to be filled in + * @param buf Text to process + * @param skip_bom Exclude leading BOM from stats if true + * @return Does the buffer heuristically look like binary data + */ +extern bool git_buf_gather_text_stats( + git_buf_text_stats *stats, const git_buf *buf, bool skip_bom); + #endif diff --git a/src/checkout.c b/src/checkout.c index cadc4c82d..27bf6fe00 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -26,7 +26,6 @@ #include "diff.h" #include "diff_generate.h" #include "pathspec.h" -#include "buf_text.h" #include "diff_xdiff.h" #include "path.h" #include "attr.h" diff --git a/src/config.c b/src/config.c index a6a8cb246..7a5dadd87 100644 --- a/src/config.c +++ b/src/config.c @@ -10,7 +10,6 @@ #include "git2/config.h" #include "git2/sys/config.h" -#include "buf_text.h" #include "config_backend.h" #include "regexp.h" #include "sysdir.h" @@ -1497,7 +1496,7 @@ int git_config_rename_section( int error = 0; struct rename_data data; - git_buf_text_puts_escape_regex(&pattern, old_section_name); + git_buf_puts_escape_regex(&pattern, old_section_name); if ((error = git_buf_puts(&pattern, "\\..+")) < 0) goto cleanup; diff --git a/src/config_parse.c b/src/config_parse.c index 7da92d3e7..ea32c36ba 100644 --- a/src/config_parse.c +++ b/src/config_parse.c @@ -7,8 +7,6 @@ #include "config_parse.h" -#include "buf_text.h" - #include <ctype.h> const char *git_config_escapes = "ntb\"\\"; @@ -231,7 +229,7 @@ static int skip_bom(git_parse_ctx *parser) { git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len); git_bom_t bom; - int bom_offset = git_buf_text_detect_bom(&bom, &buf); + int bom_offset = git_buf_detect_bom(&bom, &buf); if (bom == GIT_BOM_UTF8) git_parse_advance_chars(parser, bom_offset); diff --git a/src/crlf.c b/src/crlf.c index 81b5216bc..1de9d8c3b 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -15,7 +15,6 @@ #include "futils.h" #include "hash.h" #include "filter.h" -#include "buf_text.h" #include "repository.h" typedef enum { @@ -219,7 +218,7 @@ static int crlf_apply_to_odb( if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from)) return GIT_PASSTHROUGH; - is_binary = git_buf_text_gather_stats(&stats, from, false); + is_binary = git_buf_gather_text_stats(&stats, from, false); /* Heuristics to see if we can skip the conversion. * Straight from Core Git. @@ -247,7 +246,7 @@ static int crlf_apply_to_odb( return GIT_PASSTHROUGH; /* Actually drop the carriage returns */ - return git_buf_text_crlf_to_lf(to, from); + return git_buf_crlf_to_lf(to, from); } static int crlf_apply_to_workdir( @@ -262,7 +261,7 @@ static int crlf_apply_to_workdir( if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF) return GIT_PASSTHROUGH; - is_binary = git_buf_text_gather_stats(&stats, from, false); + is_binary = git_buf_gather_text_stats(&stats, from, false); /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */ if (stats.lf == 0 || stats.lf == stats.crlf) @@ -281,7 +280,7 @@ static int crlf_apply_to_workdir( return GIT_PASSTHROUGH; } - return git_buf_text_lf_to_crlf(to, from); + return git_buf_lf_to_crlf(to, from); } static int convert_attrs( diff --git a/src/diff_driver.c b/src/diff_driver.c index 6b19012c7..e9f63cb17 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -13,7 +13,6 @@ #include "diff.h" #include "strmap.h" #include "map.h" -#include "buf_text.h" #include "config.h" #include "regexp.h" #include "repository.h" @@ -428,8 +427,8 @@ int git_diff_driver_content_is_binary( * let's just use the simple NUL-byte detection that core git uses. */ - /* previously was: if (git_buf_text_is_binary(&search)) */ - if (git_buf_text_contains_nul(&search)) + /* previously was: if (git_buf_is_binary(&search)) */ + if (git_buf_contains_nul(&search)) return 1; return 0; diff --git a/src/ident.c b/src/ident.c index 7eccf9a43..ae3ef1b45 100644 --- a/src/ident.c +++ b/src/ident.c @@ -10,7 +10,6 @@ #include "git2/sys/filter.h" #include "filter.h" #include "buffer.h" -#include "buf_text.h" static int ident_find_id( const char **id_start, const char **id_end, const char *start, size_t len) @@ -105,7 +104,7 @@ static int ident_apply( GIT_UNUSED(self); GIT_UNUSED(payload); /* Don't filter binary files */ - if (git_buf_text_is_binary(from)) + if (git_buf_is_binary(from)) return GIT_PASSTHROUGH; if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) diff --git a/src/pathspec.c b/src/pathspec.c index 8b17a6e27..8f1bdf0fa 100644 --- a/src/pathspec.c +++ b/src/pathspec.c @@ -9,7 +9,6 @@ #include "git2/pathspec.h" #include "git2/diff.h" -#include "buf_text.h" #include "attr_file.h" #include "iterator.h" #include "repository.h" @@ -25,7 +24,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec) const char *scan; if (!pathspec || !pathspec->count || - git_buf_text_common_prefix(&prefix, pathspec) < 0) + git_buf_common_prefix(&prefix, pathspec) < 0) return NULL; /* diff prefix will only be leading non-wildcards */ @@ -41,7 +40,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec) return NULL; } - git_buf_text_unescape(&prefix); + git_buf_unescape(&prefix); return git_buf_detach(&prefix); } diff --git a/src/submodule.c b/src/submodule.c index 1d059a043..14869452f 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -12,7 +12,6 @@ #include "git2/types.h" #include "git2/index.h" #include "buffer.h" -#include "buf_text.h" #include "vector.h" #include "posix.h" #include "config_backend.h" |
