1 files changed, 156 insertions, 113 deletions
diff --git a/subversion/libsvn_diff/diff_memory.c b/subversion/libsvn_diff/diff_memory.c
index 91c197a..00f4c7f 100644
--- a/subversion/libsvn_diff/diff_memory.c
+++ b/subversion/libsvn_diff/diff_memory.c
@@ -27,6 +27,8 @@
 #include <apr_want.h>
 #include <apr_tables.h>
 
+#include <assert.h>
+
 #include "svn_diff.h"
 #include "svn_pools.h"
 #include "svn_types.h"
@@ -35,6 +37,7 @@
 #include "diff.h"
 #include "svn_private_config.h"
 #include "private/svn_adler32.h"
+#include "private/svn_diff_private.h"
 
 typedef struct source_tokens_t
 {
@@ -123,7 +126,7 @@ datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
   diff_mem_baton_t *mem_baton = baton;
   source_tokens_t *src = &(mem_baton->sources[datasource_to_index(datasource)]);
 
-  if (src->tokens->nelts > src->next_token)
+  if ((apr_size_t)src->tokens->nelts > src->next_token)
     {
       /* There are actually tokens to be returned */
       char *buf = mem_baton->normalization_buf[0];
@@ -341,7 +344,8 @@ typedef enum unified_output_e
 {
   unified_output_context = 0,
   unified_output_delete,
-  unified_output_insert
+  unified_output_insert,
+  unified_output_skip
 } unified_output_e;
 
 /* Baton for generating unified diffs */
@@ -350,7 +354,7 @@ typedef struct unified_output_baton_t
   svn_stream_t *output_stream;
   const char *header_encoding;
   source_tokens_t sources[2]; /* 0 == original; 1 == modified */
-  apr_off_t next_token; /* next token in original source */
+  apr_off_t current_token[2]; /* current token per source */
 
   /* Cached markers, in header_encoding,
      indexed using unified_output_e */
@@ -363,6 +367,9 @@ typedef struct unified_output_baton_t
   /* The delimiters of the hunk header, '@@' for text hunks and '##' for
    * property hunks. */
   const char *hunk_delimiter;
+  /* The string to print after a line that does not end with a newline.
+   * It must start with a '\'.  Typically "\ No newline at end of file". */
+  const char *no_newline_string;
 
   /* Pool for allocation of temporary memory in the callbacks
      Should be cleared on entry of each iteration of a callback */
@@ -378,31 +385,28 @@ static svn_error_t *
 output_unified_token_range(output_baton_t *btn,
                            int tokens,
                            unified_output_e type,
-                           apr_off_t first,
-                           apr_off_t past_last)
+                           apr_off_t until)
 {
   source_tokens_t *source = &btn->sources[tokens];
-  apr_off_t idx;
-
-  past_last = (past_last > source->tokens->nelts)
-    ? source->tokens->nelts : past_last;
 
-  if (tokens == 0)
-    /* We get context from the original source, don't expect
-       to be asked to output a block which starts before
-       what we already have written. */
-    first = (first < btn->next_token) ? btn->next_token : first;
+  if (until > source->tokens->nelts)
+    until = source->tokens->nelts;
 
-  if (first >= past_last)
+  if (until <= btn->current_token[tokens])
     return SVN_NO_ERROR;
 
   /* Do the loop with prefix and token */
-  for (idx = first; idx < past_last; idx++)
+  while (TRUE)
     {
-      svn_string_t *token
-        = APR_ARRAY_IDX(source->tokens, idx, svn_string_t *);
-      svn_stringbuf_appendcstr(btn->hunk, btn->prefix_str[type]);
-      svn_stringbuf_appendbytes(btn->hunk, token->data, token->len);
+      svn_string_t *token =
+        APR_ARRAY_IDX(source->tokens, btn->current_token[tokens],
+                      svn_string_t *);
+
+      if (type != unified_output_skip)
+        {
+          svn_stringbuf_appendcstr(btn->hunk, btn->prefix_str[type]);
+          svn_stringbuf_appendbytes(btn->hunk, token->data, token->len);
+        }
 
       if (type == unified_output_context)
         {
@@ -411,25 +415,28 @@ output_unified_token_range(output_baton_t *btn,
         }
       else if (type == unified_output_delete)
         btn->hunk_length[0]++;
-      else
+      else if (type == unified_output_insert)
         btn->hunk_length[1]++;
 
+      /* ### TODO: Add skip processing for -p handling? */
+
+      btn->current_token[tokens]++;
+      if (btn->current_token[tokens] == until)
+        break;
     }
-  if (past_last == source->tokens->nelts && source->ends_without_eol)
+
+  if (btn->current_token[tokens] == source->tokens->nelts
+      && source->ends_without_eol)
     {
       const char *out_str;
-      SVN_ERR(svn_utf_cstring_from_utf8_ex2
-              (&out_str,
-               /* The string below is intentionally not marked for translation:
-                  it's vital to correct operation of the diff(1)/patch(1)
-                  program pair. */
-               APR_EOL_STR "\\ No newline at end of file" APR_EOL_STR,
-               btn->header_encoding, btn->pool));
+
+      SVN_ERR(svn_utf_cstring_from_utf8_ex2(
+                &out_str, btn->no_newline_string,
+                btn->header_encoding, btn->pool));
       svn_stringbuf_appendcstr(btn->hunk, out_str);
     }
 
-  if (tokens == 0)
-    btn->next_token = past_last;
+
 
   return SVN_NO_ERROR;
 }
@@ -444,6 +451,8 @@ output_unified_flush_hunk(output_baton_t *baton,
 {
   apr_off_t target_token;
   apr_size_t hunk_len;
+  apr_off_t old_start;
+  apr_off_t new_start;
 
   if (svn_stringbuf_isempty(baton->hunk))
     return SVN_NO_ERROR;
@@ -452,58 +461,40 @@ output_unified_flush_hunk(output_baton_t *baton,
 
   /* Write the trailing context */
   target_token = baton->hunk_start[0] + baton->hunk_length[0]
-    + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+                 + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
   SVN_ERR(output_unified_token_range(baton, 0 /*original*/,
                                      unified_output_context,
-                                     baton->next_token, target_token));
+                                     target_token));
   if (hunk_delimiter == NULL)
     hunk_delimiter = "@@";
 
+  old_start = baton->hunk_start[0];
+  new_start = baton->hunk_start[1];
+
+  /* If the file is non-empty, convert the line indexes from
+     zero based to one based */
+  if (baton->hunk_length[0])
+    old_start++;
+  if (baton->hunk_length[1])
+    new_start++;
+
   /* Write the hunk header */
-  if (baton->hunk_length[0] > 0)
-    /* Convert our 0-based line numbers into unidiff 1-based numbers */
-    baton->hunk_start[0]++;
-  SVN_ERR(svn_stream_printf_from_utf8(
-            baton->output_stream, baton->header_encoding,
-            baton->pool,
-            /* Hunk length 1 is implied, don't show the
-               length field if we have a hunk that long */
-            (baton->hunk_length[0] == 1)
-            ? ("%s -%" APR_OFF_T_FMT)
-            : ("%s -%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT),
-            hunk_delimiter,
-            baton->hunk_start[0], baton->hunk_length[0]));
-
-  if (baton->hunk_length[1] > 0)
-    /* Convert our 0-based line numbers into unidiff 1-based numbers */
-    baton->hunk_start[1]++;
-
-
-  /* Hunk length 1 is implied, don't show the
-     length field if we have a hunk that long */
-  if (baton->hunk_length[1] == 1)
-    {
-      SVN_ERR(svn_stream_printf_from_utf8(
-                baton->output_stream, baton->header_encoding,
-                baton->pool,
-                " +%" APR_OFF_T_FMT " %s" APR_EOL_STR,
-                baton->hunk_start[1], hunk_delimiter));
-    }
-  else
-    {
-      SVN_ERR(svn_stream_printf_from_utf8(
-                baton->output_stream, baton->header_encoding,
-                baton->pool,
-                " +%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT " %s" APR_EOL_STR,
-                baton->hunk_start[1], baton->hunk_length[1],
-                hunk_delimiter));
-    }
+  SVN_ERR(svn_diff__unified_write_hunk_header(
+            baton->output_stream, baton->header_encoding, hunk_delimiter,
+            old_start, baton->hunk_length[0],
+            new_start, baton->hunk_length[1],
+            NULL /* hunk_extra_context */,
+            baton->pool));
 
   hunk_len = baton->hunk->len;
   SVN_ERR(svn_stream_write(baton->output_stream,
                            baton->hunk->data, &hunk_len));
 
-  baton->hunk_length[0] = baton->hunk_length[1] = 0;
+  /* Prepare for the next hunk */
+  baton->hunk_length[0] = 0;
+  baton->hunk_length[1] = 0;
+  baton->hunk_start[0] = 0;
+  baton->hunk_start[1] = 0;
   svn_stringbuf_setempty(baton->hunk);
 
   return SVN_NO_ERROR;
@@ -519,38 +510,91 @@ output_unified_diff_modified(void *baton,
                              apr_off_t latest_start,
                              apr_off_t latest_length)
 {
-  output_baton_t *btn = baton;
-  apr_off_t targ_orig, targ_mod;
-
-  targ_orig = original_start - SVN_DIFF__UNIFIED_CONTEXT_SIZE;
-  targ_orig = (targ_orig < 0) ? 0 : targ_orig;
-  targ_mod = modified_start;
-
-  /* If the changed ranges are far enough apart (no overlapping or
-   * connecting context), flush the current hunk. */
-  if (btn->next_token + SVN_DIFF__UNIFIED_CONTEXT_SIZE < targ_orig)
-    SVN_ERR(output_unified_flush_hunk(btn, btn->hunk_delimiter));
-  /* Adjust offset if it's not the first hunk. */
-  else if (btn->hunk_length[0] != 0)
-    targ_orig = btn->next_token;
-
-  if (btn->hunk_length[0] == 0
-      && btn->hunk_length[1] == 0)
+  output_baton_t *output_baton = baton;
+  apr_off_t context_prefix_length;
+  apr_off_t prev_context_end;
+  svn_boolean_t init_hunk = FALSE;
+
+  if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
+    context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+  else
+    context_prefix_length = original_start;
+
+  /* Calculate where the previous hunk will end if we would write it now
+     (including the necessary context at the end) */
+  if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
     {
-      btn->hunk_start[0] = targ_orig;
-      btn->hunk_start[1] = targ_mod + targ_orig - original_start;
+      prev_context_end = output_baton->hunk_start[0]
+                         + output_baton->hunk_length[0]
+                         + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
     }
+  else
+    {
+      prev_context_end = -1;
 
-  SVN_ERR(output_unified_token_range(btn, 0/*original*/,
-                                     unified_output_context,
-                                     targ_orig, original_start));
-  SVN_ERR(output_unified_token_range(btn, 0/*original*/,
+      if (output_baton->hunk_start[0] == 0
+          && (original_length > 0 || modified_length > 0))
+        init_hunk = TRUE;
+    }
+
+  /* If the changed range is far enough from the previous range, flush the current
+     hunk. */
+  {
+    apr_off_t new_hunk_start = (original_start - context_prefix_length);
+
+    if (output_baton->current_token[0] < new_hunk_start
+          && prev_context_end <= new_hunk_start)
+      {
+        SVN_ERR(output_unified_flush_hunk(output_baton,
+                                          output_baton->hunk_delimiter));
+        init_hunk = TRUE;
+      }
+    else if (output_baton->hunk_length[0] > 0
+             || output_baton->hunk_length[1] > 0)
+      {
+        /* We extend the current hunk */
+
+        /* Original: Output the context preceding the changed range */
+        SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+                                           unified_output_context,
+                                           original_start));
+      }
+  }
+
+  /* Original: Skip lines until we are at the beginning of the context we want
+     to display */
+  SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+                                     unified_output_skip,
+                                     original_start - context_prefix_length));
+
+  if (init_hunk)
+    {
+      SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
+                     && output_baton->hunk_length[1] == 0);
+
+      output_baton->hunk_start[0] = original_start - context_prefix_length;
+      output_baton->hunk_start[1] = modified_start - context_prefix_length;
+    }
+
+  /* Modified: Skip lines until we are at the start of the changed range */
+  SVN_ERR(output_unified_token_range(output_baton, 1 /* modified */,
+                                     unified_output_skip,
+                                     modified_start));
+
+  /* Original: Output the context preceding the changed range */
+  SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
+                                    unified_output_context,
+                                    original_start));
+
+  /* Both: Output the changed range */
+  SVN_ERR(output_unified_token_range(output_baton, 0 /* original */,
                                      unified_output_delete,
-                                     original_start,
                                      original_start + original_length));
-  return output_unified_token_range(btn, 1/*modified*/, unified_output_insert,
-                                    modified_start,
-                                    modified_start + modified_length);
+  SVN_ERR(output_unified_token_range(output_baton, 1 /* modified */,
+                                     unified_output_insert,
+                                     modified_start + modified_length));
+
+  return SVN_NO_ERROR;
 }
 
 static const svn_diff_output_fns_t mem_output_unified_vtable =
@@ -584,8 +628,12 @@ svn_diff_mem_string_output_unified2(svn_stream_t *output_stream,
       baton.output_stream = output_stream;
       baton.pool = svn_pool_create(pool);
       baton.header_encoding = header_encoding;
-      baton.hunk = svn_stringbuf_create("", pool);
+      baton.hunk = svn_stringbuf_create_empty(pool);
       baton.hunk_delimiter = hunk_delimiter;
+      baton.no_newline_string
+        = (hunk_delimiter == NULL || strcmp(hunk_delimiter, "##") != 0)
+          ? APR_EOL_STR SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR
+          : APR_EOL_STR SVN_DIFF__NO_NEWLINE_AT_END_OF_PROPERTY APR_EOL_STR;
 
       SVN_ERR(svn_utf_cstring_from_utf8_ex2
               (&(baton.prefix_str[unified_output_context]), " ",
@@ -602,12 +650,9 @@ svn_diff_mem_string_output_unified2(svn_stream_t *output_stream,
 
       if (with_diff_header)
         {
-          SVN_ERR(svn_stream_printf_from_utf8(output_stream,
-                                              header_encoding, pool,
-                                              "--- %s" APR_EOL_STR
-                                              "+++ %s" APR_EOL_STR,
-                                              original_header,
-                                              modified_header));
+          SVN_ERR(svn_diff__unidiff_write_header(
+                    output_stream, header_encoding,
+                    original_header, modified_header, pool));
         }
 
       SVN_ERR(svn_diff_output(diff, &baton,
@@ -680,7 +725,7 @@ typedef struct merge_output_baton_t
 
   /* Tokenized source text */
   source_tokens_t sources[3];
-  apr_off_t next_token;
+  apr_off_t next_token[3];
 
   /* Markers for marking conflicted sections */
   const char *markers[4]; /* 0 = original, 1 = modified,
@@ -710,7 +755,7 @@ flush_context_saver(context_saver_t *cs,
   int i;
   for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
     {
-      int slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
+      apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
       if (cs->data[slot])
         {
           apr_size_t len = cs->len[slot];
@@ -808,15 +853,13 @@ output_merge_token_range(apr_size_t *lines_printed_p,
 static svn_error_t *
 output_marker_eol(merge_output_baton_t *btn)
 {
-  apr_size_t len = strlen(btn->marker_eol);
-  return svn_stream_write(btn->output_stream, btn->marker_eol, &len);
+  return svn_stream_puts(btn->output_stream, btn->marker_eol);
 }
 
 static svn_error_t *
 output_merge_marker(merge_output_baton_t *btn, int idx)
 {
-  apr_size_t len = strlen(btn->markers[idx]);
-  SVN_ERR(svn_stream_write(btn->output_stream, btn->markers[idx], &len));
+  SVN_ERR(svn_stream_puts(btn->output_stream, btn->markers[idx]));
   return output_marker_eol(btn);
 }
 
@@ -924,7 +967,7 @@ output_conflict_with_context(void *baton,
   if (btn->output_stream == btn->context_saver->stream)
     {
       if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
-        SVN_ERR(svn_stream_printf(btn->real_output_stream, btn->pool, "@@\n"));
+        SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
       SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
     }