summaryrefslogtreecommitdiff
path: root/src/compress.c
blob: 3ffdd445ed3b024e19ecf0873f90fca54fef884c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/* -*- c-file-style: "java"; indent-tabs-mode: nil; tab-width: 4; fill-column: 78 -*-
 *
 * distcc -- A simple distributed compiler system
 *
 * Copyright (C) 2003, 2004 by Martin Pool <mbp@sourcefrog.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
 * USA.
 */


                        /* I think that I can safely speak for the
                         * whole troll community when I say "I like
                         * watching train wrecks".  -- AC  */


#include <config.h>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>

#include <sys/stat.h>
#include <sys/types.h>
#include <sys/socket.h>
#ifdef HAVE_SYS_MMAN_H
#  include <sys/mman.h>
#endif

#include <netinet/in.h>
#include <netinet/tcp.h>

#include "distcc.h"
#include "trace.h"
#include "util.h"
#include "exitcode.h"
#include "minilzo.h"


static char work_mem[LZO1X_1_MEM_COMPRESS];

/**
 * @file
 *
 * Compressed bulk data transfer for distcc.
 *
 * lzo doesn't have any detectable magic at the start in the raw form.  (lzop
 * the command-line tool adds some.)  Therefore we indicate in the request
 * header (the protocol version) whether compression is on or off.  If it is
 * on, all bulk data in both directions is compressed.  metadata whether the
 * transfer is compressed or not.
 *
 * It might be nice to unify this code with that in pump.c, which deals with
 * uncompressed files.  There are some parallels between the routines.
 * However the details are rather different, because with compressed files we
 * do not know ahead of time how big the expanded form will be.  This affects
 * sending, where we need to make a large-enough temporary buffer to compress
 * into.  It also affects receipt, where we need to allow extra space for data
 * coming in.  So for the moment they remain separate.
 *
 * We used to use mmap here, but it complicated the code (and caused a bug in
 * 2.14) without being clearly any faster.  So it's out again.
 *
 * The chunk header gives the number of compressed bytes.  The number of
 * plaintext bytes isn't transmitted, and so for decompression we might need
 * to scale up the buffer.
 */


/*
 * Compress from a file to a newly malloc'd block.
 */
int dcc_compress_file_lzo1x(int in_fd,
                            size_t in_len,
                            char **out_buf,
                            size_t *out_len)
{
    char *in_buf = NULL;
    int ret;

    if ((in_buf = malloc(in_len)) == NULL) {
        rs_log_error("allocation of %ld byte buffer failed",
                     (long) in_len);
        ret = EXIT_OUT_OF_MEMORY;
        goto out;
    }

    if ((ret = dcc_readx(in_fd, in_buf, in_len)))
        goto out;

    if ((ret = dcc_compress_lzo1x_alloc(in_buf, in_len, out_buf, out_len)))
        goto out;

    out:
    if (in_buf != NULL) {
        free(in_buf);
    }

    return ret;
}


/**
 * Send LZO-compressed bulk data.
 *
 * The most straighforward method for miniLZO is to just send everything in
 * one big chunk.  So we just read the whole input into a buffer, build the
 * output in a buffer, and send it once its complete.
 **/
int dcc_compress_lzo1x_alloc(const char *in_buf,
                             size_t in_len,
                             char **out_buf_ret,
                             size_t *out_len_ret)
{
    int ret = 0, lzo_ret;
    char *out_buf = NULL;
    size_t out_size;
    lzo_uint out_len;

    /* NOTE: out_size is the buffer size, out_len is the amount of actual
     * data. */

    /* In the unlikely worst case, LZO can cause the input to expand a bit. */
    out_size = in_len + in_len/64 + 16 + 3;
    if ((out_buf = malloc(out_size)) == NULL) {
        rs_log_error("failed to allocate compression buffer");
        return EXIT_OUT_OF_MEMORY;
    }

    out_len = out_size;
    lzo_ret = lzo1x_1_compress((lzo_byte*)in_buf, in_len,
                               (lzo_byte*)out_buf, &out_len,
                               work_mem);
    if (lzo_ret != LZO_E_OK) {
        rs_log_error("LZO1X1 compression failed: %d", lzo_ret);
        free(out_buf);
        return EXIT_IO_ERROR;
    }

    *out_buf_ret = out_buf;
    *out_len_ret = out_len;

    rs_trace("compressed %ld bytes to %ld bytes: %d%%",
             (long) in_len, (long) out_len,
             (int) (in_len ? 100*out_len / in_len : 0));

    return ret;
}



/**
 * Receive @p in_len compressed bytes from @p in_fd, and write the
 * decompressed form to @p out_fd.
 *
 * There's no way for us to know how big the uncompressed form will be, and
 * there is also no way to grow the decompression buffer if it turns out to
 * initially be too small.  So we assume a ratio of 10x.  If it turns out to
 * be too small, we increase the buffer and try again.  Typical compression of
 * source or object is about 2x to 4x.  On modern Unix we should be able to
 * allocate (and not touch) many megabytes at little cost, since it will just
 * turn into an anonymous map.
 *
 * LZO doesn't have any way to decompress part of the input and then break to
 * get more output space, so our buffer needs to be big enough in the first
 * place or we would waste time repeatedly decompressing it.
 **/
int dcc_r_bulk_lzo1x(int out_fd, int in_fd,
                     unsigned in_len)
{
    int ret, lzo_ret;
    char *in_buf = NULL, *out_buf = NULL;
    size_t out_size = 0;
    lzo_uint out_len;

    /* NOTE: out_size is the buffer size, out_len is the amount of actual
     * data. */

    if (in_len == 0)
        return 0;               /* just check */

    if ((in_buf = malloc(in_len)) == NULL) {
        rs_log_error("failed to allocate decompression input");
        ret = EXIT_OUT_OF_MEMORY;
        goto out;
    }

    if ((ret = dcc_readx(in_fd, in_buf, in_len)) != 0)
        goto out;

#if 0
    /* Initial estimate for output buffer.  This is intentionally quite low to
     * exercise the resizing code -- if it works OK then we can scale this
     * up. */
    out_size = 2 * in_len;
#else
    out_size = 8 * in_len;
#endif

    try_again_with_a_bigger_buffer:

    if ((out_buf = malloc(out_size)) == NULL) {
        rs_log_error("failed to allocate decompression buffer");
        ret = EXIT_OUT_OF_MEMORY;
        goto out;
    }

    out_len = out_size;
    lzo_ret = lzo1x_decompress_safe((lzo_byte*)in_buf, in_len,
                                    (lzo_byte*)out_buf, &out_len, work_mem);

    if (lzo_ret == LZO_E_OK) {
        rs_trace("decompressed %ld bytes to %ld bytes: %d%%",
                 (long) in_len, (long) out_len,
                 (int) (out_len ? 100*in_len / out_len : 0));

        ret = dcc_writex(out_fd, out_buf, out_len);

        goto out;
    } else if (lzo_ret == LZO_E_OUTPUT_OVERRUN) {
        free(out_buf);
        out_buf = 0;
        out_size *= 2;
        /* FIXME: Make sure this doesn't overflow memory size? */
        rs_trace("LZO_E_OUTPUT_OVERRUN, trying again with %lu byte buffer",
                 (unsigned long) out_size);
        goto try_again_with_a_bigger_buffer;
    } else {
        rs_log_error("LZO1X1 decompression failed: %d", lzo_ret);
        ret = EXIT_IO_ERROR;
        goto out;
    }

out:
    free(in_buf);
    free(out_buf);

    return ret;
}