Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RZip.cxx
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
3 * All rights reserved. *
4 * *
5 * For the licensing terms see $ROOTSYS/LICENSE. *
6 * For the list of contributors see $ROOTSYS/README/CREDITS. *
7 *************************************************************************/
8
9#include "Compression.h"
10#include "RConfigure.h"
11#include "RZip.h"
12#include "Bits.h"
13#include "ZipLZMA.h"
14#include "ZipLZ4.h"
15#include "ZipZSTD.h"
16
17#include "zlib.h"
18
19#include <cstdio>
20#include <cassert>
21
22// The size of the ROOT block framing headers for compression:
23// - 3 bytes to identify the compression algorithm and version.
24// - 3 bytes to identify the deflated buffer size.
25// - 3 bytes to identify the inflated buffer size.
26#define HDRSIZE 9
27
28/**
29 * Forward decl's
30 */
31static void R__zipOld(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgrt, int *irep);
32static void R__zipZLIB(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgrt, int *irep);
33static void R__unzipZLIB(int *srcsize, const unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep);
34
35/* ===========================================================================
36 R__ZipMode is used to select the compression algorithm when R__zip is called
37 and when R__zipMultipleAlgorithm is called with its last argument set to 0.
38 R__ZipMode = 1 : ZLIB compression algorithm is used (default)
39 R__ZipMode = 2 : LZMA compression algorithm is used
40 R__ZipMode = 4 : LZ4 compression algorithm is used
41 R__ZipMode = 0 or 3 : a very old compression algorithm is used
42 (the very old algorithm is supported for backward compatibility)
43 The LZMA algorithm requires the external XZ package be installed when linking
44 is done. LZMA typically has significantly higher compression factors, but takes
45 more CPU time and memory resources while compressing.
46
47 The LZ4 algorithm requires the external LZ4 package to be installed when linking
48 is done. LZ4 typically has the worst compression ratios, but much faster decompression
49 speeds - sometimes by an order of magnitude.
50*/
52
53/* ===========================================================================
54 Function to set the ZipMode
55 */
60
61unsigned long R__crc32(unsigned long crc, const unsigned char* buf, unsigned int len)
62{
63 return crc32(crc, buf, len);
64}
65
66/* int cxlevel; compression level */
67/* int *srcsize, *tgtsize, *irep; source and target sizes, replay */
68/* char *tgt, *src; source and target buffers */
69/* compressionAlgorithm 0 = use global setting */
70/* 1 = zlib */
71/* 2 = lzma */
72/* 3 = old */
73void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgt, int *irep,
75{
76 *irep = 0;
77
78 // Performance optimization: avoid compressing tiny source buffers.
79 if (*srcsize < 1 + HDRSIZE + 1) {
80 return;
81 }
82 // Correctness check: we need at least enough bytes to prepend the header!
83 if (*tgtsize <= HDRSIZE) {
84 return;
85 }
86
87 if (cxlevel <= 0) {
88 return;
89 }
90
91 if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kUseGlobal) {
92 compressionAlgorithm = R__ZipMode;
93 }
94
95 // The LZMA compression algorithm from the XZ package
96 if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kLZMA) {
97 R__zipLZMA(cxlevel, srcsize, src, tgtsize, tgt, irep);
98 } else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kLZ4) {
99 R__zipLZ4(cxlevel, srcsize, src, tgtsize, tgt, irep);
100 } else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kZSTD) {
101 R__zipZSTD(cxlevel, srcsize, src, tgtsize, tgt, irep);
103 R__zipOld(cxlevel, srcsize, src, tgtsize, tgt, irep);
104 } else {
105 // 1 is for ZLIB (which is the default), ZLIB is also used for any illegal
106 // algorithm setting. This was a poor historic choice, as poor code may result in
107 // a surprising change in algorithm in a future version of ROOT.
108 R__zipZLIB(cxlevel, srcsize, src, tgtsize, tgt, irep);
109 }
110}
111
112 // The very old algorithm for backward compatibility
113 // 0 for selecting with R__ZipMode in a backward compatible way
114 // 3 for selecting in other cases
115static void R__zipOld(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgt, int *irep)
116{
117 int method = Z_DEFLATED;
119 ush att = (ush)UNKNOWN;
120 ush flags = 0;
121 if (cxlevel > 9) cxlevel = 9;
122 gCompressionLevel = cxlevel;
123
124 *irep = 0;
125 /* error_flag = 0; */
126 if (*tgtsize <= 0) {
127 R__error("target buffer too small");
128 return;
129 }
130 if (*srcsize > 0xffffff) {
131 R__error("source buffer too big");
132 return;
133 }
134
135#ifdef DYN_ALLOC
136 state.R__window = 0;
137 state.R__prev = 0;
138#endif
139
140 state.in_buf = src;
141 state.in_size = (unsigned) (*srcsize);
142 state.in_offset = 0;
143
144 state.out_buf = tgt;
145 state.out_size = (unsigned) (*tgtsize);
146 state.out_offset = HDRSIZE;
147 state.R__window_size = 0L;
148
149 if (0 != R__bi_init(&state) ) return; /* initialize bit routines */
151 if (0 != R__ct_init(state.t_state,&att, &method)) return; /* initialize tree routines */
152 if (0 != R__lm_init(&state, gCompressionLevel, &flags)) return; /* initialize compression */
153 R__Deflate(&state,&state.error_flag); /* compress data */
154 if (state.error_flag != 0) return;
155
156 tgt[0] = 'C'; /* Signature 'C'-Chernyaev, 'S'-Smirnov */
157 tgt[1] = 'S';
158 tgt[2] = (char) method;
159
160 state.out_size = state.out_offset - HDRSIZE; /* compressed size */
161 tgt[3] = (char)(state.out_size & 0xff);
162 tgt[4] = (char)((state.out_size >> 8) & 0xff);
163 tgt[5] = (char)((state.out_size >> 16) & 0xff);
164
165 tgt[6] = (char)(state.in_size & 0xff); /* decompressed size */
166 tgt[7] = (char)((state.in_size >> 8) & 0xff);
167 tgt[8] = (char)((state.in_size >> 16) & 0xff);
168
169 *irep = state.out_offset;
170 return;
171}
172
173/**
174 * Compress buffer contents using the venerable zlib algorithm.
175 */
176static void R__zipZLIB(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgt, int *irep)
177{
178 int err;
179 int method = Z_DEFLATED;
180
181 z_stream stream;
182 //Don't use the globals but want name similar to help see similarities in code
183 unsigned l_in_size, l_out_size;
184 *irep = 0;
185
186 if (*tgtsize <= 0) {
187 R__error("target buffer too small");
188 return;
189 }
190 if (*srcsize > 0xffffff) {
191 R__error("source buffer too big");
192 return;
193 }
194
195 stream.next_in = (Bytef*)src;
196 stream.avail_in = (uInt)(*srcsize);
197
198 stream.next_out = (Bytef*)(&tgt[HDRSIZE]);
199 stream.avail_out = (uInt)(*tgtsize) - HDRSIZE;
200
201 stream.zalloc = nullptr;
202 stream.zfree = nullptr;
203 stream.opaque = nullptr;
204
205 if (cxlevel > 9) cxlevel = 9;
206 err = deflateInit(&stream, cxlevel);
207 if (err != Z_OK) {
208 printf("error %d in deflateInit (zlib)\n",err);
209 return;
210 }
211
212 while ((err = deflate(&stream, Z_FINISH)) != Z_STREAM_END) {
213 if (err != Z_OK) {
214 deflateEnd(&stream);
215 return;
216 }
217 }
218
219 err = deflateEnd(&stream);
220 if (err != Z_OK)
221 printf("error %d in deflateEnd (zlib)\n",err);
222
223 tgt[0] = 'Z'; /* Signature ZLib */
224 tgt[1] = 'L';
225 tgt[2] = (char) method;
226
227 l_in_size = (unsigned) (*srcsize);
228 l_out_size = stream.total_out; /* compressed size */
229 tgt[3] = (char)(l_out_size & 0xff);
230 tgt[4] = (char)((l_out_size >> 8) & 0xff);
231 tgt[5] = (char)((l_out_size >> 16) & 0xff);
232
233 tgt[6] = (char)(l_in_size & 0xff); /* decompressed size */
234 tgt[7] = (char)((l_in_size >> 8) & 0xff);
235 tgt[8] = (char)((l_in_size >> 16) & 0xff);
236
237 *irep = stream.total_out + HDRSIZE;
238}
239
240/**
241 * Below are the routines for unzipping (inflating) buffers.
242 */
243
244static int is_valid_header_zlib(const unsigned char *src)
245{
246 return src[0] == 'Z' && src[1] == 'L' && src[2] == Z_DEFLATED;
247}
248
249static int is_valid_header_old(const unsigned char *src)
250{
251 return src[0] == 'C' && src[1] == 'S' && src[2] == Z_DEFLATED;
252}
253
254static int is_valid_header_lzma(const unsigned char *src)
255{
256 return src[0] == 'X' && src[1] == 'Z' && src[2] == 0;
257}
258
259static int is_valid_header_lz4(const unsigned char *src)
260{
261 return src[0] == 'L' && src[1] == '4';
262}
263
264static int is_valid_header_zstd(const unsigned char *src)
265{
266 return src[0] == 'Z' && src[1] == 'S' && src[2] == '\1';
267}
268
269static int is_valid_header(const unsigned char *src)
270{
273}
274
276{
277 if (bufsize < 3)
279
280 if (is_valid_header_zstd(const_cast<unsigned char *>(buf)))
282 if (is_valid_header_zlib(const_cast<unsigned char *>(buf)))
284 if (is_valid_header_lz4(const_cast<unsigned char *>(buf)))
286 if (is_valid_header_lzma(const_cast<unsigned char *>(buf)))
288 if (is_valid_header_old(const_cast<unsigned char *>(buf)))
290
292}
293
294int R__unzip_header(int *srcsize, const uch *src, int *tgtsize)
295{
296 // Reads header envelope, and determines target size.
297 // Returns 0 in case of success.
298
299 *srcsize = 0;
300 *tgtsize = 0;
301
302 /* C H E C K H E A D E R */
303 if (!is_valid_header(src)) {
304 fprintf(stderr, "Error R__unzip_header: error in header. Values: %x%x\n", src[0], src[1]);
305 return 1;
306 }
307
308 *srcsize = HDRSIZE + ((long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16));
309 *tgtsize = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16);
310
311 return 0;
312}
313
314
315/***********************************************************************
316 * *
317 * Name: R__unzip Date: 20.01.95 *
318 * Author: E.Chernyaev (IHEP/Protvino) Revised: *
319 * *
320 * Function: In memory ZIP decompression. Can be issued from FORTRAN. *
321 * Written for DELPHI collaboration (CERN) *
322 * *
323 * Input: scrsize - size of input buffer *
324 * src - input buffer *
325 * tgtsize - size of target buffer *
326 * *
327 * Output: tgt - target buffer (decompressed) *
328 * irep - size of decompressed data *
329 * 0 - if error *
330 * *
331 ***********************************************************************/
332// N.B. (Brian) - I have kept the original note out of complete awe of the
333// age of the original code...
334void R__unzip(int *srcsize, const uch *src, int *tgtsize, uch *tgt, int *irep)
335{
336 long isize;
337 const uch *ibufptr;
338 uch *obufptr;
339 long ibufcnt, obufcnt;
340
341 *irep = 0L;
342
343 /* C H E C K H E A D E R */
344
345 if (*srcsize < HDRSIZE) {
346 fprintf(stderr, "R__unzip: too small source\n");
347 return;
348 }
349
350 /* C H E C K H E A D E R */
351 if (!is_valid_header(src)) {
352 fprintf(stderr, "Error R__unzip: error in header\n");
353 return;
354 }
355
356 ibufptr = src + HDRSIZE;
357 ibufcnt = (long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16);
358 isize = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16);
359 obufptr = tgt;
360 obufcnt = *tgtsize;
361
362 if (obufcnt < isize) {
363 fprintf(stderr, "R__unzip: too small target (needed: %ld, given: %ld)\n", isize, obufcnt);
364 return;
365 }
366
367 if (ibufcnt + HDRSIZE != *srcsize) {
368 fprintf(stderr, "R__unzip: discrepancy in source length (expected size: %d, real size: %ld)\n",
369 *srcsize, ibufcnt + HDRSIZE);
370 return;
371 }
372
373 /* ZLIB and other standard compression algorithms */
374 if (is_valid_header_zlib(src)) {
375 R__unzipZLIB(srcsize, src, tgtsize, tgt, irep);
376 return;
377 } else if (is_valid_header_lzma(src)) {
378 R__unzipLZMA(srcsize, src, tgtsize, tgt, irep);
379 return;
380 } else if (is_valid_header_lz4(src)) {
381 R__unzipLZ4(srcsize, src, tgtsize, tgt, irep);
382 return;
383 } else if (is_valid_header_zstd(src)) {
384 R__unzipZSTD(srcsize, src, tgtsize, tgt, irep);
385 return;
386 }
387
388 /* Old zlib format */
389 if (R__Inflate(&ibufptr, &ibufcnt, &obufptr, &obufcnt)) {
390 fprintf(stderr, "R__unzip: error during decompression\n");
391 return;
392 }
393
394 /* if (obufptr - tgt != isize) {
395 There are some rare cases when a few more bytes are required */
396 if (obufptr - tgt > *tgtsize) {
397 fprintf(stderr, "R__unzip: discrepancy (%ld) with initial size: %ld, tgtsize=%d\n", (long)(obufptr - tgt), isize,
398 *tgtsize);
399 *irep = obufptr - tgt;
400 return;
401 }
402
403 *irep = isize;
404}
405
406void R__unzipZLIB(int *srcsize, const unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
407{
408 z_stream stream; /* decompression stream */
409 int err = 0;
410
411 stream.next_in = (Bytef *)(&src[HDRSIZE]);
412 stream.avail_in = (uInt)(*srcsize) - HDRSIZE;
413 stream.next_out = (Bytef *)tgt;
414 stream.avail_out = (uInt)(*tgtsize);
415 stream.zalloc = nullptr;
416 stream.zfree = nullptr;
417 stream.opaque = nullptr;
418
419 err = inflateInit(&stream);
420 if (err != Z_OK) {
421 fprintf(stderr, "R__unzip: error %d in inflateInit (zlib)\n", err);
422 return;
423 }
424
425 while ((err = inflate(&stream, Z_FINISH)) != Z_STREAM_END) {
426 if (err != Z_OK) {
427 inflateEnd(&stream);
428 fprintf(stderr, "R__unzip: error %d in inflate (zlib)\n", err);
429 return;
430 }
431 }
432
433 inflateEnd(&stream);
434
435 *irep = stream.total_out;
436 return;
437}
int R__bi_init(bits_internal_state *state)
Definition Bits.c:134
int R__error(char *msg)
Definition Bits.c:125
int gCompressionLevel
Copyright (C) 1990-1993 Mark Adler, Richard B.
Definition Bits.c:77
static int is_valid_header(unsigned char *src)
Definition RZip.cxx:269
static int is_valid_header_zlib(unsigned char *src)
Below are the routines for unzipping (inflating) buffers.
Definition RZip.cxx:244
static int is_valid_header_lz4(unsigned char *src)
Definition RZip.cxx:259
static int is_valid_header_old(unsigned char *src)
Definition RZip.cxx:249
int R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep)
Definition RZip.cxx:334
static int is_valid_header_lzma(unsigned char *src)
Definition RZip.cxx:254
static int R__unzipZLIB(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
Definition RZip.cxx:406
ROOT::RCompressionSetting::EAlgorithm::EValues R__getCompressionAlgorithm(unsigned char *buf, size_t bufsize)
Definition RZip.cxx:275
ROOT::RCompressionSetting::EAlgorithm::EValues R__ZipMode
Definition RZip.cxx:51
int R__SetZipMode(ROOT::RCompressionSetting::EAlgorithm::EValues mode)
Definition RZip.cxx:56
static int R__zipOld(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgrt, int *irep)
Forward decl's.
Definition RZip.cxx:115
#define HDRSIZE
Definition RZip.cxx:26
unsigned long R__crc32(unsigned long crc, unsigned char *buf, unsigned int len)
Definition RZip.cxx:61
static int R__zipZLIB(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgrt, int *irep)
Compress buffer contents using the venerable zlib algorithm.
Definition RZip.cxx:176
int R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgt, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues compressionAlgorithm)
Definition RZip.cxx:73
static int is_valid_header_zstd(unsigned char *src)
Definition RZip.cxx:264
int R__unzip_header(int *srcsize, uch *src, int *tgtsize)
Definition RZip.cxx:294
Double_t err
ulg R__Deflate()
int R__ct_init()
tree_internal_state * R__get_thread_tree_state()
Definition ZTrees.c:293
int R__lm_init()
#define UNKNOWN
Definition ZIP.h:61
unsigned short ush
Definition ZInflate.c:225
int R__Inflate()
unsigned char uch
Definition ZInflate.c:224
void R__unzipLZ4(int *srcsize, const unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
Definition ZipLZ4.cxx:87
void R__zipLZ4(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgt, int *irep)
Definition ZipLZ4.cxx:33
void R__zipZSTD(int cxlevel, int *srcsize, const char *src, int *tgtsize, char *tgt, int *irep)
Definition ZipZSTD.cxx:25
void R__unzipZSTD(int *srcsize, const unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
Definition ZipZSTD.cxx:61
Int_t bufsize
Definition h2root.cxx:268
printf("Client 0: bytes recv = %d, bytes sent = %d\n", s0->GetBytesRecv(), s0->GetBytesSent())
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
@ kUseGlobal
Use the global compression algorithm.
Definition Compression.h:93
@ kOldCompressionAlgo
Use the old compression algorithm.
Definition Compression.h:99
@ kUndefined
Undefined compression algorithm (must be kept the last of the list in case a new algorithm is added).
unsigned out_size
Definition Bits.h:69
tree_internal_state * t_state
Definition Bits.h:175
uch R__window[2L *((unsigned) 32768)]
Definition Bits.h:94
unsigned out_offset
Definition Bits.h:64
unsigned in_size
Definition Bits.h:69
unsigned in_offset
Definition Bits.h:64
Pos R__prev[((unsigned) 32768)]
Definition Bits.h:104
char * in_buf
Definition Bits.h:58
char * out_buf
Definition Bits.h:59