Mercurial > repos > blastem
comparison zlib/crc32.c @ 2690:9ef72ee5c0b0
Update vendored zlib to 1.3.1
author | Michael Pavone <pavone@retrodev.com> |
---|---|
date | Sun, 15 Jun 2025 15:39:33 -0700 |
parents | 00d788dac91a |
children |
comparison
equal
deleted
inserted
replaced
2689:bd6e33de0972 | 2690:9ef72ee5c0b0 |
---|---|
1 /* crc32.c -- compute the CRC-32 of a data stream | 1 /* crc32.c -- compute the CRC-32 of a data stream |
2 * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler | 2 * Copyright (C) 1995-2022 Mark Adler |
3 * For conditions of distribution and use, see copyright notice in zlib.h | 3 * For conditions of distribution and use, see copyright notice in zlib.h |
4 * | 4 * |
5 * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster | 5 * This interleaved implementation of a CRC makes use of pipelined multiple |
6 * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing | 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to |
7 * tables for updating the shift register in one step with three exclusive-ors | 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. |
8 * instead of four steps with four exclusive-ors. This results in about a | |
9 * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. | |
10 */ | 8 */ |
11 | 9 |
12 /* @(#) $Id$ */ | 10 /* @(#) $Id$ */ |
13 | 11 |
14 /* | 12 /* |
15 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore | 13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore |
16 protection on the static variables used to control the first-use generation | 14 protection on the static variables used to control the first-use generation |
17 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should | 15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should |
18 first call get_crc_table() to initialize the tables before allowing more than | 16 first call get_crc_table() to initialize the tables before allowing more than |
19 one thread to use crc32(). | 17 one thread to use crc32(). |
20 | 18 |
21 DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. | 19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also |
20 produced, so that this one source file can be compiled to an executable. | |
22 */ | 21 */ |
23 | 22 |
24 #ifdef MAKECRCH | 23 #ifdef MAKECRCH |
25 # include <stdio.h> | 24 # include <stdio.h> |
26 # ifndef DYNAMIC_CRC_TABLE | 25 # ifndef DYNAMIC_CRC_TABLE |
27 # define DYNAMIC_CRC_TABLE | 26 # define DYNAMIC_CRC_TABLE |
28 # endif /* !DYNAMIC_CRC_TABLE */ | 27 # endif /* !DYNAMIC_CRC_TABLE */ |
29 #endif /* MAKECRCH */ | 28 #endif /* MAKECRCH */ |
30 | 29 |
31 #include "zutil.h" /* for STDC and FAR definitions */ | 30 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ |
32 | 31 |
33 /* Definitions for doing the crc four data bytes at a time. */ | 32 /* |
34 #if !defined(NOBYFOUR) && defined(Z_U4) | 33 A CRC of a message is computed on N braids of words in the message, where |
35 # define BYFOUR | 34 each word consists of W bytes (4 or 8). If N is 3, for example, then three |
36 #endif | 35 running sparse CRCs are calculated respectively on each braid, at these |
37 #ifdef BYFOUR | 36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... |
38 local unsigned long crc32_little OF((unsigned long, | 37 This is done starting at a word boundary, and continues until as many blocks |
39 const unsigned char FAR *, z_size_t)); | 38 of N * W bytes as are available have been processed. The results are combined |
40 local unsigned long crc32_big OF((unsigned long, | 39 into a single CRC at the end. For this code, N must be in the range 1..6 and |
41 const unsigned char FAR *, z_size_t)); | 40 W must be 4 or 8. The upper limit on N can be increased if desired by adding |
42 # define TBLS 8 | 41 more #if blocks, extending the patterns apparent in the code. In addition, |
42 crc32.h would need to be regenerated, if the maximum N value is increased. | |
43 | |
44 N and W are chosen empirically by benchmarking the execution time on a given | |
45 processor. The choices for N and W below were based on testing on Intel Kaby | |
46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 | |
47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest | |
48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. | |
49 They were all tested with either gcc or clang, all using the -O3 optimization | |
50 level. Your mileage may vary. | |
51 */ | |
52 | |
53 /* Define N */ | |
54 #ifdef Z_TESTN | |
55 # define N Z_TESTN | |
43 #else | 56 #else |
44 # define TBLS 1 | 57 # define N 5 |
45 #endif /* BYFOUR */ | 58 #endif |
46 | 59 #if N < 1 || N > 6 |
47 /* Local functions for crc concatenation */ | 60 # error N must be in 1..6 |
48 local unsigned long gf2_matrix_times OF((unsigned long *mat, | 61 #endif |
49 unsigned long vec)); | 62 |
50 local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); | 63 /* |
51 local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); | 64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as |
52 | 65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and |
66 that bytes are eight bits. | |
67 */ | |
68 | |
69 /* | |
70 Define W and the associated z_word_t type. If W is not defined, then a | |
71 braided calculation is not used, and the associated tables and code are not | |
72 compiled. | |
73 */ | |
74 #ifdef Z_TESTW | |
75 # if Z_TESTW-1 != -1 | |
76 # define W Z_TESTW | |
77 # endif | |
78 #else | |
79 # ifdef MAKECRCH | |
80 # define W 8 /* required for MAKECRCH */ | |
81 # else | |
82 # if defined(__x86_64__) || defined(__aarch64__) | |
83 # define W 8 | |
84 # else | |
85 # define W 4 | |
86 # endif | |
87 # endif | |
88 #endif | |
89 #ifdef W | |
90 # if W == 8 && defined(Z_U8) | |
91 typedef Z_U8 z_word_t; | |
92 # elif defined(Z_U4) | |
93 # undef W | |
94 # define W 4 | |
95 typedef Z_U4 z_word_t; | |
96 # else | |
97 # undef W | |
98 # endif | |
99 #endif | |
100 | |
101 /* If available, use the ARM processor CRC32 instruction. */ | |
102 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 | |
103 # define ARMCRC32 | |
104 #endif | |
105 | |
106 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) | |
107 /* | |
108 Swap the bytes in a z_word_t to convert between little and big endian. Any | |
109 self-respecting compiler will optimize this to a single machine byte-swap | |
110 instruction, if one is available. This assumes that word_t is either 32 bits | |
111 or 64 bits. | |
112 */ | |
113 local z_word_t byte_swap(z_word_t word) { | |
114 # if W == 8 | |
115 return | |
116 (word & 0xff00000000000000) >> 56 | | |
117 (word & 0xff000000000000) >> 40 | | |
118 (word & 0xff0000000000) >> 24 | | |
119 (word & 0xff00000000) >> 8 | | |
120 (word & 0xff000000) << 8 | | |
121 (word & 0xff0000) << 24 | | |
122 (word & 0xff00) << 40 | | |
123 (word & 0xff) << 56; | |
124 # else /* W == 4 */ | |
125 return | |
126 (word & 0xff000000) >> 24 | | |
127 (word & 0xff0000) >> 8 | | |
128 (word & 0xff00) << 8 | | |
129 (word & 0xff) << 24; | |
130 # endif | |
131 } | |
132 #endif | |
53 | 133 |
54 #ifdef DYNAMIC_CRC_TABLE | 134 #ifdef DYNAMIC_CRC_TABLE |
55 | 135 /* ========================================================================= |
56 local volatile int crc_table_empty = 1; | 136 * Table of powers of x for combining CRC-32s, filled in by make_crc_table() |
57 local z_crc_t FAR crc_table[TBLS][256]; | 137 * below. |
58 local void make_crc_table OF((void)); | 138 */ |
139 local z_crc_t FAR x2n_table[32]; | |
140 #else | |
141 /* ========================================================================= | |
142 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers | |
143 * of x for combining CRC-32s, all made by make_crc_table(). | |
144 */ | |
145 # include "crc32.h" | |
146 #endif | |
147 | |
148 /* CRC polynomial. */ | |
149 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ | |
150 | |
151 /* | |
152 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, | |
153 reflected. For speed, this requires that a not be zero. | |
154 */ | |
155 local z_crc_t multmodp(z_crc_t a, z_crc_t b) { | |
156 z_crc_t m, p; | |
157 | |
158 m = (z_crc_t)1 << 31; | |
159 p = 0; | |
160 for (;;) { | |
161 if (a & m) { | |
162 p ^= b; | |
163 if ((a & (m - 1)) == 0) | |
164 break; | |
165 } | |
166 m >>= 1; | |
167 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; | |
168 } | |
169 return p; | |
170 } | |
171 | |
172 /* | |
173 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been | |
174 initialized. | |
175 */ | |
176 local z_crc_t x2nmodp(z_off64_t n, unsigned k) { | |
177 z_crc_t p; | |
178 | |
179 p = (z_crc_t)1 << 31; /* x^0 == 1 */ | |
180 while (n) { | |
181 if (n & 1) | |
182 p = multmodp(x2n_table[k & 31], p); | |
183 n >>= 1; | |
184 k++; | |
185 } | |
186 return p; | |
187 } | |
188 | |
189 #ifdef DYNAMIC_CRC_TABLE | |
190 /* ========================================================================= | |
191 * Build the tables for byte-wise and braided CRC-32 calculations, and a table | |
192 * of powers of x for combining CRC-32s. | |
193 */ | |
194 local z_crc_t FAR crc_table[256]; | |
195 #ifdef W | |
196 local z_word_t FAR crc_big_table[256]; | |
197 local z_crc_t FAR crc_braid_table[W][256]; | |
198 local z_word_t FAR crc_braid_big_table[W][256]; | |
199 local void braid(z_crc_t [][256], z_word_t [][256], int, int); | |
200 #endif | |
59 #ifdef MAKECRCH | 201 #ifdef MAKECRCH |
60 local void write_table OF((FILE *, const z_crc_t FAR *)); | 202 local void write_table(FILE *, const z_crc_t FAR *, int); |
203 local void write_table32hi(FILE *, const z_word_t FAR *, int); | |
204 local void write_table64(FILE *, const z_word_t FAR *, int); | |
61 #endif /* MAKECRCH */ | 205 #endif /* MAKECRCH */ |
206 | |
207 /* | |
208 Define a once() function depending on the availability of atomics. If this is | |
209 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in | |
210 multiple threads, and if atomics are not available, then get_crc_table() must | |
211 be called to initialize the tables and must return before any threads are | |
212 allowed to compute or combine CRCs. | |
213 */ | |
214 | |
215 /* Definition of once functionality. */ | |
216 typedef struct once_s once_t; | |
217 | |
218 /* Check for the availability of atomics. */ | |
219 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ | |
220 !defined(__STDC_NO_ATOMICS__) | |
221 | |
222 #include <stdatomic.h> | |
223 | |
224 /* Structure for once(), which must be initialized with ONCE_INIT. */ | |
225 struct once_s { | |
226 atomic_flag begun; | |
227 atomic_int done; | |
228 }; | |
229 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} | |
230 | |
231 /* | |
232 Run the provided init() function exactly once, even if multiple threads | |
233 invoke once() at the same time. The state must be a once_t initialized with | |
234 ONCE_INIT. | |
235 */ | |
236 local void once(once_t *state, void (*init)(void)) { | |
237 if (!atomic_load(&state->done)) { | |
238 if (atomic_flag_test_and_set(&state->begun)) | |
239 while (!atomic_load(&state->done)) | |
240 ; | |
241 else { | |
242 init(); | |
243 atomic_store(&state->done, 1); | |
244 } | |
245 } | |
246 } | |
247 | |
248 #else /* no atomics */ | |
249 | |
250 /* Structure for once(), which must be initialized with ONCE_INIT. */ | |
251 struct once_s { | |
252 volatile int begun; | |
253 volatile int done; | |
254 }; | |
255 #define ONCE_INIT {0, 0} | |
256 | |
257 /* Test and set. Alas, not atomic, but tries to minimize the period of | |
258 vulnerability. */ | |
259 local int test_and_set(int volatile *flag) { | |
260 int was; | |
261 | |
262 was = *flag; | |
263 *flag = 1; | |
264 return was; | |
265 } | |
266 | |
267 /* Run the provided init() function once. This is not thread-safe. */ | |
268 local void once(once_t *state, void (*init)(void)) { | |
269 if (!state->done) { | |
270 if (test_and_set(&state->begun)) | |
271 while (!state->done) | |
272 ; | |
273 else { | |
274 init(); | |
275 state->done = 1; | |
276 } | |
277 } | |
278 } | |
279 | |
280 #endif | |
281 | |
282 /* State for once(). */ | |
283 local once_t made = ONCE_INIT; | |
284 | |
62 /* | 285 /* |
63 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: | 286 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: |
64 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. | 287 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. |
65 | 288 |
66 Polynomials over GF(2) are represented in binary, one bit per coefficient, | 289 Polynomials over GF(2) are represented in binary, one bit per coefficient, |
67 with the lowest powers in the most significant bit. Then adding polynomials | 290 with the lowest powers in the most significant bit. Then adding polynomials |
68 is just exclusive-or, and multiplying a polynomial by x is a right shift by | 291 is just exclusive-or, and multiplying a polynomial by x is a right shift by |
69 one. If we call the above polynomial p, and represent a byte as the | 292 one. If we call the above polynomial p, and represent a byte as the |
70 polynomial q, also with the lowest power in the most significant bit (so the | 293 polynomial q, also with the lowest power in the most significant bit (so the |
71 byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, | 294 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, |
72 where a mod b means the remainder after dividing a by b. | 295 where a mod b means the remainder after dividing a by b. |
73 | 296 |
74 This calculation is done using the shift-register method of multiplying and | 297 This calculation is done using the shift-register method of multiplying and |
75 taking the remainder. The register is initialized to zero, and for each | 298 taking the remainder. The register is initialized to zero, and for each |
76 incoming bit, x^32 is added mod p to the register if the bit is a one (where | 299 incoming bit, x^32 is added mod p to the register if the bit is a one (where |
77 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by | 300 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x |
78 x (which is shifting right by one and adding x^32 mod p if the bit shifted | 301 (which is shifting right by one and adding x^32 mod p if the bit shifted out |
79 out is a one). We start with the highest power (least significant bit) of | 302 is a one). We start with the highest power (least significant bit) of q and |
80 q and repeat for all eight bits of q. | 303 repeat for all eight bits of q. |
81 | 304 |
82 The first table is simply the CRC of all possible eight bit values. This is | 305 The table is simply the CRC of all possible eight bit values. This is all the |
83 all the information needed to generate CRCs on data a byte at a time for all | 306 information needed to generate CRCs on data a byte at a time for all |
84 combinations of CRC register values and incoming bytes. The remaining tables | 307 combinations of CRC register values and incoming bytes. |
85 allow for word-at-a-time CRC calculation for both big-endian and little- | 308 */ |
86 endian machines, where a word is four bytes. | 309 |
87 */ | 310 local void make_crc_table(void) { |
88 local void make_crc_table() | 311 unsigned i, j, n; |
89 { | 312 z_crc_t p; |
90 z_crc_t c; | 313 |
91 int n, k; | 314 /* initialize the CRC of bytes tables */ |
92 z_crc_t poly; /* polynomial exclusive-or pattern */ | 315 for (i = 0; i < 256; i++) { |
93 /* terms of polynomial defining this crc (except x^32): */ | 316 p = i; |
94 static volatile int first = 1; /* flag to limit concurrent making */ | 317 for (j = 0; j < 8; j++) |
95 static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; | 318 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; |
96 | 319 crc_table[i] = p; |
97 /* See if another task is already doing this (not thread-safe, but better | 320 #ifdef W |
98 than nothing -- significantly reduces duration of vulnerability in | 321 crc_big_table[i] = byte_swap(p); |
99 case the advice about DYNAMIC_CRC_TABLE is ignored) */ | 322 #endif |
100 if (first) { | 323 } |
101 first = 0; | 324 |
102 | 325 /* initialize the x^2^n mod p(x) table */ |
103 /* make exclusive-or pattern from polynomial (0xedb88320UL) */ | 326 p = (z_crc_t)1 << 30; /* x^1 */ |
104 poly = 0; | 327 x2n_table[0] = p; |
105 for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) | 328 for (n = 1; n < 32; n++) |
106 poly |= (z_crc_t)1 << (31 - p[n]); | 329 x2n_table[n] = p = multmodp(p, p); |
107 | 330 |
108 /* generate a crc for every 8-bit value */ | 331 #ifdef W |
109 for (n = 0; n < 256; n++) { | 332 /* initialize the braiding tables -- needs x2n_table[] */ |
110 c = (z_crc_t)n; | 333 braid(crc_braid_table, crc_braid_big_table, N, W); |
111 for (k = 0; k < 8; k++) | 334 #endif |
112 c = c & 1 ? poly ^ (c >> 1) : c >> 1; | |
113 crc_table[0][n] = c; | |
114 } | |
115 | |
116 #ifdef BYFOUR | |
117 /* generate crc for each value followed by one, two, and three zeros, | |
118 and then the byte reversal of those as well as the first table */ | |
119 for (n = 0; n < 256; n++) { | |
120 c = crc_table[0][n]; | |
121 crc_table[4][n] = ZSWAP32(c); | |
122 for (k = 1; k < 4; k++) { | |
123 c = crc_table[0][c & 0xff] ^ (c >> 8); | |
124 crc_table[k][n] = c; | |
125 crc_table[k + 4][n] = ZSWAP32(c); | |
126 } | |
127 } | |
128 #endif /* BYFOUR */ | |
129 | |
130 crc_table_empty = 0; | |
131 } | |
132 else { /* not first */ | |
133 /* wait for the other guy to finish (not efficient, but rare) */ | |
134 while (crc_table_empty) | |
135 ; | |
136 } | |
137 | 335 |
138 #ifdef MAKECRCH | 336 #ifdef MAKECRCH |
139 /* write out CRC tables to crc32.h */ | |
140 { | 337 { |
338 /* | |
339 The crc32.h header file contains tables for both 32-bit and 64-bit | |
340 z_word_t's, and so requires a 64-bit type be available. In that case, | |
341 z_word_t must be defined to be 64-bits. This code then also generates | |
342 and writes out the tables for the case that z_word_t is 32 bits. | |
343 */ | |
344 #if !defined(W) || W != 8 | |
345 # error Need a 64-bit integer type in order to generate crc32.h. | |
346 #endif | |
141 FILE *out; | 347 FILE *out; |
348 int k, n; | |
349 z_crc_t ltl[8][256]; | |
350 z_word_t big[8][256]; | |
142 | 351 |
143 out = fopen("crc32.h", "w"); | 352 out = fopen("crc32.h", "w"); |
144 if (out == NULL) return; | 353 if (out == NULL) return; |
145 fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); | 354 |
146 fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); | 355 /* write out little-endian CRC table to crc32.h */ |
147 fprintf(out, "local const z_crc_t FAR "); | 356 fprintf(out, |
148 fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); | 357 "/* crc32.h -- tables for rapid CRC calculation\n" |
149 write_table(out, crc_table[0]); | 358 " * Generated automatically by crc32.c\n */\n" |
150 # ifdef BYFOUR | 359 "\n" |
151 fprintf(out, "#ifdef BYFOUR\n"); | 360 "local const z_crc_t FAR crc_table[] = {\n" |
152 for (k = 1; k < 8; k++) { | 361 " "); |
153 fprintf(out, " },\n {\n"); | 362 write_table(out, crc_table, 256); |
154 write_table(out, crc_table[k]); | 363 fprintf(out, |
364 "};\n"); | |
365 | |
366 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ | |
367 fprintf(out, | |
368 "\n" | |
369 "#ifdef W\n" | |
370 "\n" | |
371 "#if W == 8\n" | |
372 "\n" | |
373 "local const z_word_t FAR crc_big_table[] = {\n" | |
374 " "); | |
375 write_table64(out, crc_big_table, 256); | |
376 fprintf(out, | |
377 "};\n"); | |
378 | |
379 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ | |
380 fprintf(out, | |
381 "\n" | |
382 "#else /* W == 4 */\n" | |
383 "\n" | |
384 "local const z_word_t FAR crc_big_table[] = {\n" | |
385 " "); | |
386 write_table32hi(out, crc_big_table, 256); | |
387 fprintf(out, | |
388 "};\n" | |
389 "\n" | |
390 "#endif\n"); | |
391 | |
392 /* write out braid tables for each value of N */ | |
393 for (n = 1; n <= 6; n++) { | |
394 fprintf(out, | |
395 "\n" | |
396 "#if N == %d\n", n); | |
397 | |
398 /* compute braid tables for this N and 64-bit word_t */ | |
399 braid(ltl, big, n, 8); | |
400 | |
401 /* write out braid tables for 64-bit z_word_t to crc32.h */ | |
402 fprintf(out, | |
403 "\n" | |
404 "#if W == 8\n" | |
405 "\n" | |
406 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); | |
407 for (k = 0; k < 8; k++) { | |
408 fprintf(out, " {"); | |
409 write_table(out, ltl[k], 256); | |
410 fprintf(out, "}%s", k < 7 ? ",\n" : ""); | |
411 } | |
412 fprintf(out, | |
413 "};\n" | |
414 "\n" | |
415 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); | |
416 for (k = 0; k < 8; k++) { | |
417 fprintf(out, " {"); | |
418 write_table64(out, big[k], 256); | |
419 fprintf(out, "}%s", k < 7 ? ",\n" : ""); | |
420 } | |
421 fprintf(out, | |
422 "};\n"); | |
423 | |
424 /* compute braid tables for this N and 32-bit word_t */ | |
425 braid(ltl, big, n, 4); | |
426 | |
427 /* write out braid tables for 32-bit z_word_t to crc32.h */ | |
428 fprintf(out, | |
429 "\n" | |
430 "#else /* W == 4 */\n" | |
431 "\n" | |
432 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); | |
433 for (k = 0; k < 4; k++) { | |
434 fprintf(out, " {"); | |
435 write_table(out, ltl[k], 256); | |
436 fprintf(out, "}%s", k < 3 ? ",\n" : ""); | |
437 } | |
438 fprintf(out, | |
439 "};\n" | |
440 "\n" | |
441 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); | |
442 for (k = 0; k < 4; k++) { | |
443 fprintf(out, " {"); | |
444 write_table32hi(out, big[k], 256); | |
445 fprintf(out, "}%s", k < 3 ? ",\n" : ""); | |
446 } | |
447 fprintf(out, | |
448 "};\n" | |
449 "\n" | |
450 "#endif\n" | |
451 "\n" | |
452 "#endif\n"); | |
155 } | 453 } |
156 fprintf(out, "#endif\n"); | 454 fprintf(out, |
157 # endif /* BYFOUR */ | 455 "\n" |
158 fprintf(out, " }\n};\n"); | 456 "#endif\n"); |
457 | |
458 /* write out zeros operator table to crc32.h */ | |
459 fprintf(out, | |
460 "\n" | |
461 "local const z_crc_t FAR x2n_table[] = {\n" | |
462 " "); | |
463 write_table(out, x2n_table, 32); | |
464 fprintf(out, | |
465 "};\n"); | |
159 fclose(out); | 466 fclose(out); |
160 } | 467 } |
161 #endif /* MAKECRCH */ | 468 #endif /* MAKECRCH */ |
162 } | 469 } |
163 | 470 |
164 #ifdef MAKECRCH | 471 #ifdef MAKECRCH |
165 local void write_table(out, table) | 472 |
166 FILE *out; | 473 /* |
167 const z_crc_t FAR *table; | 474 Write the 32-bit values in table[0..k-1] to out, five per line in |
168 { | 475 hexadecimal separated by commas. |
476 */ | |
477 local void write_table(FILE *out, const z_crc_t FAR *table, int k) { | |
169 int n; | 478 int n; |
170 | 479 |
171 for (n = 0; n < 256; n++) | 480 for (n = 0; n < k; n++) |
172 fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", | 481 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", |
173 (unsigned long)(table[n]), | 482 (unsigned long)(table[n]), |
174 n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); | 483 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); |
175 } | 484 } |
485 | |
486 /* | |
487 Write the high 32-bits of each value in table[0..k-1] to out, five per line | |
488 in hexadecimal separated by commas. | |
489 */ | |
490 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { | |
491 int n; | |
492 | |
493 for (n = 0; n < k; n++) | |
494 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", | |
495 (unsigned long)(table[n] >> 32), | |
496 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); | |
497 } | |
498 | |
499 /* | |
500 Write the 64-bit values in table[0..k-1] to out, three per line in | |
501 hexadecimal separated by commas. This assumes that if there is a 64-bit | |
502 type, then there is also a long long integer type, and it is at least 64 | |
503 bits. If not, then the type cast and format string can be adjusted | |
504 accordingly. | |
505 */ | |
506 local void write_table64(FILE *out, const z_word_t FAR *table, int k) { | |
507 int n; | |
508 | |
509 for (n = 0; n < k; n++) | |
510 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", | |
511 (unsigned long long)(table[n]), | |
512 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); | |
513 } | |
514 | |
515 /* Actually do the deed. */ | |
516 int main(void) { | |
517 make_crc_table(); | |
518 return 0; | |
519 } | |
520 | |
176 #endif /* MAKECRCH */ | 521 #endif /* MAKECRCH */ |
177 | 522 |
178 #else /* !DYNAMIC_CRC_TABLE */ | 523 #ifdef W |
179 /* ======================================================================== | 524 /* |
180 * Tables of CRC-32s of all single-byte values, made by make_crc_table(). | 525 Generate the little and big-endian braid tables for the given n and z_word_t |
181 */ | 526 size w. Each array must have room for w blocks of 256 elements. |
182 #include "crc32.h" | 527 */ |
528 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { | |
529 int k; | |
530 z_crc_t i, p, q; | |
531 for (k = 0; k < w; k++) { | |
532 p = x2nmodp((n * w + 3 - k) << 3, 0); | |
533 ltl[k][0] = 0; | |
534 big[w - 1 - k][0] = 0; | |
535 for (i = 1; i < 256; i++) { | |
536 ltl[k][i] = q = multmodp(i << 24, p); | |
537 big[w - 1 - k][i] = byte_swap(q); | |
538 } | |
539 } | |
540 } | |
541 #endif | |
542 | |
183 #endif /* DYNAMIC_CRC_TABLE */ | 543 #endif /* DYNAMIC_CRC_TABLE */ |
184 | 544 |
185 /* ========================================================================= | 545 /* ========================================================================= |
186 * This function can be used by asm versions of crc32() | 546 * This function can be used by asm versions of crc32(), and to force the |
187 */ | 547 * generation of the CRC tables in a threaded application. |
188 const z_crc_t FAR * ZEXPORT get_crc_table() | 548 */ |
189 { | 549 const z_crc_t FAR * ZEXPORT get_crc_table(void) { |
190 #ifdef DYNAMIC_CRC_TABLE | 550 #ifdef DYNAMIC_CRC_TABLE |
191 if (crc_table_empty) | 551 once(&made, make_crc_table); |
192 make_crc_table(); | |
193 #endif /* DYNAMIC_CRC_TABLE */ | 552 #endif /* DYNAMIC_CRC_TABLE */ |
194 return (const z_crc_t FAR *)crc_table; | 553 return (const z_crc_t FAR *)crc_table; |
195 } | 554 } |
196 | 555 |
556 /* ========================================================================= | |
557 * Use ARM machine instructions if available. This will compute the CRC about | |
558 * ten times faster than the braided calculation. This code does not check for | |
559 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will | |
560 * only be defined if the compilation specifies an ARM processor architecture | |
561 * that has the instructions. For example, compiling with -march=armv8.1-a or | |
562 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 | |
563 * instructions. | |
564 */ | |
565 #ifdef ARMCRC32 | |
566 | |
567 /* | |
568 Constants empirically determined to maximize speed. These values are from | |
569 measurements on a Cortex-A57. Your mileage may vary. | |
570 */ | |
571 #define Z_BATCH 3990 /* number of words in a batch */ | |
572 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ | |
573 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ | |
574 | |
575 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, | |
576 z_size_t len) { | |
577 z_crc_t val; | |
578 z_word_t crc1, crc2; | |
579 const z_word_t *word; | |
580 z_word_t val0, val1, val2; | |
581 z_size_t last, last2, i; | |
582 z_size_t num; | |
583 | |
584 /* Return initial CRC, if requested. */ | |
585 if (buf == Z_NULL) return 0; | |
586 | |
587 #ifdef DYNAMIC_CRC_TABLE | |
588 once(&made, make_crc_table); | |
589 #endif /* DYNAMIC_CRC_TABLE */ | |
590 | |
591 /* Pre-condition the CRC */ | |
592 crc = (~crc) & 0xffffffff; | |
593 | |
594 /* Compute the CRC up to a word boundary. */ | |
595 while (len && ((z_size_t)buf & 7) != 0) { | |
596 len--; | |
597 val = *buf++; | |
598 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); | |
599 } | |
600 | |
601 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ | |
602 word = (z_word_t const *)buf; | |
603 num = len >> 3; | |
604 len &= 7; | |
605 | |
606 /* Do three interleaved CRCs to realize the throughput of one crc32x | |
607 instruction per cycle. Each CRC is calculated on Z_BATCH words. The | |
608 three CRCs are combined into a single CRC after each set of batches. */ | |
609 while (num >= 3 * Z_BATCH) { | |
610 crc1 = 0; | |
611 crc2 = 0; | |
612 for (i = 0; i < Z_BATCH; i++) { | |
613 val0 = word[i]; | |
614 val1 = word[i + Z_BATCH]; | |
615 val2 = word[i + 2 * Z_BATCH]; | |
616 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | |
617 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); | |
618 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); | |
619 } | |
620 word += 3 * Z_BATCH; | |
621 num -= 3 * Z_BATCH; | |
622 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; | |
623 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; | |
624 } | |
625 | |
626 /* Do one last smaller batch with the remaining words, if there are enough | |
627 to pay for the combination of CRCs. */ | |
628 last = num / 3; | |
629 if (last >= Z_BATCH_MIN) { | |
630 last2 = last << 1; | |
631 crc1 = 0; | |
632 crc2 = 0; | |
633 for (i = 0; i < last; i++) { | |
634 val0 = word[i]; | |
635 val1 = word[i + last]; | |
636 val2 = word[i + last2]; | |
637 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | |
638 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); | |
639 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); | |
640 } | |
641 word += 3 * last; | |
642 num -= 3 * last; | |
643 val = x2nmodp(last, 6); | |
644 crc = multmodp(val, crc) ^ crc1; | |
645 crc = multmodp(val, crc) ^ crc2; | |
646 } | |
647 | |
648 /* Compute the CRC on any remaining words. */ | |
649 for (i = 0; i < num; i++) { | |
650 val0 = word[i]; | |
651 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | |
652 } | |
653 word += num; | |
654 | |
655 /* Complete the CRC on any remaining bytes. */ | |
656 buf = (const unsigned char FAR *)word; | |
657 while (len) { | |
658 len--; | |
659 val = *buf++; | |
660 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); | |
661 } | |
662 | |
663 /* Return the CRC, post-conditioned. */ | |
664 return crc ^ 0xffffffff; | |
665 } | |
666 | |
667 #else | |
668 | |
669 #ifdef W | |
670 | |
671 /* | |
672 Return the CRC of the W bytes in the word_t data, taking the | |
673 least-significant byte of the word as the first byte of data, without any pre | |
674 or post conditioning. This is used to combine the CRCs of each braid. | |
675 */ | |
676 local z_crc_t crc_word(z_word_t data) { | |
677 int k; | |
678 for (k = 0; k < W; k++) | |
679 data = (data >> 8) ^ crc_table[data & 0xff]; | |
680 return (z_crc_t)data; | |
681 } | |
682 | |
683 local z_word_t crc_word_big(z_word_t data) { | |
684 int k; | |
685 for (k = 0; k < W; k++) | |
686 data = (data << 8) ^ | |
687 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; | |
688 return data; | |
689 } | |
690 | |
691 #endif | |
692 | |
197 /* ========================================================================= */ | 693 /* ========================================================================= */ |
198 #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) | 694 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, |
199 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 | 695 z_size_t len) { |
696 /* Return initial CRC, if requested. */ | |
697 if (buf == Z_NULL) return 0; | |
698 | |
699 #ifdef DYNAMIC_CRC_TABLE | |
700 once(&made, make_crc_table); | |
701 #endif /* DYNAMIC_CRC_TABLE */ | |
702 | |
703 /* Pre-condition the CRC */ | |
704 crc = (~crc) & 0xffffffff; | |
705 | |
706 #ifdef W | |
707 | |
708 /* If provided enough bytes, do a braided CRC calculation. */ | |
709 if (len >= N * W + W - 1) { | |
710 z_size_t blks; | |
711 z_word_t const *words; | |
712 unsigned endian; | |
713 int k; | |
714 | |
715 /* Compute the CRC up to a z_word_t boundary. */ | |
716 while (len && ((z_size_t)buf & (W - 1)) != 0) { | |
717 len--; | |
718 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
719 } | |
720 | |
721 /* Compute the CRC on as many N z_word_t blocks as are available. */ | |
722 blks = len / (N * W); | |
723 len -= blks * N * W; | |
724 words = (z_word_t const *)buf; | |
725 | |
726 /* Do endian check at execution time instead of compile time, since ARM | |
727 processors can change the endianness at execution time. If the | |
728 compiler knows what the endianness will be, it can optimize out the | |
729 check and the unused branch. */ | |
730 endian = 1; | |
731 if (*(unsigned char *)&endian) { | |
732 /* Little endian. */ | |
733 | |
734 z_crc_t crc0; | |
735 z_word_t word0; | |
736 #if N > 1 | |
737 z_crc_t crc1; | |
738 z_word_t word1; | |
739 #if N > 2 | |
740 z_crc_t crc2; | |
741 z_word_t word2; | |
742 #if N > 3 | |
743 z_crc_t crc3; | |
744 z_word_t word3; | |
745 #if N > 4 | |
746 z_crc_t crc4; | |
747 z_word_t word4; | |
748 #if N > 5 | |
749 z_crc_t crc5; | |
750 z_word_t word5; | |
751 #endif | |
752 #endif | |
753 #endif | |
754 #endif | |
755 #endif | |
756 | |
757 /* Initialize the CRC for each braid. */ | |
758 crc0 = crc; | |
759 #if N > 1 | |
760 crc1 = 0; | |
761 #if N > 2 | |
762 crc2 = 0; | |
763 #if N > 3 | |
764 crc3 = 0; | |
765 #if N > 4 | |
766 crc4 = 0; | |
767 #if N > 5 | |
768 crc5 = 0; | |
769 #endif | |
770 #endif | |
771 #endif | |
772 #endif | |
773 #endif | |
774 | |
775 /* | |
776 Process the first blks-1 blocks, computing the CRCs on each braid | |
777 independently. | |
778 */ | |
779 while (--blks) { | |
780 /* Load the word for each braid into registers. */ | |
781 word0 = crc0 ^ words[0]; | |
782 #if N > 1 | |
783 word1 = crc1 ^ words[1]; | |
784 #if N > 2 | |
785 word2 = crc2 ^ words[2]; | |
786 #if N > 3 | |
787 word3 = crc3 ^ words[3]; | |
788 #if N > 4 | |
789 word4 = crc4 ^ words[4]; | |
790 #if N > 5 | |
791 word5 = crc5 ^ words[5]; | |
792 #endif | |
793 #endif | |
794 #endif | |
795 #endif | |
796 #endif | |
797 words += N; | |
798 | |
799 /* Compute and update the CRC for each word. The loop should | |
800 get unrolled. */ | |
801 crc0 = crc_braid_table[0][word0 & 0xff]; | |
802 #if N > 1 | |
803 crc1 = crc_braid_table[0][word1 & 0xff]; | |
804 #if N > 2 | |
805 crc2 = crc_braid_table[0][word2 & 0xff]; | |
806 #if N > 3 | |
807 crc3 = crc_braid_table[0][word3 & 0xff]; | |
808 #if N > 4 | |
809 crc4 = crc_braid_table[0][word4 & 0xff]; | |
810 #if N > 5 | |
811 crc5 = crc_braid_table[0][word5 & 0xff]; | |
812 #endif | |
813 #endif | |
814 #endif | |
815 #endif | |
816 #endif | |
817 for (k = 1; k < W; k++) { | |
818 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; | |
819 #if N > 1 | |
820 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; | |
821 #if N > 2 | |
822 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; | |
823 #if N > 3 | |
824 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; | |
825 #if N > 4 | |
826 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; | |
827 #if N > 5 | |
828 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; | |
829 #endif | |
830 #endif | |
831 #endif | |
832 #endif | |
833 #endif | |
834 } | |
835 } | |
836 | |
837 /* | |
838 Process the last block, combining the CRCs of the N braids at the | |
839 same time. | |
840 */ | |
841 crc = crc_word(crc0 ^ words[0]); | |
842 #if N > 1 | |
843 crc = crc_word(crc1 ^ words[1] ^ crc); | |
844 #if N > 2 | |
845 crc = crc_word(crc2 ^ words[2] ^ crc); | |
846 #if N > 3 | |
847 crc = crc_word(crc3 ^ words[3] ^ crc); | |
848 #if N > 4 | |
849 crc = crc_word(crc4 ^ words[4] ^ crc); | |
850 #if N > 5 | |
851 crc = crc_word(crc5 ^ words[5] ^ crc); | |
852 #endif | |
853 #endif | |
854 #endif | |
855 #endif | |
856 #endif | |
857 words += N; | |
858 } | |
859 else { | |
860 /* Big endian. */ | |
861 | |
862 z_word_t crc0, word0, comb; | |
863 #if N > 1 | |
864 z_word_t crc1, word1; | |
865 #if N > 2 | |
866 z_word_t crc2, word2; | |
867 #if N > 3 | |
868 z_word_t crc3, word3; | |
869 #if N > 4 | |
870 z_word_t crc4, word4; | |
871 #if N > 5 | |
872 z_word_t crc5, word5; | |
873 #endif | |
874 #endif | |
875 #endif | |
876 #endif | |
877 #endif | |
878 | |
879 /* Initialize the CRC for each braid. */ | |
880 crc0 = byte_swap(crc); | |
881 #if N > 1 | |
882 crc1 = 0; | |
883 #if N > 2 | |
884 crc2 = 0; | |
885 #if N > 3 | |
886 crc3 = 0; | |
887 #if N > 4 | |
888 crc4 = 0; | |
889 #if N > 5 | |
890 crc5 = 0; | |
891 #endif | |
892 #endif | |
893 #endif | |
894 #endif | |
895 #endif | |
896 | |
897 /* | |
898 Process the first blks-1 blocks, computing the CRCs on each braid | |
899 independently. | |
900 */ | |
901 while (--blks) { | |
902 /* Load the word for each braid into registers. */ | |
903 word0 = crc0 ^ words[0]; | |
904 #if N > 1 | |
905 word1 = crc1 ^ words[1]; | |
906 #if N > 2 | |
907 word2 = crc2 ^ words[2]; | |
908 #if N > 3 | |
909 word3 = crc3 ^ words[3]; | |
910 #if N > 4 | |
911 word4 = crc4 ^ words[4]; | |
912 #if N > 5 | |
913 word5 = crc5 ^ words[5]; | |
914 #endif | |
915 #endif | |
916 #endif | |
917 #endif | |
918 #endif | |
919 words += N; | |
920 | |
921 /* Compute and update the CRC for each word. The loop should | |
922 get unrolled. */ | |
923 crc0 = crc_braid_big_table[0][word0 & 0xff]; | |
924 #if N > 1 | |
925 crc1 = crc_braid_big_table[0][word1 & 0xff]; | |
926 #if N > 2 | |
927 crc2 = crc_braid_big_table[0][word2 & 0xff]; | |
928 #if N > 3 | |
929 crc3 = crc_braid_big_table[0][word3 & 0xff]; | |
930 #if N > 4 | |
931 crc4 = crc_braid_big_table[0][word4 & 0xff]; | |
932 #if N > 5 | |
933 crc5 = crc_braid_big_table[0][word5 & 0xff]; | |
934 #endif | |
935 #endif | |
936 #endif | |
937 #endif | |
938 #endif | |
939 for (k = 1; k < W; k++) { | |
940 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; | |
941 #if N > 1 | |
942 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; | |
943 #if N > 2 | |
944 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; | |
945 #if N > 3 | |
946 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; | |
947 #if N > 4 | |
948 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; | |
949 #if N > 5 | |
950 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; | |
951 #endif | |
952 #endif | |
953 #endif | |
954 #endif | |
955 #endif | |
956 } | |
957 } | |
958 | |
959 /* | |
960 Process the last block, combining the CRCs of the N braids at the | |
961 same time. | |
962 */ | |
963 comb = crc_word_big(crc0 ^ words[0]); | |
964 #if N > 1 | |
965 comb = crc_word_big(crc1 ^ words[1] ^ comb); | |
966 #if N > 2 | |
967 comb = crc_word_big(crc2 ^ words[2] ^ comb); | |
968 #if N > 3 | |
969 comb = crc_word_big(crc3 ^ words[3] ^ comb); | |
970 #if N > 4 | |
971 comb = crc_word_big(crc4 ^ words[4] ^ comb); | |
972 #if N > 5 | |
973 comb = crc_word_big(crc5 ^ words[5] ^ comb); | |
974 #endif | |
975 #endif | |
976 #endif | |
977 #endif | |
978 #endif | |
979 words += N; | |
980 crc = byte_swap(comb); | |
981 } | |
982 | |
983 /* | |
984 Update the pointer to the remaining bytes to process. | |
985 */ | |
986 buf = (unsigned char const *)words; | |
987 } | |
988 | |
989 #endif /* W */ | |
990 | |
991 /* Complete the computation of the CRC on any remaining bytes. */ | |
992 while (len >= 8) { | |
993 len -= 8; | |
994 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
995 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
996 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
997 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
998 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
999 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
1000 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
1001 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
1002 } | |
1003 while (len) { | |
1004 len--; | |
1005 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
1006 } | |
1007 | |
1008 /* Return the CRC, post-conditioned. */ | |
1009 return crc ^ 0xffffffff; | |
1010 } | |
1011 | |
1012 #endif | |
200 | 1013 |
201 /* ========================================================================= */ | 1014 /* ========================================================================= */ |
202 unsigned long ZEXPORT crc32_z(crc, buf, len) | 1015 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, |
203 unsigned long crc; | 1016 uInt len) { |
204 const unsigned char FAR *buf; | 1017 return crc32_z(crc, buf, len); |
205 z_size_t len; | 1018 } |
206 { | 1019 |
207 if (buf == Z_NULL) return 0UL; | 1020 /* ========================================================================= */ |
208 | 1021 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { |
209 #ifdef DYNAMIC_CRC_TABLE | 1022 #ifdef DYNAMIC_CRC_TABLE |
210 if (crc_table_empty) | 1023 once(&made, make_crc_table); |
211 make_crc_table(); | |
212 #endif /* DYNAMIC_CRC_TABLE */ | 1024 #endif /* DYNAMIC_CRC_TABLE */ |
213 | 1025 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); |
214 #ifdef BYFOUR | |
215 if (sizeof(void *) == sizeof(ptrdiff_t)) { | |
216 z_crc_t endian; | |
217 | |
218 endian = 1; | |
219 if (*((unsigned char *)(&endian))) | |
220 return crc32_little(crc, buf, len); | |
221 else | |
222 return crc32_big(crc, buf, len); | |
223 } | |
224 #endif /* BYFOUR */ | |
225 crc = crc ^ 0xffffffffUL; | |
226 while (len >= 8) { | |
227 DO8; | |
228 len -= 8; | |
229 } | |
230 if (len) do { | |
231 DO1; | |
232 } while (--len); | |
233 return crc ^ 0xffffffffUL; | |
234 } | 1026 } |
235 | 1027 |
236 /* ========================================================================= */ | 1028 /* ========================================================================= */ |
237 unsigned long ZEXPORT crc32(crc, buf, len) | 1029 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { |
238 unsigned long crc; | 1030 return crc32_combine64(crc1, crc2, (z_off64_t)len2); |
239 const unsigned char FAR *buf; | 1031 } |
240 uInt len; | |
241 { | |
242 return crc32_z(crc, buf, len); | |
243 } | |
244 | |
245 #ifdef BYFOUR | |
246 | |
247 /* | |
248 This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit | |
249 integer pointer type. This violates the strict aliasing rule, where a | |
250 compiler can assume, for optimization purposes, that two pointers to | |
251 fundamentally different types won't ever point to the same memory. This can | |
252 manifest as a problem only if one of the pointers is written to. This code | |
253 only reads from those pointers. So long as this code remains isolated in | |
254 this compilation unit, there won't be a problem. For this reason, this code | |
255 should not be copied and pasted into a compilation unit in which other code | |
256 writes to the buffer that is passed to these routines. | |
257 */ | |
258 | 1032 |
259 /* ========================================================================= */ | 1033 /* ========================================================================= */ |
260 #define DOLIT4 c ^= *buf4++; \ | 1034 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { |
261 c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ | 1035 #ifdef DYNAMIC_CRC_TABLE |
262 crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] | 1036 once(&made, make_crc_table); |
263 #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 | 1037 #endif /* DYNAMIC_CRC_TABLE */ |
1038 return x2nmodp(len2, 3); | |
1039 } | |
264 | 1040 |
265 /* ========================================================================= */ | 1041 /* ========================================================================= */ |
266 local unsigned long crc32_little(crc, buf, len) | 1042 uLong ZEXPORT crc32_combine_gen(z_off_t len2) { |
267 unsigned long crc; | 1043 return crc32_combine_gen64((z_off64_t)len2); |
268 const unsigned char FAR *buf; | |
269 z_size_t len; | |
270 { | |
271 register z_crc_t c; | |
272 register const z_crc_t FAR *buf4; | |
273 | |
274 c = (z_crc_t)crc; | |
275 c = ~c; | |
276 while (len && ((ptrdiff_t)buf & 3)) { | |
277 c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); | |
278 len--; | |
279 } | |
280 | |
281 buf4 = (const z_crc_t FAR *)(const void FAR *)buf; | |
282 while (len >= 32) { | |
283 DOLIT32; | |
284 len -= 32; | |
285 } | |
286 while (len >= 4) { | |
287 DOLIT4; | |
288 len -= 4; | |
289 } | |
290 buf = (const unsigned char FAR *)buf4; | |
291 | |
292 if (len) do { | |
293 c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); | |
294 } while (--len); | |
295 c = ~c; | |
296 return (unsigned long)c; | |
297 } | 1044 } |
298 | 1045 |
299 /* ========================================================================= */ | 1046 /* ========================================================================= */ |
300 #define DOBIG4 c ^= *buf4++; \ | 1047 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { |
301 c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ | 1048 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); |
302 crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] | 1049 } |
303 #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 | |
304 | |
305 /* ========================================================================= */ | |
306 local unsigned long crc32_big(crc, buf, len) | |
307 unsigned long crc; | |
308 const unsigned char FAR *buf; | |
309 z_size_t len; | |
310 { | |
311 register z_crc_t c; | |
312 register const z_crc_t FAR *buf4; | |
313 | |
314 c = ZSWAP32((z_crc_t)crc); | |
315 c = ~c; | |
316 while (len && ((ptrdiff_t)buf & 3)) { | |
317 c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); | |
318 len--; | |
319 } | |
320 | |
321 buf4 = (const z_crc_t FAR *)(const void FAR *)buf; | |
322 while (len >= 32) { | |
323 DOBIG32; | |
324 len -= 32; | |
325 } | |
326 while (len >= 4) { | |
327 DOBIG4; | |
328 len -= 4; | |
329 } | |
330 buf = (const unsigned char FAR *)buf4; | |
331 | |
332 if (len) do { | |
333 c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); | |
334 } while (--len); | |
335 c = ~c; | |
336 return (unsigned long)(ZSWAP32(c)); | |
337 } | |
338 | |
339 #endif /* BYFOUR */ | |
340 | |
341 #define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ | |
342 | |
343 /* ========================================================================= */ | |
344 local unsigned long gf2_matrix_times(mat, vec) | |
345 unsigned long *mat; | |
346 unsigned long vec; | |
347 { | |
348 unsigned long sum; | |
349 | |
350 sum = 0; | |
351 while (vec) { | |
352 if (vec & 1) | |
353 sum ^= *mat; | |
354 vec >>= 1; | |
355 mat++; | |
356 } | |
357 return sum; | |
358 } | |
359 | |
360 /* ========================================================================= */ | |
361 local void gf2_matrix_square(square, mat) | |
362 unsigned long *square; | |
363 unsigned long *mat; | |
364 { | |
365 int n; | |
366 | |
367 for (n = 0; n < GF2_DIM; n++) | |
368 square[n] = gf2_matrix_times(mat, mat[n]); | |
369 } | |
370 | |
371 /* ========================================================================= */ | |
372 local uLong crc32_combine_(crc1, crc2, len2) | |
373 uLong crc1; | |
374 uLong crc2; | |
375 z_off64_t len2; | |
376 { | |
377 int n; | |
378 unsigned long row; | |
379 unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ | |
380 unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ | |
381 | |
382 /* degenerate case (also disallow negative lengths) */ | |
383 if (len2 <= 0) | |
384 return crc1; | |
385 | |
386 /* put operator for one zero bit in odd */ | |
387 odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ | |
388 row = 1; | |
389 for (n = 1; n < GF2_DIM; n++) { | |
390 odd[n] = row; | |
391 row <<= 1; | |
392 } | |
393 | |
394 /* put operator for two zero bits in even */ | |
395 gf2_matrix_square(even, odd); | |
396 | |
397 /* put operator for four zero bits in odd */ | |
398 gf2_matrix_square(odd, even); | |
399 | |
400 /* apply len2 zeros to crc1 (first square will put the operator for one | |
401 zero byte, eight zero bits, in even) */ | |
402 do { | |
403 /* apply zeros operator for this bit of len2 */ | |
404 gf2_matrix_square(even, odd); | |
405 if (len2 & 1) | |
406 crc1 = gf2_matrix_times(even, crc1); | |
407 len2 >>= 1; | |
408 | |
409 /* if no more bits set, then done */ | |
410 if (len2 == 0) | |
411 break; | |
412 | |
413 /* another iteration of the loop with odd and even swapped */ | |
414 gf2_matrix_square(odd, even); | |
415 if (len2 & 1) | |
416 crc1 = gf2_matrix_times(odd, crc1); | |
417 len2 >>= 1; | |
418 | |
419 /* if no more bits set, then done */ | |
420 } while (len2 != 0); | |
421 | |
422 /* return combined crc */ | |
423 crc1 ^= crc2; | |
424 return crc1; | |
425 } | |
426 | |
427 /* ========================================================================= */ | |
428 uLong ZEXPORT crc32_combine(crc1, crc2, len2) | |
429 uLong crc1; | |
430 uLong crc2; | |
431 z_off_t len2; | |
432 { | |
433 return crc32_combine_(crc1, crc2, len2); | |
434 } | |
435 | |
436 uLong ZEXPORT crc32_combine64(crc1, crc2, len2) | |
437 uLong crc1; | |
438 uLong crc2; | |
439 z_off64_t len2; | |
440 { | |
441 return crc32_combine_(crc1, crc2, len2); | |
442 } |