Fossil SCM
Use intrinsic byte-swapping functions to boost the performance of delta checksums.
Commit
7338b3a9510281e7d49643f55a5c27c24107f083
Parent
ec8394e166117a5…
1 file changed
+60
-27
+60
-27
| --- src/delta.c | ||
| +++ src/delta.c | ||
| @@ -209,43 +209,76 @@ | ||
| 209 | 209 | unsigned int i, x; |
| 210 | 210 | for(i=1, x=64; v>=x; i++, x <<= 6){} |
| 211 | 211 | return i; |
| 212 | 212 | } |
| 213 | 213 | |
| 214 | +#ifdef __GNUC__ | |
| 215 | +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) | |
| 216 | +#else | |
| 217 | +# define GCC_VERSION 0 | |
| 218 | +#endif | |
| 219 | + | |
| 214 | 220 | /* |
| 215 | -** Compute a 32-bit checksum on the N-byte buffer. Return the result. | |
| 221 | +** Compute a 32-bit big-endian checksum on the N-byte buffer. If the | |
| 222 | +** buffer is not a multiple of 4 bytes length, compute the sum that would | |
| 223 | +** have occurred if the buffer was padded with zeros to the next multiple | |
| 224 | +** of four bytes. | |
| 216 | 225 | */ |
| 217 | 226 | static unsigned int checksum(const char *zIn, size_t N){ |
| 227 | + static const int byteOrderTest = 1; | |
| 218 | 228 | const unsigned char *z = (const unsigned char *)zIn; |
| 219 | - unsigned sum0 = 0; | |
| 220 | - unsigned sum1 = 0; | |
| 221 | - unsigned sum2 = 0; | |
| 222 | - unsigned sum3 = 0; | |
| 223 | - while(N >= 16){ | |
| 224 | - sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); | |
| 225 | - sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); | |
| 226 | - sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); | |
| 227 | - sum3 += ((unsigned)z[3] + z[7] + z[11]+ z[15]); | |
| 228 | - z += 16; | |
| 229 | - N -= 16; | |
| 230 | - } | |
| 231 | - while(N >= 4){ | |
| 232 | - sum0 += z[0]; | |
| 233 | - sum1 += z[1]; | |
| 234 | - sum2 += z[2]; | |
| 235 | - sum3 += z[3]; | |
| 236 | - z += 4; | |
| 237 | - N -= 4; | |
| 238 | - } | |
| 239 | - sum3 += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); | |
| 240 | - switch(N){ | |
| 241 | - case 3: sum3 += (z[2] << 8); | |
| 242 | - case 2: sum3 += (z[1] << 16); | |
| 243 | - case 1: sum3 += (z[0] << 24); | |
| 229 | + const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3]; | |
| 230 | + unsigned sum = 0; | |
| 231 | + assert( (3&(sqlite3_uint64)z)==0 ); /* Four-byte alignment */ | |
| 232 | + if( 0==*(char*)&byteOrderTest ){ | |
| 233 | + /* This is a big-endian machine */ | |
| 234 | + while( z<zEnd ){ | |
| 235 | + sum += *(unsigned*)z; | |
| 236 | + z += 4; | |
| 237 | + } | |
| 238 | + }else{ | |
| 239 | + /* A little-endian machine */ | |
| 240 | +#if GCC_VERSION>=4003000 | |
| 241 | + while( z<zEnd ){ | |
| 242 | + sum += __builtin_bswap32(*(unsigned*)z); | |
| 243 | + z += 4; | |
| 244 | + } | |
| 245 | +#elif defined(_MSC_VER) && _MSC_VER>=1300 | |
| 246 | + while( z<zEnd ){ | |
| 247 | + sum += _byteswap_ulong(*(unsigned*)z); | |
| 248 | + z += 4; | |
| 249 | + } | |
| 250 | +#else | |
| 251 | + unsigned sum0 = 0; | |
| 252 | + unsigned sum1 = 0; | |
| 253 | + unsigned sum2 = 0; | |
| 254 | + while(N >= 16){ | |
| 255 | + sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); | |
| 256 | + sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); | |
| 257 | + sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); | |
| 258 | + sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]); | |
| 259 | + z += 16; | |
| 260 | + N -= 16; | |
| 261 | + } | |
| 262 | + while(N >= 4){ | |
| 263 | + sum0 += z[0]; | |
| 264 | + sum1 += z[1]; | |
| 265 | + sum2 += z[2]; | |
| 266 | + sum += z[3]; | |
| 267 | + z += 4; | |
| 268 | + N -= 4; | |
| 269 | + } | |
| 270 | + sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); | |
| 271 | +#endif | |
| 272 | + } | |
| 273 | + switch(N&3){ | |
| 274 | + case 3: sum += (z[2] << 8); | |
| 275 | + case 2: sum += (z[1] << 16); | |
| 276 | + case 1: sum += (z[0] << 24); | |
| 244 | 277 | default: ; |
| 245 | 278 | } |
| 246 | - return sum3; | |
| 279 | + return sum; | |
| 247 | 280 | } |
| 248 | 281 | |
| 249 | 282 | /* |
| 250 | 283 | ** Create a new delta. |
| 251 | 284 | ** |
| 252 | 285 |
| --- src/delta.c | |
| +++ src/delta.c | |
| @@ -209,43 +209,76 @@ | |
| 209 | unsigned int i, x; |
| 210 | for(i=1, x=64; v>=x; i++, x <<= 6){} |
| 211 | return i; |
| 212 | } |
| 213 | |
| 214 | /* |
| 215 | ** Compute a 32-bit checksum on the N-byte buffer. Return the result. |
| 216 | */ |
| 217 | static unsigned int checksum(const char *zIn, size_t N){ |
| 218 | const unsigned char *z = (const unsigned char *)zIn; |
| 219 | unsigned sum0 = 0; |
| 220 | unsigned sum1 = 0; |
| 221 | unsigned sum2 = 0; |
| 222 | unsigned sum3 = 0; |
| 223 | while(N >= 16){ |
| 224 | sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); |
| 225 | sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); |
| 226 | sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); |
| 227 | sum3 += ((unsigned)z[3] + z[7] + z[11]+ z[15]); |
| 228 | z += 16; |
| 229 | N -= 16; |
| 230 | } |
| 231 | while(N >= 4){ |
| 232 | sum0 += z[0]; |
| 233 | sum1 += z[1]; |
| 234 | sum2 += z[2]; |
| 235 | sum3 += z[3]; |
| 236 | z += 4; |
| 237 | N -= 4; |
| 238 | } |
| 239 | sum3 += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); |
| 240 | switch(N){ |
| 241 | case 3: sum3 += (z[2] << 8); |
| 242 | case 2: sum3 += (z[1] << 16); |
| 243 | case 1: sum3 += (z[0] << 24); |
| 244 | default: ; |
| 245 | } |
| 246 | return sum3; |
| 247 | } |
| 248 | |
| 249 | /* |
| 250 | ** Create a new delta. |
| 251 | ** |
| 252 |
| --- src/delta.c | |
| +++ src/delta.c | |
| @@ -209,43 +209,76 @@ | |
| 209 | unsigned int i, x; |
| 210 | for(i=1, x=64; v>=x; i++, x <<= 6){} |
| 211 | return i; |
| 212 | } |
| 213 | |
| 214 | #ifdef __GNUC__ |
| 215 | # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) |
| 216 | #else |
| 217 | # define GCC_VERSION 0 |
| 218 | #endif |
| 219 | |
| 220 | /* |
| 221 | ** Compute a 32-bit big-endian checksum on the N-byte buffer. If the |
| 222 | ** buffer is not a multiple of 4 bytes length, compute the sum that would |
| 223 | ** have occurred if the buffer was padded with zeros to the next multiple |
| 224 | ** of four bytes. |
| 225 | */ |
| 226 | static unsigned int checksum(const char *zIn, size_t N){ |
| 227 | static const int byteOrderTest = 1; |
| 228 | const unsigned char *z = (const unsigned char *)zIn; |
| 229 | const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3]; |
| 230 | unsigned sum = 0; |
| 231 | assert( (3&(sqlite3_uint64)z)==0 ); /* Four-byte alignment */ |
| 232 | if( 0==*(char*)&byteOrderTest ){ |
| 233 | /* This is a big-endian machine */ |
| 234 | while( z<zEnd ){ |
| 235 | sum += *(unsigned*)z; |
| 236 | z += 4; |
| 237 | } |
| 238 | }else{ |
| 239 | /* A little-endian machine */ |
| 240 | #if GCC_VERSION>=4003000 |
| 241 | while( z<zEnd ){ |
| 242 | sum += __builtin_bswap32(*(unsigned*)z); |
| 243 | z += 4; |
| 244 | } |
| 245 | #elif defined(_MSC_VER) && _MSC_VER>=1300 |
| 246 | while( z<zEnd ){ |
| 247 | sum += _byteswap_ulong(*(unsigned*)z); |
| 248 | z += 4; |
| 249 | } |
| 250 | #else |
| 251 | unsigned sum0 = 0; |
| 252 | unsigned sum1 = 0; |
| 253 | unsigned sum2 = 0; |
| 254 | while(N >= 16){ |
| 255 | sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); |
| 256 | sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); |
| 257 | sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); |
| 258 | sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]); |
| 259 | z += 16; |
| 260 | N -= 16; |
| 261 | } |
| 262 | while(N >= 4){ |
| 263 | sum0 += z[0]; |
| 264 | sum1 += z[1]; |
| 265 | sum2 += z[2]; |
| 266 | sum += z[3]; |
| 267 | z += 4; |
| 268 | N -= 4; |
| 269 | } |
| 270 | sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); |
| 271 | #endif |
| 272 | } |
| 273 | switch(N&3){ |
| 274 | case 3: sum += (z[2] << 8); |
| 275 | case 2: sum += (z[1] << 16); |
| 276 | case 1: sum += (z[0] << 24); |
| 277 | default: ; |
| 278 | } |
| 279 | return sum; |
| 280 | } |
| 281 | |
| 282 | /* |
| 283 | ** Create a new delta. |
| 284 | ** |
| 285 |