$treeview $search $mathjax $extrastylesheet
librsync
2.3.1
$projectbrief
|
$projectbrief
|
$searchbox |
00001 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 00002 * 00003 * librsync -- library for network deltas 00004 * 00005 * Copyright (C) 1999, 2000, 2001 by Martin Pool <mbp@sourcefrog.net> 00006 * Copyright (C) 1999 by Andrew Tridgell 00007 * 00008 * This program is free software; you can redistribute it and/or modify 00009 * it under the terms of the GNU Lesser General Public License as published by 00010 * the Free Software Foundation; either version 2.1 of the License, or 00011 * (at your option) any later version. 00012 * 00013 * This program is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public License 00019 * along with this program; if not, write to the Free Software 00020 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00021 */ 00022 00023 #include "config.h" 00024 #include <assert.h> 00025 #include <stdlib.h> 00026 #include <string.h> 00027 #include "librsync.h" 00028 #include "sumset.h" 00029 #include "trace.h" 00030 #include "util.h" 00031 00032 static void rs_block_sig_init(rs_block_sig_t *sig, rs_weak_sum_t weak_sum, 00033 rs_strong_sum_t *strong_sum, int strong_len) 00034 { 00035 sig->weak_sum = weak_sum; 00036 if (strong_sum) 00037 memcpy(sig->strong_sum, strong_sum, strong_len); 00038 } 00039 00040 static inline unsigned rs_block_sig_hash(const rs_block_sig_t *sig) 00041 { 00042 return (unsigned)sig->weak_sum; 00043 } 00044 00045 typedef struct rs_block_match { 00046 rs_block_sig_t block_sig; 00047 rs_signature_t *signature; 00048 const void *buf; 00049 size_t len; 00050 } rs_block_match_t; 00051 00052 static void rs_block_match_init(rs_block_match_t *match, rs_signature_t *sig, 00053 rs_weak_sum_t weak_sum, 00054 rs_strong_sum_t *strong_sum, const void *buf, 00055 size_t len) 00056 { 00057 rs_block_sig_init(&match->block_sig, weak_sum, strong_sum, 00058 sig->strong_sum_len); 00059 match->signature = sig; 00060 match->buf = buf; 00061 match->len = len; 00062 } 00063 00064 static inline int rs_block_match_cmp(rs_block_match_t *match, 00065 const rs_block_sig_t *block_sig) 00066 { 00067 /* If buf is not NULL, the strong sum is yet to be calculated. */ 00068 if (match->buf) { 00069 #ifndef HASHTABLE_NSTATS 00070 match->signature->calc_strong_count++; 00071 #endif 00072 rs_signature_calc_strong_sum(match->signature, match->buf, match->len, 00073 &(match->block_sig.strong_sum)); 00074 match->buf = NULL; 00075 } 00076 return memcmp(&match->block_sig.strong_sum, &block_sig->strong_sum, 00077 match->signature->strong_sum_len); 00078 } 00079 00080 /* Disable mix32() in the hashtable because RabinKarp doesn't need it. We 00081 manually apply mix32() to rollsums before using them in the hashtable. */ 00082 #define HASHTABLE_NMIX32 00083 /* Instantiate hashtable for rs_block_sig and rs_block_match. */ 00084 #define ENTRY rs_block_sig 00085 #define MATCH rs_block_match 00086 #define NAME hashtable 00087 #include "hashtable.h" 00088 00089 /* Get the size of a packed rs_block_sig_t. */ 00090 static inline size_t rs_block_sig_size(const rs_signature_t *sig) 00091 { 00092 /* Round up to next multiple of sizeof(weak_sum) to align memory correctly. 00093 */ 00094 return offsetof(rs_block_sig_t, 00095 strong_sum) + ((sig->strong_sum_len + 00096 sizeof(rs_weak_sum_t)- 00097 1) / sizeof(rs_weak_sum_t)) * 00098 sizeof(rs_weak_sum_t); 00099 } 00100 00101 /* Get the pointer to the block_sig_t from a block index. */ 00102 static inline rs_block_sig_t *rs_block_sig_ptr(const rs_signature_t *sig, 00103 int block_idx) 00104 { 00105 return (rs_block_sig_t *)((char *)sig->block_sigs + 00106 block_idx * rs_block_sig_size(sig)); 00107 } 00108 00109 /* Get the index of a block from a block_sig_t pointer. */ 00110 static inline int rs_block_sig_idx(const rs_signature_t *sig, 00111 rs_block_sig_t *block_sig) 00112 { 00113 return ((char *)block_sig - 00114 (char *)sig->block_sigs) / rs_block_sig_size(sig); 00115 } 00116 00117 rs_result rs_sig_args(rs_long_t old_fsize, rs_magic_number * magic, 00118 size_t *block_len, size_t *strong_len) 00119 { 00120 size_t rec_block_len; /* the recomended block_len for the given 00121 old_fsize. */ 00122 size_t min_strong_len; /* the minimum strong_len for the given 00123 old_fsize and block_len. */ 00124 size_t max_strong_len; /* the maximum strong_len for the given magic. */ 00125 00126 /* Check and set default arguments. */ 00127 *magic = *magic ? *magic : RS_RK_BLAKE2_SIG_MAGIC; 00128 switch (*magic) { 00129 case RS_BLAKE2_SIG_MAGIC: 00130 case RS_RK_BLAKE2_SIG_MAGIC: 00131 max_strong_len = RS_BLAKE2_SUM_LENGTH; 00132 break; 00133 case RS_MD4_SIG_MAGIC: 00134 case RS_RK_MD4_SIG_MAGIC: 00135 max_strong_len = RS_MD4_SUM_LENGTH; 00136 break; 00137 default: 00138 rs_error("invalid magic %#x", *magic); 00139 return RS_BAD_MAGIC; 00140 } 00141 /* The recommended block_len is sqrt(old_fsize) with a 256 min size rounded 00142 down to a multiple of the 128 byte blake2b blocksize to give a 00143 reasonable compromise between signature size, delta size, and 00144 performance. If the old_fsize is unknown, we use the default. */ 00145 if (old_fsize < 0) { 00146 rec_block_len = RS_DEFAULT_BLOCK_LEN; 00147 } else { 00148 rec_block_len = 00149 old_fsize <= 256 * 256 ? 256 : rs_long_sqrt(old_fsize) & ~127; 00150 } 00151 if (*block_len == 0) 00152 *block_len = rec_block_len; 00153 /* The recommended strong_len assumes the worst case new_fsize = old_fsize 00154 + 16MB with no matches. This results in comparing a block at every byte 00155 offset against all the blocks in the signature, or new_fsize*block_num 00156 comparisons. With N bits in the blocksig, there is a 1/2^N chance per 00157 comparison of a hash colision. So with 2^N attempts there would be a 00158 fair chance of having a collision. So we want to round up to the next 00159 byte, add an extra 2 bytes (16 bits) in the strongsum, and assume the 00160 weaksum is worth another 16 bits, for at least 32 bits extra, giving a 00161 worst case 1/2^32 chance of having a hash collision per delta. If 00162 old_fsize is unknown we use a conservative default. */ 00163 if (old_fsize < 0) { 00164 min_strong_len = RS_DEFAULT_MIN_STRONG_LEN; 00165 } else { 00166 min_strong_len = 00167 2 + (rs_long_ln2(old_fsize + ((rs_long_t)1 << 24)) + 00168 rs_long_ln2(old_fsize / *block_len + 1) + 7) / 8; 00169 } 00170 if (*strong_len == 0) 00171 *strong_len = max_strong_len; 00172 else if (*strong_len == -1) 00173 *strong_len = min_strong_len; 00174 else if (old_fsize >= 0 && *strong_len < min_strong_len) { 00175 rs_log(RS_LOG_WARNING, 00176 "strong_len=" FMT_SIZE " smaller than recommended minimum " 00177 FMT_SIZE " for old_fsize=" FMT_LONG " with block_len=" FMT_SIZE, 00178 *strong_len, min_strong_len, old_fsize, *block_len); 00179 } else if (*strong_len > max_strong_len) { 00180 rs_error("invalid strong_len=" FMT_SIZE " for magic=%#x", *strong_len, 00181 (int)*magic); 00182 return RS_PARAM_ERROR; 00183 } 00184 rs_sig_args_check(*magic, *block_len, *strong_len); 00185 return RS_DONE; 00186 } 00187 00188 rs_result rs_signature_init(rs_signature_t *sig, rs_magic_number magic, 00189 size_t block_len, size_t strong_len, 00190 rs_long_t sig_fsize) 00191 { 00192 rs_result result; 00193 00194 /* Check and set default arguments, using old_fsize=-1 for unknown. */ 00195 if ((result = rs_sig_args(-1, &magic, &block_len, &strong_len)) != RS_DONE) 00196 return result; 00197 /* Set attributes from args. */ 00198 sig->magic = magic; 00199 sig->block_len = block_len; 00200 sig->strong_sum_len = strong_len; 00201 sig->count = 0; 00202 /* Calculate the number of blocks if we have the signature file size. */ 00203 /* Magic+header is 12 bytes, each block thereafter is 4 bytes 00204 weak_sum+strong_sum_len bytes */ 00205 sig->size = (int)(sig_fsize < 12 ? 0 : (sig_fsize - 12) / (4 + strong_len)); 00206 if (sig->size) 00207 sig->block_sigs = 00208 rs_alloc(sig->size * rs_block_sig_size(sig), 00209 "signature->block_sigs"); 00210 else 00211 sig->block_sigs = NULL; 00212 sig->hashtable = NULL; 00213 #ifndef HASHTABLE_NSTATS 00214 sig->calc_strong_count = 0; 00215 #endif 00216 rs_signature_check(sig); 00217 return RS_DONE; 00218 } 00219 00220 void rs_signature_done(rs_signature_t *sig) 00221 { 00222 hashtable_free(sig->hashtable); 00223 free(sig->block_sigs); 00224 rs_bzero(sig, sizeof(*sig)); 00225 } 00226 00227 rs_block_sig_t *rs_signature_add_block(rs_signature_t *sig, 00228 rs_weak_sum_t weak_sum, 00229 rs_strong_sum_t *strong_sum) 00230 { 00231 rs_signature_check(sig); 00232 /* Apply mix32() to rollsum weaksums to improve their distribution. */ 00233 if (rs_signature_weaksum_kind(sig) == RS_ROLLSUM) 00234 weak_sum = mix32(weak_sum); 00235 /* If block_sigs is full, allocate more space. */ 00236 if (sig->count == sig->size) { 00237 sig->size = sig->size ? sig->size * 2 : 16; 00238 sig->block_sigs = 00239 rs_realloc(sig->block_sigs, sig->size * rs_block_sig_size(sig), 00240 "signature->block_sigs"); 00241 } 00242 rs_block_sig_t *b = rs_block_sig_ptr(sig, sig->count++); 00243 rs_block_sig_init(b, weak_sum, strong_sum, sig->strong_sum_len); 00244 return b; 00245 } 00246 00247 rs_long_t rs_signature_find_match(rs_signature_t *sig, rs_weak_sum_t weak_sum, 00248 void const *buf, size_t len) 00249 { 00250 rs_block_match_t m; 00251 rs_block_sig_t *b; 00252 00253 rs_signature_check(sig); 00254 rs_block_match_init(&m, sig, weak_sum, NULL, buf, len); 00255 if ((b = hashtable_find(sig->hashtable, &m))) { 00256 return (rs_long_t)rs_block_sig_idx(sig, b) * sig->block_len; 00257 } 00258 return -1; 00259 } 00260 00261 void rs_signature_log_stats(rs_signature_t const *sig) 00262 { 00263 #ifndef HASHTABLE_NSTATS 00264 hashtable_t *t = sig->hashtable; 00265 00266 rs_log(RS_LOG_INFO | RS_LOG_NONAME, 00267 "match statistics: signature[%ld searches, %ld (%.3f%%) matches, " 00268 "%ld (%.3fx) weak sum compares, %ld (%.3f%%) strong sum compares, " 00269 "%ld (%.3f%%) strong sum calcs]", t->find_count, t->match_count, 00270 100.0 * (double)t->match_count / t->find_count, t->hashcmp_count, 00271 (double)t->hashcmp_count / t->find_count, t->entrycmp_count, 00272 100.0 * (double)t->entrycmp_count / t->find_count, 00273 sig->calc_strong_count, 00274 100.0 * (double)sig->calc_strong_count / t->find_count); 00275 #endif 00276 } 00277 00278 rs_result rs_build_hash_table(rs_signature_t *sig) 00279 { 00280 rs_block_match_t m; 00281 rs_block_sig_t *b; 00282 int i; 00283 00284 rs_signature_check(sig); 00285 sig->hashtable = hashtable_new(sig->count); 00286 if (!sig->hashtable) 00287 return RS_MEM_ERROR; 00288 for (i = 0; i < sig->count; i++) { 00289 b = rs_block_sig_ptr(sig, i); 00290 rs_block_match_init(&m, sig, b->weak_sum, &b->strong_sum, NULL, 0); 00291 if (!hashtable_find(sig->hashtable, &m)) 00292 hashtable_add(sig->hashtable, b); 00293 } 00294 hashtable_stats_init(sig->hashtable); 00295 return RS_DONE; 00296 } 00297 00298 void rs_free_sumset(rs_signature_t *psums) 00299 { 00300 rs_signature_done(psums); 00301 free(psums); 00302 } 00303 00304 void rs_sumset_dump(rs_signature_t const *sums) 00305 { 00306 int i; 00307 rs_block_sig_t *b; 00308 char strong_hex[RS_MAX_STRONG_SUM_LENGTH * 3]; 00309 00310 rs_log(RS_LOG_INFO | RS_LOG_NONAME, 00311 "sumset info: magic=%#x, block_len=%d, block_num=%d", sums->magic, 00312 sums->block_len, sums->count); 00313 00314 for (i = 0; i < sums->count; i++) { 00315 b = rs_block_sig_ptr(sums, i); 00316 rs_hexify(strong_hex, b->strong_sum, sums->strong_sum_len); 00317 rs_log(RS_LOG_INFO | RS_LOG_NONAME, 00318 "sum %6d: weak=" FMT_WEAKSUM ", strong=%s", i, b->weak_sum, 00319 strong_hex); 00320 } 00321 }