$treeview $search $mathjax $extrastylesheet
librsync
2.3.1
$projectbrief
|
$projectbrief
|
$searchbox |
00001 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 00002 * 00003 * librsync -- the library for network deltas 00004 * 00005 * Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net> 00006 * 00007 * This program is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful, but 00013 * WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00020 */ 00021 00022 /*= 00023 | To walk on water you've gotta sink 00024 | in the ice. 00025 | -- Shihad, `The General Electric'. 00026 */ 00027 00028 /** \file scoop.c 00029 * This file deals with readahead from caller-supplied buffers. 00030 * 00031 * Many functions require a certain minimum amount of input to do their 00032 * processing. For example, to calculate a strong checksum of a block we need 00033 * at least a block of input. 00034 * 00035 * Since we put the buffers completely under the control of the caller, we 00036 * can't count on ever getting this much data all in one go. We can't simply 00037 * wait, because the caller might have a smaller buffer than we require and so 00038 * we'll never get it. For the same reason we must always accept all the data 00039 * we're given. 00040 * 00041 * So, stream input data that's required for readahead is put into a special 00042 * buffer, from which the caller can then read. It's essentially like an 00043 * internal pipe, which on any given read request may or may not be able to 00044 * actually supply the data. 00045 * 00046 * As a future optimization, we might try to take data directly from the input 00047 * buffer if there's already enough there. 00048 * 00049 * \todo We probably know a maximum amount of data that can be scooped up, so 00050 * we could just avoid dynamic allocation. However that can't be fixed at 00051 * compile time, because when generating a delta it needs to be large enough to 00052 * hold one full block. Perhaps we can set it up when the job is allocated? It 00053 * would be kind of nice to not do any memory allocation after startup, as 00054 * bzlib does this. */ 00055 00056 #include "config.h" 00057 #include <assert.h> 00058 #include <stdlib.h> 00059 #include <string.h> 00060 #include "librsync.h" 00061 #include "job.h" 00062 #include "stream.h" 00063 #include "trace.h" 00064 #include "util.h" 00065 00066 /** Try to accept a from the input buffer to get LEN bytes in the scoop. */ 00067 void rs_scoop_input(rs_job_t *job, size_t len) 00068 { 00069 rs_buffers_t *stream = job->stream; 00070 size_t tocopy; 00071 00072 assert(len > job->scoop_avail); 00073 00074 if (job->scoop_alloc < len) { 00075 /* Need to allocate a larger scoop. */ 00076 rs_byte_t *newbuf; 00077 size_t newsize; 00078 for (newsize = 64; newsize < len; newsize <<= 1) ; 00079 newbuf = rs_alloc(newsize, "scoop buffer"); 00080 if (job->scoop_avail) 00081 memcpy(newbuf, job->scoop_next, job->scoop_avail); 00082 if (job->scoop_buf) 00083 free(job->scoop_buf); 00084 job->scoop_buf = job->scoop_next = newbuf; 00085 rs_trace("resized scoop buffer to " FMT_SIZE " bytes from " FMT_SIZE "", 00086 newsize, job->scoop_alloc); 00087 job->scoop_alloc = newsize; 00088 } else if (job->scoop_buf != job->scoop_next) { 00089 /* Move existing data to the front of the scoop. */ 00090 rs_trace("moving scoop " FMT_SIZE " bytes to reuse " FMT_SIZE " bytes", 00091 job->scoop_avail, (size_t)(job->scoop_next - job->scoop_buf)); 00092 memmove(job->scoop_buf, job->scoop_next, job->scoop_avail); 00093 job->scoop_next = job->scoop_buf; 00094 } 00095 /* take as much input as is available, to give up to LEN bytes in the 00096 scoop. */ 00097 tocopy = len - job->scoop_avail; 00098 if (tocopy > stream->avail_in) 00099 tocopy = stream->avail_in; 00100 assert(tocopy + job->scoop_avail <= job->scoop_alloc); 00101 00102 memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy); 00103 rs_trace("accepted " FMT_SIZE " bytes from input to scoop", tocopy); 00104 job->scoop_avail += tocopy; 00105 stream->next_in += tocopy; 00106 stream->avail_in -= tocopy; 00107 } 00108 00109 /** Advance the input cursor forward \p len bytes. 00110 * 00111 * This is used after doing readahead, when you decide you want to keep it. \p 00112 * len must be no more than the amount of available data, so you can't cheat. 00113 * 00114 * So when creating a delta, we require one block of readahead. But after 00115 * examining that block, we might decide to advance over all of it (if there is 00116 * a match), or just one byte (if not). */ 00117 void rs_scoop_advance(rs_job_t *job, size_t len) 00118 { 00119 rs_buffers_t *stream = job->stream; 00120 00121 /* It never makes sense to advance over a mixture of bytes from the scoop 00122 and input, because you couldn't possibly have looked at them all at the 00123 same time. */ 00124 if (job->scoop_avail) { 00125 /* reading from the scoop buffer */ 00126 rs_trace("advance over " FMT_SIZE " bytes from scoop", len); 00127 assert(len <= job->scoop_avail); 00128 job->scoop_avail -= len; 00129 job->scoop_next += len; 00130 } else { 00131 rs_trace("advance over " FMT_SIZE " bytes from input buffer", len); 00132 assert(len <= stream->avail_in); 00133 stream->avail_in -= len; 00134 stream->next_in += len; 00135 } 00136 } 00137 00138 /** Read from scoop without advancing. 00139 * 00140 * Ask for LEN bytes of input from the stream. If that much data is available, 00141 * then return a pointer to it in PTR, advance the stream input pointer over 00142 * the data, and return RS_DONE. If there's not enough data, then accept 00143 * whatever is there into a buffer, advance over it, and return RS_BLOCKED. 00144 * 00145 * The data is not actually removed from the input, so this function lets you 00146 * do readahead. If you want to keep any of the data, you should also call 00147 * rs_scoop_advance() to skip over it. */ 00148 rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr) 00149 { 00150 rs_buffers_t *stream = job->stream; 00151 rs_job_check(job); 00152 00153 if (!job->scoop_avail && stream->avail_in >= len) { 00154 /* The scoop is empty and there's enough data in the input. */ 00155 *ptr = stream->next_in; 00156 rs_trace("got " FMT_SIZE " bytes direct from input", len); 00157 return RS_DONE; 00158 } else if (job->scoop_avail < len && stream->avail_in) { 00159 /* There is not enough data in the scoop. */ 00160 rs_trace("scoop has less than " FMT_SIZE " bytes, scooping from " 00161 FMT_SIZE " input bytes", len, stream->avail_in); 00162 rs_scoop_input(job, len); 00163 } 00164 if (job->scoop_avail >= len) { 00165 /* There is enough data in the scoop now. */ 00166 rs_trace("scoop has at least " FMT_SIZE " bytes, this is enough", 00167 job->scoop_avail); 00168 *ptr = job->scoop_next; 00169 return RS_DONE; 00170 } else if (stream->eof_in) { 00171 /* Not enough input data and at EOF. */ 00172 rs_trace("reached end of input stream"); 00173 return RS_INPUT_ENDED; 00174 } else { 00175 /* Not enough input data yet. */ 00176 rs_trace("blocked with insufficient input data"); 00177 return RS_BLOCKED; 00178 } 00179 } 00180 00181 /** Read LEN bytes if possible, and remove them from the input scoop. 00182 * 00183 * \param *job An rs_job_t pointer to the job instance. 00184 * 00185 * \param len The length of the data in the ptr buffer. 00186 * 00187 * \param **ptr will be updated to point to a read-only buffer holding the 00188 * data, if enough is available. 00189 * 00190 * \return RS_DONE if there was enough data, RS_BLOCKED if there was not enough 00191 * data yet, or RS_INPUT_ENDED if there was not enough data and at EOF. */ 00192 rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr) 00193 { 00194 rs_result result; 00195 00196 result = rs_scoop_readahead(job, len, ptr); 00197 if (result == RS_DONE) 00198 rs_scoop_advance(job, len); 00199 return result; 00200 } 00201 00202 /** Read whatever data remains in the input stream. 00203 * 00204 * \param *job The rs_job_t instance the job instance. 00205 * 00206 * \param *len will be updated to the length of the available data. 00207 * 00208 * \param **ptr will point at the available data. 00209 * 00210 * \return RS_DONE if there was data, RS_INPUT_ENDED if there was no data and 00211 * at EOF, RS_BLOCKED if there was no data and not at EOF. */ 00212 rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr) 00213 { 00214 rs_buffers_t *stream = job->stream; 00215 00216 *len = job->scoop_avail + stream->avail_in; 00217 if (*len) 00218 return rs_scoop_read(job, *len, ptr); 00219 else if (stream->eof_in) 00220 return RS_INPUT_ENDED; 00221 else 00222 return RS_BLOCKED; 00223 } 00224 00225 /** Return the total number of bytes available including the scoop and input 00226 * buffer. */ 00227 size_t rs_scoop_total_avail(rs_job_t *job) 00228 { 00229 return job->scoop_avail + job->stream->avail_in; 00230 }