Ticket #798: new-downloader-v8.diff
File new-downloader-v8.diff, 281.7 KB (added by warner, at 2010-05-27T23:59:51Z) |
---|
-
Makefile
diff --git a/Makefile b/Makefile index c7dc647..ae53f6a 100644
a b coverage-output-text: 140 140 141 141 coverage-output: 142 142 rm -rf coverage-html 143 coverage html - d coverage-html $(COVERAGE_OMIT)143 coverage html -i -d coverage-html $(COVERAGE_OMIT) 144 144 cp .coverage coverage-html/coverage.data 145 145 @echo "now point your browser at coverage-html/index.html" 146 146 … … endif 184 184 185 185 pyflakes: 186 186 $(PYTHON) -OOu `which pyflakes` src/allmydata |sort |uniq 187 check-umids: 188 $(PYTHON) misc/check-umids.py `find src/allmydata -name '*.py'` 187 189 188 190 count-lines: 189 191 @echo -n "files: " -
new file misc/check-umids.py
diff --git a/misc/check-umids.py b/misc/check-umids.py new file mode 100755 index 0000000..05e8825
- + 1 #! /usr/bin/python 2 3 # ./rumid.py foo.py 4 5 import sys, re, os 6 7 ok = True 8 umids = {} 9 10 for fn in sys.argv[1:]: 11 fn = os.path.abspath(fn) 12 for lineno,line in enumerate(open(fn, "r").readlines()): 13 lineno = lineno+1 14 if "umid" not in line: 15 continue 16 mo = re.search("umid=[\"\']([^\"\']+)[\"\']", line) 17 if mo: 18 umid = mo.group(1) 19 if umid in umids: 20 oldfn, oldlineno = umids[umid] 21 print "%s:%d: duplicate umid '%s'" % (fn, lineno, umid) 22 print "%s:%d: first used here" % (oldfn, oldlineno) 23 ok = False 24 umids[umid] = (fn,lineno) 25 26 if ok: 27 print "all umids are unique" 28 else: 29 print "some umids were duplicates" 30 sys.exit(1) -
misc/coverage.el
diff --git a/misc/coverage.el b/misc/coverage.el index bad490f..8d69d5d 100644
a b 84 84 'face '(:box "red") 85 85 ) 86 86 ) 87 (message "Added annotations") 87 (message (format "Added annotations: %d uncovered lines" 88 (safe-length uncovered-code-lines))) 88 89 ) 89 90 ) 90 91 (message "unable to find coverage for this file")) -
misc/coverage2el.py
diff --git a/misc/coverage2el.py b/misc/coverage2el.py index ed94bd0..7d03a27 100644
a b 1 1 2 from coverage import coverage, summary 2 from coverage import coverage, summary, misc 3 3 4 4 class ElispReporter(summary.SummaryReporter): 5 5 def report(self): … … class ElispReporter(summary.SummaryReporter): 21 21 out.write("(let ((results (make-hash-table :test 'equal)))\n") 22 22 for cu in self.code_units: 23 23 f = cu.filename 24 (fn, executable, missing, mf) = self.coverage.analysis(cu) 24 try: 25 (fn, executable, missing, mf) = self.coverage.analysis(cu) 26 except misc.NoSource: 27 continue 25 28 code_linenumbers = executable 26 29 uncovered_code = missing 27 30 covered_linenumbers = sorted(set(executable) - set(missing)) -
misc/sizes.py
diff --git a/misc/sizes.py b/misc/sizes.py index d9c230a..7910946 100644
a b class Sizes: 60 60 self.block_arity = 0 61 61 self.block_tree_depth = 0 62 62 self.block_overhead = 0 63 self.bytes_until_some_data = 20+ share_size63 self.bytes_until_some_data = 32 + share_size 64 64 self.share_storage_overhead = 0 65 65 self.share_transmission_overhead = 0 66 66 67 67 elif mode == "beta": 68 68 # k=num_blocks, d=1 69 # each block has a 20-byte hash69 # each block has a 32-byte hash 70 70 self.block_arity = num_blocks 71 71 self.block_tree_depth = 1 72 self.block_overhead = 2072 self.block_overhead = 32 73 73 # the share has a list of hashes, one for each block 74 74 self.share_storage_overhead = (self.block_overhead * 75 75 num_blocks) 76 76 # we can get away with not sending the hash of the share that 77 77 # we're sending in full, once 78 self.share_transmission_overhead = self.share_storage_overhead - 2078 self.share_transmission_overhead = self.share_storage_overhead - 32 79 79 # we must get the whole list (so it can be validated) before 80 80 # any data can be validated 81 81 self.bytes_until_some_data = (self.share_transmission_overhead + … … class Sizes: 89 89 # to make things easier, we make the pessimistic assumption that 90 90 # we have to store hashes for all the empty places in the tree 91 91 # (when the number of shares is not an exact exponent of k) 92 self.block_overhead = 2092 self.block_overhead = 32 93 93 # the block hashes are organized into a k-ary tree, which 94 94 # means storing (and eventually transmitting) more hashes. This 95 95 # count includes all the low-level share hashes and the root. … … class Sizes: 98 98 #print "num_leaves", num_leaves 99 99 #print "hash_nodes", hash_nodes 100 100 # the storage overhead is this 101 self.share_storage_overhead = 20* (hash_nodes - 1)101 self.share_storage_overhead = 32 * (hash_nodes - 1) 102 102 # the transmission overhead is smaller: if we actually transmit 103 103 # every block, we don't have to transmit 1/k of the 104 104 # lowest-level block hashes, and we don't have to transmit the 105 105 # root because it was already sent with the share-level hash tree 106 self.share_transmission_overhead = 20* (hash_nodes106 self.share_transmission_overhead = 32 * (hash_nodes 107 107 - 1 # the root 108 108 - num_leaves / k) 109 109 # we must get a full sibling hash chain before we can validate 110 110 # any data 111 111 sibling_length = d * (k-1) 112 self.bytes_until_some_data = 20* sibling_length + block_size112 self.bytes_until_some_data = 32 * sibling_length + block_size 113 113 114 114 115 115 -
misc/storage-overhead.py
diff --git a/misc/storage-overhead.py b/misc/storage-overhead.py index 75a0bf6..a294b8d 100644
a b 1 1 #!/usr/bin/env python 2 2 3 3 import sys, math 4 from allmydata import upload, uri, encode, storage 4 from allmydata import uri, storage 5 from allmydata.immutable import upload 6 from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE 5 7 from allmydata.util import mathutil 6 8 7 9 def roundup(size, blocksize=4096): … … class BigFakeString: 22 24 def tell(self): 23 25 return self.fp 24 26 25 def calc(filesize, params=(3,7,10), segsize= encode.Encoder.MAX_SEGMENT_SIZE):27 def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE): 26 28 num_shares = params[2] 27 29 if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD: 28 urisize = len(uri. pack_lit("A"*filesize))30 urisize = len(uri.LiteralFileURI("A"*filesize).to_string()) 29 31 sharesize = 0 30 32 sharespace = 0 31 33 else: 32 u = upload.FileUploader(None) 34 u = upload.FileUploader(None) # XXX changed 33 35 u.set_params(params) 34 36 # unfortunately, Encoder doesn't currently lend itself to answering 35 37 # this question without measuring a filesize, so we have to give it a -
src/allmydata/client.py
diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 12e7473..d3ae29b 100644
a b import allmydata 12 12 from allmydata.storage.server import StorageServer 13 13 from allmydata import storage_client 14 14 from allmydata.immutable.upload import Uploader 15 from allmydata.immutable.download import Downloader15 from allmydata.immutable.downloader.util import Terminator 16 16 from allmydata.immutable.offloaded import Helper 17 17 from allmydata.control import ControlServer 18 18 from allmydata.introducer.client import IntroducerClient 19 from allmydata.util import hashutil, base32, pollmixin, cachedir,log19 from allmydata.util import hashutil, base32, pollmixin, log 20 20 from allmydata.util.abbreviate import parse_abbreviated_size 21 21 from allmydata.util.time_format import parse_duration, parse_date 22 22 from allmydata.stats import StatsProvider … … class Client(node.Node, pollmixin.PollMixin): 278 278 279 279 self.init_client_storage_broker() 280 280 self.history = History(self.stats_provider) 281 self.terminator = Terminator() 282 self.terminator.setServiceParent(self) 281 283 self.add_service(Uploader(helper_furl, self.stats_provider)) 282 download_cachedir = os.path.join(self.basedir,283 "private", "cache", "download")284 self.download_cache_dirman = cachedir.CacheDirectoryManager(download_cachedir)285 self.download_cache_dirman.setServiceParent(self)286 self.downloader = Downloader(self.storage_broker, self.stats_provider)287 284 self.init_stub_client() 288 285 self.init_nodemaker() 289 286 … … class Client(node.Node, pollmixin.PollMixin): 342 339 self._secret_holder, 343 340 self.get_history(), 344 341 self.getServiceNamed("uploader"), 345 self.downloader, 346 self.download_cache_dirman, 342 self.terminator, 347 343 self.get_encoding_parameters(), 348 344 self._key_generator) 349 345 -
src/allmydata/immutable/checker.py
diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index 2f2d8f1..31c70e3 100644
a b class Checker(log.PrefixingLogMixin): 85 85 level = log.WEIRD 86 86 if f.check(DeadReferenceError): 87 87 level = log.UNUSUAL 88 self.log("failure from server on 'get_buckets' the REMOTE failure was:", facility="tahoe.immutable.checker", failure=f, level=level, umid="3uuBUQ") 88 self.log("failure from server on 'get_buckets' the REMOTE failure was:", 89 facility="tahoe.immutable.checker", 90 failure=f, level=level, umid="AX7wZQ") 89 91 return ({}, serverid, False) 90 92 91 93 d.addCallbacks(_wrap_results, _trap_errs) -
new file src/allmydata/immutable/downloader/common.py
diff --git a/src/allmydata/immutable/downloader/__init__.py b/src/allmydata/immutable/downloader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/allmydata/immutable/downloader/common.py b/src/allmydata/immutable/downloader/common.py new file mode 100644 index 0000000..7364b8d
- + 1 2 (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, BADSEGNUM) = \ 3 ("AVAILABLE", "PENDING", "OVERDUE", "COMPLETE", "CORRUPT", "DEAD", "BADSEGNUM") 4 5 class BadSegmentNumberError(Exception): 6 pass 7 class WrongSegmentError(Exception): 8 pass 9 class BadCiphertextHashError(Exception): 10 pass 11 -
new file src/allmydata/immutable/downloader/fetcher.py
diff --git a/src/allmydata/immutable/downloader/fetcher.py b/src/allmydata/immutable/downloader/fetcher.py new file mode 100644 index 0000000..2fd987b
- + 1 2 from twisted.python.failure import Failure 3 from foolscap.api import eventually 4 from allmydata.interfaces import NotEnoughSharesError, NoSharesError 5 from allmydata.util import log 6 from allmydata.util.dictutil import DictOfSets 7 from common import AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, \ 8 BADSEGNUM, BadSegmentNumberError 9 10 class SegmentFetcher: 11 """I am responsible for acquiring blocks for a single segment. I will use 12 the Share instances passed to my add_shares() method to locate, retrieve, 13 and validate those blocks. I expect my parent node to call my 14 no_more_shares() method when there are no more shares available. I will 15 call my parent's want_more_shares() method when I want more: I expect to 16 see at least one call to add_shares or no_more_shares afterwards. 17 18 When I have enough validated blocks, I will call my parent's 19 process_blocks() method with a dictionary that maps shnum to blockdata. 20 If I am unable to provide enough blocks, I will call my parent's 21 fetch_failed() method with (self, f). After either of these events, I 22 will shut down and do no further work. My parent can also call my stop() 23 method to have me shut down early.""" 24 25 def __init__(self, node, segnum, k): 26 self._node = node # _Node 27 self.segnum = segnum 28 self._k = k 29 self._shares = {} # maps non-dead Share instance to a state, one of 30 # (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT). 31 # State transition map is: 32 # AVAILABLE -(send-read)-> PENDING 33 # PENDING -(timer)-> OVERDUE 34 # PENDING -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM 35 # OVERDUE -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM 36 # If a share becomes DEAD, it is removed from the 37 # dict. If it becomes BADSEGNUM, the whole fetch is 38 # terminated. 39 self._share_observers = {} # maps Share to Observer2 for active ones 40 self._shnums = DictOfSets() # maps shnum to the shares that provide it 41 self._blocks = {} # maps shnum to validated block data 42 self._no_more_shares = False 43 self._bad_segnum = False 44 self._last_failure = None 45 self._running = True 46 47 def stop(self): 48 log.msg("SegmentFetcher(%s).stop" % self._node._si_prefix, 49 level=log.NOISY, umid="LWyqpg") 50 self._cancel_all_requests() 51 self._running = False 52 self._shares.clear() # let GC work # ??? XXX 53 54 55 # called by our parent _Node 56 57 def add_shares(self, shares): 58 # called when ShareFinder locates a new share, and when a non-initial 59 # segment fetch is started and we already know about shares from the 60 # previous segment 61 for s in shares: 62 self._shares[s] = AVAILABLE 63 self._shnums.add(s._shnum, s) 64 eventually(self.loop) 65 66 def no_more_shares(self): 67 # ShareFinder tells us it's reached the end of its list 68 self._no_more_shares = True 69 eventually(self.loop) 70 71 # internal methods 72 73 def _count_shnums(self, *states): 74 """shnums for which at least one state is in the following list""" 75 shnums = [] 76 for shnum,shares in self._shnums.iteritems(): 77 matches = [s for s in shares if self._shares.get(s) in states] 78 if matches: 79 shnums.append(shnum) 80 return len(shnums) 81 82 def loop(self): 83 try: 84 # if any exception occurs here, kill the download 85 self._do_loop() 86 except BaseException: 87 self._node.fetch_failed(self, Failure()) 88 raise 89 90 def _do_loop(self): 91 k = self._k 92 if not self._running: 93 return 94 if self._bad_segnum: 95 # oops, we were asking for a segment number beyond the end of the 96 # file. This is an error. 97 self.stop() 98 e = BadSegmentNumberError("segnum=%d, numsegs=%d" % 99 (self.segnum, self._node.num_segments)) 100 f = Failure(e) 101 self._node.fetch_failed(self, f) 102 return 103 104 # are we done? 105 if self._count_shnums(COMPLETE) >= k: 106 # yay! 107 self.stop() 108 self._node.process_blocks(self.segnum, self._blocks) 109 return 110 111 # we may have exhausted everything 112 if (self._no_more_shares and 113 self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) < k): 114 # no more new shares are coming, and the remaining hopeful shares 115 # aren't going to be enough. boo! 116 117 log.msg("share states: %r" % (self._shares,), 118 level=log.NOISY, umid="0ThykQ") 119 if self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) == 0: 120 format = ("no shares (need %(k)d)." 121 " Last failure: %(last_failure)s") 122 args = { "k": k, 123 "last_failure": self._last_failure } 124 error = NoSharesError 125 else: 126 format = ("ran out of shares: %(complete)d complete," 127 " %(pending)d pending, %(overdue)d overdue," 128 " %(unused)d unused, need %(k)d." 129 " Last failure: %(last_failure)s") 130 args = {"complete": self._count_shnums(COMPLETE), 131 "pending": self._count_shnums(PENDING), 132 "overdue": self._count_shnums(OVERDUE), 133 # 'unused' should be zero 134 "unused": self._count_shnums(AVAILABLE), 135 "k": k, 136 "last_failure": self._last_failure, 137 } 138 error = NotEnoughSharesError 139 log.msg(format=format, level=log.UNUSUAL, umid="1DsnTg", **args) 140 e = error(format % args) 141 f = Failure(e) 142 self.stop() 143 self._node.fetch_failed(self, f) 144 return 145 146 # nope, not done. Are we "block-hungry" (i.e. do we want to send out 147 # more read requests, or do we think we have enough in flight 148 # already?) 149 while self._count_shnums(PENDING, COMPLETE) < k: 150 # we're hungry.. are there any unused shares? 151 sent = self._send_new_request() 152 if not sent: 153 break 154 155 # ok, now are we "share-hungry" (i.e. do we have enough known shares 156 # to make us happy, or should we ask the ShareFinder to get us more?) 157 if self._count_shnums(AVAILABLE, PENDING, COMPLETE) < k: 158 # we're hungry for more shares 159 self._node.want_more_shares() 160 # that will trigger the ShareFinder to keep looking 161 162 def _find_one(self, shares, state): 163 # TODO could choose fastest 164 for s in shares: 165 if self._shares[s] == state: 166 return s 167 # can never get here, caller has assert in case of code bug 168 169 def _send_new_request(self): 170 for shnum,shares in sorted(self._shnums.iteritems()): 171 states = [self._shares[s] for s in shares] 172 if COMPLETE in states or PENDING in states: 173 # don't send redundant requests 174 continue 175 if AVAILABLE not in states: 176 # no candidates for this shnum, move on 177 continue 178 # here's a candidate. Send a request. 179 s = self._find_one(shares, AVAILABLE) 180 assert s 181 self._shares[s] = PENDING 182 self._share_observers[s] = o = s.get_block(self.segnum) 183 o.subscribe(self._block_request_activity, share=s, shnum=shnum) 184 # TODO: build up a list of candidates, then walk through the 185 # list, sending requests to the most desireable servers, 186 # re-checking our block-hunger each time. For non-initial segment 187 # fetches, this would let us stick with faster servers. 188 return True 189 # nothing was sent: don't call us again until you have more shares to 190 # work with, or one of the existing shares has been declared OVERDUE 191 return False 192 193 def _cancel_all_requests(self): 194 for o in self._share_observers.values(): 195 o.cancel() 196 self._share_observers = {} 197 198 def _block_request_activity(self, share, shnum, state, block=None, f=None): 199 # called by Shares, in response to our s.send_request() calls. 200 if not self._running: 201 return 202 log.msg("SegmentFetcher(%s)._block_request_activity:" 203 " Share(sh%d-on-%s) -> %s" % 204 (self._node._si_prefix, shnum, share._peerid_s, state), 205 level=log.NOISY, umid="vilNWA") 206 # COMPLETE, CORRUPT, DEAD, BADSEGNUM are terminal. 207 if state in (COMPLETE, CORRUPT, DEAD, BADSEGNUM): 208 self._share_observers.pop(share, None) 209 if state is COMPLETE: 210 # 'block' is fully validated 211 self._shares[share] = COMPLETE 212 self._blocks[shnum] = block 213 elif state is OVERDUE: 214 self._shares[share] = OVERDUE 215 # OVERDUE is not terminal: it will eventually transition to 216 # COMPLETE, CORRUPT, or DEAD. 217 elif state is CORRUPT: 218 self._shares[share] = CORRUPT 219 elif state is DEAD: 220 del self._shares[share] 221 self._shnums[shnum].remove(share) 222 self._last_failure = f 223 elif state is BADSEGNUM: 224 self._shares[share] = BADSEGNUM # ??? 225 self._bad_segnum = True 226 eventually(self.loop) 227 228 -
new file src/allmydata/immutable/downloader/finder.py
diff --git a/src/allmydata/immutable/downloader/finder.py b/src/allmydata/immutable/downloader/finder.py new file mode 100644 index 0000000..7cefefa
- + 1 2 import time 3 now = time.time 4 from foolscap.api import eventually 5 from allmydata.util import base32, log, idlib 6 7 from share import Share, CommonShare 8 from util import incidentally 9 10 class RequestToken: 11 def __init__(self, peerid): 12 self.peerid = peerid 13 14 class ShareFinder: 15 def __init__(self, storage_broker, verifycap, node, download_status, 16 logparent=None, max_outstanding_requests=10): 17 self.running = True # stopped by Share.stop, from Terminator 18 self.verifycap = verifycap 19 self._started = False 20 self._storage_broker = storage_broker 21 self.share_consumer = self.node = node 22 self.max_outstanding_requests = max_outstanding_requests 23 24 self._hungry = False 25 26 self._commonshares = {} # shnum to CommonShare instance 27 self.undelivered_shares = [] 28 self.pending_requests = set() 29 30 self._storage_index = verifycap.storage_index 31 self._si_prefix = base32.b2a_l(self._storage_index[:8], 60) 32 self._node_logparent = logparent 33 self._download_status = download_status 34 self._lp = log.msg(format="ShareFinder[si=%(si)s] starting", 35 si=self._si_prefix, 36 level=log.NOISY, parent=logparent, umid="2xjj2A") 37 38 def start_finding_servers(self): 39 # don't get servers until somebody uses us: creating the 40 # ImmutableFileNode should not cause work to happen yet. Test case is 41 # test_dirnode, which creates us with storage_broker=None 42 if not self._started: 43 si = self.verifycap.storage_index 44 s = self._storage_broker.get_servers_for_index(si) 45 self._servers = iter(s) 46 self._started = True 47 48 def log(self, *args, **kwargs): 49 if "parent" not in kwargs: 50 kwargs["parent"] = self._lp 51 return log.msg(*args, **kwargs) 52 53 def stop(self): 54 self.running = False 55 56 # called by our parent CiphertextDownloader 57 def hungry(self): 58 self.log(format="ShareFinder[si=%(si)s] hungry", 59 si=self._si_prefix, level=log.NOISY, umid="NywYaQ") 60 self.start_finding_servers() 61 self._hungry = True 62 eventually(self.loop) 63 64 # internal methods 65 def loop(self): 66 undelivered_s = ",".join(["sh%d@%s" % 67 (s._shnum, idlib.shortnodeid_b2a(s._peerid)) 68 for s in self.undelivered_shares]) 69 pending_s = ",".join([idlib.shortnodeid_b2a(rt.peerid) 70 for rt in self.pending_requests]) # sort? 71 self.log(format="ShareFinder loop: running=%(running)s" 72 " hungry=%(hungry)s, undelivered=%(undelivered)s," 73 " pending=%(pending)s", 74 running=self.running, hungry=self._hungry, 75 undelivered=undelivered_s, pending=pending_s, 76 level=log.NOISY, umid="kRtS4Q") 77 if not self.running: 78 return 79 if not self._hungry: 80 return 81 if self.undelivered_shares: 82 sh = self.undelivered_shares.pop(0) 83 # they will call hungry() again if they want more 84 self._hungry = False 85 self.log(format="delivering Share(shnum=%(shnum)d, server=%(peerid)s)", 86 shnum=sh._shnum, peerid=sh._peerid_s, 87 level=log.NOISY, umid="2n1qQw") 88 eventually(self.share_consumer.got_shares, [sh]) 89 return 90 91 if len(self.pending_requests) >= self.max_outstanding_requests: 92 # cannot send more requests, must wait for some to retire 93 return 94 95 server = None 96 try: 97 if self._servers: 98 server = self._servers.next() 99 except StopIteration: 100 self._servers = None 101 102 if server: 103 self.send_request(server) 104 # we loop again to get parallel queries. The check above will 105 # prevent us from looping forever. 106 eventually(self.loop) 107 return 108 109 if self.pending_requests: 110 # no server, but there are still requests in flight: maybe one of 111 # them will make progress 112 return 113 114 self.log(format="ShareFinder.loop: no_more_shares, ever", 115 level=log.UNUSUAL, umid="XjQlzg") 116 # we've run out of servers (so we can't send any more requests), and 117 # we have nothing in flight. No further progress can be made. They 118 # are destined to remain hungry. 119 self.share_consumer.no_more_shares() 120 121 def send_request(self, server): 122 peerid, rref = server 123 req = RequestToken(peerid) 124 self.pending_requests.add(req) 125 lp = self.log(format="sending DYHB to [%(peerid)s]", 126 peerid=idlib.shortnodeid_b2a(peerid), 127 level=log.NOISY, umid="Io7pyg") 128 d_ev = self._download_status.add_dyhb_sent(peerid, now()) 129 d = rref.callRemote("get_buckets", self._storage_index) 130 d.addBoth(incidentally, self.pending_requests.discard, req) 131 d.addCallbacks(self._got_response, self._got_error, 132 callbackArgs=(rref.version, peerid, req, d_ev, lp), 133 errbackArgs=(peerid, req, d_ev, lp)) 134 d.addErrback(log.err, format="error in send_request", 135 level=log.WEIRD, parent=lp, umid="rpdV0w") 136 d.addCallback(incidentally, eventually, self.loop) 137 138 def _got_response(self, buckets, server_version, peerid, req, d_ev, lp): 139 shnums = sorted([shnum for shnum in buckets]) 140 d_ev.finished(shnums, now()) 141 if buckets: 142 shnums_s = ",".join([str(shnum) for shnum in shnums]) 143 self.log(format="got shnums [%(shnums)s] from [%(peerid)s]", 144 shnums=shnums_s, peerid=idlib.shortnodeid_b2a(peerid), 145 level=log.NOISY, parent=lp, umid="0fcEZw") 146 else: 147 self.log(format="no shares from [%(peerid)s]", 148 peerid=idlib.shortnodeid_b2a(peerid), 149 level=log.NOISY, parent=lp, umid="U7d4JA") 150 if self.node.num_segments is None: 151 best_numsegs = self.node.guessed_num_segments 152 else: 153 best_numsegs = self.node.num_segments 154 for shnum, bucket in buckets.iteritems(): 155 if shnum in self._commonshares: 156 cs = self._commonshares[shnum] 157 else: 158 cs = CommonShare(best_numsegs, self._si_prefix, shnum, 159 self._node_logparent) 160 # Share._get_satisfaction is responsible for updating 161 # CommonShare.set_numsegs after we know the UEB. Alternatives: 162 # 1: d = self.node.get_num_segments() 163 # d.addCallback(cs.got_numsegs) 164 # the problem is that the OneShotObserverList I was using 165 # inserts an eventual-send between _get_satisfaction's 166 # _satisfy_UEB and _satisfy_block_hash_tree, and the 167 # CommonShare didn't get the num_segs message before 168 # being asked to set block hash values. To resolve this 169 # would require an immediate ObserverList instead of 170 # an eventual-send -based one 171 # 2: break _get_satisfaction into Deferred-attached pieces. 172 # Yuck. 173 self._commonshares[shnum] = cs 174 s = Share(bucket, server_version, self.verifycap, cs, self.node, 175 self._download_status, peerid, shnum, 176 self._node_logparent) 177 self.undelivered_shares.append(s) 178 179 def _got_error(self, f, peerid, req, d_ev, lp): 180 d_ev.finished("error", now()) 181 self.log(format="got error from [%(peerid)s]", 182 peerid=idlib.shortnodeid_b2a(peerid), failure=f, 183 level=log.UNUSUAL, parent=lp, umid="zUKdCw") 184 185 -
new file src/allmydata/immutable/downloader/node.py
diff --git a/src/allmydata/immutable/downloader/node.py b/src/allmydata/immutable/downloader/node.py new file mode 100644 index 0000000..2991c9e
- + 1 2 import time 3 now = time.time 4 from twisted.python.failure import Failure 5 from twisted.internet import defer 6 from foolscap.api import eventually 7 from allmydata import uri 8 from allmydata.codec import CRSDecoder 9 from allmydata.util import base32, log, hashutil, mathutil, observer 10 from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE 11 from allmydata.hashtree import IncompleteHashTree, BadHashError, \ 12 NotEnoughHashesError 13 14 # local imports 15 from finder import ShareFinder 16 from fetcher import SegmentFetcher 17 from segmentation import Segmentation 18 from common import BadCiphertextHashError 19 20 class Cancel: 21 def __init__(self, f): 22 self._f = f 23 self.cancelled = False 24 def cancel(self): 25 if not self.cancelled: 26 self.cancelled = True 27 self._f(self) 28 29 class DownloadNode: 30 """Internal class which manages downloads and holds state. External 31 callers use CiphertextFileNode instead.""" 32 33 # Share._node points to me 34 def __init__(self, verifycap, storage_broker, secret_holder, 35 terminator, history, download_status): 36 assert isinstance(verifycap, uri.CHKFileVerifierURI) 37 self._verifycap = verifycap 38 self._storage_broker = storage_broker 39 self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60) 40 self.running = True 41 if terminator: 42 terminator.register(self) # calls self.stop() at stopService() 43 # the rules are: 44 # 1: Only send network requests if you're active (self.running is True) 45 # 2: Use TimerService, not reactor.callLater 46 # 3: You can do eventual-sends any time. 47 # These rules should mean that once 48 # stopService()+flushEventualQueue() fires, everything will be done. 49 self._secret_holder = secret_holder 50 self._history = history 51 self._download_status = download_status 52 53 k, N = self._verifycap.needed_shares, self._verifycap.total_shares 54 self.share_hash_tree = IncompleteHashTree(N) 55 56 # we guess the segment size, so Segmentation can pull non-initial 57 # segments in a single roundtrip. This populates 58 # .guessed_segment_size, .guessed_num_segments, and 59 # .ciphertext_hash_tree (with a dummy, to let us guess which hashes 60 # we'll need) 61 self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE) 62 63 # filled in when we parse a valid UEB 64 self.have_UEB = False 65 self.segment_size = None 66 self.tail_segment_size = None 67 self.tail_segment_padded = None 68 self.num_segments = None 69 self.block_size = None 70 self.tail_block_size = None 71 72 # things to track callers that want data 73 74 # _segment_requests can have duplicates 75 self._segment_requests = [] # (segnum, d, cancel_handle) 76 self._active_segment = None # a SegmentFetcher, with .segnum 77 78 self._segsize_observers = observer.OneShotObserverList() 79 80 # we create one top-level logparent for this _Node, and another one 81 # for each read() call. Segmentation and get_segment() messages are 82 # associated with the read() call, everything else is tied to the 83 # _Node's log entry. 84 lp = log.msg(format="Immutable _Node(%(si)s) created: size=%(size)d," 85 " guessed_segsize=%(guessed_segsize)d," 86 " guessed_numsegs=%(guessed_numsegs)d", 87 si=self._si_prefix, size=verifycap.size, 88 guessed_segsize=self.guessed_segment_size, 89 guessed_numsegs=self.guessed_num_segments, 90 level=log.OPERATIONAL, umid="uJ0zAQ") 91 self._lp = lp 92 93 self._sharefinder = ShareFinder(storage_broker, verifycap, self, 94 self._download_status, lp) 95 self._shares = set() 96 97 def _build_guessed_tables(self, max_segment_size): 98 size = min(self._verifycap.size, max_segment_size) 99 s = mathutil.next_multiple(size, self._verifycap.needed_shares) 100 self.guessed_segment_size = s 101 r = self._calculate_sizes(self.guessed_segment_size) 102 self.guessed_num_segments = r["num_segments"] 103 # as with CommonShare, our ciphertext_hash_tree is a stub until we 104 # get the real num_segments 105 self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments) 106 107 def __repr__(self): 108 return "Imm_Node(%s)" % (self._si_prefix,) 109 110 def stop(self): 111 # called by the Terminator at shutdown, mostly for tests 112 if self._active_segment: 113 self._active_segment.stop() 114 self._active_segment = None 115 self._sharefinder.stop() 116 117 # things called by outside callers, via CiphertextFileNode. get_segment() 118 # may also be called by Segmentation. 119 120 def read(self, consumer, offset=0, size=None, read_ev=None): 121 """I am the main entry point, from which FileNode.read() can get 122 data. I feed the consumer with the desired range of ciphertext. I 123 return a Deferred that fires (with the consumer) when the read is 124 finished. 125 126 Note that there is no notion of a 'file pointer': each call to read() 127 uses an independent offset= value.""" 128 # for concurrent operations: each gets its own Segmentation manager 129 if size is None: 130 size = self._verifycap.size 131 # clip size so offset+size does not go past EOF 132 size = min(size, self._verifycap.size-offset) 133 if read_ev is None: 134 read_ev = self._download_status.add_read_event(offset, size, now()) 135 136 lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)", 137 si=base32.b2a(self._verifycap.storage_index)[:8], 138 offset=offset, size=size, 139 level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww") 140 if self._history: 141 sp = self._history.stats_provider 142 sp.count("downloader.files_downloaded", 1) # really read() calls 143 sp.count("downloader.bytes_downloaded", size) 144 s = Segmentation(self, offset, size, consumer, read_ev, lp) 145 # this raises an interesting question: what segments to fetch? if 146 # offset=0, always fetch the first segment, and then allow 147 # Segmentation to be responsible for pulling the subsequent ones if 148 # the first wasn't large enough. If offset>0, we're going to need an 149 # extra roundtrip to get the UEB (and therefore the segment size) 150 # before we can figure out which segment to get. TODO: allow the 151 # offset-table-guessing code (which starts by guessing the segsize) 152 # to assist the offset>0 process. 153 d = s.start() 154 def _done(res): 155 read_ev.finished(now()) 156 return res 157 d.addBoth(_done) 158 return d 159 160 def get_segment(self, segnum, logparent=None): 161 """Begin downloading a segment. I return a tuple (d, c): 'd' is a 162 Deferred that fires with (offset,data) when the desired segment is 163 available, and c is an object on which c.cancel() can be called to 164 disavow interest in the segment (after which 'd' will never fire). 165 166 You probably need to know the segment size before calling this, 167 unless you want the first few bytes of the file. If you ask for a 168 segment number which turns out to be too large, the Deferred will 169 errback with BadSegmentNumberError. 170 171 The Deferred fires with the offset of the first byte of the data 172 segment, so that you can call get_segment() before knowing the 173 segment size, and still know which data you received. 174 175 The Deferred can also errback with other fatal problems, such as 176 NotEnoughSharesError, NoSharesError, or BadCiphertextHashError. 177 """ 178 log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)", 179 si=base32.b2a(self._verifycap.storage_index)[:8], 180 segnum=segnum, 181 level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ") 182 self._download_status.add_segment_request(segnum, now()) 183 d = defer.Deferred() 184 c = Cancel(self._cancel_request) 185 self._segment_requests.append( (segnum, d, c) ) 186 self._start_new_segment() 187 return (d, c) 188 189 def get_segsize(self): 190 """Return a Deferred that fires when we know the real segment size.""" 191 if self.segment_size: 192 return defer.succeed(self.segment_size) 193 # TODO: this downloads (and discards) the first segment of the file. 194 # We could make this more efficient by writing 195 # fetcher.SegmentSizeFetcher, with the job of finding a single valid 196 # share and extracting the UEB. We'd add Share.get_UEB() to request 197 # just the UEB. 198 (d,c) = self.get_segment(0) 199 # this ensures that an error during get_segment() will errback the 200 # caller, so Repair won't wait forever on completely missing files 201 d.addCallback(lambda ign: self._segsize_observers.when_fired()) 202 return d 203 204 # things called by the Segmentation object used to transform 205 # arbitrary-sized read() calls into quantized segment fetches 206 207 def _start_new_segment(self): 208 if self._active_segment is None and self._segment_requests: 209 segnum = self._segment_requests[0][0] 210 k = self._verifycap.needed_shares 211 log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d", 212 node=repr(self), segnum=segnum, 213 level=log.NOISY, umid="wAlnHQ") 214 self._active_segment = fetcher = SegmentFetcher(self, segnum, k) 215 active_shares = [s for s in self._shares if s.is_alive()] 216 fetcher.add_shares(active_shares) # this triggers the loop 217 218 219 # called by our child ShareFinder 220 def got_shares(self, shares): 221 self._shares.update(shares) 222 if self._active_segment: 223 self._active_segment.add_shares(shares) 224 def no_more_shares(self): 225 self._no_more_shares = True 226 if self._active_segment: 227 self._active_segment.no_more_shares() 228 229 # things called by our Share instances 230 231 def validate_and_store_UEB(self, UEB_s): 232 log.msg("validate_and_store_UEB", 233 level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw") 234 h = hashutil.uri_extension_hash(UEB_s) 235 if h != self._verifycap.uri_extension_hash: 236 raise BadHashError 237 UEB_dict = uri.unpack_extension(UEB_s) 238 self._parse_and_store_UEB(UEB_dict) # sets self._stuff 239 # TODO: a malformed (but authentic) UEB could throw an assertion in 240 # _parse_and_store_UEB, and we should abandon the download. 241 self.have_UEB = True 242 243 def _parse_and_store_UEB(self, d): 244 # Note: the UEB contains needed_shares and total_shares. These are 245 # redundant and inferior (the filecap contains the authoritative 246 # values). However, because it is possible to encode the same file in 247 # multiple ways, and the encoders might choose (poorly) to use the 248 # same key for both (therefore getting the same SI), we might 249 # encounter shares for both types. The UEB hashes will be different, 250 # however, and we'll disregard the "other" encoding's shares as 251 # corrupted. 252 253 # therefore, we ignore d['total_shares'] and d['needed_shares']. 254 255 log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s", 256 ueb=repr(d), vcap=self._verifycap.to_string(), 257 level=log.NOISY, parent=self._lp, umid="cVqZnA") 258 259 k, N = self._verifycap.needed_shares, self._verifycap.total_shares 260 261 self.segment_size = d['segment_size'] 262 self._segsize_observers.fire(self.segment_size) 263 264 r = self._calculate_sizes(self.segment_size) 265 self.tail_segment_size = r["tail_segment_size"] 266 self.tail_segment_padded = r["tail_segment_padded"] 267 self.num_segments = r["num_segments"] 268 self.block_size = r["block_size"] 269 self.tail_block_size = r["tail_block_size"] 270 log.msg("actual sizes: %s" % (r,), 271 level=log.NOISY, parent=self._lp, umid="PY6P5Q") 272 if (self.segment_size == self.guessed_segment_size 273 and self.num_segments == self.guessed_num_segments): 274 log.msg("my guess was right!", 275 level=log.NOISY, parent=self._lp, umid="x340Ow") 276 else: 277 log.msg("my guess was wrong! Extra round trips for me.", 278 level=log.NOISY, parent=self._lp, umid="tb7RJw") 279 280 # zfec.Decode() instantiation is fast, but still, let's use the same 281 # codec instance for all but the last segment. 3-of-10 takes 15us on 282 # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is 283 # 2.5ms, worst-case 254-of-255 is 9.3ms 284 self._codec = CRSDecoder() 285 self._codec.set_params(self.segment_size, k, N) 286 287 288 # Ciphertext hash tree root is mandatory, so that there is at most 289 # one ciphertext that matches this read-cap or verify-cap. The 290 # integrity check on the shares is not sufficient to prevent the 291 # original encoder from creating some shares of file A and other 292 # shares of file B. self.ciphertext_hash_tree was a guess before: 293 # this is where we create it for real. 294 self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments) 295 self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) 296 297 self.share_hash_tree.set_hashes({0: d['share_root_hash']}) 298 299 # Our job is a fast download, not verification, so we ignore any 300 # redundant fields. The Verifier uses a different code path which 301 # does not ignore them. 302 303 def _calculate_sizes(self, segment_size): 304 # segments of ciphertext 305 size = self._verifycap.size 306 k = self._verifycap.needed_shares 307 308 # this assert matches the one in encode.py:127 inside 309 # Encoded._got_all_encoding_parameters, where the UEB is constructed 310 assert segment_size % k == 0 311 312 # the last segment is usually short. We don't store a whole segsize, 313 # but we do pad the segment up to a multiple of k, because the 314 # encoder requires that. 315 tail_segment_size = size % segment_size 316 if tail_segment_size == 0: 317 tail_segment_size = segment_size 318 padded = mathutil.next_multiple(tail_segment_size, k) 319 tail_segment_padded = padded 320 321 num_segments = mathutil.div_ceil(size, segment_size) 322 323 # each segment is turned into N blocks. All but the last are of size 324 # block_size, and the last is of size tail_block_size 325 block_size = segment_size / k 326 tail_block_size = tail_segment_padded / k 327 328 return { "tail_segment_size": tail_segment_size, 329 "tail_segment_padded": tail_segment_padded, 330 "num_segments": num_segments, 331 "block_size": block_size, 332 "tail_block_size": tail_block_size, 333 } 334 335 336 def process_share_hashes(self, share_hashes): 337 for hashnum in share_hashes: 338 if hashnum >= len(self.share_hash_tree): 339 # "BadHashError" is normally for e.g. a corrupt block. We 340 # sort of abuse it here to mean a badly numbered hash (which 341 # indicates corruption in the number bytes, rather than in 342 # the data bytes). 343 raise BadHashError("hashnum %d doesn't fit in hashtree(%d)" 344 % (hashnum, len(self.share_hash_tree))) 345 self.share_hash_tree.set_hashes(share_hashes) 346 347 def get_needed_ciphertext_hashes(self, segnum): 348 cht = self.ciphertext_hash_tree 349 return cht.needed_hashes(segnum, include_leaf=True) 350 def process_ciphertext_hashes(self, hashes): 351 assert self.num_segments is not None 352 # this may raise BadHashError or NotEnoughHashesError 353 self.ciphertext_hash_tree.set_hashes(hashes) 354 355 356 # called by our child SegmentFetcher 357 358 def want_more_shares(self): 359 self._sharefinder.hungry() 360 361 def fetch_failed(self, sf, f): 362 assert sf is self._active_segment 363 self._active_segment = None 364 # deliver error upwards 365 for (d,c) in self._extract_requests(sf.segnum): 366 eventually(self._deliver, d, c, f) 367 368 def process_blocks(self, segnum, blocks): 369 d = defer.maybeDeferred(self._decode_blocks, segnum, blocks) 370 d.addCallback(self._check_ciphertext_hash, segnum) 371 def _deliver(result): 372 ds = self._download_status 373 if isinstance(result, Failure): 374 ds.add_segment_error(segnum, now()) 375 else: 376 (offset, segment, decodetime) = result 377 ds.add_segment_delivery(segnum, now(), 378 offset, len(segment), decodetime) 379 log.msg(format="delivering segment(%(segnum)d)", 380 segnum=segnum, 381 level=log.OPERATIONAL, parent=self._lp, 382 umid="j60Ojg") 383 for (d,c) in self._extract_requests(segnum): 384 eventually(self._deliver, d, c, result) 385 self._active_segment = None 386 self._start_new_segment() 387 d.addBoth(_deliver) 388 d.addErrback(lambda f: 389 log.err("unhandled error during process_blocks", 390 failure=f, level=log.WEIRD, 391 parent=self._lp, umid="MkEsCg")) 392 393 def _decode_blocks(self, segnum, blocks): 394 tail = (segnum == self.num_segments-1) 395 codec = self._codec 396 block_size = self.block_size 397 decoded_size = self.segment_size 398 if tail: 399 # account for the padding in the last segment 400 codec = CRSDecoder() 401 k, N = self._verifycap.needed_shares, self._verifycap.total_shares 402 codec.set_params(self.tail_segment_padded, k, N) 403 block_size = self.tail_block_size 404 decoded_size = self.tail_segment_padded 405 406 shares = [] 407 shareids = [] 408 for (shareid, share) in blocks.iteritems(): 409 assert len(share) == block_size 410 shareids.append(shareid) 411 shares.append(share) 412 del blocks 413 414 start = now() 415 d = codec.decode(shares, shareids) # segment 416 del shares 417 def _process(buffers): 418 decodetime = now() - start 419 segment = "".join(buffers) 420 assert len(segment) == decoded_size 421 del buffers 422 if tail: 423 segment = segment[:self.tail_segment_size] 424 return (segment, decodetime) 425 d.addCallback(_process) 426 return d 427 428 def _check_ciphertext_hash(self, (segment, decodetime), segnum): 429 assert self._active_segment.segnum == segnum 430 assert self.segment_size is not None 431 offset = segnum * self.segment_size 432 433 h = hashutil.crypttext_segment_hash(segment) 434 try: 435 self.ciphertext_hash_tree.set_hashes(leaves={segnum: h}) 436 return (offset, segment, decodetime) 437 except (BadHashError, NotEnoughHashesError): 438 format = ("hash failure in ciphertext_hash_tree:" 439 " segnum=%(segnum)d, SI=%(si)s") 440 log.msg(format=format, segnum=segnum, si=self._si_prefix, 441 failure=Failure(), 442 level=log.WEIRD, parent=self._lp, umid="MTwNnw") 443 # this is especially weird, because we made it past the share 444 # hash tree. It implies that we're using the wrong encoding, or 445 # that the uploader deliberately constructed a bad UEB. 446 msg = format % {"segnum": segnum, "si": self._si_prefix} 447 raise BadCiphertextHashError(msg) 448 449 def _deliver(self, d, c, result): 450 # this method exists to handle cancel() that occurs between 451 # _got_segment and _deliver 452 if not c.cancelled: 453 d.callback(result) # might actually be an errback 454 455 def _extract_requests(self, segnum): 456 """Remove matching requests and return their (d,c) tuples so that the 457 caller can retire them.""" 458 retire = [(d,c) for (segnum0, d, c) in self._segment_requests 459 if segnum0 == segnum] 460 self._segment_requests = [t for t in self._segment_requests 461 if t[0] != segnum] 462 return retire 463 464 def _cancel_request(self, c): 465 self._segment_requests = [t for t in self._segment_requests 466 if t[2] != c] 467 segnums = [segnum for (segnum,d,c) in self._segment_requests] 468 if self._active_segment.segnum not in segnums: 469 self._active_segment.stop() 470 self._active_segment = None 471 self._start_new_segment() -
new file src/allmydata/immutable/downloader/segmentation.py
diff --git a/src/allmydata/immutable/downloader/segmentation.py b/src/allmydata/immutable/downloader/segmentation.py new file mode 100644 index 0000000..adc138e
- + 1 2 import time 3 now = time.time 4 from zope.interface import implements 5 from twisted.internet import defer 6 from twisted.internet.interfaces import IPushProducer 7 from foolscap.api import eventually 8 from allmydata.util import log 9 from allmydata.util.spans import overlap 10 11 from common import BadSegmentNumberError, WrongSegmentError 12 13 class Segmentation: 14 """I am responsible for a single offset+size read of the file. I handle 15 segmentation: I figure out which segments are necessary, request them 16 (from my CiphertextDownloader) in order, and trim the segments down to 17 match the offset+size span. I use the Producer/Consumer interface to only 18 request one segment at a time. 19 """ 20 implements(IPushProducer) 21 def __init__(self, node, offset, size, consumer, read_ev, logparent=None): 22 self._node = node 23 self._hungry = True 24 self._active_segnum = None 25 self._cancel_segment_request = None 26 # these are updated as we deliver data. At any given time, we still 27 # want to download file[offset:offset+size] 28 self._offset = offset 29 self._size = size 30 assert offset+size <= node._verifycap.size 31 self._consumer = consumer 32 self._read_ev = read_ev 33 self._start_pause = None 34 self._lp = logparent 35 36 def start(self): 37 self._alive = True 38 self._deferred = defer.Deferred() 39 self._consumer.registerProducer(self, True) 40 self._maybe_fetch_next() 41 return self._deferred 42 43 def _maybe_fetch_next(self): 44 if not self._alive or not self._hungry: 45 return 46 if self._active_segnum is not None: 47 return 48 self._fetch_next() 49 50 def _fetch_next(self): 51 if self._size == 0: 52 # done! 53 self._alive = False 54 self._hungry = False 55 self._consumer.unregisterProducer() 56 self._deferred.callback(self._consumer) 57 return 58 n = self._node 59 have_actual_segment_size = n.segment_size is not None 60 guess_s = "" 61 if not have_actual_segment_size: 62 guess_s = "probably " 63 segment_size = n.segment_size or n.guessed_segment_size 64 if self._offset == 0: 65 # great! we want segment0 for sure 66 wanted_segnum = 0 67 else: 68 # this might be a guess 69 wanted_segnum = self._offset // segment_size 70 log.msg(format="_fetch_next(offset=%(offset)d) %(guess)swants segnum=%(segnum)d", 71 offset=self._offset, guess=guess_s, segnum=wanted_segnum, 72 level=log.NOISY, parent=self._lp, umid="5WfN0w") 73 self._active_segnum = wanted_segnum 74 d,c = n.get_segment(wanted_segnum, self._lp) 75 self._cancel_segment_request = c 76 d.addBoth(self._request_retired) 77 d.addCallback(self._got_segment, wanted_segnum) 78 if not have_actual_segment_size: 79 # we can retry once 80 d.addErrback(self._retry_bad_segment) 81 d.addErrback(self._error) 82 83 def _request_retired(self, res): 84 self._active_segnum = None 85 self._cancel_segment_request = None 86 return res 87 88 def _got_segment(self, (segment_start,segment,decodetime), wanted_segnum): 89 self._cancel_segment_request = None 90 # we got file[segment_start:segment_start+len(segment)] 91 # we want file[self._offset:self._offset+self._size] 92 log.msg(format="Segmentation got data:" 93 " want [%(wantstart)d-%(wantend)d)," 94 " given [%(segstart)d-%(segend)d), for segnum=%(segnum)d", 95 wantstart=self._offset, wantend=self._offset+self._size, 96 segstart=segment_start, segend=segment_start+len(segment), 97 segnum=wanted_segnum, 98 level=log.OPERATIONAL, parent=self._lp, umid="32dHcg") 99 100 o = overlap(segment_start, len(segment), self._offset, self._size) 101 # the overlap is file[o[0]:o[0]+o[1]] 102 if not o or o[0] != self._offset: 103 # we didn't get the first byte, so we can't use this segment 104 log.msg("Segmentation handed wrong data:" 105 " want [%d-%d), given [%d-%d), for segnum=%d," 106 " for si=%s" 107 % (self._offset, self._offset+self._size, 108 segment_start, segment_start+len(segment), 109 wanted_segnum, self._node._si_prefix), 110 level=log.UNUSUAL, parent=self._lp, umid="STlIiA") 111 # we may retry if the segnum we asked was based on a guess 112 raise WrongSegmentError("I was given the wrong data.") 113 offset_in_segment = self._offset - segment_start 114 desired_data = segment[offset_in_segment:offset_in_segment+o[1]] 115 116 self._offset += len(desired_data) 117 self._size -= len(desired_data) 118 self._consumer.write(desired_data) 119 # the consumer might call our .pauseProducing() inside that write() 120 # call, setting self._hungry=False 121 self._read_ev.update(len(desired_data), 0, 0) 122 self._maybe_fetch_next() 123 124 def _retry_bad_segment(self, f): 125 f.trap(WrongSegmentError, BadSegmentNumberError) 126 # we guessed the segnum wrong: either one that doesn't overlap with 127 # the start of our desired region, or one that's beyond the end of 128 # the world. Now that we have the right information, we're allowed to 129 # retry once. 130 assert self._node.segment_size is not None 131 return self._maybe_fetch_next() 132 133 def _error(self, f): 134 log.msg("Error in Segmentation", failure=f, 135 level=log.WEIRD, parent=self._lp, umid="EYlXBg") 136 self._alive = False 137 self._hungry = False 138 self._consumer.unregisterProducer() 139 self._deferred.errback(f) 140 141 def stopProducing(self): 142 self._hungry = False 143 self._alive = False 144 # cancel any outstanding segment request 145 if self._cancel_segment_request: 146 self._cancel_segment_request.cancel() 147 self._cancel_segment_request = None 148 def pauseProducing(self): 149 self._hungry = False 150 self._start_pause = now() 151 def resumeProducing(self): 152 self._hungry = True 153 eventually(self._maybe_fetch_next) 154 if self._start_pause is not None: 155 paused = now() - self._start_pause 156 self._read_ev.update(0, 0, paused) 157 self._start_pause = None -
new file src/allmydata/immutable/downloader/share.py
diff --git a/src/allmydata/immutable/downloader/share.py b/src/allmydata/immutable/downloader/share.py new file mode 100644 index 0000000..c4dbd73
- + 1 2 import struct 3 import time 4 now = time.time 5 6 from twisted.python.failure import Failure 7 from foolscap.api import eventually 8 from allmydata.util import base32, log, hashutil, mathutil 9 from allmydata.util.spans import Spans, DataSpans 10 from allmydata.interfaces import HASH_SIZE 11 from allmydata.hashtree import IncompleteHashTree, BadHashError, \ 12 NotEnoughHashesError 13 14 from allmydata.immutable.layout import make_write_bucket_proxy 15 from util import Observer2 16 from common import COMPLETE, CORRUPT, DEAD, BADSEGNUM 17 18 19 class LayoutInvalid(Exception): 20 pass 21 class DataUnavailable(Exception): 22 pass 23 24 class Share: 25 """I represent a single instance of a single share (e.g. I reference the 26 shnum2 for share SI=abcde on server xy12t, not the one on server ab45q). 27 I am associated with a CommonShare that remembers data that is held in 28 common among e.g. SI=abcde/shnum2 across all servers. I am also 29 associated with a CiphertextFileNode for e.g. SI=abcde (all shares, all 30 servers). 31 """ 32 # this is a specific implementation of IShare for tahoe's native storage 33 # servers. A different backend would use a different class. 34 35 def __init__(self, rref, server_version, verifycap, commonshare, node, 36 download_status, peerid, shnum, logparent): 37 self._rref = rref 38 self._server_version = server_version 39 self._node = node # holds share_hash_tree and UEB 40 self.actual_segment_size = node.segment_size # might still be None 41 # XXX change node.guessed_segment_size to 42 # node.best_guess_segment_size(), which should give us the real ones 43 # if known, else its guess. 44 self._guess_offsets(verifycap, node.guessed_segment_size) 45 self.actual_offsets = None 46 self._UEB_length = None 47 self._commonshare = commonshare # holds block_hash_tree 48 self._download_status = download_status 49 self._peerid = peerid 50 self._peerid_s = base32.b2a(peerid)[:5] 51 self._storage_index = verifycap.storage_index 52 self._si_prefix = base32.b2a(verifycap.storage_index)[:8] 53 self._shnum = shnum 54 # self._alive becomes False upon fatal corruption or server error 55 self._alive = True 56 self._lp = log.msg(format="%(share)s created", share=repr(self), 57 level=log.NOISY, parent=logparent, umid="P7hv2w") 58 59 self._pending = Spans() # request sent but no response received yet 60 self._received = DataSpans() # ACK response received, with data 61 self._unavailable = Spans() # NAK response received, no data 62 63 # any given byte of the share can be in one of four states: 64 # in: _wanted, _requested, _received 65 # FALSE FALSE FALSE : don't care about it at all 66 # TRUE FALSE FALSE : want it, haven't yet asked for it 67 # TRUE TRUE FALSE : request is in-flight 68 # or didn't get it 69 # FALSE TRUE TRUE : got it, haven't used it yet 70 # FALSE TRUE FALSE : got it and used it 71 # FALSE FALSE FALSE : block consumed, ready to ask again 72 # 73 # when we request data and get a NAK, we leave it in _requested 74 # to remind ourself to not ask for it again. We don't explicitly 75 # remove it from anything (maybe this should change). 76 # 77 # We retain the hashtrees in the Node, so we leave those spans in 78 # _requested (and never ask for them again, as long as the Node is 79 # alive). But we don't retain data blocks (too big), so when we 80 # consume a data block, we remove it from _requested, so a later 81 # download can re-fetch it. 82 83 self._requested_blocks = [] # (segnum, set(observer2..)) 84 ver = server_version["http://allmydata.org/tahoe/protocols/storage/v1"] 85 self._overrun_ok = ver["tolerates-immutable-read-overrun"] 86 # If _overrun_ok and we guess the offsets correctly, we can get 87 # everything in one RTT. If _overrun_ok and we guess wrong, we might 88 # need two RTT (but we could get lucky and do it in one). If overrun 89 # is *not* ok (tahoe-1.3.0 or earlier), we need four RTT: 1=version, 90 # 2=offset table, 3=UEB_length and everything else (hashes, block), 91 # 4=UEB. 92 93 self.had_corruption = False # for unit tests 94 95 def __repr__(self): 96 return "Share(sh%d-on-%s)" % (self._shnum, self._peerid_s) 97 98 def is_alive(self): 99 # XXX: reconsider. If the share sees a single error, should it remain 100 # dead for all time? Or should the next segment try again? This DEAD 101 # state is stored elsewhere too (SegmentFetcher per-share states?) 102 # and needs to be consistent. We clear _alive in self._fail(), which 103 # is called upon a network error, or layout failure, or hash failure 104 # in the UEB or a hash tree. We do not _fail() for a hash failure in 105 # a block, but of course we still tell our callers about 106 # state=CORRUPT so they'll find a different share. 107 return self._alive 108 109 def _guess_offsets(self, verifycap, guessed_segment_size): 110 self.guessed_segment_size = guessed_segment_size 111 size = verifycap.size 112 k = verifycap.needed_shares 113 N = verifycap.total_shares 114 r = self._node._calculate_sizes(guessed_segment_size) 115 # num_segments, block_size/tail_block_size 116 # guessed_segment_size/tail_segment_size/tail_segment_padded 117 share_size = mathutil.div_ceil(size, k) 118 # share_size is the amount of block data that will be put into each 119 # share, summed over all segments. It does not include hashes, the 120 # UEB, or other overhead. 121 122 # use the upload-side code to get this as accurate as possible 123 ht = IncompleteHashTree(N) 124 num_share_hashes = len(ht.needed_hashes(0, include_leaf=True)) 125 wbp = make_write_bucket_proxy(None, share_size, r["block_size"], 126 r["num_segments"], num_share_hashes, 0, 127 None) 128 self._fieldsize = wbp.fieldsize 129 self._fieldstruct = wbp.fieldstruct 130 self.guessed_offsets = wbp._offsets 131 132 # called by our client, the SegmentFetcher 133 def get_block(self, segnum): 134 """Add a block number to the list of requests. This will eventually 135 result in a fetch of the data necessary to validate the block, then 136 the block itself. The fetch order is generally 137 first-come-first-served, but requests may be answered out-of-order if 138 data becomes available sooner. 139 140 I return an Observer2, which has two uses. The first is to call 141 o.subscribe(), which gives me a place to send state changes and 142 eventually the data block. The second is o.cancel(), which removes 143 the request (if it is still active). 144 145 I will distribute the following events through my Observer2: 146 - state=OVERDUE: ?? I believe I should have had an answer by now. 147 You may want to ask another share instead. 148 - state=BADSEGNUM: the segnum you asked for is too large. I must 149 fetch a valid UEB before I can determine this, 150 so the notification is asynchronous 151 - state=COMPLETE, block=data: here is a valid block 152 - state=CORRUPT: this share contains corrupted data 153 - state=DEAD, f=Failure: the server reported an error, this share 154 is unusable 155 """ 156 log.msg("%s.get_block(%d)" % (repr(self), segnum), 157 level=log.NOISY, parent=self._lp, umid="RTo9MQ") 158 assert segnum >= 0 159 o = Observer2() 160 o.set_canceler(self, "_cancel_block_request") 161 for i,(segnum0,observers) in enumerate(self._requested_blocks): 162 if segnum0 == segnum: 163 observers.add(o) 164 break 165 else: 166 self._requested_blocks.append( (segnum, set([o])) ) 167 eventually(self.loop) 168 return o 169 170 def _cancel_block_request(self, o): 171 new_requests = [] 172 for e in self._requested_blocks: 173 (segnum0, observers) = e 174 observers.discard(o) 175 if observers: 176 new_requests.append(e) 177 self._requested_blocks = new_requests 178 179 # internal methods 180 def _active_segnum_and_observers(self): 181 if self._requested_blocks: 182 # we only retrieve information for one segment at a time, to 183 # minimize alacrity (first come, first served) 184 return self._requested_blocks[0] 185 return None, [] 186 187 def loop(self): 188 try: 189 # if any exceptions occur here, kill the download 190 log.msg("%s.loop, reqs=[%s], pending=%s, received=%s," 191 " unavailable=%s" % 192 (repr(self), 193 ",".join([str(req[0]) for req in self._requested_blocks]), 194 self._pending.dump(), self._received.dump(), 195 self._unavailable.dump() ), 196 level=log.NOISY, parent=self._lp, umid="BaL1zw") 197 self._do_loop() 198 # all exception cases call self._fail(), which clears self._alive 199 except (BadHashError, NotEnoughHashesError, LayoutInvalid), e: 200 # Abandon this share. We do this if we see corruption in the 201 # offset table, the UEB, or a hash tree. We don't abandon the 202 # whole share if we see corruption in a data block (we abandon 203 # just the one block, and still try to get data from other blocks 204 # on the same server). In theory, we could get good data from a 205 # share with a corrupt UEB (by first getting the UEB from some 206 # other share), or corrupt hash trees, but the logic to decide 207 # when this is safe is non-trivial. So for now, give up at the 208 # first sign of corruption. 209 # 210 # _satisfy_*() code which detects corruption should first call 211 # self._signal_corruption(), and then raise the exception. 212 log.msg(format="corruption detected in %(share)s", 213 share=repr(self), 214 level=log.UNUSUAL, parent=self._lp, umid="gWspVw") 215 self._fail(Failure(e), log.UNUSUAL) 216 except DataUnavailable, e: 217 # Abandon this share. 218 log.msg(format="need data that will never be available" 219 " from %s: pending=%s, received=%s, unavailable=%s" % 220 (repr(self), 221 self._pending.dump(), self._received.dump(), 222 self._unavailable.dump() ), 223 level=log.UNUSUAL, parent=self._lp, umid="F7yJnQ") 224 self._fail(Failure(e), log.UNUSUAL) 225 except BaseException: 226 self._fail(Failure()) 227 raise 228 log.msg("%s.loop done, reqs=[%s], pending=%s, received=%s," 229 " unavailable=%s" % 230 (repr(self), 231 ",".join([str(req[0]) for req in self._requested_blocks]), 232 self._pending.dump(), self._received.dump(), 233 self._unavailable.dump() ), 234 level=log.NOISY, parent=self._lp, umid="9lRaRA") 235 236 def _do_loop(self): 237 # we are (eventually) called after all state transitions: 238 # new segments added to self._requested_blocks 239 # new data received from servers (responses to our read() calls) 240 # impatience timer fires (server appears slow) 241 if not self._alive: 242 return 243 244 # First, consume all of the information that we currently have, for 245 # all the segments people currently want. 246 while self._get_satisfaction(): 247 pass 248 249 # When we get no satisfaction (from the data we've received so far), 250 # we determine what data we desire (to satisfy more requests). The 251 # number of segments is finite, so I can't get no satisfaction 252 # forever. 253 wanted, needed = self._desire() 254 255 # Finally, send out requests for whatever we need (desire minus 256 # have). You can't always get what you want, but if you try 257 # sometimes, you just might find, you get what you need. 258 self._send_requests(wanted + needed) 259 260 # and sometimes you can't even get what you need 261 disappointment = needed & self._unavailable 262 if len(disappointment): 263 self.had_corruption = True 264 raise DataUnavailable("need %s but will never get it" % 265 disappointment.dump()) 266 267 def _get_satisfaction(self): 268 # return True if we retired a data block, and should therefore be 269 # called again. Return False if we don't retire a data block (even if 270 # we do retire some other data, like hash chains). 271 272 if self.actual_offsets is None: 273 if not self._satisfy_offsets(): 274 # can't even look at anything without the offset table 275 return False 276 277 if not self._node.have_UEB: 278 if not self._satisfy_UEB(): 279 # can't check any hashes without the UEB 280 return False 281 self.actual_segment_size = self._node.segment_size # might be updated 282 assert self.actual_segment_size is not None 283 284 # knowing the UEB means knowing num_segments. Despite the redundancy, 285 # this is the best place to set this. CommonShare.set_numsegs will 286 # ignore duplicate calls. 287 assert self._node.num_segments is not None 288 cs = self._commonshare 289 cs.set_numsegs(self._node.num_segments) 290 291 segnum, observers = self._active_segnum_and_observers() 292 # if segnum is None, we don't really need to do anything (we have no 293 # outstanding readers right now), but we'll fill in the bits that 294 # aren't tied to any particular segment. 295 296 if segnum is not None and segnum >= self._node.num_segments: 297 for o in observers: 298 o.notify(state=BADSEGNUM) 299 self._requested_blocks.pop(0) 300 return True 301 302 if self._node.share_hash_tree.needed_hashes(self._shnum): 303 if not self._satisfy_share_hash_tree(): 304 # can't check block_hash_tree without a root 305 return False 306 307 if cs.need_block_hash_root(): 308 block_hash_root = self._node.share_hash_tree.get_leaf(self._shnum) 309 cs.set_block_hash_root(block_hash_root) 310 311 if segnum is None: 312 return False # we don't want any particular segment right now 313 314 # block_hash_tree 315 needed_hashes = self._commonshare.get_needed_block_hashes(segnum) 316 if needed_hashes: 317 if not self._satisfy_block_hash_tree(needed_hashes): 318 # can't check block without block_hash_tree 319 return False 320 321 # ciphertext_hash_tree 322 needed_hashes = self._node.get_needed_ciphertext_hashes(segnum) 323 if needed_hashes: 324 if not self._satisfy_ciphertext_hash_tree(needed_hashes): 325 # can't check decoded blocks without ciphertext_hash_tree 326 return False 327 328 # data blocks 329 return self._satisfy_data_block(segnum, observers) 330 331 def _satisfy_offsets(self): 332 version_s = self._received.get(0, 4) 333 if version_s is None: 334 return False 335 (version,) = struct.unpack(">L", version_s) 336 if version == 1: 337 table_start = 0x0c 338 self._fieldsize = 0x4 339 self._fieldstruct = "L" 340 elif version == 2: 341 table_start = 0x14 342 self._fieldsize = 0x8 343 self._fieldstruct = "Q" 344 else: 345 self.had_corruption = True 346 raise LayoutInvalid("unknown version %d (I understand 1 and 2)" 347 % version) 348 offset_table_size = 6 * self._fieldsize 349 table_s = self._received.pop(table_start, offset_table_size) 350 if table_s is None: 351 return False 352 fields = struct.unpack(">"+6*self._fieldstruct, table_s) 353 offsets = {} 354 for i,field in enumerate(['data', 355 'plaintext_hash_tree', # UNUSED 356 'crypttext_hash_tree', 357 'block_hashes', 358 'share_hashes', 359 'uri_extension', 360 ] ): 361 offsets[field] = fields[i] 362 self.actual_offsets = offsets 363 log.msg("actual offsets: data=%d, plaintext_hash_tree=%d, crypttext_hash_tree=%d, block_hashes=%d, share_hashes=%d, uri_extension=%d" % tuple(fields)) 364 self._received.remove(0, 4) # don't need this anymore 365 366 # validate the offsets a bit 367 share_hashes_size = offsets["uri_extension"] - offsets["share_hashes"] 368 if share_hashes_size < 0 or share_hashes_size % (2+HASH_SIZE) != 0: 369 # the share hash chain is stored as (hashnum,hash) pairs 370 self.had_corruption = True 371 raise LayoutInvalid("share hashes malformed -- should be a" 372 " multiple of %d bytes -- not %d" % 373 (2+HASH_SIZE, share_hashes_size)) 374 block_hashes_size = offsets["share_hashes"] - offsets["block_hashes"] 375 if block_hashes_size < 0 or block_hashes_size % (HASH_SIZE) != 0: 376 # the block hash tree is stored as a list of hashes 377 self.had_corruption = True 378 raise LayoutInvalid("block hashes malformed -- should be a" 379 " multiple of %d bytes -- not %d" % 380 (HASH_SIZE, block_hashes_size)) 381 # we only look at 'crypttext_hash_tree' if the UEB says we're 382 # actually using it. Same with 'plaintext_hash_tree'. This gives us 383 # some wiggle room: a place to stash data for later extensions. 384 385 return True 386 387 def _satisfy_UEB(self): 388 o = self.actual_offsets 389 fsize = self._fieldsize 390 UEB_length_s = self._received.get(o["uri_extension"], fsize) 391 if not UEB_length_s: 392 return False 393 (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s) 394 UEB_s = self._received.pop(o["uri_extension"]+fsize, UEB_length) 395 if not UEB_s: 396 return False 397 self._received.remove(o["uri_extension"], fsize) 398 try: 399 self._node.validate_and_store_UEB(UEB_s) 400 return True 401 except (LayoutInvalid, BadHashError), e: 402 # TODO: if this UEB was bad, we'll keep trying to validate it 403 # over and over again. Only log.err on the first one, or better 404 # yet skip all but the first 405 f = Failure(e) 406 self._signal_corruption(f, o["uri_extension"], fsize+UEB_length) 407 self.had_corruption = True 408 raise 409 410 def _satisfy_share_hash_tree(self): 411 # the share hash chain is stored as (hashnum,hash) tuples, so you 412 # can't fetch just the pieces you need, because you don't know 413 # exactly where they are. So fetch everything, and parse the results 414 # later. 415 o = self.actual_offsets 416 hashlen = o["uri_extension"] - o["share_hashes"] 417 assert hashlen % (2+HASH_SIZE) == 0 418 hashdata = self._received.get(o["share_hashes"], hashlen) 419 if not hashdata: 420 return False 421 share_hashes = {} 422 for i in range(0, hashlen, 2+HASH_SIZE): 423 (hashnum,) = struct.unpack(">H", hashdata[i:i+2]) 424 hashvalue = hashdata[i+2:i+2+HASH_SIZE] 425 share_hashes[hashnum] = hashvalue 426 try: 427 self._node.process_share_hashes(share_hashes) 428 # adds to self._node.share_hash_tree 429 except (BadHashError, NotEnoughHashesError), e: 430 f = Failure(e) 431 self._signal_corruption(f, o["share_hashes"], hashlen) 432 self.had_corruption = True 433 raise 434 self._received.remove(o["share_hashes"], hashlen) 435 return True 436 437 def _signal_corruption(self, f, start, offset): 438 # there was corruption somewhere in the given range 439 reason = "corruption in share[%d-%d): %s" % (start, start+offset, 440 str(f.value)) 441 self._rref.callRemoteOnly("advise_corrupt_share", reason) 442 443 def _satisfy_block_hash_tree(self, needed_hashes): 444 o_bh = self.actual_offsets["block_hashes"] 445 block_hashes = {} 446 for hashnum in needed_hashes: 447 hashdata = self._received.get(o_bh+hashnum*HASH_SIZE, HASH_SIZE) 448 if hashdata: 449 block_hashes[hashnum] = hashdata 450 else: 451 return False # missing some hashes 452 # note that we don't submit any hashes to the block_hash_tree until 453 # we've gotten them all, because the hash tree will throw an 454 # exception if we only give it a partial set (which it therefore 455 # cannot validate) 456 try: 457 self._commonshare.process_block_hashes(block_hashes) 458 except (BadHashError, NotEnoughHashesError), e: 459 f = Failure(e) 460 hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())]) 461 log.msg(format="hash failure in block_hashes=(%(hashnums)s)," 462 " from %(share)s", 463 hashnums=hashnums, shnum=self._shnum, share=repr(self), 464 failure=f, level=log.WEIRD, parent=self._lp, umid="yNyFdA") 465 hsize = max(0, max(needed_hashes)) * HASH_SIZE 466 self._signal_corruption(f, o_bh, hsize) 467 self.had_corruption = True 468 raise 469 for hashnum in needed_hashes: 470 self._received.remove(o_bh+hashnum*HASH_SIZE, HASH_SIZE) 471 return True 472 473 def _satisfy_ciphertext_hash_tree(self, needed_hashes): 474 start = self.actual_offsets["crypttext_hash_tree"] 475 hashes = {} 476 for hashnum in needed_hashes: 477 hashdata = self._received.get(start+hashnum*HASH_SIZE, HASH_SIZE) 478 if hashdata: 479 hashes[hashnum] = hashdata 480 else: 481 return False # missing some hashes 482 # we don't submit any hashes to the ciphertext_hash_tree until we've 483 # gotten them all 484 try: 485 self._node.process_ciphertext_hashes(hashes) 486 except (BadHashError, NotEnoughHashesError), e: 487 f = Failure(e) 488 hashnums = ",".join([str(n) for n in sorted(hashes.keys())]) 489 log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s)," 490 " from %(share)s", 491 hashnums=hashnums, share=repr(self), failure=f, 492 level=log.WEIRD, parent=self._lp, umid="iZI0TA") 493 hsize = max(0, max(needed_hashes))*HASH_SIZE 494 self._signal_corruption(f, start, hsize) 495 self.had_corruption = True 496 raise 497 for hashnum in needed_hashes: 498 self._received.remove(start+hashnum*HASH_SIZE, HASH_SIZE) 499 return True 500 501 def _satisfy_data_block(self, segnum, observers): 502 tail = (segnum == self._node.num_segments-1) 503 datastart = self.actual_offsets["data"] 504 blockstart = datastart + segnum * self._node.block_size 505 blocklen = self._node.block_size 506 if tail: 507 blocklen = self._node.tail_block_size 508 509 block = self._received.pop(blockstart, blocklen) 510 if not block: 511 log.msg("no data for block %s (want [%d:+%d])" % (repr(self), 512 blockstart, blocklen)) 513 return False 514 log.msg(format="%(share)s._satisfy_data_block [%(start)d:+%(length)d]", 515 share=repr(self), start=blockstart, length=blocklen, 516 level=log.NOISY, parent=self._lp, umid="uTDNZg") 517 # this block is being retired, either as COMPLETE or CORRUPT, since 518 # no further data reads will help 519 assert self._requested_blocks[0][0] == segnum 520 try: 521 self._commonshare.check_block(segnum, block) 522 # hurrah, we have a valid block. Deliver it. 523 for o in observers: 524 # goes to SegmentFetcher._block_request_activity 525 o.notify(state=COMPLETE, block=block) 526 except (BadHashError, NotEnoughHashesError), e: 527 # rats, we have a corrupt block. Notify our clients that they 528 # need to look elsewhere, and advise the server. Unlike 529 # corruption in other parts of the share, this doesn't cause us 530 # to abandon the whole share. 531 f = Failure(e) 532 log.msg(format="hash failure in block %(segnum)d, from %(share)s", 533 segnum=segnum, share=repr(self), failure=f, 534 level=log.WEIRD, parent=self._lp, umid="mZjkqA") 535 for o in observers: 536 o.notify(state=CORRUPT) 537 self._signal_corruption(f, blockstart, blocklen) 538 self.had_corruption = True 539 # in either case, we've retired this block 540 self._requested_blocks.pop(0) 541 # popping the request keeps us from turning around and wanting the 542 # block again right away 543 return True # got satisfaction 544 545 def _desire(self): 546 segnum, observers = self._active_segnum_and_observers() # maybe None 547 548 # 'want_it' is for data we merely want: we know that we don't really 549 # need it. This includes speculative reads, like the first 1KB of the 550 # share (for the offset table) and the first 2KB of the UEB. 551 # 552 # 'need_it' is for data that, if we have the real offset table, we'll 553 # need. If we are only guessing at the offset table, it's merely 554 # wanted. (The share is abandoned if we can't get data that we really 555 # need). 556 # 557 # 'gotta_gotta_have_it' is for data that we absolutely need, 558 # independent of whether we're still guessing about the offset table: 559 # the version number and the offset table itself. 560 # 561 # Mr. Popeil, I'm in trouble, need your assistance on the double. Aww.. 562 563 desire = Spans(), Spans(), Spans() 564 (want_it, need_it, gotta_gotta_have_it) = desire 565 566 self.actual_segment_size = self._node.segment_size # might be updated 567 o = self.actual_offsets or self.guessed_offsets 568 segsize = self.actual_segment_size or self.guessed_segment_size 569 r = self._node._calculate_sizes(segsize) 570 571 if not self.actual_offsets: 572 # all _desire functions add bits to the three desire[] spans 573 self._desire_offsets(desire) 574 575 # we can use guessed offsets as long as this server tolerates 576 # overrun. Otherwise, we must wait for the offsets to arrive before 577 # we try to read anything else. 578 if self.actual_offsets or self._overrun_ok: 579 if not self._node.have_UEB: 580 self._desire_UEB(desire, o) 581 # They might ask for a segment that doesn't look right. 582 # _satisfy() will catch+reject bad segnums once we know the UEB 583 # (and therefore segsize and numsegs), so we'll only fail this 584 # test if we're still guessing. We want to avoid asking the 585 # hashtrees for needed_hashes() for bad segnums. So don't enter 586 # _desire_hashes or _desire_data unless the segnum looks 587 # reasonable. 588 if segnum < r["num_segments"]: 589 # XXX somehow we're getting here for sh5. we don't yet know 590 # the actual_segment_size, we're still working off the guess. 591 # the ciphertext_hash_tree has been corrected, but the 592 # commonshare._block_hash_tree is still in the guessed state. 593 self._desire_share_hashes(desire, o) 594 if segnum is not None: 595 self._desire_block_hashes(desire, o, segnum) 596 self._desire_data(desire, o, r, segnum, segsize) 597 else: 598 log.msg("_desire: segnum(%d) looks wrong (numsegs=%d)" 599 % (segnum, r["num_segments"]), 600 level=log.UNUSUAL, parent=self._lp, umid="tuYRQQ") 601 602 log.msg("end _desire: want_it=%s need_it=%s gotta=%s" 603 % (want_it.dump(), need_it.dump(), gotta_gotta_have_it.dump())) 604 if self.actual_offsets: 605 return (want_it, need_it+gotta_gotta_have_it) 606 else: 607 return (want_it+need_it, gotta_gotta_have_it) 608 609 def _desire_offsets(self, desire): 610 (want_it, need_it, gotta_gotta_have_it) = desire 611 if self._overrun_ok: 612 # easy! this includes version number, sizes, and offsets 613 want_it.add(0, 1024) 614 return 615 616 # v1 has an offset table that lives [0x0,0x24). v2 lives [0x0,0x44). 617 # To be conservative, only request the data that we know lives there, 618 # even if that means more roundtrips. 619 620 gotta_gotta_have_it.add(0, 4) # version number, always safe 621 version_s = self._received.get(0, 4) 622 if not version_s: 623 return 624 (version,) = struct.unpack(">L", version_s) 625 # The code in _satisfy_offsets will have checked this version 626 # already. There is no code path to get this far with version>2. 627 assert 1 <= version <= 2, "can't get here, version=%d" % version 628 if version == 1: 629 table_start = 0x0c 630 fieldsize = 0x4 631 elif version == 2: 632 table_start = 0x14 633 fieldsize = 0x8 634 offset_table_size = 6 * fieldsize 635 gotta_gotta_have_it.add(table_start, offset_table_size) 636 637 def _desire_UEB(self, desire, o): 638 (want_it, need_it, gotta_gotta_have_it) = desire 639 640 # UEB data is stored as (length,data). 641 if self._overrun_ok: 642 # We can pre-fetch 2kb, which should probably cover it. If it 643 # turns out to be larger, we'll come back here later with a known 644 # length and fetch the rest. 645 want_it.add(o["uri_extension"], 2048) 646 # now, while that is probably enough to fetch the whole UEB, it 647 # might not be, so we need to do the next few steps as well. In 648 # most cases, the following steps will not actually add anything 649 # to need_it 650 651 need_it.add(o["uri_extension"], self._fieldsize) 652 # only use a length if we're sure it's correct, otherwise we'll 653 # probably fetch a huge number 654 if not self.actual_offsets: 655 return 656 UEB_length_s = self._received.get(o["uri_extension"], self._fieldsize) 657 if UEB_length_s: 658 (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s) 659 # we know the length, so make sure we grab everything 660 need_it.add(o["uri_extension"]+self._fieldsize, UEB_length) 661 662 def _desire_share_hashes(self, desire, o): 663 (want_it, need_it, gotta_gotta_have_it) = desire 664 665 if self._node.share_hash_tree.needed_hashes(self._shnum): 666 hashlen = o["uri_extension"] - o["share_hashes"] 667 need_it.add(o["share_hashes"], hashlen) 668 669 def _desire_block_hashes(self, desire, o, segnum): 670 (want_it, need_it, gotta_gotta_have_it) = desire 671 672 # block hash chain 673 for hashnum in self._commonshare.get_needed_block_hashes(segnum): 674 need_it.add(o["block_hashes"]+hashnum*HASH_SIZE, HASH_SIZE) 675 676 # ciphertext hash chain 677 for hashnum in self._node.get_needed_ciphertext_hashes(segnum): 678 need_it.add(o["crypttext_hash_tree"]+hashnum*HASH_SIZE, HASH_SIZE) 679 680 def _desire_data(self, desire, o, r, segnum, segsize): 681 (want_it, need_it, gotta_gotta_have_it) = desire 682 tail = (segnum == r["num_segments"]-1) 683 datastart = o["data"] 684 blockstart = datastart + segnum * r["block_size"] 685 blocklen = r["block_size"] 686 if tail: 687 blocklen = r["tail_block_size"] 688 need_it.add(blockstart, blocklen) 689 690 def _send_requests(self, desired): 691 ask = desired - self._pending - self._received.get_spans() 692 log.msg("%s._send_requests, desired=%s, pending=%s, ask=%s" % 693 (repr(self), desired.dump(), self._pending.dump(), ask.dump()), 694 level=log.NOISY, parent=self._lp, umid="E94CVA") 695 # XXX At one time, this code distinguished between data blocks and 696 # hashes, and made sure to send (small) requests for hashes before 697 # sending (big) requests for blocks. The idea was to make sure that 698 # all hashes arrive before the blocks, so the blocks can be consumed 699 # and released in a single turn. I removed this for simplicity. 700 # Reconsider the removal: maybe bring it back. 701 ds = self._download_status 702 703 for (start, length) in ask: 704 # TODO: quantize to reasonably-large blocks 705 self._pending.add(start, length) 706 lp = log.msg(format="%(share)s._send_request" 707 " [%(start)d:+%(length)d]", 708 share=repr(self), 709 start=start, length=length, 710 level=log.NOISY, parent=self._lp, umid="sgVAyA") 711 req_ev = ds.add_request_sent(self._peerid, self._shnum, 712 start, length, now()) 713 d = self._send_request(start, length) 714 d.addCallback(self._got_data, start, length, req_ev, lp) 715 d.addErrback(self._got_error, start, length, req_ev, lp) 716 d.addCallback(self._trigger_loop) 717 d.addErrback(lambda f: 718 log.err(format="unhandled error during send_request", 719 failure=f, parent=self._lp, 720 level=log.WEIRD, umid="qZu0wg")) 721 722 def _send_request(self, start, length): 723 return self._rref.callRemote("read", start, length) 724 725 def _got_data(self, data, start, length, req_ev, lp): 726 req_ev.finished(len(data), now()) 727 if not self._alive: 728 return 729 log.msg(format="%(share)s._got_data [%(start)d:+%(length)d] -> %(datalen)d", 730 share=repr(self), start=start, length=length, datalen=len(data), 731 level=log.NOISY, parent=lp, umid="5Qn6VQ") 732 self._pending.remove(start, length) 733 self._received.add(start, data) 734 735 # if we ask for [a:c], and we get back [a:b] (b<c), that means we're 736 # never going to get [b:c]. If we really need that data, this block 737 # will never complete. The easiest way to get into this situation is 738 # to hit a share with a corrupted offset table, or one that's somehow 739 # been truncated. On the other hand, when overrun_ok is true, we ask 740 # for data beyond the end of the share all the time (it saves some 741 # RTT when we don't know the length of the share ahead of time). So 742 # not every asked-for-but-not-received byte is fatal. 743 if len(data) < length: 744 self._unavailable.add(start+len(data), length-len(data)) 745 746 # XXX if table corruption causes our sections to overlap, then one 747 # consumer (i.e. block hash tree) will pop/remove the data that 748 # another consumer (i.e. block data) mistakenly thinks it needs. It 749 # won't ask for that data again, because the span is in 750 # self._requested. But that span won't be in self._unavailable 751 # because we got it back from the server. TODO: handle this properly 752 # (raise DataUnavailable). Then add sanity-checking 753 # no-overlaps-allowed tests to the offset-table unpacking code to 754 # catch this earlier. XXX 755 756 # accumulate a wanted/needed span (not as self._x, but passed into 757 # desire* functions). manage a pending/in-flight list. when the 758 # requests are sent out, empty/discard the wanted/needed span and 759 # populate/augment the pending list. when the responses come back, 760 # augment either received+data or unavailable. 761 762 # if a corrupt offset table results in double-usage, we'll send 763 # double requests. 764 765 # the wanted/needed span is only "wanted" for the first pass. Once 766 # the offset table arrives, it's all "needed". 767 768 def _got_error(self, f, start, length, req_ev, lp): 769 req_ev.finished("error", now()) 770 log.msg(format="error requesting %(start)d+%(length)d" 771 " from %(server)s for si %(si)s", 772 start=start, length=length, 773 server=self._peerid_s, si=self._si_prefix, 774 failure=f, parent=lp, level=log.UNUSUAL, umid="BZgAJw") 775 # retire our observers, assuming we won't be able to make any 776 # further progress 777 self._fail(f, log.UNUSUAL) 778 779 def _trigger_loop(self, res): 780 if self._alive: 781 eventually(self.loop) 782 return res 783 784 def _fail(self, f, level=log.WEIRD): 785 log.msg(format="abandoning %(share)s", 786 share=repr(self), failure=f, 787 level=level, parent=self._lp, umid="JKM2Og") 788 self._alive = False 789 for (segnum, observers) in self._requested_blocks: 790 for o in observers: 791 o.notify(state=DEAD, f=f) 792 793 794 class CommonShare: 795 """I hold data that is common across all instances of a single share, 796 like sh2 on both servers A and B. This is just the block hash tree. 797 """ 798 def __init__(self, guessed_numsegs, si_prefix, shnum, logparent): 799 self.si_prefix = si_prefix 800 self.shnum = shnum 801 # in the beginning, before we have the real UEB, we can only guess at 802 # the number of segments. But we want to ask for block hashes early. 803 # So if we're asked for which block hashes are needed before we know 804 # numsegs for sure, we return a guess. 805 self._block_hash_tree = IncompleteHashTree(guessed_numsegs) 806 self._know_numsegs = False 807 self._logparent = logparent 808 809 def set_numsegs(self, numsegs): 810 if self._know_numsegs: 811 return 812 self._block_hash_tree = IncompleteHashTree(numsegs) 813 self._know_numsegs = True 814 815 def need_block_hash_root(self): 816 return bool(not self._block_hash_tree[0]) 817 818 def set_block_hash_root(self, roothash): 819 assert self._know_numsegs 820 self._block_hash_tree.set_hashes({0: roothash}) 821 822 def get_needed_block_hashes(self, segnum): 823 # XXX: include_leaf=True needs thought: how did the old downloader do 824 # it? I think it grabbed *all* block hashes and set them all at once. 825 # Since we want to fetch less data, we either need to fetch the leaf 826 # too, or wait to set the block hashes until we've also received the 827 # block itself, so we can hash it too, and set the chain+leaf all at 828 # the same time. 829 return self._block_hash_tree.needed_hashes(segnum, include_leaf=True) 830 831 def process_block_hashes(self, block_hashes): 832 assert self._know_numsegs 833 # this may raise BadHashError or NotEnoughHashesError 834 self._block_hash_tree.set_hashes(block_hashes) 835 836 def check_block(self, segnum, block): 837 assert self._know_numsegs 838 h = hashutil.block_hash(block) 839 # this may raise BadHashError or NotEnoughHashesError 840 self._block_hash_tree.set_hashes(leaves={segnum: h}) -
new file src/allmydata/immutable/downloader/status.py
diff --git a/src/allmydata/immutable/downloader/status.py b/src/allmydata/immutable/downloader/status.py new file mode 100644 index 0000000..5d60db0
- + 1 2 import itertools 3 from zope.interface import implements 4 from allmydata.interfaces import IDownloadStatus 5 6 class RequestEvent: 7 def __init__(self, download_status, tag): 8 self._download_status = download_status 9 self._tag = tag 10 def finished(self, received, when): 11 self._download_status.add_request_finished(self._tag, received, when) 12 13 class DYHBEvent: 14 def __init__(self, download_status, tag): 15 self._download_status = download_status 16 self._tag = tag 17 def finished(self, shnums, when): 18 self._download_status.add_dyhb_finished(self._tag, shnums, when) 19 20 class ReadEvent: 21 def __init__(self, download_status, tag): 22 self._download_status = download_status 23 self._tag = tag 24 def update(self, bytes, decrypttime, pausetime): 25 self._download_status.update_read_event(self._tag, bytes, 26 decrypttime, pausetime) 27 def finished(self, finishtime): 28 self._download_status.finish_read_event(self._tag, finishtime) 29 30 class DownloadStatus: 31 # There is one DownloadStatus for each CiphertextFileNode. The status 32 # object will keep track of all activity for that node. 33 implements(IDownloadStatus) 34 statusid_counter = itertools.count(0) 35 36 def __init__(self, storage_index, size): 37 self.storage_index = storage_index 38 self.size = size 39 self.counter = self.statusid_counter.next() 40 self.helper = False 41 self.started = None 42 # self.dyhb_requests tracks "do you have a share" requests and 43 # responses. It maps serverid to a tuple of: 44 # send time 45 # tuple of response shnums (None if response hasn't arrived, "error") 46 # response time (None if response hasn't arrived yet) 47 self.dyhb_requests = {} 48 49 # self.requests tracks share-data requests and responses. It maps 50 # serverid to a tuple of: 51 # shnum, 52 # start,length, (of data requested) 53 # send time 54 # response length (None if reponse hasn't arrived yet, or "error") 55 # response time (None if response hasn't arrived) 56 self.requests = {} 57 58 # self.segment_events tracks segment requests and delivery. It is a 59 # list of: 60 # type ("request", "delivery", "error") 61 # segment number 62 # event time 63 # segment start (file offset of first byte, None except in "delivery") 64 # segment length (only in "delivery") 65 # time spent in decode (only in "delivery") 66 self.segment_events = [] 67 68 # self.read_events tracks read() requests. It is a list of: 69 # start,length (of data requested) 70 # request time 71 # finish time (None until finished) 72 # bytes returned (starts at 0, grows as segments are delivered) 73 # time spent in decrypt (None for ciphertext-only reads) 74 # time spent paused 75 self.read_events = [] 76 77 self.known_shares = [] # (serverid, shnum) 78 self.problems = [] 79 80 81 def add_dyhb_sent(self, serverid, when): 82 r = (when, None, None) 83 if serverid not in self.dyhb_requests: 84 self.dyhb_requests[serverid] = [] 85 self.dyhb_requests[serverid].append(r) 86 tag = (serverid, len(self.dyhb_requests[serverid])-1) 87 return DYHBEvent(self, tag) 88 89 def add_dyhb_finished(self, tag, shnums, when): 90 # received="error" on error, else tuple(shnums) 91 (serverid, index) = tag 92 r = self.dyhb_requests[serverid][index] 93 (sent, _, _) = r 94 r = (sent, shnums, when) 95 self.dyhb_requests[serverid][index] = r 96 97 def add_request_sent(self, serverid, shnum, start, length, when): 98 r = (shnum, start, length, when, None, None) 99 if serverid not in self.requests: 100 self.requests[serverid] = [] 101 self.requests[serverid].append(r) 102 tag = (serverid, len(self.requests[serverid])-1) 103 return RequestEvent(self, tag) 104 105 def add_request_finished(self, tag, received, when): 106 # received="error" on error, else len(data) 107 (serverid, index) = tag 108 r = self.requests[serverid][index] 109 (shnum, start, length, sent, _, _) = r 110 r = (shnum, start, length, sent, received, when) 111 self.requests[serverid][index] = r 112 113 def add_segment_request(self, segnum, when): 114 if self.started is None: 115 self.started = when 116 r = ("request", segnum, when, None, None, None) 117 self.segment_events.append(r) 118 def add_segment_delivery(self, segnum, when, start, length, decodetime): 119 r = ("delivery", segnum, when, start, length, decodetime) 120 self.segment_events.append(r) 121 def add_segment_error(self, segnum, when): 122 r = ("error", segnum, when, None, None, None) 123 self.segment_events.append(r) 124 125 def add_read_event(self, start, length, when): 126 if self.started is None: 127 self.started = when 128 r = (start, length, when, None, 0, 0, 0) 129 self.read_events.append(r) 130 tag = len(self.read_events)-1 131 return ReadEvent(self, tag) 132 def update_read_event(self, tag, bytes_d, decrypt_d, paused_d): 133 r = self.read_events[tag] 134 (start, length, requesttime, finishtime, bytes, decrypt, paused) = r 135 bytes += bytes_d 136 decrypt += decrypt_d 137 paused += paused_d 138 r = (start, length, requesttime, finishtime, bytes, decrypt, paused) 139 self.read_events[tag] = r 140 def finish_read_event(self, tag, finishtime): 141 r = self.read_events[tag] 142 (start, length, requesttime, _, bytes, decrypt, paused) = r 143 r = (start, length, requesttime, finishtime, bytes, decrypt, paused) 144 self.read_events[tag] = r 145 146 def add_known_share(self, serverid, shnum): 147 self.known_shares.append( (serverid, shnum) ) 148 149 def add_problem(self, p): 150 self.problems.append(p) 151 152 # IDownloadStatus methods 153 def get_counter(self): 154 return self.counter 155 def get_storage_index(self): 156 return self.storage_index 157 def get_size(self): 158 return self.size 159 def get_status(self): 160 return "not impl yet" # TODO 161 def get_progress(self): 162 return 0.1 # TODO 163 def using_helper(self): 164 return False 165 def get_active(self): 166 return False # TODO 167 def get_started(self): 168 return self.started 169 def get_results(self): 170 return None # TODO -
new file src/allmydata/immutable/downloader/util.py
diff --git a/src/allmydata/immutable/downloader/util.py b/src/allmydata/immutable/downloader/util.py new file mode 100644 index 0000000..d45f5cc
- + 1 import weakref 2 3 from twisted.application import service 4 from foolscap.api import eventually 5 6 class Observer2: 7 """A simple class to distribute multiple events to a single subscriber. 8 It accepts arbitrary kwargs, but no posargs.""" 9 def __init__(self): 10 self._watcher = None 11 self._undelivered_results = [] 12 self._canceler = None 13 14 def set_canceler(self, c, methname): 15 """I will call c.METHNAME(self) when somebody cancels me.""" 16 # we use a weakref to avoid creating a cycle between us and the thing 17 # we're observing: they'll be holding a reference to us to compare 18 # against the value we pass to their canceler function. However, 19 # since bound methods are first-class objects (and not kept alive by 20 # the object they're bound to), we can't just stash a weakref to the 21 # bound cancel method. Instead, we must hold a weakref to the actual 22 # object, and obtain its cancel method later. 23 # http://code.activestate.com/recipes/81253-weakmethod/ has an 24 # alternative. 25 self._canceler = (weakref.ref(c), methname) 26 27 def subscribe(self, observer, **watcher_kwargs): 28 self._watcher = (observer, watcher_kwargs) 29 while self._undelivered_results: 30 self._notify(self._undelivered_results.pop(0)) 31 32 def notify(self, **result_kwargs): 33 if self._watcher: 34 self._notify(result_kwargs) 35 else: 36 self._undelivered_results.append(result_kwargs) 37 38 def _notify(self, result_kwargs): 39 o, watcher_kwargs = self._watcher 40 kwargs = dict(result_kwargs) 41 kwargs.update(watcher_kwargs) 42 eventually(o, **kwargs) 43 44 def cancel(self): 45 wr,methname = self._canceler 46 o = wr() 47 if o: 48 getattr(o,methname)(self) 49 50 51 def incidentally(res, f, *args, **kwargs): 52 """Add me to a Deferred chain like this: 53 d.addBoth(incidentally, func, arg) 54 and I'll behave as if you'd added the following function: 55 def _(res): 56 func(arg) 57 return res 58 This is useful if you want to execute an expression when the Deferred 59 fires, but don't care about its value. 60 """ 61 f(*args, **kwargs) 62 return res 63 64 65 class Terminator(service.Service): 66 def __init__(self): 67 self._clients = weakref.WeakKeyDictionary() 68 def register(self, c): 69 self._clients[c] = None 70 def stopService(self): 71 for c in self._clients: 72 c.stop() 73 return service.Service.stopService(self) -
src/allmydata/immutable/filenode.py
diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py index 70044a7..1d5be94 100644
a b 1 import copy, os.path, stat 2 from cStringIO import StringIO 1 2 import binascii 3 import copy 4 import time 5 now = time.time 3 6 from zope.interface import implements 4 7 from twisted.internet import defer 5 from twisted.internet.interfaces import IPushProducer 6 from twisted.protocols import basic 7 from foolscap.api import eventually 8 from allmydata.interfaces import IImmutableFileNode, ICheckable, \ 9 IDownloadTarget, IUploadResults 10 from allmydata.util import dictutil, log, base32 11 from allmydata.uri import CHKFileURI, LiteralFileURI 12 from allmydata.immutable.checker import Checker 13 from allmydata.check_results import CheckResults, CheckAndRepairResults 14 from allmydata.immutable.repairer import Repairer 15 from allmydata.immutable import download 16 17 class _ImmutableFileNodeBase(object): 18 implements(IImmutableFileNode, ICheckable) 19 20 def get_write_uri(self): 21 return None 22 23 def get_readonly_uri(self): 24 return self.get_uri() 25 26 def is_mutable(self): 27 return False 28 29 def is_readonly(self): 30 return True 31 32 def is_unknown(self): 33 return False 34 35 def is_allowed_in_immutable_directory(self): 36 return True 37 38 def raise_error(self): 39 pass 40 41 def __hash__(self): 42 return self.u.__hash__() 43 def __eq__(self, other): 44 if isinstance(other, _ImmutableFileNodeBase): 45 return self.u.__eq__(other.u) 46 else: 47 return False 48 def __ne__(self, other): 49 if isinstance(other, _ImmutableFileNodeBase): 50 return self.u.__eq__(other.u) 51 else: 52 return True 53 54 class PortionOfFile: 55 # like a list slice (things[2:14]), but for a file on disk 56 def __init__(self, fn, offset=0, size=None): 57 self.f = open(fn, "rb") 58 self.f.seek(offset) 59 self.bytes_left = size 60 61 def read(self, size=None): 62 # bytes_to_read = min(size, self.bytes_left), but None>anything 63 if size is None: 64 bytes_to_read = self.bytes_left 65 elif self.bytes_left is None: 66 bytes_to_read = size 67 else: 68 bytes_to_read = min(size, self.bytes_left) 69 data = self.f.read(bytes_to_read) 70 if self.bytes_left is not None: 71 self.bytes_left -= len(data) 72 return data 73 74 class DownloadCache: 75 implements(IDownloadTarget) 76 77 def __init__(self, filecap, storage_index, downloader, 78 cachedirectorymanager): 79 self._downloader = downloader 80 self._uri = filecap 81 self._storage_index = storage_index 82 self.milestones = set() # of (offset,size,Deferred) 83 self.cachedirectorymanager = cachedirectorymanager 84 self.cachefile = None 85 self.download_in_progress = False 86 # five states: 87 # new ImmutableFileNode, no downloads ever performed 88 # new ImmutableFileNode, leftover file (partial) 89 # new ImmutableFileNode, leftover file (whole) 90 # download in progress, not yet complete 91 # download complete 92 93 def when_range_available(self, offset, size): 94 assert isinstance(offset, (int,long)) 95 assert isinstance(size, (int,long)) 96 97 d = defer.Deferred() 98 self.milestones.add( (offset,size,d) ) 99 self._check_milestones() 100 if self.milestones and not self.download_in_progress: 101 self.download_in_progress = True 102 log.msg(format=("immutable filenode read [%(si)s]: " + 103 "starting download"), 104 si=base32.b2a(self._storage_index), 105 umid="h26Heg", level=log.OPERATIONAL) 106 d2 = self._downloader.download(self._uri, self) 107 d2.addBoth(self._download_done) 108 d2.addErrback(self._download_failed) 109 d2.addErrback(log.err, umid="cQaM9g") 110 return d 111 112 def read(self, consumer, offset, size): 113 assert offset+size <= self.get_filesize() 114 if not self.cachefile: 115 self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index)) 116 f = PortionOfFile(self.cachefile.get_filename(), offset, size) 117 d = basic.FileSender().beginFileTransfer(f, consumer) 118 d.addCallback(lambda lastSent: consumer) 119 return d 120 121 def _download_done(self, res): 122 # clear download_in_progress, so failed downloads can be re-tried 123 self.download_in_progress = False 124 return res 125 126 def _download_failed(self, f): 127 # tell anyone who's waiting that we failed 128 for m in self.milestones: 129 (offset,size,d) = m 130 eventually(d.errback, f) 131 self.milestones.clear() 132 133 def _check_milestones(self): 134 current_size = self.get_filesize() 135 for m in list(self.milestones): 136 (offset,size,d) = m 137 if offset+size <= current_size: 138 log.msg(format=("immutable filenode read [%(si)s] " + 139 "%(offset)d+%(size)d vs %(filesize)d: " + 140 "done"), 141 si=base32.b2a(self._storage_index), 142 offset=offset, size=size, filesize=current_size, 143 umid="nuedUg", level=log.NOISY) 144 self.milestones.discard(m) 145 eventually(d.callback, None) 146 else: 147 log.msg(format=("immutable filenode read [%(si)s] " + 148 "%(offset)d+%(size)d vs %(filesize)d: " + 149 "still waiting"), 150 si=base32.b2a(self._storage_index), 151 offset=offset, size=size, filesize=current_size, 152 umid="8PKOhg", level=log.NOISY) 153 154 def get_filesize(self): 155 if not self.cachefile: 156 self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index)) 157 try: 158 filesize = os.stat(self.cachefile.get_filename())[stat.ST_SIZE] 159 except OSError: 160 filesize = 0 161 return filesize 162 163 164 def open(self, size): 165 if not self.cachefile: 166 self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index)) 167 self.f = open(self.cachefile.get_filename(), "wb") 168 169 def write(self, data): 170 self.f.write(data) 171 self._check_milestones() 172 173 def close(self): 174 self.f.close() 175 self._check_milestones() 176 177 def fail(self, why): 178 pass 179 def register_canceller(self, cb): 180 pass 181 def finish(self): 182 return None 183 # The following methods are just because the target might be a 184 # repairer.DownUpConnector, and just because the current CHKUpload object 185 # expects to find the storage index and encoding parameters in its 186 # Uploadable. 187 def set_storageindex(self, storageindex): 188 pass 189 def set_encodingparams(self, encodingparams): 190 pass 8 from twisted.internet.interfaces import IConsumer 191 9 10 from allmydata.interfaces import IImmutableFileNode, IUploadResults 11 from allmydata import uri 12 from allmydata.check_results import CheckResults, CheckAndRepairResults 13 from allmydata.util.dictutil import DictOfSets 14 from pycryptopp.cipher.aes import AES 192 15 193 class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 194 def __init__(self, filecap, storage_broker, secret_holder, 195 downloader, history, cachedirectorymanager): 196 assert isinstance(filecap, CHKFileURI) 197 self.u = filecap 16 # local imports 17 from allmydata.immutable.checker import Checker 18 from allmydata.immutable.repairer import Repairer 19 from allmydata.immutable.downloader.node import DownloadNode 20 from allmydata.immutable.downloader.status import DownloadStatus 21 22 class CiphertextFileNode: 23 def __init__(self, verifycap, storage_broker, secret_holder, 24 terminator, history, download_status=None): 25 assert isinstance(verifycap, uri.CHKFileVerifierURI) 26 self._verifycap = verifycap 198 27 self._storage_broker = storage_broker 199 28 self._secret_holder = secret_holder 200 self._downloader = downloader 201 self._history = history 202 storage_index = self.get_storage_index() 203 self.download_cache = DownloadCache(filecap, storage_index, downloader, 204 cachedirectorymanager) 205 prefix = self.u.get_verify_cap().to_string() 206 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.filenode", prefix=prefix) 207 self.log("starting", level=log.OPERATIONAL) 29 if download_status is None: 30 ds = DownloadStatus(verifycap.storage_index, verifycap.size) 31 if history: 32 history.add_download(ds) 33 download_status = ds 34 self._node = DownloadNode(verifycap, storage_broker, secret_holder, 35 terminator, history, download_status) 36 37 def read(self, consumer, offset=0, size=None, read_ev=None): 38 """I am the main entry point, from which FileNode.read() can get 39 data. I feed the consumer with the desired range of ciphertext. I 40 return a Deferred that fires (with the consumer) when the read is 41 finished.""" 42 return self._node.read(consumer, offset, size, read_ev) 43 44 def get_segment(self, segnum): 45 """Begin downloading a segment. I return a tuple (d, c): 'd' is a 46 Deferred that fires with (offset,data) when the desired segment is 47 available, and c is an object on which c.cancel() can be called to 48 disavow interest in the segment (after which 'd' will never fire). 49 50 You probably need to know the segment size before calling this, 51 unless you want the first few bytes of the file. If you ask for a 52 segment number which turns out to be too large, the Deferred will 53 errback with BadSegmentNumberError. 54 55 The Deferred fires with the offset of the first byte of the data 56 segment, so that you can call get_segment() before knowing the 57 segment size, and still know which data you received. 58 """ 59 return self._node.get_segment(segnum) 60 61 def get_segment_size(self): 62 # return a Deferred that fires with the file's real segment size 63 return self._node.get_segsize() 208 64 209 def get_size(self): 210 return self.u.get_size() 211 def get_current_size(self): 212 return defer.succeed(self.get_size()) 213 214 def get_cap(self): 215 return self.u 216 def get_readcap(self): 217 return self.u.get_readonly() 65 def get_storage_index(self): 66 return self._verifycap.storage_index 218 67 def get_verify_cap(self): 219 return self.u.get_verify_cap() 220 def get_repair_cap(self): 221 # CHK files can be repaired with just the verifycap 222 return self.u.get_verify_cap() 68 return self._verifycap 69 def get_size(self): 70 return self._verifycap.size 223 71 224 def get_uri(self):225 return self.u.to_string()72 def raise_error(self): 73 pass 226 74 227 def get_storage_index(self):228 return self.u.get_storage_index()229 75 230 76 def check_and_repair(self, monitor, verify=False, add_lease=False): 231 verifycap = self.get_verify_cap() 77 verifycap = self._verifycap 78 storage_index = verifycap.storage_index 232 79 sb = self._storage_broker 233 80 servers = sb.get_all_servers() 234 81 sh = self._secret_holder … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 238 85 monitor=monitor) 239 86 d = c.start() 240 87 def _maybe_repair(cr): 241 crr = CheckAndRepairResults(s elf.u.get_storage_index())88 crr = CheckAndRepairResults(storage_index) 242 89 crr.pre_repair_results = cr 243 90 if cr.is_healthy(): 244 91 crr.post_repair_results = cr … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 248 95 crr.repair_successful = False # until proven successful 249 96 def _gather_repair_results(ur): 250 97 assert IUploadResults.providedBy(ur), ur 251 # clone the cr -- check results to form the basic of the prr -- post-repair results 98 # clone the cr (check results) to form the basis of the 99 # prr (post-repair results) 252 100 prr = CheckResults(cr.uri, cr.storage_index) 253 101 prr.data = copy.deepcopy(cr.data) 254 102 255 103 sm = prr.data['sharemap'] 256 assert isinstance(sm, dictutil.DictOfSets), sm104 assert isinstance(sm, DictOfSets), sm 257 105 sm.update(ur.sharemap) 258 106 servers_responding = set(prr.data['servers-responding']) 259 107 servers_responding.union(ur.sharemap.iterkeys()) 260 108 prr.data['servers-responding'] = list(servers_responding) 261 109 prr.data['count-shares-good'] = len(sm) 262 110 prr.data['count-good-share-hosts'] = len(sm) 263 is_healthy = bool(len(sm) >= self.u.total_shares)264 is_recoverable = bool(len(sm) >= self.u.needed_shares)111 is_healthy = bool(len(sm) >= verifycap.total_shares) 112 is_recoverable = bool(len(sm) >= verifycap.needed_shares) 265 113 prr.set_healthy(is_healthy) 266 114 prr.set_recoverable(is_recoverable) 267 115 crr.repair_successful = is_healthy 268 prr.set_needs_rebalancing(len(sm) >= self.u.total_shares)116 prr.set_needs_rebalancing(len(sm) >= verifycap.total_shares) 269 117 270 118 crr.post_repair_results = prr 271 119 return crr … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 275 123 crr.repair_successful = False 276 124 crr.repair_failure = f 277 125 return f 278 r = Repairer(s torage_broker=sb, secret_holder=sh,279 verifycap=verifycap,monitor=monitor)126 r = Repairer(self, storage_broker=sb, secret_holder=sh, 127 monitor=monitor) 280 128 d = r.start() 281 129 d.addCallbacks(_gather_repair_results, _repair_error) 282 130 return d … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 285 133 return d 286 134 287 135 def check(self, monitor, verify=False, add_lease=False): 288 verifycap = self. get_verify_cap()136 verifycap = self._verifycap 289 137 sb = self._storage_broker 290 138 servers = sb.get_all_servers() 291 139 sh = self._secret_holder … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 295 143 monitor=monitor) 296 144 return v.start() 297 145 146 147 class DecryptingConsumer: 148 """I sit between a CiphertextDownloader (which acts as a Producer) and 149 the real Consumer, decrypting everything that passes by. The real 150 Consumer sees the real Producer, but the Producer sees us instead of the 151 real consumer.""" 152 implements(IConsumer) 153 154 def __init__(self, consumer, readkey, offset, read_event): 155 self._consumer = consumer 156 self._read_event = read_event 157 # TODO: pycryptopp CTR-mode needs random-access operations: I want 158 # either a=AES(readkey, offset) or better yet both of: 159 # a=AES(readkey, offset=0) 160 # a.process(ciphertext, offset=xyz) 161 # For now, we fake it with the existing iv= argument. 162 offset_big = offset // 16 163 offset_small = offset % 16 164 iv = binascii.unhexlify("%032x" % offset_big) 165 self._decryptor = AES(readkey, iv=iv) 166 self._decryptor.process("\x00"*offset_small) 167 168 def registerProducer(self, producer, streaming): 169 # this passes through, so the real consumer can flow-control the real 170 # producer. Therefore we don't need to provide any IPushProducer 171 # methods. We implement all the IConsumer methods as pass-throughs, 172 # and only intercept write() to perform decryption. 173 self._consumer.registerProducer(producer, streaming) 174 def unregisterProducer(self): 175 self._consumer.unregisterProducer() 176 def write(self, ciphertext): 177 started = now() 178 plaintext = self._decryptor.process(ciphertext) 179 elapsed = now() - started 180 self._read_event.update(0, elapsed, 0) 181 self._consumer.write(plaintext) 182 183 class ImmutableFileNode: 184 implements(IImmutableFileNode) 185 186 # I wrap a CiphertextFileNode with a decryption key 187 def __init__(self, filecap, storage_broker, secret_holder, terminator, 188 history): 189 assert isinstance(filecap, uri.CHKFileURI) 190 verifycap = filecap.get_verify_cap() 191 ds = DownloadStatus(verifycap.storage_index, verifycap.size) 192 if history: 193 history.add_download(ds) 194 self._download_status = ds 195 self._cnode = CiphertextFileNode(verifycap, storage_broker, 196 secret_holder, terminator, history, ds) 197 assert isinstance(filecap, uri.CHKFileURI) 198 self.u = filecap 199 self._readkey = filecap.key 200 201 # TODO: I'm not sure about this.. what's the use case for node==node? If 202 # we keep it here, we should also put this on CiphertextFileNode 203 def __hash__(self): 204 return self.u.__hash__() 205 def __eq__(self, other): 206 if isinstance(other, ImmutableFileNode): 207 return self.u.__eq__(other.u) 208 else: 209 return False 210 def __ne__(self, other): 211 if isinstance(other, ImmutableFileNode): 212 return self.u.__eq__(other.u) 213 else: 214 return True 215 298 216 def read(self, consumer, offset=0, size=None): 299 self.log("read", offset=offset, size=size, 300 umid="UPP8FA", level=log.OPERATIONAL) 301 if size is None: 302 size = self.get_size() - offset 303 size = min(size, self.get_size() - offset) 304 305 if offset == 0 and size == self.get_size(): 306 # don't use the cache, just do a normal streaming download 307 self.log("doing normal full download", umid="VRSBwg", level=log.OPERATIONAL) 308 target = download.ConsumerAdapter(consumer) 309 return self._downloader.download(self.get_cap(), target, 310 self._parentmsgid, 311 history=self._history) 312 313 d = self.download_cache.when_range_available(offset, size) 314 d.addCallback(lambda res: 315 self.download_cache.read(consumer, offset, size)) 217 actual_size = size 218 if actual_size == None: 219 actual_size = self.u.size 220 actual_size = actual_size - offset 221 read_ev = self._download_status.add_read_event(offset,actual_size, 222 now()) 223 decryptor = DecryptingConsumer(consumer, self._readkey, offset, read_ev) 224 d = self._cnode.read(decryptor, offset, size, read_ev) 225 d.addCallback(lambda dc: consumer) 316 226 return d 317 227 318 class LiteralProducer: 319 implements(IPushProducer) 320 def resumeProducing(self): 321 pass 322 def stopProducing(self): 228 def raise_error(self): 323 229 pass 324 230 231 def get_write_uri(self): 232 return None 325 233 326 class LiteralFileNode(_ImmutableFileNodeBase): 327 328 def __init__(self, filecap): 329 assert isinstance(filecap, LiteralFileURI) 330 self.u = filecap 331 332 def get_size(self): 333 return len(self.u.data) 334 def get_current_size(self): 335 return defer.succeed(self.get_size()) 234 def get_readonly_uri(self): 235 return self.get_uri() 336 236 237 def get_uri(self): 238 return self.u.to_string() 337 239 def get_cap(self): 338 240 return self.u 339 241 def get_readcap(self): 340 return self.u 242 return self.u.get_readonly() 341 243 def get_verify_cap(self): 342 return None244 return self.u.get_verify_cap() 343 245 def get_repair_cap(self): 344 return None 345 346 def get_uri(self): 347 return self.u.to_string() 246 # CHK files can be repaired with just the verifycap 247 return self.u.get_verify_cap() 348 248 349 249 def get_storage_index(self): 350 return None250 return self.u.get_storage_index() 351 251 352 def check(self, monitor, verify=False, add_lease=False): 353 return defer.succeed(None) 252 def get_size(self): 253 return self.u.get_size() 254 def get_current_size(self): 255 return defer.succeed(self.get_size()) 354 256 355 def check_and_repair(self, monitor, verify=False, add_lease=False):356 return defer.succeed(None)257 def is_mutable(self): 258 return False 357 259 358 def read(self, consumer, offset=0, size=None): 359 if size is None: 360 data = self.u.data[offset:] 361 else: 362 data = self.u.data[offset:offset+size] 363 364 # We use twisted.protocols.basic.FileSender, which only does 365 # non-streaming, i.e. PullProducer, where the receiver/consumer must 366 # ask explicitly for each chunk of data. There are only two places in 367 # the Twisted codebase that can't handle streaming=False, both of 368 # which are in the upload path for an FTP/SFTP server 369 # (protocols.ftp.FileConsumer and 370 # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is 371 # likely to be used as the target for a Tahoe download. 372 373 d = basic.FileSender().beginFileTransfer(StringIO(data), consumer) 374 d.addCallback(lambda lastSent: consumer) 375 return d 260 def is_readonly(self): 261 return True 262 263 def is_unknown(self): 264 return False 265 266 def is_allowed_in_immutable_directory(self): 267 return True 268 269 def check_and_repair(self, monitor, verify=False, add_lease=False): 270 return self._cnode.check_and_repair(monitor, verify, add_lease) 271 def check(self, monitor, verify=False, add_lease=False): 272 return self._cnode.check(monitor, verify, add_lease) -
src/allmydata/immutable/layout.py
diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 6e07da7..27fb844 100644
a b limitations described in #346. 74 74 # they are still provided when writing so that older versions of Tahoe can 75 75 # read them. 76 76 77 FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares 78 77 79 def make_write_bucket_proxy(rref, data_size, block_size, num_segments, 78 80 num_share_hashes, uri_extension_size_max, nodeid): 79 81 # Use layout v1 for small files, so they'll be readable by older versions 80 82 # (<tahoe-1.3.0). Use layout v2 for large files; they'll only be readable 81 83 # by tahoe-1.3.0 or later. 82 84 try: 85 if FORCE_V2: 86 raise FileTooLargeError 83 87 wbp = WriteBucketProxy(rref, data_size, block_size, num_segments, 84 88 num_share_hashes, uri_extension_size_max, nodeid) 85 89 except FileTooLargeError: -
new file src/allmydata/immutable/literal.py
diff --git a/src/allmydata/immutable/literal.py b/src/allmydata/immutable/literal.py new file mode 100644 index 0000000..09466cb
- + 1 from cStringIO import StringIO 2 from zope.interface import implements 3 from twisted.internet import defer 4 from twisted.internet.interfaces import IPushProducer 5 from twisted.protocols import basic 6 from allmydata.interfaces import IImmutableFileNode, ICheckable 7 from allmydata.uri import LiteralFileURI 8 9 class _ImmutableFileNodeBase(object): 10 implements(IImmutableFileNode, ICheckable) 11 12 def get_write_uri(self): 13 return None 14 15 def get_readonly_uri(self): 16 return self.get_uri() 17 18 def is_mutable(self): 19 return False 20 21 def is_readonly(self): 22 return True 23 24 def is_unknown(self): 25 return False 26 27 def is_allowed_in_immutable_directory(self): 28 return True 29 30 def raise_error(self): 31 pass 32 33 def __hash__(self): 34 return self.u.__hash__() 35 def __eq__(self, other): 36 if isinstance(other, _ImmutableFileNodeBase): 37 return self.u.__eq__(other.u) 38 else: 39 return False 40 def __ne__(self, other): 41 if isinstance(other, _ImmutableFileNodeBase): 42 return self.u.__eq__(other.u) 43 else: 44 return True 45 46 47 class LiteralProducer: 48 implements(IPushProducer) 49 def resumeProducing(self): 50 pass 51 def stopProducing(self): 52 pass 53 54 55 class LiteralFileNode(_ImmutableFileNodeBase): 56 57 def __init__(self, filecap): 58 assert isinstance(filecap, LiteralFileURI) 59 self.u = filecap 60 61 def get_size(self): 62 return len(self.u.data) 63 def get_current_size(self): 64 return defer.succeed(self.get_size()) 65 66 def get_cap(self): 67 return self.u 68 def get_readcap(self): 69 return self.u 70 def get_verify_cap(self): 71 return None 72 def get_repair_cap(self): 73 return None 74 75 def get_uri(self): 76 return self.u.to_string() 77 78 def get_storage_index(self): 79 return None 80 81 def check(self, monitor, verify=False, add_lease=False): 82 return defer.succeed(None) 83 84 def check_and_repair(self, monitor, verify=False, add_lease=False): 85 return defer.succeed(None) 86 87 def read(self, consumer, offset=0, size=None): 88 if size is None: 89 data = self.u.data[offset:] 90 else: 91 data = self.u.data[offset:offset+size] 92 93 # We use twisted.protocols.basic.FileSender, which only does 94 # non-streaming, i.e. PullProducer, where the receiver/consumer must 95 # ask explicitly for each chunk of data. There are only two places in 96 # the Twisted codebase that can't handle streaming=False, both of 97 # which are in the upload path for an FTP/SFTP server 98 # (protocols.ftp.FileConsumer and 99 # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is 100 # likely to be used as the target for a Tahoe download. 101 102 d = basic.FileSender().beginFileTransfer(StringIO(data), consumer) 103 d.addCallback(lambda lastSent: consumer) 104 return d -
new file src/allmydata/immutable/notes.txt
diff --git a/src/allmydata/immutable/notes.txt b/src/allmydata/immutable/notes.txt new file mode 100644 index 0000000..ad11565
- + 1 2 # TODO: if server1 has all shares, and server2-10 have one each, make the 3 # loop stall slightly before requesting all shares from the first server, to 4 # give it a chance to learn about the other shares and get some diversity. 5 # Or, don't bother, let the first block all come from one server, and take 6 # comfort in the fact that we'll learn about the other servers by the time we 7 # fetch the second block. 8 # 9 # davidsarah points out that we could use sequential (instead of parallel) 10 # fetching of multiple block from a single server: by the time the first 11 # block arrives, we'll hopefully have heard about other shares. This would 12 # induce some RTT delays (i.e. lose pipelining) in the case that this server 13 # has the only shares, but that seems tolerable. We could rig it to only use 14 # sequential requests on the first segment. 15 16 # as a query gets later, we're more willing to duplicate work. 17 18 # should change server read protocol to allow small shares to be fetched in a 19 # single RTT. Instead of get_buckets-then-read, just use read(shnums, readv), 20 # where shnums=[] means all shares, and the return value is a dict of 21 # # shnum->ta (like with mutable files). The DYHB query should also fetch the 22 # offset table, since everything else can be located once we have that. 23 24 25 # ImmutableFileNode 26 # DecryptingConsumer 27 # CiphertextFileNode 28 # Segmentation 29 # ShareFinder 30 # SegmentFetcher[segnum] (one at a time) 31 # CommonShare[shnum] 32 # Share[shnum,server] 33 34 35 # TODO: if offset table is corrupt, attacker could cause us to fetch whole 36 # (large) share. But only from that one server, and they could throw lots of 37 # data at our connection anyways. 38 39 # log budget: when downloading at 1MBps (i.e. 8 segments-per-second), 10 40 # log.OPERATIONAL per second, 100 log.NOISY per second. With k=3, that's 3 41 # log.NOISY per block fetch. 42 43 44 # test_cli.Error failed for a while: ShareFinder created, used up 45 # (NotEnoughSharesError), started again. The self.running=False is the 46 # problem. 47 # 48 # The second download is hungry, but because ShareFinder.running is false, it 49 # never notifies the SegmentFetcher that there are no more shares coming, so 50 # the download never completes. To trigger this in tests, we need the first 51 # download to want more shares (so it must fail with NotEnoughSharesError, or 52 # we must lose a share/server between downloads). 53 # 54 # fix was to not call self.stop when ShareFinder runs out of shares. stop() 55 # is now only called by the Terminator. 56 57 # TODO: make sure that _signal_corruption(f) isn't sending private local 58 # variables in the CopiedFailure 59 60 # tests to write: 61 # * truncated share, so _satisfy_* doesn't get all it wants 62 # * slow server 63 64 # all classes are also Services, and the rule is that you don't initiate more 65 # work unless self.running 66 67 # GC: decide whether each service is restartable or not. For non-restartable 68 # services, stopService() should delete a lot of attributes to kill reference 69 # cycles. The primary goal is to decref remote storage BucketReaders when a 70 # download is complete. 71 72 ======================================== 73 old stuff from download2_off: 74 75 #! /usr/bin/python 76 77 # known (shnum,Server) pairs are sorted into a list according to 78 # desireability. This sort is picking a winding path through a matrix of 79 # [shnum][server]. The goal is to get diversity of both shnum and server. 80 81 # The initial order is: 82 # find the lowest shnum on the first server, add it 83 # look at the next server, find the lowest shnum that we don't already have 84 # if any 85 # next server, etc, until all known servers are checked 86 # now look at servers that we skipped (because ... 87 88 # Keep track of which block requests are outstanding by (shnum,Server). Don't 89 # bother prioritizing "validated" shares: the overhead to pull the share hash 90 # chain is tiny (4 hashes = 128 bytes), and the overhead to pull a new block 91 # hash chain is also tiny (1GB file, 8192 segments of 128KiB each, 13 hashes, 92 # 832 bytes). Each time a block request is sent, also request any necessary 93 # hashes. Don't bother with a "ValidatedShare" class (as distinct from some 94 # other sort of Share). Don't bother avoiding duplicate hash-chain requests. 95 96 # For each outstanding segread, walk the list and send requests (skipping 97 # outstanding shnums) until requests for k distinct shnums are in flight. If 98 # we can't do that, ask for more. If we get impatient on a request, find the 99 # first non-outstanding 100 101 # start with the first Share in the list, and send a request. Then look at 102 # the next one. If we already have a pending request for the same shnum or 103 # server, push that Share down onto the fallback list and try the next one, 104 # etc. If we run out of non-fallback shares, use the fallback ones, 105 # preferring shnums that we don't have outstanding requests for (i.e. assume 106 # that all requests will complete). Do this by having a second fallback list. 107 108 # hell, I'm reviving the Herder. But remember, we're still talking 3 objects 109 # per file, not thousands. 110 111 # actually, don't bother sorting the initial list. Append Shares as the 112 # responses come back, that will put the fastest servers at the front of the 113 # list, and give a tiny preference to servers that are earlier in the 114 # permuted order. 115 116 # more ideas: 117 # sort shares by: 118 # 1: number of roundtrips needed to get some data 119 # 2: share number 120 # 3: ms of RTT delay 121 # maybe measure average time-to-completion of requests, compare completion 122 # time against that, much larger indicates congestion on the server side 123 # or the server's upstream speed is less than our downstream. Minimum 124 # time-to-completion indicates min(our-downstream,their-upstream). Could 125 # fetch shares one-at-a-time to measure that better. 126 127 # when should we risk duplicate work and send a new request? 128 129 def walk(self): 130 shares = sorted(list) 131 oldshares = copy(shares) 132 outstanding = list() 133 fallbacks = list() 134 second_fallbacks = list() 135 while len(outstanding.nonlate.shnums) < k: # need more requests 136 while oldshares: 137 s = shares.pop(0) 138 if s.server in outstanding.servers or s.shnum in outstanding.shnums: 139 fallbacks.append(s) 140 continue 141 outstanding.append(s) 142 send_request(s) 143 break #'while need_more_requests' 144 # must use fallback list. Ask for more servers while we're at it. 145 ask_for_more_servers() 146 while fallbacks: 147 s = fallbacks.pop(0) 148 if s.shnum in outstanding.shnums: 149 # assume that the outstanding requests will complete, but 150 # send new requests for other shnums to existing servers 151 second_fallbacks.append(s) 152 continue 153 outstanding.append(s) 154 send_request(s) 155 break #'while need_more_requests' 156 # if we get here, we're being forced to send out multiple queries per 157 # share. We've already asked for more servers, which might help. If 158 # there are no late outstanding queries, then duplicate shares won't 159 # help. Don't send queries for duplicate shares until some of the 160 # queries are late. 161 if outstanding.late: 162 # we're allowed to try any non-outstanding share 163 while second_fallbacks: 164 pass 165 newshares = outstanding + fallbacks + second_fallbacks + oldshares 166 167 168 class Server: 169 """I represent an abstract Storage Server. One day, the StorageBroker 170 will return instances of me. For now, the StorageBroker returns (peerid, 171 RemoteReference) tuples, and this code wraps a Server instance around 172 them. 173 """ 174 def __init__(self, peerid, ss): 175 self.peerid = peerid 176 self.remote = ss 177 self._remote_buckets = {} # maps shnum to RIBucketReader 178 # TODO: release the bucket references on shares that we no longer 179 # want. OTOH, why would we not want them? Corruption? 180 181 def send_query(self, storage_index): 182 """I return a Deferred that fires with a set of shnums. If the server 183 had shares available, I will retain the RemoteReferences to its 184 buckets, so that get_data(shnum, range) can be called later.""" 185 d = self.remote.callRemote("get_buckets", self.storage_index) 186 d.addCallback(self._got_response) 187 return d 188 189 def _got_response(self, r): 190 self._remote_buckets = r 191 return set(r.keys()) 192 193 class ShareOnAServer: 194 """I represent one instance of a share, known to live on a specific 195 server. I am created every time a server responds affirmatively to a 196 do-you-have-block query.""" 197 198 def __init__(self, shnum, server): 199 self._shnum = shnum 200 self._server = server 201 self._block_hash_tree = None 202 203 def cost(self, segnum): 204 """I return a tuple of (roundtrips, bytes, rtt), indicating how 205 expensive I think it would be to fetch the given segment. Roundtrips 206 indicates how many roundtrips it is likely to take (one to get the 207 data and hashes, plus one to get the offset table and UEB if this is 208 the first segment we've ever fetched). 'bytes' is how many bytes we 209 must fetch (estimated). 'rtt' is estimated round-trip time (float) in 210 seconds for a trivial request. The downloading algorithm will compare 211 costs to decide which shares should be used.""" 212 # the most significant factor here is roundtrips: a Share for which 213 # we already have the offset table is better to than a brand new one 214 215 def max_bandwidth(self): 216 """Return a float, indicating the highest plausible bytes-per-second 217 that I've observed coming from this share. This will be based upon 218 the minimum (bytes-per-fetch / time-per-fetch) ever observed. This 219 can we used to estimate the server's upstream bandwidth. Clearly this 220 is only accurate if a share is retrieved with no contention for 221 either the upstream, downstream, or middle of the connection, but it 222 may still serve as a useful metric for deciding which servers to pull 223 from.""" 224 225 def get_segment(self, segnum): 226 """I return a Deferred that will fire with the segment data, or 227 errback.""" 228 229 class NativeShareOnAServer(ShareOnAServer): 230 """For tahoe native (foolscap) servers, I contain a RemoteReference to 231 the RIBucketReader instance.""" 232 def __init__(self, shnum, server, rref): 233 ShareOnAServer.__init__(self, shnum, server) 234 self._rref = rref # RIBucketReader 235 236 class Share: 237 def __init__(self, shnum): 238 self._shnum = shnum 239 # _servers are the Server instances which appear to hold a copy of 240 # this share. It is populated when the ValidShare is first created, 241 # or when we receive a get_buckets() response for a shnum that 242 # already has a ValidShare instance. When we lose the connection to a 243 # server, we remove it. 244 self._servers = set() 245 # offsets, UEB, and share_hash_tree all live in the parent. 246 # block_hash_tree lives here. 247 self._block_hash_tree = None 248 249 self._want 250 251 def get_servers(self): 252 return self._servers 253 254 255 def get_block(self, segnum): 256 # read enough data to obtain a single validated block 257 if not self.have_offsets: 258 # we get the offsets in their own read, since they tell us where 259 # everything else lives. We must fetch offsets for each share 260 # separately, since they aren't directly covered by the UEB. 261 pass 262 if not self.parent.have_ueb: 263 # use _guessed_segsize to make a guess about the layout, so we 264 # can fetch both the offset table and the UEB in the same read. 265 # This also requires making a guess about the presence or absence 266 # of the plaintext_hash_tree. Oh, and also the version number. Oh 267 # well. 268 pass 269 270 class CiphertextDownloader: 271 """I manage all downloads for a single file. I operate a state machine 272 with input events that are local read() requests, responses to my remote 273 'get_bucket' and 'read_bucket' messages, and connection establishment and 274 loss. My outbound events are connection establishment requests and bucket 275 read requests messages. 276 """ 277 # eventually this will merge into the FileNode 278 ServerClass = Server # for tests to override 279 280 def __init__(self, storage_index, ueb_hash, size, k, N, storage_broker, 281 shutdowner): 282 # values we get from the filecap 283 self._storage_index = si = storage_index 284 self._ueb_hash = ueb_hash 285 self._size = size 286 self._needed_shares = k 287 self._total_shares = N 288 self._share_hash_tree = IncompleteHashTree(self._total_shares) 289 # values we discover when we first fetch the UEB 290 self._ueb = None # is dict after UEB fetch+validate 291 self._segsize = None 292 self._numsegs = None 293 self._blocksize = None 294 self._tail_segsize = None 295 self._ciphertext_hash = None # optional 296 # structures we create when we fetch the UEB, then continue to fill 297 # as we download the file 298 self._share_hash_tree = None # is IncompleteHashTree after UEB fetch 299 self._ciphertext_hash_tree = None 300 301 # values we learn as we download the file 302 self._offsets = {} # (shnum,Server) to offset table (dict) 303 self._block_hash_tree = {} # shnum to IncompleteHashTree 304 # other things which help us 305 self._guessed_segsize = min(128*1024, size) 306 self._active_share_readers = {} # maps shnum to Reader instance 307 self._share_readers = [] # sorted by preference, best first 308 self._readers = set() # set of Reader instances 309 self._recent_horizon = 10 # seconds 310 311 # 'shutdowner' is a MultiService parent used to cancel all downloads 312 # when the node is shutting down, to let tests have a clean reactor. 313 314 self._init_available_servers() 315 self._init_find_enough_shares() 316 317 # _available_servers is an iterator that provides us with Server 318 # instances. Each time we pull out a Server, we immediately send it a 319 # query, so we don't need to keep track of who we've sent queries to. 320 321 def _init_available_servers(self): 322 self._available_servers = self._get_available_servers() 323 self._no_more_available_servers = False 324 325 def _get_available_servers(self): 326 """I am a generator of servers to use, sorted by the order in which 327 we should query them. I make sure there are no duplicates in this 328 list.""" 329 # TODO: make StorageBroker responsible for this non-duplication, and 330 # replace this method with a simple iter(get_servers_for_index()), 331 # plus a self._no_more_available_servers=True 332 seen = set() 333 sb = self._storage_broker 334 for (peerid, ss) in sb.get_servers_for_index(self._storage_index): 335 if peerid not in seen: 336 yield self.ServerClass(peerid, ss) # Server(peerid, ss) 337 seen.add(peerid) 338 self._no_more_available_servers = True 339 340 # this block of code is responsible for having enough non-problematic 341 # distinct shares/servers available and ready for download, and for 342 # limiting the number of queries that are outstanding. The idea is that 343 # we'll use the k fastest/best shares, and have the other ones in reserve 344 # in case those servers stop responding or respond too slowly. We keep 345 # track of all known shares, but we also keep track of problematic shares 346 # (ones with hash failures or lost connections), so we can put them at 347 # the bottom of the list. 348 349 def _init_find_enough_shares(self): 350 # _unvalidated_sharemap maps shnum to set of Servers, and remembers 351 # where viable (but not yet validated) shares are located. Each 352 # get_bucket() response adds to this map, each act of validation 353 # removes from it. 354 self._sharemap = DictOfSets() 355 356 # _sharemap maps shnum to set of Servers, and remembers where viable 357 # shares are located. Each get_bucket() response adds to this map, 358 # each hash failure or disconnect removes from it. (TODO: if we 359 # disconnect but reconnect later, we should be allowed to re-query). 360 self._sharemap = DictOfSets() 361 362 # _problem_shares is a set of (shnum, Server) tuples, and 363 364 # _queries_in_flight maps a Server to a timestamp, which remembers 365 # which servers we've sent queries to (and when) but have not yet 366 # heard a response. This lets us put a limit on the number of 367 # outstanding queries, to limit the size of the work window (how much 368 # extra work we ask servers to do in the hopes of keeping our own 369 # pipeline filled). We remove a Server from _queries_in_flight when 370 # we get an answer/error or we finally give up. If we ever switch to 371 # a non-connection-oriented protocol (like UDP, or forwarded Chord 372 # queries), we can use this information to retransmit any query that 373 # has gone unanswered for too long. 374 self._queries_in_flight = dict() 375 376 def _count_recent_queries_in_flight(self): 377 now = time.time() 378 recent = now - self._recent_horizon 379 return len([s for (s,when) in self._queries_in_flight.items() 380 if when > recent]) 381 382 def _find_enough_shares(self): 383 # goal: have 2*k distinct not-invalid shares available for reading, 384 # from 2*k distinct servers. Do not have more than 4*k "recent" 385 # queries in flight at a time. 386 if (len(self._sharemap) >= 2*self._needed_shares 387 and len(self._sharemap.values) >= 2*self._needed_shares): 388 return 389 num = self._count_recent_queries_in_flight() 390 while num < 4*self._needed_shares: 391 try: 392 s = self._available_servers.next() 393 except StopIteration: 394 return # no more progress can be made 395 self._queries_in_flight[s] = time.time() 396 d = s.send_query(self._storage_index) 397 d.addBoth(incidentally, self._queries_in_flight.discard, s) 398 d.addCallbacks(lambda shnums: [self._sharemap.add(shnum, s) 399 for shnum in shnums], 400 lambda f: self._query_error(f, s)) 401 d.addErrback(self._error) 402 d.addCallback(self._reschedule) 403 num += 1 404 405 def _query_error(self, f, s): 406 # a server returned an error, log it gently and ignore 407 level = log.WEIRD 408 if f.check(DeadReferenceError): 409 level = log.UNUSUAL 410 log.msg("Error during get_buckets to server=%(server)s", server=str(s), 411 failure=f, level=level, umid="3uuBUQ") 412 413 # this block is responsible for turning known shares into usable shares, 414 # by fetching enough data to validate their contents. 415 416 # UEB (from any share) 417 # share hash chain, validated (from any share, for given shnum) 418 # block hash (any share, given shnum) 419 420 def _got_ueb(self, ueb_data, share): 421 if self._ueb is not None: 422 return 423 if hashutil.uri_extension_hash(ueb_data) != self._ueb_hash: 424 share.error("UEB hash does not match") 425 return 426 d = uri.unpack_extension(ueb_data) 427 self.share_size = mathutil.div_ceil(self._size, self._needed_shares) 428 429 430 # There are several kinds of things that can be found in a UEB. 431 # First, things that we really need to learn from the UEB in order to 432 # do this download. Next: things which are optional but not redundant 433 # -- if they are present in the UEB they will get used. Next, things 434 # that are optional and redundant. These things are required to be 435 # consistent: they don't have to be in the UEB, but if they are in 436 # the UEB then they will be checked for consistency with the 437 # already-known facts, and if they are inconsistent then an exception 438 # will be raised. These things aren't actually used -- they are just 439 # tested for consistency and ignored. Finally: things which are 440 # deprecated -- they ought not be in the UEB at all, and if they are 441 # present then a warning will be logged but they are otherwise 442 # ignored. 443 444 # First, things that we really need to learn from the UEB: 445 # segment_size, crypttext_root_hash, and share_root_hash. 446 self._segsize = d['segment_size'] 447 448 self._blocksize = mathutil.div_ceil(self._segsize, self._needed_shares) 449 self._numsegs = mathutil.div_ceil(self._size, self._segsize) 450 451 self._tail_segsize = self._size % self._segsize 452 if self._tail_segsize == 0: 453 self._tail_segsize = self._segsize 454 # padding for erasure code 455 self._tail_segsize = mathutil.next_multiple(self._tail_segsize, 456 self._needed_shares) 457 458 # Ciphertext hash tree root is mandatory, so that there is at most 459 # one ciphertext that matches this read-cap or verify-cap. The 460 # integrity check on the shares is not sufficient to prevent the 461 # original encoder from creating some shares of file A and other 462 # shares of file B. 463 self._ciphertext_hash_tree = IncompleteHashTree(self._numsegs) 464 self._ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) 465 466 self._share_hash_tree.set_hashes({0: d['share_root_hash']}) 467 468 469 # Next: things that are optional and not redundant: crypttext_hash 470 if 'crypttext_hash' in d: 471 if len(self._ciphertext_hash) == hashutil.CRYPTO_VAL_SIZE: 472 self._ciphertext_hash = d['crypttext_hash'] 473 else: 474 log.msg("ignoring bad-length UEB[crypttext_hash], " 475 "got %d bytes, want %d" % (len(d['crypttext_hash']), 476 hashutil.CRYPTO_VAL_SIZE), 477 umid="oZkGLA", level=log.WEIRD) 478 479 # we ignore all of the redundant fields when downloading. The 480 # Verifier uses a different code path which does not ignore them. 481 482 # finally, set self._ueb as a marker that we don't need to request it 483 # anymore 484 self._ueb = d 485 486 def _got_share_hashes(self, hashes, share): 487 assert isinstance(hashes, dict) 488 try: 489 self._share_hash_tree.set_hashes(hashes) 490 except (IndexError, BadHashError, NotEnoughHashesError), le: 491 share.error("Bad or missing hashes") 492 return 493 494 #def _got_block_hashes( 495 496 def _init_validate_enough_shares(self): 497 # _valid_shares maps shnum to ValidatedShare instances, and is 498 # populated once the block hash root has been fetched and validated 499 # (which requires any valid copy of the UEB, and a valid copy of the 500 # share hash chain for each shnum) 501 self._valid_shares = {} 502 503 # _target_shares is an ordered list of ReadyShare instances, each of 504 # which is a (shnum, server) tuple. It is sorted in order of 505 # preference: we expect to get the fastest response from the 506 # ReadyShares at the front of the list. It is also sorted to 507 # distribute the shnums, so that fetching shares from 508 # _target_shares[:k] is likely (but not guaranteed) to give us k 509 # distinct shares. The rule is that we skip over entries for blocks 510 # that we've already received, limit the number of recent queries for 511 # the same block, 512 self._target_shares = [] 513 514 def _validate_enough_shares(self): 515 # my goal is to have at least 2*k distinct validated shares from at 516 # least 2*k distinct servers 517 valid_share_servers = set() 518 for vs in self._valid_shares.values(): 519 valid_share_servers.update(vs.get_servers()) 520 if (len(self._valid_shares) >= 2*self._needed_shares 521 and len(self._valid_share_servers) >= 2*self._needed_shares): 522 return 523 #for 524 525 def _reschedule(self, _ign): 526 # fire the loop again 527 if not self._scheduled: 528 self._scheduled = True 529 eventually(self._loop) 530 531 def _loop(self): 532 self._scheduled = False 533 # what do we need? 534 535 self._find_enough_shares() 536 self._validate_enough_shares() 537 538 if not self._ueb: 539 # we always need a copy of the UEB 540 pass 541 542 def _error(self, f): 543 # this is an unexpected error: a coding bug 544 log.err(f, level=log.UNUSUAL) 545 546 547 548 # using a single packed string (and an offset table) may be an artifact of 549 # our native storage server: other backends might allow cheap multi-part 550 # files (think S3, several buckets per share, one for each section). 551 552 # find new names for: 553 # data_holder 554 # Share / Share2 (ShareInstance / Share? but the first is more useful) 555 556 class IShare(Interface): 557 """I represent a single instance of a single share (e.g. I reference the 558 shnum2 for share SI=abcde on server xy12t, not the one on server ab45q). 559 This interface is used by SegmentFetcher to retrieve validated blocks. 560 """ 561 def get_block(segnum): 562 """Return an Observer2, which will be notified with the following 563 events: 564 state=COMPLETE, block=data (terminal): validated block data 565 state=OVERDUE (non-terminal): we have reason to believe that the 566 request might have stalled, or we 567 might just be impatient 568 state=CORRUPT (terminal): the data we received was corrupt 569 state=DEAD (terminal): the connection has failed 570 """ 571 572 573 # it'd be nice if we receive the hashes before the block, or just 574 # afterwards, so we aren't stuck holding on to unvalidated blocks 575 # that we can't process. If we guess the offsets right, we can 576 # accomplish this by sending the block request after the metadata 577 # requests (by keeping two separate requestlists), and have a one RTT 578 # pipeline like: 579 # 1a=metadata, 1b=block 580 # 1b->process+deliver : one RTT 581 582 # But if we guess wrong, and fetch the wrong part of the block, we'll 583 # have a pipeline that looks like: 584 # 1a=wrong metadata, 1b=wrong block 585 # 1a->2a=right metadata,2b=right block 586 # 2b->process+deliver 587 # which means two RTT and buffering one block (which, since we'll 588 # guess the segsize wrong for everything, means buffering one 589 # segment) 590 591 # if we start asking for multiple segments, we could get something 592 # worse: 593 # 1a=wrong metadata, 1b=wrong block0, 1c=wrong block1, .. 594 # 1a->2a=right metadata,2b=right block0,2c=right block1, . 595 # 2b->process+deliver 596 597 # which means two RTT but fetching and buffering the whole file 598 # before delivering anything. However, since we don't know when the 599 # other shares are going to arrive, we need to avoid having more than 600 # one block in the pipeline anyways. So we shouldn't be able to get 601 # into this state. 602 603 # it also means that, instead of handling all of 604 # self._requested_blocks at once, we should only be handling one 605 # block at a time: one of the requested block should be special 606 # (probably FIFO). But retire all we can. 607 608 # this might be better with a Deferred, using COMPLETE as the success 609 # case and CORRUPT/DEAD in an errback, because that would let us hold the 610 # 'share' and 'shnum' arguments locally (instead of roundtripping them 611 # through Share.send_request). But that OVERDUE is not terminal. So I 612 # want a new sort of callback mechanism, with the extra-argument-passing 613 # aspects of Deferred, but without being so one-shot. Is this a job for 614 # Observer? No, it doesn't take extra arguments. So this uses Observer2. 615 616 617 class Reader: 618 """I am responsible for a single offset+size read of the file. I handle 619 segmentation: I figure out which segments are necessary, request them 620 (from my CiphertextDownloader) in order, and trim the segments down to 621 match the offset+size span. I use the Producer/Consumer interface to only 622 request one segment at a time. 623 """ 624 implements(IPushProducer) 625 def __init__(self, consumer, offset, size): 626 self._needed = [] 627 self._consumer = consumer 628 self._hungry = False 629 self._offset = offset 630 self._size = size 631 self._segsize = None 632 def start(self): 633 self._alive = True 634 self._deferred = defer.Deferred() 635 # the process doesn't actually start until set_segment_size() 636 return self._deferred 637 638 def set_segment_size(self, segsize): 639 if self._segsize is not None: 640 return 641 self._segsize = segsize 642 self._compute_segnums() 643 644 def _compute_segnums(self, segsize): 645 # now that we know the file's segsize, what segments (and which 646 # ranges of each) will we need? 647 size = self._size 648 offset = self._offset 649 while size: 650 assert size >= 0 651 this_seg_num = int(offset / self._segsize) 652 this_seg_offset = offset - (seg_num*self._segsize) 653 this_seg_size = min(size, self._segsize-seg_offset) 654 size -= this_seg_size 655 if size: 656 offset += this_seg_size 657 yield (this_seg_num, this_seg_offset, this_seg_size) 658 659 def get_needed_segments(self): 660 return set([segnum for (segnum, off, size) in self._needed]) 661 662 663 def stopProducing(self): 664 self._hungry = False 665 self._alive = False 666 # TODO: cancel the segment requests 667 def pauseProducing(self): 668 self._hungry = False 669 def resumeProducing(self): 670 self._hungry = True 671 def add_segment(self, segnum, offset, size): 672 self._needed.append( (segnum, offset, size) ) 673 def got_segment(self, segnum, segdata): 674 """Return True if this schedule has more to go, or False if it is 675 done.""" 676 assert self._needed[0][segnum] == segnum 677 (_ign, offset, size) = self._needed.pop(0) 678 data = segdata[offset:offset+size] 679 self._consumer.write(data) 680 if not self._needed: 681 # we're done 682 self._alive = False 683 self._hungry = False 684 self._consumer.unregisterProducer() 685 self._deferred.callback(self._consumer) 686 def error(self, f): 687 self._alive = False 688 self._hungry = False 689 self._consumer.unregisterProducer() 690 self._deferred.errback(f) 691 692 693 694 class x: 695 def OFFread(self, consumer, offset=0, size=None): 696 """I am the main entry point, from which FileNode.read() can get 697 data.""" 698 # tolerate concurrent operations: each gets its own Reader 699 if size is None: 700 size = self._size - offset 701 r = Reader(consumer, offset, size) 702 self._readers.add(r) 703 d = r.start() 704 if self.segment_size is not None: 705 r.set_segment_size(self.segment_size) 706 # TODO: if we can't find any segments, and thus never get a 707 # segsize, tell the Readers to give up 708 return d -
src/allmydata/immutable/repairer.py
diff --git a/src/allmydata/immutable/repairer.py b/src/allmydata/immutable/repairer.py index fa6a604..64fb9a1 100644
a b 1 1 from zope.interface import implements 2 2 from twisted.internet import defer 3 3 from allmydata.storage.server import si_b2a 4 from allmydata.util import log, observer 5 from allmydata.util.assertutil import precondition, _assert 6 from allmydata.uri import CHKFileVerifierURI 7 from allmydata.interfaces import IEncryptedUploadable, IDownloadTarget 8 from twisted.internet.interfaces import IConsumer 4 from allmydata.util import log, consumer 5 from allmydata.util.assertutil import precondition 6 from allmydata.interfaces import IEncryptedUploadable 9 7 10 from allmydata.immutable import download, upload 11 12 import collections 8 from allmydata.immutable import upload 13 9 14 10 class Repairer(log.PrefixingLogMixin): 11 implements(IEncryptedUploadable) 15 12 """I generate any shares which were not available and upload them to 16 13 servers. 17 14 … … class Repairer(log.PrefixingLogMixin): 43 40 cancelled (by invoking its raise_if_cancelled() method). 44 41 """ 45 42 46 def __init__(self, storage_broker, secret_holder, verifycap, monitor): 47 assert precondition(isinstance(verifycap, CHKFileVerifierURI)) 48 49 logprefix = si_b2a(verifycap.get_storage_index())[:5] 43 def __init__(self, filenode, storage_broker, secret_holder, monitor): 44 logprefix = si_b2a(filenode.get_storage_index())[:5] 50 45 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer", 51 46 prefix=logprefix) 52 47 self._filenode = filenode 53 48 self._storage_broker = storage_broker 54 49 self._secret_holder = secret_holder 55 self._verifycap = verifycap56 50 self._monitor = monitor 51 self._offset = 0 57 52 58 53 def start(self): 59 54 self.log("starting repair") 60 duc = DownUpConnector() 61 dl = download.CiphertextDownloader(self._storage_broker, 62 self._verifycap, target=duc, 63 monitor=self._monitor) 64 ul = upload.CHKUploader(self._storage_broker, self._secret_holder) 65 66 d = defer.Deferred() 67 68 # If the upload or the download fails or is stopped, then the repair 69 # failed. 70 def _errb(f): 71 d.errback(f) 72 return None 73 74 # If the upload succeeds, then the repair has succeeded. 75 def _cb(res): 76 d.callback(res) 77 ul.start(duc).addCallbacks(_cb, _errb) 78 79 # If the download fails or is stopped, then the repair failed. 80 d2 = dl.start() 81 d2.addErrback(_errb) 82 83 # We ignore the callback from d2. Is this right? Ugh. 84 55 d = self._filenode.get_segment_size() 56 def _got_segsize(segsize): 57 vcap = self._filenode.get_verify_cap() 58 k = vcap.needed_shares 59 N = vcap.total_shares 60 happy = upload.BaseUploadable.default_encoding_param_happy 61 self._encodingparams = (k, happy, N, segsize) 62 ul = upload.CHKUploader(self._storage_broker, self._secret_holder) 63 return ul.start(self) # I am the IEncryptedUploadable 64 d.addCallback(_got_segsize) 85 65 return d 86 66 87 class DownUpConnector(log.PrefixingLogMixin):88 implements(IEncryptedUploadable, IDownloadTarget, IConsumer)89 """I act like an 'encrypted uploadable' -- something that a local90 uploader can read ciphertext from in order to upload the ciphertext.91 However, unbeknownst to the uploader, I actually download the ciphertext92 from a CiphertextDownloader instance as it is needed.93 94 On the other hand, I act like a 'download target' -- something that a95 local downloader can write ciphertext to as it downloads the ciphertext.96 That downloader doesn't realize, of course, that I'm just turning around97 and giving the ciphertext to the uploader."""98 99 # The theory behind this class is nice: just satisfy two separate100 # interfaces. The implementation is slightly horrible, because of101 # "impedance mismatch" -- the downloader expects to be able to102 # synchronously push data in, and the uploader expects to be able to read103 # data out with a "read(THIS_SPECIFIC_LENGTH)" which returns a deferred.104 # The two interfaces have different APIs for pausing/unpausing. The105 # uploader requests metadata like size and encodingparams which the106 # downloader provides either eventually or not at all (okay I just now107 # extended the downloader to provide encodingparams). Most of this108 # slightly horrible code would disappear if CiphertextDownloader just109 # used this object as an IConsumer (plus maybe a couple of other methods)110 # and if the Uploader simply expected to be treated as an IConsumer (plus111 # maybe a couple of other things).112 113 def __init__(self, buflim=2**19):114 """If we're already holding at least buflim bytes, then tell the115 downloader to pause until we have less than buflim bytes."""116 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer")117 self.buflim = buflim118 self.bufs = collections.deque() # list of strings119 self.bufsiz = 0 # how many bytes total in bufs120 121 # list of deferreds which will fire with the requested ciphertext122 self.next_read_ds = collections.deque()123 124 # how many bytes of ciphertext were requested by each deferred125 self.next_read_lens = collections.deque()126 127 self._size_osol = observer.OneShotObserverList()128 self._encodingparams_osol = observer.OneShotObserverList()129 self._storageindex_osol = observer.OneShotObserverList()130 self._closed_to_pusher = False131 132 # once seg size is available, the following attribute will be created133 # to hold it:134 135 # self.encodingparams # (provided by the object which is pushing data136 # into me, required by the object which is pulling data out of me)137 138 # open() will create the following attribute:139 # self.size # size of the whole file (provided by the object which is140 # pushing data into me, required by the object which is pulling data141 # out of me)142 143 # set_upload_status() will create the following attribute:144 145 # self.upload_status # XXX do we need to actually update this? Is146 # anybody watching the results during a repair?147 148 def _satisfy_reads_if_possible(self):149 assert bool(self.next_read_ds) == bool(self.next_read_lens)150 while self.next_read_ds and ((self.bufsiz >= self.next_read_lens[0])151 or self._closed_to_pusher):152 nrd = self.next_read_ds.popleft()153 nrl = self.next_read_lens.popleft()154 155 # Pick out the requested number of bytes from self.bufs, turn it156 # into a string, and callback the deferred with that.157 res = []158 ressize = 0159 while ressize < nrl and self.bufs:160 nextbuf = self.bufs.popleft()161 res.append(nextbuf)162 ressize += len(nextbuf)163 if ressize > nrl:164 extra = ressize - nrl165 self.bufs.appendleft(nextbuf[:-extra])166 res[-1] = nextbuf[:-extra]167 assert _assert(sum(len(x) for x in res) <= nrl, [len(x) for x in res], nrl)168 assert _assert(sum(len(x) for x in res) == nrl or self._closed_to_pusher, [len(x) for x in res], nrl)169 self.bufsiz -= nrl170 if self.bufsiz < self.buflim and self.producer:171 self.producer.resumeProducing()172 nrd.callback(res)173 174 # methods to satisfy the IConsumer and IDownloadTarget interfaces. (From175 # the perspective of a downloader I am an IDownloadTarget and an176 # IConsumer.)177 def registerProducer(self, producer, streaming):178 assert streaming # We know how to handle only streaming producers.179 self.producer = producer # the downloader180 def unregisterProducer(self):181 self.producer = None182 def open(self, size):183 self.size = size184 self._size_osol.fire(self.size)185 def set_encodingparams(self, encodingparams):186 self.encodingparams = encodingparams187 self._encodingparams_osol.fire(self.encodingparams)188 def set_storageindex(self, storageindex):189 self.storageindex = storageindex190 self._storageindex_osol.fire(self.storageindex)191 def write(self, data):192 precondition(data) # please don't write empty strings193 self.bufs.append(data)194 self.bufsiz += len(data)195 self._satisfy_reads_if_possible()196 if self.bufsiz >= self.buflim and self.producer:197 self.producer.pauseProducing()198 def finish(self):199 pass200 def close(self):201 self._closed_to_pusher = True202 # Any reads which haven't been satisfied by now are going to203 # have to be satisfied with short reads.204 self._satisfy_reads_if_possible()205 67 206 68 # methods to satisfy the IEncryptedUploader interface 207 69 # (From the perspective of an uploader I am an IEncryptedUploadable.) 208 70 def set_upload_status(self, upload_status): 209 71 self.upload_status = upload_status 210 72 def get_size(self): 211 if hasattr(self, 'size'): # attribute created by self.open() 212 return defer.succeed(self.size) 213 else: 214 return self._size_osol.when_fired() 73 size = self._filenode.get_size() 74 assert size is not None 75 return defer.succeed(size) 215 76 def get_all_encoding_parameters(self): 216 # We have to learn the encoding params from pusher. 217 if hasattr(self, 'encodingparams'): 218 # attribute created by self.set_encodingparams() 219 return defer.succeed(self.encodingparams) 220 else: 221 return self._encodingparams_osol.when_fired() 77 return defer.succeed(self._encodingparams) 222 78 def read_encrypted(self, length, hash_only): 223 """Returns a deferred which eventually fire dwith the requested224 ciphertext ."""79 """Returns a deferred which eventually fires with the requested 80 ciphertext, as a list of strings.""" 225 81 precondition(length) # please don't ask to read 0 bytes 226 d = defer.Deferred()227 self.next_read_ds.append(d)228 self. next_read_lens.append(length)229 self._satisfy_reads_if_possible()82 mc = consumer.MemoryConsumer() 83 d = self._filenode.read(mc, self._offset, length) 84 self._offset += length 85 d.addCallback(lambda ign: mc.chunks) 230 86 return d 231 87 def get_storage_index(self): 232 # We have to learn the storage index from pusher. 233 if hasattr(self, 'storageindex'): 234 # attribute created by self.set_storageindex() 235 return defer.succeed(self.storageindex) 236 else: 237 return self._storageindex.when_fired() 88 return self._filenode.get_storage_index() 89 def close(self): 90 pass -
src/allmydata/immutable/upload.py
diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py index ca7d56b..7ac86c2 100644
a b from allmydata.util.assertutil import precondition 20 20 from allmydata.util.rrefutil import add_version_to_remote_reference 21 21 from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \ 22 22 IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \ 23 NoServersError, InsufficientVersionError, UploadUnhappinessError 23 NoServersError, InsufficientVersionError, UploadUnhappinessError, \ 24 DEFAULT_MAX_SEGMENT_SIZE 24 25 from allmydata.immutable import layout 25 26 from pycryptopp.cipher.aes import AES 26 27 … … class AssistedUploader: 1170 1171 return self._upload_status 1171 1172 1172 1173 class BaseUploadable: 1173 default_max_segment_size = 128*KiB # overridden by max_segment_size 1174 # this is overridden by max_segment_size 1175 default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE 1174 1176 default_encoding_param_k = 3 # overridden by encoding_parameters 1175 1177 default_encoding_param_happy = 7 1176 1178 default_encoding_param_n = 10 -
src/allmydata/interfaces.py
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index f325bb1..75aa51e 100644
a b WriteEnablerSecret = Hash # used to protect mutable bucket modifications 24 24 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests 25 25 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests 26 26 27 KiB = 1024 28 DEFAULT_MAX_SEGMENT_SIZE = 128*KiB 29 27 30 class RIStubClient(RemoteInterface): 28 31 """Each client publishes a service announcement for a dummy object called 29 32 the StubClient. This object doesn't actually offer any services, but the -
src/allmydata/nodemaker.py
diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index a30efbf..ef182a4 100644
a b import weakref 2 2 from zope.interface import implements 3 3 from allmydata.util.assertutil import precondition 4 4 from allmydata.interfaces import INodeMaker, MustBeDeepImmutableError 5 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode 5 from allmydata.immutable.literal import LiteralFileNode 6 from allmydata.immutable.filenode import ImmutableFileNode, CiphertextFileNode 6 7 from allmydata.immutable.upload import Data 7 8 from allmydata.mutable.filenode import MutableFileNode 8 9 from allmydata.dirnode import DirectoryNode, pack_children … … class NodeMaker: 17 18 implements(INodeMaker) 18 19 19 20 def __init__(self, storage_broker, secret_holder, history, 20 uploader, downloader, download_cache_dirman,21 uploader, terminator, 21 22 default_encoding_parameters, key_generator): 22 23 self.storage_broker = storage_broker 23 24 self.secret_holder = secret_holder 24 25 self.history = history 25 26 self.uploader = uploader 26 self.downloader = downloader 27 self.download_cache_dirman = download_cache_dirman 27 self.terminator = terminator 28 28 self.default_encoding_parameters = default_encoding_parameters 29 29 self.key_generator = key_generator 30 30 … … class NodeMaker: 34 34 return LiteralFileNode(cap) 35 35 def _create_immutable(self, cap): 36 36 return ImmutableFileNode(cap, self.storage_broker, self.secret_holder, 37 self.downloader, self.history, 38 self.download_cache_dirman) 37 self.terminator, self.history) 38 def _create_immutable_verifier(self, cap): 39 return CiphertextFileNode(cap, self.storage_broker, self.secret_holder, 40 self.terminator, self.history) 39 41 def _create_mutable(self, cap): 40 42 n = MutableFileNode(self.storage_broker, self.secret_holder, 41 43 self.default_encoding_parameters, … … class NodeMaker: 48 50 # this returns synchronously. It starts with a "cap string". 49 51 assert isinstance(writecap, (str, type(None))), type(writecap) 50 52 assert isinstance(readcap, (str, type(None))), type(readcap) 51 53 52 54 bigcap = writecap or readcap 53 55 if not bigcap: 54 56 # maybe the writecap was hidden because we're in a readonly … … class NodeMaker: 78 80 return self._create_lit(cap) 79 81 if isinstance(cap, uri.CHKFileURI): 80 82 return self._create_immutable(cap) 83 if isinstance(cap, uri.CHKFileVerifierURI): 84 return self._create_immutable_verifier(cap) 81 85 if isinstance(cap, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI)): 82 86 return self._create_mutable(cap) 83 87 if isinstance(cap, (uri.DirectoryURI, -
src/allmydata/test/test_cli.py
diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index c65474f..3566960 100644
a b class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 2040 2040 self.delete_shares_numbered(ur.uri, range(1,10)) 2041 2041 d.addCallback(_stash_bad) 2042 2042 2043 # the download is abandoned as soon as it's clear that we won't get 2044 # enough shares. The one remaining share might be in either the 2045 # COMPLETE or the PENDING state. 2046 in_complete_msg = "ran out of shares: 1 complete, 0 pending, 0 overdue, 0 unused, need 3" 2047 in_pending_msg = "ran out of shares: 0 complete, 1 pending, 0 overdue, 0 unused, need 3" 2048 2043 2049 d.addCallback(lambda ign: self.do_cli("get", self.uri_1share)) 2044 2050 def _check1((rc, out, err)): 2045 2051 self.failIfEqual(rc, 0) 2046 2052 self.failUnless("410 Gone" in err, err) 2047 2053 self.failUnlessIn("NotEnoughSharesError: ", err) 2048 self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err) 2054 self.failUnless(in_complete_msg in err or in_pending_msg in err, 2055 err) 2049 2056 d.addCallback(_check1) 2050 2057 2051 2058 targetf = os.path.join(self.basedir, "output") … … class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 2054 2061 self.failIfEqual(rc, 0) 2055 2062 self.failUnless("410 Gone" in err, err) 2056 2063 self.failUnlessIn("NotEnoughSharesError: ", err) 2057 self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err) 2064 self.failUnless(in_complete_msg in err or in_pending_msg in err, 2065 err) 2058 2066 self.failIf(os.path.exists(targetf)) 2059 2067 d.addCallback(_check2) 2060 2068 -
src/allmydata/test/test_dirnode.py
diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index e6aaf77..3779327 100644
a b class Packing(unittest.TestCase): 1106 1106 def test_unpack_and_pack_behavior(self): 1107 1107 known_tree = b32decode(self.known_tree) 1108 1108 nodemaker = NodeMaker(None, None, None, 1109 None, None, None,1109 None, None, 1110 1110 {"k": 3, "n": 10}, None) 1111 1111 write_uri = "URI:SSK-RO:e3mdrzfwhoq42hy5ubcz6rp3o4:ybyibhnp3vvwuq2vaw2ckjmesgkklfs6ghxleztqidihjyofgw7q" 1112 1112 filenode = nodemaker.create_from_cap(write_uri) … … class Packing(unittest.TestCase): 1168 1168 return kids 1169 1169 1170 1170 def test_deep_immutable(self): 1171 nm = NodeMaker(None, None, None, None, None, None, {"k": 3, "n": 10}, 1172 None) 1171 nm = NodeMaker(None, None, None, None, None, {"k": 3, "n": 10}, None) 1173 1172 fn = MinimalFakeMutableFile() 1174 1173 1175 1174 kids = self._make_kids(nm, ["imm", "lit", "write", "read", … … class FakeNodeMaker(NodeMaker): 1263 1262 class FakeClient2(Client): 1264 1263 def __init__(self): 1265 1264 self.nodemaker = FakeNodeMaker(None, None, None, 1266 None, None, None,1265 None, None, 1267 1266 {"k":3,"n":10}, None) 1268 1267 def create_node_from_uri(self, rwcap, rocap): 1269 1268 return self.nodemaker.create_from_cap(rwcap, rocap) … … class Deleter(GridTestMixin, unittest.TestCase): 1547 1546 def _do_delete(ignored): 1548 1547 nm = UCWEingNodeMaker(c0.storage_broker, c0._secret_holder, 1549 1548 c0.get_history(), c0.getServiceNamed("uploader"), 1550 c0.downloader, 1551 c0.download_cache_dirman, 1549 c0.terminator, 1552 1550 c0.get_encoding_parameters(), 1553 1551 c0._key_generator) 1554 1552 n = nm.create_from_cap(self.root_uri) -
src/allmydata/test/test_download.py
diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index b54bf01..95fae2f 100644
a b 5 5 6 6 import os 7 7 from twisted.trial import unittest 8 from twisted.internet import defer 8 9 from allmydata import uri 9 10 from allmydata.storage.server import storage_index_to_dir 10 from allmydata.util import base32, fileutil 11 from allmydata.util.consumer import download_to_data 12 from allmydata.immutable import upload 11 from allmydata.util import base32, fileutil, spans, log 12 from allmydata.util.consumer import download_to_data, MemoryConsumer 13 from allmydata.immutable import upload, layout 13 14 from allmydata.test.no_network import GridTestMixin 15 from allmydata.test.common import ShouldFailMixin 16 from allmydata.interfaces import NotEnoughSharesError, NoSharesError 17 from allmydata.immutable.downloader.common import BadSegmentNumberError, \ 18 BadCiphertextHashError 19 from allmydata.codec import CRSDecoder 20 from foolscap.eventual import fireEventually, flushEventualQueue 14 21 15 22 plaintext = "This is a moderate-sized file.\n" * 10 16 23 mutable_plaintext = "This is a moderate-sized mutable file.\n" * 10 … … mutable_shares = { 68 75 } 69 76 #--------- END stored_shares.py ---------------- 70 77 71 class DownloadTest(GridTestMixin, unittest.TestCase): 72 timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box. 73 def test_download(self): 74 self.basedir = self.mktemp() 75 self.set_up_grid() 76 self.c0 = self.g.clients[0] 77 78 # do this to create the shares 79 #return self.create_shares() 80 81 self.load_shares() 82 d = self.download_immutable() 83 d.addCallback(self.download_mutable) 84 return d 78 class _Base(GridTestMixin, ShouldFailMixin): 85 79 86 80 def create_shares(self, ignored=None): 87 81 u = upload.Data(plaintext, None) … … class DownloadTest(GridTestMixin, unittest.TestCase): 178 172 def _got_data(data): 179 173 self.failUnlessEqual(data, plaintext) 180 174 d.addCallback(_got_data) 175 # make sure we can use the same node twice 176 d.addCallback(lambda ign: download_to_data(n)) 177 d.addCallback(_got_data) 181 178 return d 182 179 183 180 def download_mutable(self, ignored=None): … … class DownloadTest(GridTestMixin, unittest.TestCase): 188 185 d.addCallback(_got_data) 189 186 return d 190 187 188 class DownloadTest(_Base, unittest.TestCase): 189 timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box. 190 def test_download(self): 191 self.basedir = self.mktemp() 192 self.set_up_grid() 193 self.c0 = self.g.clients[0] 194 195 # do this to create the shares 196 #return self.create_shares() 197 198 self.load_shares() 199 d = self.download_immutable() 200 d.addCallback(self.download_mutable) 201 return d 202 203 def test_download_failover(self): 204 self.basedir = self.mktemp() 205 self.set_up_grid() 206 self.c0 = self.g.clients[0] 207 208 self.load_shares() 209 si = uri.from_string(immutable_uri).get_storage_index() 210 si_dir = storage_index_to_dir(si) 211 212 n = self.c0.create_node_from_uri(immutable_uri) 213 d = download_to_data(n) 214 def _got_data(data): 215 self.failUnlessEqual(data, plaintext) 216 d.addCallback(_got_data) 217 218 def _clobber_some_shares(ign): 219 # find the three shares that were used, and delete them. Then 220 # download again, forcing the downloader to fail over to other 221 # shares 222 for s in n._cnode._node._shares: 223 for clientnum in immutable_shares: 224 for shnum in immutable_shares[clientnum]: 225 if s._shnum == shnum: 226 fn = os.path.join(self.get_serverdir(clientnum), 227 "shares", si_dir, str(shnum)) 228 os.unlink(fn) 229 d.addCallback(_clobber_some_shares) 230 d.addCallback(lambda ign: download_to_data(n)) 231 d.addCallback(_got_data) 232 233 def _clobber_most_shares(ign): 234 # delete all but one of the shares that are still alive 235 live_shares = [s for s in n._cnode._node._shares if s.is_alive()] 236 save_me = live_shares[0]._shnum 237 for clientnum in immutable_shares: 238 for shnum in immutable_shares[clientnum]: 239 if shnum == save_me: 240 continue 241 fn = os.path.join(self.get_serverdir(clientnum), 242 "shares", si_dir, str(shnum)) 243 if os.path.exists(fn): 244 os.unlink(fn) 245 # now the download should fail with NotEnoughSharesError 246 return self.shouldFail(NotEnoughSharesError, "1shares", None, 247 download_to_data, n) 248 d.addCallback(_clobber_most_shares) 249 250 def _clobber_all_shares(ign): 251 # delete the last remaining share 252 for clientnum in immutable_shares: 253 for shnum in immutable_shares[clientnum]: 254 fn = os.path.join(self.get_serverdir(clientnum), 255 "shares", si_dir, str(shnum)) 256 if os.path.exists(fn): 257 os.unlink(fn) 258 # now a new download should fail with NoSharesError. We want a 259 # new ImmutableFileNode so it will forget about the old shares. 260 # If we merely called create_node_from_uri() without first 261 # dereferencing the original node, the NodeMaker's _node_cache 262 # would give us back the old one. 263 n = None 264 n = self.c0.create_node_from_uri(immutable_uri) 265 return self.shouldFail(NoSharesError, "0shares", None, 266 download_to_data, n) 267 d.addCallback(_clobber_all_shares) 268 return d 269 270 def test_badguess(self): 271 self.basedir = self.mktemp() 272 self.set_up_grid() 273 self.c0 = self.g.clients[0] 274 self.load_shares() 275 n = self.c0.create_node_from_uri(immutable_uri) 276 277 # Cause the downloader to guess a segsize that's too low, so it will 278 # ask for a segment number that's too high (beyond the end of the 279 # real list, causing BadSegmentNumberError), to exercise 280 # Segmentation._retry_bad_segment 281 282 con1 = MemoryConsumer() 283 n._cnode._node._build_guessed_tables(90) 284 # plaintext size of 310 bytes, wrong-segsize of 90 bytes, will make 285 # us think that file[180:200] is in the third segment (segnum=2), but 286 # really there's only one segment 287 d = n.read(con1, 180, 20) 288 def _done(res): 289 self.failUnlessEqual("".join(con1.chunks), plaintext[180:200]) 290 d.addCallback(_done) 291 return d 292 293 def test_simultaneous_badguess(self): 294 self.basedir = self.mktemp() 295 self.set_up_grid() 296 self.c0 = self.g.clients[0] 297 298 # upload a file with multiple segments, and a non-default segsize, to 299 # exercise the offset-guessing code. Because we don't tell the 300 # downloader about the unusual segsize, it will guess wrong, and have 301 # to do extra roundtrips to get the correct data. 302 u = upload.Data(plaintext, None) 303 u.max_segment_size = 70 # 5 segs, 8-wide hashtree 304 con1 = MemoryConsumer() 305 con2 = MemoryConsumer() 306 d = self.c0.upload(u) 307 def _uploaded(ur): 308 n = self.c0.create_node_from_uri(ur.uri) 309 d1 = n.read(con1, 70, 20) 310 d2 = n.read(con2, 140, 20) 311 return defer.gatherResults([d1,d2]) 312 d.addCallback(_uploaded) 313 def _done(res): 314 self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) 315 self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) 316 d.addCallback(_done) 317 return d 318 319 def test_simultaneous_goodguess(self): 320 self.basedir = self.mktemp() 321 self.set_up_grid() 322 self.c0 = self.g.clients[0] 323 324 # upload a file with multiple segments, and a non-default segsize, to 325 # exercise the offset-guessing code. This time we *do* tell the 326 # downloader about the unusual segsize, so it can guess right. 327 u = upload.Data(plaintext, None) 328 u.max_segment_size = 70 # 5 segs, 8-wide hashtree 329 con1 = MemoryConsumer() 330 con2 = MemoryConsumer() 331 d = self.c0.upload(u) 332 def _uploaded(ur): 333 n = self.c0.create_node_from_uri(ur.uri) 334 n._cnode._node._build_guessed_tables(u.max_segment_size) 335 d1 = n.read(con1, 70, 20) 336 #d2 = n.read(con2, 140, 20) # XXX 337 d2 = defer.succeed(None) 338 return defer.gatherResults([d1,d2]) 339 d.addCallback(_uploaded) 340 def _done(res): 341 self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) 342 self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) 343 #d.addCallback(_done) 344 return d 345 346 def test_sequential_goodguess(self): 347 self.basedir = self.mktemp() 348 self.set_up_grid() 349 self.c0 = self.g.clients[0] 350 data = (plaintext*100)[:30000] # multiple of k 351 352 # upload a file with multiple segments, and a non-default segsize, to 353 # exercise the offset-guessing code. This time we *do* tell the 354 # downloader about the unusual segsize, so it can guess right. 355 u = upload.Data(data, None) 356 u.max_segment_size = 6000 # 5 segs, 8-wide hashtree 357 con1 = MemoryConsumer() 358 con2 = MemoryConsumer() 359 d = self.c0.upload(u) 360 def _uploaded(ur): 361 n = self.c0.create_node_from_uri(ur.uri) 362 n._cnode._node._build_guessed_tables(u.max_segment_size) 363 d = n.read(con1, 12000, 20) 364 def _read1(ign): 365 self.failUnlessEqual("".join(con1.chunks), data[12000:12020]) 366 return n.read(con2, 24000, 20) 367 d.addCallback(_read1) 368 def _read2(ign): 369 self.failUnlessEqual("".join(con2.chunks), data[24000:24020]) 370 d.addCallback(_read2) 371 return d 372 d.addCallback(_uploaded) 373 return d 374 375 376 def test_simultaneous_get_blocks(self): 377 self.basedir = self.mktemp() 378 self.set_up_grid() 379 self.c0 = self.g.clients[0] 380 381 self.load_shares() 382 stay_empty = [] 383 384 n = self.c0.create_node_from_uri(immutable_uri) 385 d = download_to_data(n) 386 def _use_shares(ign): 387 shares = list(n._cnode._node._shares) 388 s0 = shares[0] 389 # make sure .cancel works too 390 o0 = s0.get_block(0) 391 o0.subscribe(lambda **kwargs: stay_empty.append(kwargs)) 392 o1 = s0.get_block(0) 393 o2 = s0.get_block(0) 394 o0.cancel() 395 o3 = s0.get_block(1) # state=BADSEGNUM 396 d1 = defer.Deferred() 397 d2 = defer.Deferred() 398 d3 = defer.Deferred() 399 o1.subscribe(lambda **kwargs: d1.callback(kwargs)) 400 o2.subscribe(lambda **kwargs: d2.callback(kwargs)) 401 o3.subscribe(lambda **kwargs: d3.callback(kwargs)) 402 return defer.gatherResults([d1,d2,d3]) 403 d.addCallback(_use_shares) 404 def _done(res): 405 r1,r2,r3 = res 406 self.failUnlessEqual(r1["state"], "COMPLETE") 407 self.failUnlessEqual(r2["state"], "COMPLETE") 408 self.failUnlessEqual(r3["state"], "BADSEGNUM") 409 self.failUnless("block" in r1) 410 self.failUnless("block" in r2) 411 self.failIf(stay_empty) 412 d.addCallback(_done) 413 return d 414 415 def test_download_no_overrun(self): 416 self.basedir = self.mktemp() 417 self.set_up_grid() 418 self.c0 = self.g.clients[0] 419 420 self.load_shares() 421 422 # tweak the client's copies of server-version data, so it believes 423 # that they're old and can't handle reads that overrun the length of 424 # the share. This exercises a different code path. 425 for (peerid, rref) in self.c0.storage_broker.get_all_servers(): 426 v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] 427 v1["tolerates-immutable-read-overrun"] = False 428 429 n = self.c0.create_node_from_uri(immutable_uri) 430 d = download_to_data(n) 431 def _got_data(data): 432 self.failUnlessEqual(data, plaintext) 433 d.addCallback(_got_data) 434 return d 435 436 def test_download_segment(self): 437 self.basedir = self.mktemp() 438 self.set_up_grid() 439 self.c0 = self.g.clients[0] 440 self.load_shares() 441 n = self.c0.create_node_from_uri(immutable_uri) 442 cn = n._cnode 443 (d,c) = cn.get_segment(0) 444 def _got_segment((offset,data,decodetime)): 445 self.failUnlessEqual(offset, 0) 446 self.failUnlessEqual(len(data), len(plaintext)) 447 d.addCallback(_got_segment) 448 return d 449 450 def test_download_segment_cancel(self): 451 self.basedir = self.mktemp() 452 self.set_up_grid() 453 self.c0 = self.g.clients[0] 454 self.load_shares() 455 n = self.c0.create_node_from_uri(immutable_uri) 456 cn = n._cnode 457 (d,c) = cn.get_segment(0) 458 fired = [] 459 d.addCallback(fired.append) 460 c.cancel() 461 d = fireEventually() 462 d.addCallback(flushEventualQueue) 463 def _check(ign): 464 self.failUnlessEqual(fired, []) 465 d.addCallback(_check) 466 return d 467 468 def test_download_bad_segment(self): 469 self.basedir = self.mktemp() 470 self.set_up_grid() 471 self.c0 = self.g.clients[0] 472 self.load_shares() 473 n = self.c0.create_node_from_uri(immutable_uri) 474 cn = n._cnode 475 def _try_download(): 476 (d,c) = cn.get_segment(1) 477 return d 478 d = self.shouldFail(BadSegmentNumberError, "badseg", 479 "segnum=1, numsegs=1", 480 _try_download) 481 return d 482 483 def test_download_segment_terminate(self): 484 self.basedir = self.mktemp() 485 self.set_up_grid() 486 self.c0 = self.g.clients[0] 487 self.load_shares() 488 n = self.c0.create_node_from_uri(immutable_uri) 489 cn = n._cnode 490 (d,c) = cn.get_segment(0) 491 fired = [] 492 d.addCallback(fired.append) 493 self.c0.terminator.disownServiceParent() 494 d = fireEventually() 495 d.addCallback(flushEventualQueue) 496 def _check(ign): 497 self.failUnlessEqual(fired, []) 498 d.addCallback(_check) 499 return d 500 501 def test_stop_producing(self): 502 self.basedir = self.mktemp() 503 self.set_up_grid() 504 self.c0 = self.g.clients[0] 505 self.load_shares() 506 n = self.c0.create_node_from_uri(immutable_uri) 507 508 con = MemoryConsumer() 509 d = n.read(con) 510 con.producer.stopProducing() 511 # d should never fire 512 del d 513 514 def test_download_segment_bad_ciphertext_hash(self): 515 # The crypttext_hash_tree asserts the integrity of the decoded 516 # ciphertext, and exists to detect two sorts of problems. The first 517 # is a bug in zfec decode. The second is the "two-sided t-shirt" 518 # attack (found by Christian Grothoff), in which a malicious uploader 519 # creates two sets of shares (one for file A, second for file B), 520 # uploads a combination of them (shares 0-4 of A, 5-9 of B), and then 521 # builds an otherwise normal UEB around those shares: their goal is 522 # to give their victim a filecap which sometimes downloads the good A 523 # contents, and sometimes the bad B contents, depending upon which 524 # servers/shares they can get to. Having a hash of the ciphertext 525 # forces them to commit to exactly one version. (Christian's prize 526 # for finding this problem was a t-shirt with two sides: the shares 527 # of file A on the front, B on the back). 528 529 # creating a set of shares with this property is too hard, although 530 # it'd be nice to do so and confirm our fix. (it requires a lot of 531 # tampering with the uploader). So instead, we just damage the 532 # decoder. The tail decoder is rebuilt each time, so we need to use a 533 # file with multiple segments. 534 self.basedir = self.mktemp() 535 self.set_up_grid() 536 self.c0 = self.g.clients[0] 537 538 u = upload.Data(plaintext, None) 539 u.max_segment_size = 60 # 6 segs 540 d = self.c0.upload(u) 541 def _uploaded(ur): 542 n = self.c0.create_node_from_uri(ur.uri) 543 n._cnode._node._build_guessed_tables(u.max_segment_size) 544 545 d = download_to_data(n) 546 def _break_codec(data): 547 # the codec isn't created until the UEB is retrieved 548 node = n._cnode._node 549 vcap = node._verifycap 550 k, N = vcap.needed_shares, vcap.total_shares 551 bad_codec = BrokenDecoder() 552 bad_codec.set_params(node.segment_size, k, N) 553 node._codec = bad_codec 554 d.addCallback(_break_codec) 555 # now try to download it again. The broken codec will provide 556 # ciphertext that fails the hash test. 557 d.addCallback(lambda ign: 558 self.shouldFail(BadCiphertextHashError, "badhash", 559 "hash failure in " 560 "ciphertext_hash_tree: segnum=0", 561 download_to_data, n)) 562 return d 563 d.addCallback(_uploaded) 564 return d 565 566 def OFFtest_download_segment_XXX(self): 567 self.basedir = self.mktemp() 568 self.set_up_grid() 569 self.c0 = self.g.clients[0] 570 571 # upload a file with multiple segments, and a non-default segsize, to 572 # exercise the offset-guessing code. This time we *do* tell the 573 # downloader about the unusual segsize, so it can guess right. 574 u = upload.Data(plaintext, None) 575 u.max_segment_size = 70 # 5 segs, 8-wide hashtree 576 con1 = MemoryConsumer() 577 con2 = MemoryConsumer() 578 d = self.c0.upload(u) 579 def _uploaded(ur): 580 n = self.c0.create_node_from_uri(ur.uri) 581 n._cnode._node._build_guessed_tables(u.max_segment_size) 582 d1 = n.read(con1, 70, 20) 583 #d2 = n.read(con2, 140, 20) 584 d2 = defer.succeed(None) 585 return defer.gatherResults([d1,d2]) 586 d.addCallback(_uploaded) 587 def _done(res): 588 self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) 589 self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) 590 #d.addCallback(_done) 591 return d 592 593 def test_duplicate_shares(self): 594 self.basedir = self.mktemp() 595 self.set_up_grid() 596 self.c0 = self.g.clients[0] 597 598 self.load_shares() 599 # make sure everybody has a copy of sh0. The second server contacted 600 # will report two shares, and the ShareFinder will handle the 601 # duplicate by attaching both to the same CommonShare instance. 602 si = uri.from_string(immutable_uri).get_storage_index() 603 si_dir = storage_index_to_dir(si) 604 sh0_file = [sharefile 605 for (shnum, serverid, sharefile) 606 in self.find_shares(immutable_uri) 607 if shnum == 0][0] 608 sh0_data = open(sh0_file, "rb").read() 609 for clientnum in immutable_shares: 610 if 0 in immutable_shares[clientnum]: 611 continue 612 cdir = self.get_serverdir(clientnum) 613 target = os.path.join(cdir, "shares", si_dir, "0") 614 outf = open(target, "wb") 615 outf.write(sh0_data) 616 outf.close() 617 618 d = self.download_immutable() 619 return d 620 621 def test_verifycap(self): 622 self.basedir = self.mktemp() 623 self.set_up_grid() 624 self.c0 = self.g.clients[0] 625 self.load_shares() 626 627 n = self.c0.create_node_from_uri(immutable_uri) 628 vcap = n.get_verify_cap().to_string() 629 vn = self.c0.create_node_from_uri(vcap) 630 d = download_to_data(vn) 631 def _got_ciphertext(ciphertext): 632 self.failUnlessEqual(len(ciphertext), len(plaintext)) 633 self.failIfEqual(ciphertext, plaintext) 634 d.addCallback(_got_ciphertext) 635 return d 636 637 class BrokenDecoder(CRSDecoder): 638 def decode(self, shares, shareids): 639 d = CRSDecoder.decode(self, shares, shareids) 640 def _decoded(buffers): 641 def _corruptor(s, which): 642 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] 643 buffers[0] = _corruptor(buffers[0], 0) # flip lsb of first byte 644 return buffers 645 d.addCallback(_decoded) 646 return d 647 648 class Corruption(_Base, unittest.TestCase): 649 650 def test_each_byte(self): 651 # Setting catalog_detection=True performs an exhaustive test of the 652 # Downloader's response to corruption in the lsb of each byte of the 653 # 2070-byte share, with two goals: make sure we tolerate all forms of 654 # corruption (i.e. don't hang or return bad data), and make a list of 655 # which bytes can be corrupted without influencing the download 656 # (since we don't need every byte of the share). That takes 50s to 657 # run on my laptop and doesn't have any actual asserts, so we don't 658 # normally do that. 659 self.catalog_detection = False 660 661 self.basedir = "download/Corruption/each_byte" 662 self.set_up_grid() 663 self.c0 = self.g.clients[0] 664 665 # to exercise the block-hash-tree code properly, we need to have 666 # multiple segments. We don't tell the downloader about the different 667 # segsize, so it guesses wrong and must do extra roundtrips. 668 u = upload.Data(plaintext, None) 669 u.max_segment_size = 120 # 3 segs, 4-wide hashtree 670 671 def _fix_sh0(res): 672 f = open(self.sh0_file, "wb") 673 f.write(self.sh0_orig) 674 f.close() 675 def _corrupt_flip(ign, imm_uri, which): 676 log.msg("corrupt %d" % which) 677 def _corruptor(s, debug=False): 678 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] 679 self.corrupt_shares_numbered(imm_uri, [0], _corruptor) 680 681 def _corrupt_set(ign, imm_uri, which, newvalue): 682 log.msg("corrupt %d" % which) 683 def _corruptor(s, debug=False): 684 return s[:which] + chr(newvalue) + s[which+1:] 685 self.corrupt_shares_numbered(imm_uri, [0], _corruptor) 686 687 if self.catalog_detection: 688 undetected = spans.Spans() 689 690 def _download(ign, imm_uri, which, expected): 691 n = self.c0.create_node_from_uri(imm_uri) 692 # for this test to work, we need to have a new Node each time. 693 # Make sure the NodeMaker's weakcache hasn't interfered. 694 assert not n._cnode._node._shares 695 d = download_to_data(n) 696 def _got_data(data): 697 self.failUnlessEqual(data, plaintext) 698 shnums = sorted([s._shnum for s in n._cnode._node._shares]) 699 no_sh0 = bool(0 not in shnums) 700 sh0 = [s for s in n._cnode._node._shares if s._shnum == 0] 701 sh0_had_corruption = False 702 if sh0 and sh0[0].had_corruption: 703 sh0_had_corruption = True 704 num_needed = len(n._cnode._node._shares) 705 if self.catalog_detection: 706 detected = no_sh0 or sh0_had_corruption or (num_needed!=3) 707 if not detected: 708 undetected.add(which, 1) 709 if expected == "no-sh0": 710 self.failIfIn(0, shnums) 711 elif expected == "0bad-need-3": 712 self.failIf(no_sh0) 713 self.failUnless(sh0[0].had_corruption) 714 self.failUnlessEqual(num_needed, 3) 715 elif expected == "need-4th": 716 self.failIf(no_sh0) 717 self.failUnless(sh0[0].had_corruption) 718 self.failIfEqual(num_needed, 3) 719 d.addCallback(_got_data) 720 return d 721 722 723 d = self.c0.upload(u) 724 def _uploaded(ur): 725 imm_uri = ur.uri 726 self.sh0_file = [sharefile 727 for (shnum, serverid, sharefile) 728 in self.find_shares(imm_uri) 729 if shnum == 0][0] 730 self.sh0_orig = open(self.sh0_file, "rb").read() 731 d = defer.succeed(None) 732 # 'victims' is a list of corruption tests to run. Each one flips 733 # the low-order bit of the specified offset in the share file (so 734 # offset=0 is the MSB of the container version, offset=15 is the 735 # LSB of the share version, offset=24 is the MSB of the 736 # data-block-offset, and offset=48 is the first byte of the first 737 # data-block). Each one also specifies what sort of corruption 738 # we're expecting to see. 739 no_sh0_victims = [0,1,2,3] # container version 740 need3_victims = [ ] # none currently in this category 741 # when the offsets are corrupted, the Share will be unable to 742 # retrieve the data it wants (because it thinks that data lives 743 # off in the weeds somewhere), and Share treats DataUnavailable 744 # as abandon-this-share, so in general we'll be forced to look 745 # for a 4th share. 746 need_4th_victims = [12,13,14,15, # share version 747 24,25,26,27, # offset[data] 748 32,33,34,35, # offset[crypttext_hash_tree] 749 36,37,38,39, # offset[block_hashes] 750 44,45,46,47, # offset[UEB] 751 ] 752 need_4th_victims.append(48) # block data 753 # when corrupting hash trees, we must corrupt a value that isn't 754 # directly set from somewhere else. Since we download data from 755 # seg0, corrupt something on its hash chain, like [2] (the 756 # right-hand child of the root) 757 need_4th_victims.append(600+2*32) # block_hashes[2] 758 # Share.loop is pretty conservative: it abandons the share at the 759 # first sign of corruption. It doesn't strictly need to be this 760 # way: if the UEB were corrupt, we could still get good block 761 # data from that share, as long as there was a good copy of the 762 # UEB elsewhere. If this behavior is relaxed, then corruption in 763 # the following fields (which are present in multiple shares) 764 # should fall into the "need3_victims" case instead of the 765 # "need_4th_victims" case. 766 need_4th_victims.append(376+2*32) # crypttext_hash_tree[2] 767 need_4th_victims.append(824) # share_hashes 768 need_4th_victims.append(994) # UEB length 769 need_4th_victims.append(998) # UEB 770 corrupt_me = ([(i,"no-sh0") for i in no_sh0_victims] + 771 [(i, "0bad-need-3") for i in need3_victims] + 772 [(i, "need-4th") for i in need_4th_victims]) 773 if self.catalog_detection: 774 corrupt_me = [(i, "") for i in range(len(self.sh0_orig))] 775 for i,expected in corrupt_me: 776 d.addCallback(_corrupt_flip, imm_uri, i) 777 d.addCallback(_download, imm_uri, i, expected) 778 d.addCallback(_fix_sh0) 779 d.addCallback(fireEventually) 780 corrupt_values = [(3, 2, "no-sh0"), 781 (15, 2, "need-4th"), # share looks v2 782 ] 783 for i,newvalue,expected in corrupt_values: 784 d.addCallback(_corrupt_set, imm_uri, i, newvalue) 785 d.addCallback(_download, imm_uri, i, expected) 786 d.addCallback(_fix_sh0) 787 d.addCallback(fireEventually) 788 return d 789 d.addCallback(_uploaded) 790 def _show_results(ign): 791 print 792 print ("of [0:%d], corruption ignored in %s" % 793 (len(self.sh0_orig), undetected.dump())) 794 if self.catalog_detection: 795 d.addCallback(_show_results) 796 # of [0:2070], corruption ignored in len=1133: 797 # [4-11],[16-23],[28-31],[152-439],[600-663],[1309-2069] 798 # [4-11]: container sizes 799 # [16-23]: share block/data sizes 800 # [152-375]: plaintext hash tree 801 # [376-408]: crypttext_hash_tree[0] (root) 802 # [408-439]: crypttext_hash_tree[1] (computed) 803 # [600-631]: block hash tree[0] (root) 804 # [632-663]: block hash tree[1] (computed) 805 # [1309-]: reserved+unused UEB space 806 return d 807 808 809 class DownloadV2(_Base, unittest.TestCase): 810 # tests which exercise v2-share code. They first upload a file with 811 # FORCE_V2 set. 812 813 def setUp(self): 814 d = defer.maybeDeferred(_Base.setUp, self) 815 def _set_force_v2(ign): 816 self.old_force_v2 = layout.FORCE_V2 817 layout.FORCE_V2 = True 818 d.addCallback(_set_force_v2) 819 return d 820 def tearDown(self): 821 layout.FORCE_V2 = self.old_force_v2 822 return _Base.tearDown(self) 823 824 def test_download(self): 825 self.basedir = self.mktemp() 826 self.set_up_grid() 827 self.c0 = self.g.clients[0] 828 829 # upload a file 830 u = upload.Data(plaintext, None) 831 d = self.c0.upload(u) 832 def _uploaded(ur): 833 imm_uri = ur.uri 834 n = self.c0.create_node_from_uri(imm_uri) 835 return download_to_data(n) 836 d.addCallback(_uploaded) 837 return d 838 839 def test_download_no_overrun(self): 840 self.basedir = self.mktemp() 841 self.set_up_grid() 842 self.c0 = self.g.clients[0] 843 844 # tweak the client's copies of server-version data, so it believes 845 # that they're old and can't handle reads that overrun the length of 846 # the share. This exercises a different code path. 847 for (peerid, rref) in self.c0.storage_broker.get_all_servers(): 848 v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] 849 v1["tolerates-immutable-read-overrun"] = False 850 851 # upload a file 852 u = upload.Data(plaintext, None) 853 d = self.c0.upload(u) 854 def _uploaded(ur): 855 imm_uri = ur.uri 856 n = self.c0.create_node_from_uri(imm_uri) 857 return download_to_data(n) 858 d.addCallback(_uploaded) 859 return d 860 861 def OFF_test_no_overrun_corrupt_shver(self): # unnecessary 862 self.basedir = self.mktemp() 863 self.set_up_grid() 864 self.c0 = self.g.clients[0] 865 866 for (peerid, rref) in self.c0.storage_broker.get_all_servers(): 867 v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] 868 v1["tolerates-immutable-read-overrun"] = False 869 870 # upload a file 871 u = upload.Data(plaintext, None) 872 d = self.c0.upload(u) 873 def _uploaded(ur): 874 imm_uri = ur.uri 875 def _do_corrupt(which, newvalue): 876 def _corruptor(s, debug=False): 877 return s[:which] + chr(newvalue) + s[which+1:] 878 self.corrupt_shares_numbered(imm_uri, [0], _corruptor) 879 _do_corrupt(12+3, 0x00) 880 n = self.c0.create_node_from_uri(imm_uri) 881 d = download_to_data(n) 882 def _got_data(data): 883 self.failUnlessEqual(data, plaintext) 884 d.addCallback(_got_data) 885 return d 886 d.addCallback(_uploaded) 887 return d -
src/allmydata/test/test_filenode.py
diff --git a/src/allmydata/test/test_filenode.py b/src/allmydata/test/test_filenode.py index 5f3feaa..61bb0e8 100644
a b 2 2 from twisted.trial import unittest 3 3 from allmydata import uri, client 4 4 from allmydata.monitor import Monitor 5 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode 5 from allmydata.immutable.literal import LiteralFileNode 6 from allmydata.immutable.filenode import ImmutableFileNode 6 7 from allmydata.mutable.filenode import MutableFileNode 7 from allmydata.util import hashutil , cachedir8 from allmydata.util import hashutil 8 9 from allmydata.util.consumer import download_to_data 9 10 10 11 class NotANode: … … class Node(unittest.TestCase): 30 31 needed_shares=3, 31 32 total_shares=10, 32 33 size=1000) 33 cf = cachedir.CacheFile("none") 34 fn1 = ImmutableFileNode(u, None, None, None, None, cf) 35 fn2 = ImmutableFileNode(u, None, None, None, None, cf) 34 fn1 = ImmutableFileNode(u, None, None, None, None) 35 fn2 = ImmutableFileNode(u, None, None, None, None) 36 36 self.failUnlessEqual(fn1, fn2) 37 37 self.failIfEqual(fn1, "I am not a filenode") 38 38 self.failIfEqual(fn1, NotANode()) -
src/allmydata/test/test_hung_server.py
diff --git a/src/allmydata/test/test_hung_server.py b/src/allmydata/test/test_hung_server.py index 4aef484..b87658d 100644
a b class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 19 19 # Many of these tests take around 60 seconds on François's ARM buildslave: 20 20 # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel 21 21 timeout = 120 22 skip="not ready" 22 23 23 24 def _break(self, servers): 24 25 for (id, ss) in servers: … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 109 110 stage_4_d = None # currently we aren't doing any tests which require this for mutable files 110 111 else: 111 112 d = download_to_data(n) 112 stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME 113 #stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME 114 stage_4_d = None 113 115 return (d, stage_4_d,) 114 116 115 117 def _wait_for_data(self, n): … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 137 139 self._download_and_check) 138 140 else: 139 141 return self.shouldFail(NotEnoughSharesError, self.basedir, 140 " Failed to get enough shareholders",142 "ran out of shares", 141 143 self._download_and_check) 142 144 143 145 … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 230 232 return d 231 233 232 234 def test_failover_during_stage_4(self): 235 raise unittest.SkipTest("needs rewrite") 233 236 # See #287 234 237 d = defer.succeed(None) 235 238 for mutable in [False]: -
src/allmydata/test/test_immutable.py
diff --git a/src/allmydata/test/test_immutable.py b/src/allmydata/test/test_immutable.py index a430db2..a61c058 100644
a b from twisted.internet import defer 5 5 from twisted.trial import unittest 6 6 import random 7 7 8 class Test(common.ShareManglingMixin, unittest.TestCase):8 class Test(common.ShareManglingMixin, common.ShouldFailMixin, unittest.TestCase): 9 9 def test_test_code(self): 10 10 # The following process of stashing the shares, running 11 11 # replace_shares, and asserting that the new set of shares equals the … … class Test(common.ShareManglingMixin, unittest.TestCase): 18 18 return res 19 19 d.addCallback(_stash_it) 20 20 21 # The following process of deleting 8 of the shares and asserting that you can't 22 # download it is more to test this test code than to test the Tahoe code... 21 # The following process of deleting 8 of the shares and asserting 22 # that you can't download it is more to test this test code than to 23 # test the Tahoe code... 23 24 def _then_delete_8(unused=None): 24 25 self.replace_shares(stash[0], storage_index=self.uri.get_storage_index()) 25 26 for i in range(8): … … class Test(common.ShareManglingMixin, unittest.TestCase): 42 43 return d 43 44 44 45 def test_download(self): 45 """ Basic download. (This functionality is more or less already tested by test code in46 other modules, but this module is also going to test some more specific things about47 immutable download.)46 """ Basic download. (This functionality is more or less already 47 tested by test code in other modules, but this module is also going 48 to test some more specific things about immutable download.) 48 49 """ 49 50 d = defer.succeed(None) 50 51 before_download_reads = self._count_reads() 51 52 def _after_download(unused=None): 52 53 after_download_reads = self._count_reads() 53 self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads)) 54 #print before_download_reads, after_download_reads 55 self.failIf(after_download_reads-before_download_reads > 27, 56 (after_download_reads, before_download_reads)) 54 57 d.addCallback(self._download_and_check_plaintext) 55 58 d.addCallback(_after_download) 56 59 return d 57 60 58 61 def test_download_from_only_3_remaining_shares(self): 59 """ Test download after 7 random shares (of the 10) have been removed. """ 62 """ Test download after 7 random shares (of the 10) have been 63 removed.""" 60 64 d = defer.succeed(None) 61 65 def _then_delete_7(unused=None): 62 66 for i in range(7): … … class Test(common.ShareManglingMixin, unittest.TestCase): 65 69 d.addCallback(_then_delete_7) 66 70 def _after_download(unused=None): 67 71 after_download_reads = self._count_reads() 72 #print before_download_reads, after_download_reads 68 73 self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads)) 69 74 d.addCallback(self._download_and_check_plaintext) 70 75 d.addCallback(_after_download) 71 76 return d 72 77 73 78 def test_download_from_only_3_shares_with_good_crypttext_hash(self): 74 """ Test download after 7 random shares (of the 10) have had their crypttext hash tree corrupted. """ 79 """ Test download after 7 random shares (of the 10) have had their 80 crypttext hash tree corrupted.""" 75 81 d = defer.succeed(None) 76 82 def _then_corrupt_7(unused=None): 77 83 shnums = range(10) … … class Test(common.ShareManglingMixin, unittest.TestCase): 84 90 return d 85 91 86 92 def test_download_abort_if_too_many_missing_shares(self): 87 """ Test that download gives up quickly when it realizes there aren't enough shares out 88 there.""" 89 d = defer.succeed(None) 90 def _then_delete_8(unused=None): 91 for i in range(8): 92 self._delete_a_share() 93 d.addCallback(_then_delete_8) 94 95 before_download_reads = self._count_reads() 96 def _attempt_to_download(unused=None): 97 d2 = download_to_data(self.n) 98 99 def _callb(res): 100 self.fail("Should have gotten an error from attempt to download, not %r" % (res,)) 101 def _errb(f): 102 self.failUnless(f.check(NotEnoughSharesError)) 103 d2.addCallbacks(_callb, _errb) 104 return d2 105 106 d.addCallback(_attempt_to_download) 107 108 def _after_attempt(unused=None): 109 after_download_reads = self._count_reads() 110 # To pass this test, you are required to give up before actually trying to read any 111 # share data. 112 self.failIf(after_download_reads-before_download_reads > 0, (after_download_reads, before_download_reads)) 113 d.addCallback(_after_attempt) 93 """ Test that download gives up quickly when it realizes there aren't 94 enough shares out there.""" 95 for i in range(8): 96 self._delete_a_share() 97 d = self.shouldFail(NotEnoughSharesError, "delete 8", None, 98 download_to_data, self.n) 99 # the new downloader pipelines a bunch of read requests in parallel, 100 # so don't bother asserting anything about the number of reads 114 101 return d 115 102 116 103 def test_download_abort_if_too_many_corrupted_shares(self): 117 """ Test that download gives up quickly when it realizes there aren't enough uncorrupted 118 shares out there. It should be able to tell because the corruption occurs in the 119 sharedata version number, which it checks first.""" 104 """Test that download gives up quickly when it realizes there aren't 105 enough uncorrupted shares out there. It should be able to tell 106 because the corruption occurs in the sharedata version number, which 107 it checks first.""" 120 108 d = defer.succeed(None) 121 109 def _then_corrupt_8(unused=None): 122 110 shnums = range(10) … … class Test(common.ShareManglingMixin, unittest.TestCase): 140 128 141 129 def _after_attempt(unused=None): 142 130 after_download_reads = self._count_reads() 143 # To pass this test, you are required to give up before reading all of the share 144 # data. Actually, we could give up sooner than 45 reads, but currently our download 145 # code does 45 reads. This test then serves as a "performance regression detector" 146 # -- if you change download code so that it takes *more* reads, then this test will 147 # fail. 148 self.failIf(after_download_reads-before_download_reads > 45, (after_download_reads, before_download_reads)) 131 #print before_download_reads, after_download_reads 132 # To pass this test, you are required to give up before reading 133 # all of the share data. Actually, we could give up sooner than 134 # 45 reads, but currently our download code does 45 reads. This 135 # test then serves as a "performance regression detector" -- if 136 # you change download code so that it takes *more* reads, then 137 # this test will fail. 138 self.failIf(after_download_reads-before_download_reads > 45, 139 (after_download_reads, before_download_reads)) 149 140 d.addCallback(_after_attempt) 150 141 return d 151 142 152 143 153 # XXX extend these tests to show bad behavior of various kinds from servers: raising exception from each remove_foo() method, for example 144 # XXX extend these tests to show bad behavior of various kinds from servers: 145 # raising exception from each remove_foo() method, for example 154 146 155 147 # XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit 156 148 149 # TODO: delete this whole file -
src/allmydata/test/test_mutable.py
diff --git a/src/allmydata/test/test_mutable.py b/src/allmydata/test/test_mutable.py index fa29d34..1c3825c 100644
a b def make_nodemaker(s=None, num_peers=10): 197 197 keygen = client.KeyGenerator() 198 198 keygen.set_default_keysize(522) 199 199 nodemaker = NodeMaker(storage_broker, sh, None, 200 None, None, None,200 None, None, 201 201 {"k": 3, "n": 10}, keygen) 202 202 return nodemaker 203 203 -
src/allmydata/test/test_repairer.py
diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index 91ab704..8075a21 100644
a b from allmydata.test import common 3 3 from allmydata.monitor import Monitor 4 4 from allmydata import check_results 5 5 from allmydata.interfaces import NotEnoughSharesError 6 from allmydata.immutable import repairer,upload6 from allmydata.immutable import upload 7 7 from allmydata.util.consumer import download_to_data 8 8 from twisted.internet import defer 9 9 from twisted.trial import unittest … … WRITE_LEEWAY = 35 363 363 # Optimally, you could repair one of these (small) files in a single write. 364 364 DELTA_WRITES_PER_SHARE = 1 * WRITE_LEEWAY 365 365 366 class DownUpConnector(unittest.TestCase):367 def test_deferred_satisfaction(self):368 duc = repairer.DownUpConnector()369 duc.registerProducer(None, True) # just because you have to call registerProducer first370 # case 1: total data in buf is < requested data at time of request371 duc.write('\x01')372 d = duc.read_encrypted(2, False)373 def _then(data):374 self.failUnlessEqual(len(data), 2)375 self.failUnlessEqual(data[0], '\x01')376 self.failUnlessEqual(data[1], '\x02')377 d.addCallback(_then)378 duc.write('\x02')379 return d380 381 def test_extra(self):382 duc = repairer.DownUpConnector()383 duc.registerProducer(None, True) # just because you have to call registerProducer first384 # case 1: total data in buf is < requested data at time of request385 duc.write('\x01')386 d = duc.read_encrypted(2, False)387 def _then(data):388 self.failUnlessEqual(len(data), 2)389 self.failUnlessEqual(data[0], '\x01')390 self.failUnlessEqual(data[1], '\x02')391 d.addCallback(_then)392 duc.write('\x02\0x03')393 return d394 395 def test_short_reads_1(self):396 # You don't get fewer bytes than you requested -- instead you get no callback at all.397 duc = repairer.DownUpConnector()398 duc.registerProducer(None, True) # just because you have to call registerProducer first399 400 d = duc.read_encrypted(2, False)401 duc.write('\x04')402 403 def _callb(res):404 self.fail("Shouldn't have gotten this callback res: %s" % (res,))405 d.addCallback(_callb)406 407 # Also in the other order of read-vs-write:408 duc2 = repairer.DownUpConnector()409 duc2.registerProducer(None, True) # just because you have to call registerProducer first410 duc2.write('\x04')411 d = duc2.read_encrypted(2, False)412 413 def _callb2(res):414 self.fail("Shouldn't have gotten this callback res: %s" % (res,))415 d.addCallback(_callb2)416 417 # But once the DUC is closed then you *do* get short reads.418 duc3 = repairer.DownUpConnector()419 duc3.registerProducer(None, True) # just because you have to call registerProducer first420 421 d = duc3.read_encrypted(2, False)422 duc3.write('\x04')423 duc3.close()424 def _callb3(res):425 self.failUnlessEqual(len(res), 1)426 self.failUnlessEqual(res[0], '\x04')427 d.addCallback(_callb3)428 return d429 430 def test_short_reads_2(self):431 # Also in the other order of read-vs-write.432 duc = repairer.DownUpConnector()433 duc.registerProducer(None, True) # just because you have to call registerProducer first434 435 duc.write('\x04')436 d = duc.read_encrypted(2, False)437 duc.close()438 439 def _callb(res):440 self.failUnlessEqual(len(res), 1)441 self.failUnlessEqual(res[0], '\x04')442 d.addCallback(_callb)443 return d444 445 def test_short_reads_3(self):446 # Also if it is closed before the read.447 duc = repairer.DownUpConnector()448 duc.registerProducer(None, True) # just because you have to call registerProducer first449 450 duc.write('\x04')451 duc.close()452 d = duc.read_encrypted(2, False)453 def _callb(res):454 self.failUnlessEqual(len(res), 1)455 self.failUnlessEqual(res[0], '\x04')456 d.addCallback(_callb)457 return d458 459 366 class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, 460 367 common.ShouldFailMixin): 461 368 -
src/allmydata/test/test_system.py
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index c1b1f7f..a9956e3 100644
a b from allmydata import uri 9 9 from allmydata.storage.mutable import MutableShareFile 10 10 from allmydata.storage.server import si_a2b 11 11 from allmydata.immutable import offloaded, upload 12 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode 12 from allmydata.immutable.literal import LiteralFileNode 13 from allmydata.immutable.filenode import ImmutableFileNode 13 14 from allmydata.util import idlib, mathutil 14 15 from allmydata.util import log, base32 15 16 from allmydata.util.consumer import MemoryConsumer, download_to_data -
src/allmydata/test/test_util.py
diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 0a326b3..2fceee5 100644
a b from twisted.trial import unittest 7 7 from twisted.internet import defer, reactor 8 8 from twisted.python.failure import Failure 9 9 from twisted.python import log 10 from hashlib import md5 10 11 11 12 from allmydata.util import base32, idlib, humanreadable, mathutil, hashutil 12 13 from allmydata.util import assertutil, fileutil, deferredutil, abbreviate 13 14 from allmydata.util import limiter, time_format, pollmixin, cachedir 14 15 from allmydata.util import statistics, dictutil, pipeline 15 16 from allmydata.util import log as tahoe_log 17 from allmydata.util.spans import Spans, overlap, DataSpans 16 18 17 19 class Base32(unittest.TestCase): 18 20 def test_b2a_matches_Pythons(self): … … class Log(unittest.TestCase): 1537 1539 tahoe_log.err(format="intentional sample error", 1538 1540 failure=f, level=tahoe_log.OPERATIONAL, umid="wO9UoQ") 1539 1541 self.flushLoggedErrors(SampleError) 1542 1543 1544 class SimpleSpans: 1545 # this is a simple+inefficient form of util.spans.Spans . We compare the 1546 # behavior of this reference model against the real (efficient) form. 1547 1548 def __init__(self, _span_or_start=None, length=None): 1549 self._have = set() 1550 if length is not None: 1551 for i in range(_span_or_start, _span_or_start+length): 1552 self._have.add(i) 1553 elif _span_or_start: 1554 for (start,length) in _span_or_start: 1555 self.add(start, length) 1556 1557 def add(self, start, length): 1558 for i in range(start, start+length): 1559 self._have.add(i) 1560 return self 1561 1562 def remove(self, start, length): 1563 for i in range(start, start+length): 1564 self._have.discard(i) 1565 return self 1566 1567 def each(self): 1568 return sorted(self._have) 1569 1570 def __iter__(self): 1571 items = sorted(self._have) 1572 prevstart = None 1573 prevend = None 1574 for i in items: 1575 if prevstart is None: 1576 prevstart = prevend = i 1577 continue 1578 if i == prevend+1: 1579 prevend = i 1580 continue 1581 yield (prevstart, prevend-prevstart+1) 1582 prevstart = prevend = i 1583 if prevstart is not None: 1584 yield (prevstart, prevend-prevstart+1) 1585 1586 def __len__(self): 1587 # this also gets us bool(s) 1588 return len(self._have) 1589 1590 def __add__(self, other): 1591 s = self.__class__(self) 1592 for (start, length) in other: 1593 s.add(start, length) 1594 return s 1595 1596 def __sub__(self, other): 1597 s = self.__class__(self) 1598 for (start, length) in other: 1599 s.remove(start, length) 1600 return s 1601 1602 def __iadd__(self, other): 1603 for (start, length) in other: 1604 self.add(start, length) 1605 return self 1606 1607 def __isub__(self, other): 1608 for (start, length) in other: 1609 self.remove(start, length) 1610 return self 1611 1612 def __and__(self, other): 1613 s = self.__class__() 1614 for i in other.each(): 1615 if i in self._have: 1616 s.add(i, 1) 1617 return s 1618 1619 def __contains__(self, (start,length)): 1620 for i in range(start, start+length): 1621 if i not in self._have: 1622 return False 1623 return True 1624 1625 class ByteSpans(unittest.TestCase): 1626 def test_basic(self): 1627 s = Spans() 1628 self.failUnlessEqual(list(s), []) 1629 self.failIf(s) 1630 self.failIf((0,1) in s) 1631 self.failUnlessEqual(len(s), 0) 1632 1633 s1 = Spans(3, 4) # 3,4,5,6 1634 self._check1(s1) 1635 1636 s2 = Spans(s1) 1637 self._check1(s2) 1638 1639 s2.add(10,2) # 10,11 1640 self._check1(s1) 1641 self.failUnless((10,1) in s2) 1642 self.failIf((10,1) in s1) 1643 self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11]) 1644 self.failUnlessEqual(len(s2), 6) 1645 1646 s2.add(15,2).add(20,2) 1647 self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21]) 1648 self.failUnlessEqual(len(s2), 10) 1649 1650 s2.remove(4,3).remove(15,1) 1651 self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21]) 1652 self.failUnlessEqual(len(s2), 6) 1653 1654 s1 = SimpleSpans(3, 4) # 3 4 5 6 1655 s2 = SimpleSpans(5, 4) # 5 6 7 8 1656 i = s1 & s2 1657 self.failUnlessEqual(list(i.each()), [5, 6]) 1658 1659 def _check1(self, s): 1660 self.failUnlessEqual(list(s), [(3,4)]) 1661 self.failUnless(s) 1662 self.failUnlessEqual(len(s), 4) 1663 self.failIf((0,1) in s) 1664 self.failUnless((3,4) in s) 1665 self.failUnless((3,1) in s) 1666 self.failUnless((5,2) in s) 1667 self.failUnless((6,1) in s) 1668 self.failIf((6,2) in s) 1669 self.failIf((7,1) in s) 1670 self.failUnlessEqual(list(s.each()), [3,4,5,6]) 1671 1672 def test_math(self): 1673 s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9 1674 s2 = Spans(5, 3) # 5,6,7 1675 s3 = Spans(8, 4) # 8,9,10,11 1676 1677 s = s1 - s2 1678 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9]) 1679 s = s1 - s3 1680 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7]) 1681 s = s2 - s3 1682 self.failUnlessEqual(list(s.each()), [5,6,7]) 1683 s = s1 & s2 1684 self.failUnlessEqual(list(s.each()), [5,6,7]) 1685 s = s2 & s1 1686 self.failUnlessEqual(list(s.each()), [5,6,7]) 1687 s = s1 & s3 1688 self.failUnlessEqual(list(s.each()), [8,9]) 1689 s = s3 & s1 1690 self.failUnlessEqual(list(s.each()), [8,9]) 1691 s = s2 & s3 1692 self.failUnlessEqual(list(s.each()), []) 1693 s = s3 & s2 1694 self.failUnlessEqual(list(s.each()), []) 1695 s = Spans() & s3 1696 self.failUnlessEqual(list(s.each()), []) 1697 s = s3 & Spans() 1698 self.failUnlessEqual(list(s.each()), []) 1699 1700 s = s1 + s2 1701 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9]) 1702 s = s1 + s3 1703 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11]) 1704 s = s2 + s3 1705 self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11]) 1706 1707 s = Spans(s1) 1708 s -= s2 1709 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9]) 1710 s = Spans(s1) 1711 s -= s3 1712 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7]) 1713 s = Spans(s2) 1714 s -= s3 1715 self.failUnlessEqual(list(s.each()), [5,6,7]) 1716 1717 s = Spans(s1) 1718 s += s2 1719 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9]) 1720 s = Spans(s1) 1721 s += s3 1722 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11]) 1723 s = Spans(s2) 1724 s += s3 1725 self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11]) 1726 1727 def test_random(self): 1728 # attempt to increase coverage of corner cases by comparing behavior 1729 # of a simple-but-slow model implementation against the 1730 # complex-but-fast actual implementation, in a large number of random 1731 # operations 1732 S1 = SimpleSpans 1733 S2 = Spans 1734 s1 = S1(); s2 = S2() 1735 seed = "" 1736 def _create(subseed): 1737 ns1 = S1(); ns2 = S2() 1738 for i in range(10): 1739 what = md5(subseed+str(i)).hexdigest() 1740 start = int(what[2:4], 16) 1741 length = max(1,int(what[5:6], 16)) 1742 ns1.add(start, length); ns2.add(start, length) 1743 return ns1, ns2 1744 1745 #print 1746 for i in range(1000): 1747 what = md5(seed+str(i)).hexdigest() 1748 op = what[0] 1749 subop = what[1] 1750 start = int(what[2:4], 16) 1751 length = max(1,int(what[5:6], 16)) 1752 #print what 1753 if op in "0": 1754 if subop in "01234": 1755 s1 = S1(); s2 = S2() 1756 elif subop in "5678": 1757 s1 = S1(start, length); s2 = S2(start, length) 1758 else: 1759 s1 = S1(s1); s2 = S2(s2) 1760 #print "s2 = %s" % s2.dump() 1761 elif op in "123": 1762 #print "s2.add(%d,%d)" % (start, length) 1763 s1.add(start, length); s2.add(start, length) 1764 elif op in "456": 1765 #print "s2.remove(%d,%d)" % (start, length) 1766 s1.remove(start, length); s2.remove(start, length) 1767 elif op in "78": 1768 ns1, ns2 = _create(what[7:11]) 1769 #print "s2 + %s" % ns2.dump() 1770 s1 = s1 + ns1; s2 = s2 + ns2 1771 elif op in "9a": 1772 ns1, ns2 = _create(what[7:11]) 1773 #print "%s - %s" % (s2.dump(), ns2.dump()) 1774 s1 = s1 - ns1; s2 = s2 - ns2 1775 elif op in "bc": 1776 ns1, ns2 = _create(what[7:11]) 1777 #print "s2 += %s" % ns2.dump() 1778 s1 += ns1; s2 += ns2 1779 elif op in "de": 1780 ns1, ns2 = _create(what[7:11]) 1781 #print "%s -= %s" % (s2.dump(), ns2.dump()) 1782 s1 -= ns1; s2 -= ns2 1783 else: 1784 ns1, ns2 = _create(what[7:11]) 1785 #print "%s &= %s" % (s2.dump(), ns2.dump()) 1786 s1 = s1 & ns1; s2 = s2 & ns2 1787 #print "s2 now %s" % s2.dump() 1788 self.failUnlessEqual(list(s1.each()), list(s2.each())) 1789 self.failUnlessEqual(len(s1), len(s2)) 1790 self.failUnlessEqual(bool(s1), bool(s2)) 1791 self.failUnlessEqual(list(s1), list(s2)) 1792 for j in range(10): 1793 what = md5(what[12:14]+str(j)).hexdigest() 1794 start = int(what[2:4], 16) 1795 length = max(1, int(what[5:6], 16)) 1796 span = (start, length) 1797 self.failUnlessEqual(bool(span in s1), bool(span in s2)) 1798 1799 1800 # s() 1801 # s(start,length) 1802 # s(s0) 1803 # s.add(start,length) : returns s 1804 # s.remove(start,length) 1805 # s.each() -> list of byte offsets, mostly for testing 1806 # list(s) -> list of (start,length) tuples, one per span 1807 # (start,length) in s -> True if (start..start+length-1) are all members 1808 # NOT equivalent to x in list(s) 1809 # len(s) -> number of bytes, for testing, bool(), and accounting/limiting 1810 # bool(s) (__len__) 1811 # s = s1+s2, s1-s2, +=s1, -=s1 1812 1813 def test_overlap(self): 1814 for a in range(20): 1815 for b in range(10): 1816 for c in range(20): 1817 for d in range(10): 1818 self._test_overlap(a,b,c,d) 1819 1820 def _test_overlap(self, a, b, c, d): 1821 s1 = set(range(a,a+b)) 1822 s2 = set(range(c,c+d)) 1823 #print "---" 1824 #self._show_overlap(s1, "1") 1825 #self._show_overlap(s2, "2") 1826 o = overlap(a,b,c,d) 1827 expected = s1.intersection(s2) 1828 if not expected: 1829 self.failUnlessEqual(o, None) 1830 else: 1831 start,length = o 1832 so = set(range(start,start+length)) 1833 #self._show(so, "o") 1834 self.failUnlessEqual(so, expected) 1835 1836 def _show_overlap(self, s, c): 1837 import sys 1838 out = sys.stdout 1839 if s: 1840 for i in range(max(s)): 1841 if i in s: 1842 out.write(c) 1843 else: 1844 out.write(" ") 1845 out.write("\n") 1846 1847 def extend(s, start, length, fill): 1848 if len(s) >= start+length: 1849 return s 1850 assert len(fill) == 1 1851 return s + fill*(start+length-len(s)) 1852 1853 def replace(s, start, data): 1854 assert len(s) >= start+len(data) 1855 return s[:start] + data + s[start+len(data):] 1856 1857 class SimpleDataSpans: 1858 def __init__(self, other=None): 1859 self.missing = "" # "1" where missing, "0" where found 1860 self.data = "" 1861 if other: 1862 for (start, data) in other.get_chunks(): 1863 self.add(start, data) 1864 1865 def __len__(self): 1866 return len(self.missing.translate(None, "1")) 1867 def _dump(self): 1868 return [i for (i,c) in enumerate(self.missing) if c == "0"] 1869 def _have(self, start, length): 1870 m = self.missing[start:start+length] 1871 if not m or len(m)<length or int(m): 1872 return False 1873 return True 1874 def get_chunks(self): 1875 for i in self._dump(): 1876 yield (i, self.data[i]) 1877 def get_spans(self): 1878 return SimpleSpans([(start,len(data)) 1879 for (start,data) in self.get_chunks()]) 1880 def get(self, start, length): 1881 if self._have(start, length): 1882 return self.data[start:start+length] 1883 return None 1884 def pop(self, start, length): 1885 data = self.get(start, length) 1886 if data: 1887 self.remove(start, length) 1888 return data 1889 def remove(self, start, length): 1890 self.missing = replace(extend(self.missing, start, length, "1"), 1891 start, "1"*length) 1892 def add(self, start, data): 1893 self.missing = replace(extend(self.missing, start, len(data), "1"), 1894 start, "0"*len(data)) 1895 self.data = replace(extend(self.data, start, len(data), " "), 1896 start, data) 1897 1898 1899 class StringSpans(unittest.TestCase): 1900 def do_basic(self, klass): 1901 ds = klass() 1902 self.failUnlessEqual(len(ds), 0) 1903 self.failUnlessEqual(list(ds._dump()), []) 1904 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0) 1905 s = ds.get_spans() 1906 self.failUnlessEqual(ds.get(0, 4), None) 1907 self.failUnlessEqual(ds.pop(0, 4), None) 1908 ds.remove(0, 4) 1909 1910 ds.add(2, "four") 1911 self.failUnlessEqual(len(ds), 4) 1912 self.failUnlessEqual(list(ds._dump()), [2,3,4,5]) 1913 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4) 1914 s = ds.get_spans() 1915 self.failUnless((2,2) in s) 1916 self.failUnlessEqual(ds.get(0, 4), None) 1917 self.failUnlessEqual(ds.pop(0, 4), None) 1918 self.failUnlessEqual(ds.get(4, 4), None) 1919 1920 ds2 = klass(ds) 1921 self.failUnlessEqual(len(ds2), 4) 1922 self.failUnlessEqual(list(ds2._dump()), [2,3,4,5]) 1923 self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4) 1924 self.failUnlessEqual(ds2.get(0, 4), None) 1925 self.failUnlessEqual(ds2.pop(0, 4), None) 1926 self.failUnlessEqual(ds2.pop(2, 3), "fou") 1927 self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1) 1928 self.failUnlessEqual(ds2.get(2, 3), None) 1929 self.failUnlessEqual(ds2.get(5, 1), "r") 1930 self.failUnlessEqual(ds.get(2, 3), "fou") 1931 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4) 1932 1933 ds.add(0, "23") 1934 self.failUnlessEqual(len(ds), 6) 1935 self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5]) 1936 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6) 1937 self.failUnlessEqual(ds.get(0, 4), "23fo") 1938 self.failUnlessEqual(ds.pop(0, 4), "23fo") 1939 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2) 1940 self.failUnlessEqual(ds.get(0, 4), None) 1941 self.failUnlessEqual(ds.pop(0, 4), None) 1942 1943 ds = klass() 1944 ds.add(2, "four") 1945 ds.add(3, "ea") 1946 self.failUnlessEqual(ds.get(2, 4), "fear") 1947 1948 def do_scan(self, klass): 1949 # do a test with gaps and spans of size 1 and 2 1950 # left=(1,11) * right=(1,11) * gapsize=(1,2) 1951 # 111, 112, 121, 122, 211, 212, 221, 222 1952 # 211 1953 # 121 1954 # 112 1955 # 212 1956 # 222 1957 # 221 1958 # 111 1959 # 122 1960 # 11 1 1 11 11 11 1 1 111 1961 # 0123456789012345678901234567 1962 # abcdefghijklmnopqrstuvwxyz-= 1963 pieces = [(1, "bc"), 1964 (4, "e"), 1965 (7, "h"), 1966 (9, "jk"), 1967 (12, "mn"), 1968 (16, "qr"), 1969 (20, "u"), 1970 (22, "w"), 1971 (25, "z-="), 1972 ] 1973 p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27]) 1974 S = "abcdefghijklmnopqrstuvwxyz-=" 1975 # TODO: when adding data, add capital letters, to make sure we aren't 1976 # just leaving the old data in place 1977 l = len(S) 1978 def base(): 1979 ds = klass() 1980 for start, data in pieces: 1981 ds.add(start, data) 1982 return ds 1983 def dump(s): 1984 p = set(s._dump()) 1985 # wow, this is the first time I've ever wanted ?: in python 1986 # note: this requires python2.5 1987 d = "".join([(S[i] if i in p else " ") for i in range(l)]) 1988 assert len(d) == l 1989 return d 1990 DEBUG = False 1991 for start in range(0, l): 1992 for end in range(start+1, l): 1993 # add [start-end) to the baseline 1994 which = "%d-%d" % (start, end-1) 1995 p_added = set(range(start, end)) 1996 b = base() 1997 if DEBUG: 1998 print 1999 print dump(b), which 2000 add = klass(); add.add(start, S[start:end]) 2001 print dump(add) 2002 b.add(start, S[start:end]) 2003 if DEBUG: 2004 print dump(b) 2005 # check that the new span is there 2006 d = b.get(start, end-start) 2007 self.failUnlessEqual(d, S[start:end], which) 2008 # check that all the original pieces are still there 2009 for t_start, t_data in pieces: 2010 t_len = len(t_data) 2011 self.failUnlessEqual(b.get(t_start, t_len), 2012 S[t_start:t_start+t_len], 2013 "%s %d+%d" % (which, t_start, t_len)) 2014 # check that a lot of subspans are mostly correct 2015 for t_start in range(l): 2016 for t_len in range(1,4): 2017 d = b.get(t_start, t_len) 2018 if d is not None: 2019 which2 = "%s+(%d-%d)" % (which, t_start, 2020 t_start+t_len-1) 2021 self.failUnlessEqual(d, S[t_start:t_start+t_len], 2022 which2) 2023 # check that removing a subspan gives the right value 2024 b2 = klass(b) 2025 b2.remove(t_start, t_len) 2026 removed = set(range(t_start, t_start+t_len)) 2027 for i in range(l): 2028 exp = (((i in p_elements) or (i in p_added)) 2029 and (i not in removed)) 2030 which2 = "%s-(%d-%d)" % (which, t_start, 2031 t_start+t_len-1) 2032 self.failUnlessEqual(bool(b2.get(i, 1)), exp, 2033 which2+" %d" % i) 2034 2035 def test_test(self): 2036 self.do_basic(SimpleDataSpans) 2037 self.do_scan(SimpleDataSpans) 2038 2039 def test_basic(self): 2040 self.do_basic(DataSpans) 2041 self.do_scan(DataSpans) 2042 2043 def test_random(self): 2044 # attempt to increase coverage of corner cases by comparing behavior 2045 # of a simple-but-slow model implementation against the 2046 # complex-but-fast actual implementation, in a large number of random 2047 # operations 2048 S1 = SimpleDataSpans 2049 S2 = DataSpans 2050 s1 = S1(); s2 = S2() 2051 seed = "" 2052 def _randstr(length, seed): 2053 created = 0 2054 pieces = [] 2055 while created < length: 2056 piece = md5(seed + str(created)).hexdigest() 2057 pieces.append(piece) 2058 created += len(piece) 2059 return "".join(pieces)[:length] 2060 def _create(subseed): 2061 ns1 = S1(); ns2 = S2() 2062 for i in range(10): 2063 what = md5(subseed+str(i)).hexdigest() 2064 start = int(what[2:4], 16) 2065 length = max(1,int(what[5:6], 16)) 2066 ns1.add(start, _randstr(length, what[7:9])); 2067 ns2.add(start, _randstr(length, what[7:9])) 2068 return ns1, ns2 2069 2070 #print 2071 for i in range(1000): 2072 what = md5(seed+str(i)).hexdigest() 2073 op = what[0] 2074 subop = what[1] 2075 start = int(what[2:4], 16) 2076 length = max(1,int(what[5:6], 16)) 2077 #print what 2078 if op in "0": 2079 if subop in "0123456": 2080 s1 = S1(); s2 = S2() 2081 else: 2082 s1, s2 = _create(what[7:11]) 2083 #print "s2 = %s" % list(s2._dump()) 2084 elif op in "123456": 2085 #print "s2.add(%d,%d)" % (start, length) 2086 s1.add(start, _randstr(length, what[7:9])); 2087 s2.add(start, _randstr(length, what[7:9])) 2088 elif op in "789abc": 2089 #print "s2.remove(%d,%d)" % (start, length) 2090 s1.remove(start, length); s2.remove(start, length) 2091 else: 2092 #print "s2.pop(%d,%d)" % (start, length) 2093 d1 = s1.pop(start, length); d2 = s2.pop(start, length) 2094 self.failUnlessEqual(d1, d2) 2095 #print "s1 now %s" % list(s1._dump()) 2096 #print "s2 now %s" % list(s2._dump()) 2097 self.failUnlessEqual(len(s1), len(s2)) 2098 self.failUnlessEqual(list(s1._dump()), list(s2._dump())) 2099 for j in range(100): 2100 what = md5(what[12:14]+str(j)).hexdigest() 2101 start = int(what[2:4], 16) 2102 length = max(1, int(what[5:6], 16)) 2103 d1 = s1.get(start, length); d2 = s2.get(start, length) 2104 self.failUnlessEqual(d1, d2, "%d+%d" % (start, length)) -
src/allmydata/test/test_web.py
diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 0df0bbe..e0c8f12 100644
a b class FakeClient(Client): 105 105 self.uploader = FakeUploader() 106 106 self.uploader.setServiceParent(self) 107 107 self.nodemaker = FakeNodeMaker(None, self._secret_holder, None, 108 self.uploader, None, None,108 self.uploader, None, 109 109 None, None) 110 110 111 111 def startService(self): … … class Grid(GridTestMixin, WebErrorMixin, unittest.TestCase, ShouldFailMixin): 4124 4124 "no servers were connected, but it might also indicate " 4125 4125 "severe corruption. You should perform a filecheck on " 4126 4126 "this object to learn more. The full error message is: " 4127 " Failed to get enough shareholders: have 0, need 3")4127 "no shares (need 3). Last failure: None") 4128 4128 self.failUnlessEqual(exp, body) 4129 4129 d.addCallback(_check_zero_shares) 4130 4130 … … class Grid(GridTestMixin, WebErrorMixin, unittest.TestCase, ShouldFailMixin): 4136 4136 def _check_one_share(body): 4137 4137 self.failIf("<html>" in body, body) 4138 4138 body = " ".join(body.strip().split()) 4139 exp= ("NotEnoughSharesError: This indicates that some "4139 msg = ("NotEnoughSharesError: This indicates that some " 4140 4140 "servers were unavailable, or that shares have been " 4141 4141 "lost to server departure, hard drive failure, or disk " 4142 4142 "corruption. You should perform a filecheck on " 4143 4143 "this object to learn more. The full error message is:" 4144 " Failed to get enough shareholders: have 1, need 3") 4145 self.failUnlessEqual(exp, body) 4144 " ran out of shares: %d complete, %d pending, 0 overdue," 4145 " 0 unused, need 3. Last failure: None") 4146 msg1 = msg % (1, 0) 4147 msg2 = msg % (0, 1) 4148 self.failUnless(body == msg1 or body == msg2, body) 4146 4149 d.addCallback(_check_one_share) 4147 4150 4148 4151 d.addCallback(lambda ignored: -
src/allmydata/util/dictutil.py
diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index 3dc815b..91785ac 100644
a b class DictOfSets(dict): 57 57 if not self[key]: 58 58 del self[key] 59 59 60 def allvalues(self): 61 # return a set that merges all value sets 62 r = set() 63 for key in self: 64 r.update(self[key]) 65 return r 66 60 67 class UtilDict: 61 68 def __init__(self, initialdata={}): 62 69 self.d = {} -
new file src/allmydata/util/spans.py
diff --git a/src/allmydata/util/spans.py b/src/allmydata/util/spans.py new file mode 100755 index 0000000..2a199f0
- + 1 2 class Spans: 3 """I represent a compressed list of booleans, one per index (an integer). 4 Typically, each index represents an offset into a large string, pointing 5 to a specific byte of a share. In this context, True means that byte has 6 been received, or has been requested. 7 8 Another way to look at this is maintaining a set of integers, optimized 9 for operations on spans like 'add range to set' and 'is range in set?'. 10 11 This is a python equivalent of perl's Set::IntSpan module, frequently 12 used to represent .newsrc contents. 13 14 Rather than storing an actual (large) list or dictionary, I represent my 15 internal state as a sorted list of spans, each with a start and a length. 16 My API is presented in terms of start+length pairs. I provide set 17 arithmetic operators, to efficiently answer questions like 'I want bytes 18 XYZ, I already requested bytes ABC, and I've already received bytes DEF: 19 what bytes should I request now?'. 20 21 The new downloader will use it to keep track of which bytes we've requested 22 or received already. 23 """ 24 25 def __init__(self, _span_or_start=None, length=None): 26 self._spans = list() 27 if length is not None: 28 self._spans.append( (_span_or_start, length) ) 29 elif _span_or_start: 30 for (start,length) in _span_or_start: 31 self.add(start, length) 32 self._check() 33 34 def _check(self): 35 assert sorted(self._spans) == self._spans 36 prev_end = None 37 try: 38 for (start,length) in self._spans: 39 if prev_end is not None: 40 assert start > prev_end 41 prev_end = start+length 42 except AssertionError: 43 print "BAD:", self.dump() 44 raise 45 46 def add(self, start, length): 47 assert start >= 0 48 assert length > 0 49 #print " ADD [%d+%d -%d) to %s" % (start, length, start+length, self.dump()) 50 first_overlap = last_overlap = None 51 for i,(s_start,s_length) in enumerate(self._spans): 52 #print " (%d+%d)-> overlap=%s adjacent=%s" % (s_start,s_length, overlap(s_start, s_length, start, length), adjacent(s_start, s_length, start, length)) 53 if (overlap(s_start, s_length, start, length) 54 or adjacent(s_start, s_length, start, length)): 55 last_overlap = i 56 if first_overlap is None: 57 first_overlap = i 58 continue 59 # no overlap 60 if first_overlap is not None: 61 break 62 #print " first_overlap", first_overlap, last_overlap 63 if first_overlap is None: 64 # no overlap, so just insert the span and sort by starting 65 # position. 66 self._spans.insert(0, (start,length)) 67 self._spans.sort() 68 else: 69 # everything from [first_overlap] to [last_overlap] overlapped 70 first_start,first_length = self._spans[first_overlap] 71 last_start,last_length = self._spans[last_overlap] 72 newspan_start = min(start, first_start) 73 newspan_end = max(start+length, last_start+last_length) 74 newspan_length = newspan_end - newspan_start 75 newspan = (newspan_start, newspan_length) 76 self._spans[first_overlap:last_overlap+1] = [newspan] 77 #print " ADD done: %s" % self.dump() 78 self._check() 79 80 return self 81 82 def remove(self, start, length): 83 assert start >= 0 84 assert length > 0 85 #print " REMOVE [%d+%d -%d) from %s" % (start, length, start+length, self.dump()) 86 first_complete_overlap = last_complete_overlap = None 87 for i,(s_start,s_length) in enumerate(self._spans): 88 s_end = s_start + s_length 89 o = overlap(s_start, s_length, start, length) 90 if o: 91 o_start, o_length = o 92 o_end = o_start+o_length 93 if o_start == s_start and o_end == s_end: 94 # delete this span altogether 95 if first_complete_overlap is None: 96 first_complete_overlap = i 97 last_complete_overlap = i 98 elif o_start == s_start: 99 # we only overlap the left side, so trim the start 100 # 1111 101 # rrrr 102 # oo 103 # -> 11 104 new_start = o_end 105 new_end = s_end 106 assert new_start > s_start 107 new_length = new_end - new_start 108 self._spans[i] = (new_start, new_length) 109 elif o_end == s_end: 110 # we only overlap the right side 111 # 1111 112 # rrrr 113 # oo 114 # -> 11 115 new_start = s_start 116 new_end = o_start 117 assert new_end < s_end 118 new_length = new_end - new_start 119 self._spans[i] = (new_start, new_length) 120 else: 121 # we overlap the middle, so create a new span. No need to 122 # examine any other spans. 123 # 111111 124 # rr 125 # LL RR 126 left_start = s_start 127 left_end = o_start 128 left_length = left_end - left_start 129 right_start = o_end 130 right_end = s_end 131 right_length = right_end - right_start 132 self._spans[i] = (left_start, left_length) 133 self._spans.append( (right_start, right_length) ) 134 self._spans.sort() 135 break 136 if first_complete_overlap is not None: 137 del self._spans[first_complete_overlap:last_complete_overlap+1] 138 #print " REMOVE done: %s" % self.dump() 139 self._check() 140 return self 141 142 def dump(self): 143 return "len=%d: %s" % (len(self), 144 ",".join(["[%d-%d]" % (start,start+l-1) 145 for (start,l) in self._spans]) ) 146 147 def each(self): 148 for start, length in self._spans: 149 for i in range(start, start+length): 150 yield i 151 152 def __iter__(self): 153 for s in self._spans: 154 yield s 155 156 def __len__(self): 157 # this also gets us bool(s) 158 return sum([length for start,length in self._spans]) 159 160 def __add__(self, other): 161 s = self.__class__(self) 162 for (start, length) in other: 163 s.add(start, length) 164 return s 165 166 def __sub__(self, other): 167 s = self.__class__(self) 168 for (start, length) in other: 169 s.remove(start, length) 170 return s 171 172 def __iadd__(self, other): 173 for (start, length) in other: 174 self.add(start, length) 175 return self 176 177 def __isub__(self, other): 178 for (start, length) in other: 179 self.remove(start, length) 180 return self 181 182 def __and__(self, other): 183 if not self._spans: 184 return self.__class__() 185 bounds = self.__class__(self._spans[0][0], 186 self._spans[-1][0]+self._spans[-1][1]) 187 not_other = bounds - other 188 return self - not_other 189 190 def __contains__(self, (start,length)): 191 for span_start,span_length in self._spans: 192 o = overlap(start, length, span_start, span_length) 193 if o: 194 o_start,o_length = o 195 if o_start == start and o_length == length: 196 return True 197 return False 198 199 def overlap(start0, length0, start1, length1): 200 # return start2,length2 of the overlapping region, or None 201 # 00 00 000 0000 00 00 000 00 00 00 00 202 # 11 11 11 11 111 11 11 1111 111 11 11 203 left = max(start0, start1) 204 right = min(start0+length0, start1+length1) 205 # if there is overlap, 'left' will be its start, and right-1 will 206 # be the end' 207 if left < right: 208 return (left, right-left) 209 return None 210 211 def adjacent(start0, length0, start1, length1): 212 if (start0 < start1) and start0+length0 == start1: 213 return True 214 elif (start1 < start0) and start1+length1 == start0: 215 return True 216 return False 217 218 class DataSpans: 219 """I represent portions of a large string. Equivalently, I can be said to 220 maintain a large array of characters (with gaps of empty elements). I can 221 be used to manage access to a remote share, where some pieces have been 222 retrieved, some have been requested, and others have not been read. 223 """ 224 225 def __init__(self, other=None): 226 self.spans = [] # (start, data) tuples, non-overlapping, merged 227 if other: 228 for (start, data) in other.get_chunks(): 229 self.add(start, data) 230 231 def __len__(self): 232 # return number of bytes we're holding 233 return sum([len(data) for (start,data) in self.spans]) 234 235 def _dump(self): 236 # return iterator of sorted list of offsets, one per byte 237 for (start,data) in self.spans: 238 for i in range(start, start+len(data)): 239 yield i 240 241 def dump(self): 242 return "len=%d: %s" % (len(self), 243 ",".join(["[%d-%d]" % (start,start+len(data)-1) 244 for (start,data) in self.spans]) ) 245 246 def get_chunks(self): 247 return list(self.spans) 248 249 def get_spans(self): 250 """Return a Spans object with a bit set for each byte I hold""" 251 return Spans([(start, len(data)) for (start,data) in self.spans]) 252 253 def assert_invariants(self): 254 if not self.spans: 255 return 256 prev_start = self.spans[0][0] 257 prev_end = prev_start + len(self.spans[0][1]) 258 for start, data in self.spans[1:]: 259 if not start > prev_end: 260 # adjacent or overlapping: bad 261 print "ASSERTION FAILED", self.spans 262 raise AssertionError 263 264 def get(self, start, length): 265 # returns a string of LENGTH, or None 266 #print "get", start, length, self.spans 267 end = start+length 268 for (s_start,s_data) in self.spans: 269 s_end = s_start+len(s_data) 270 #print " ",s_start,s_end 271 if s_start <= start < s_end: 272 # we want some data from this span. Because we maintain 273 # strictly merged and non-overlapping spans, everything we 274 # want must be in this span. 275 offset = start - s_start 276 if offset + length > len(s_data): 277 #print " None, span falls short" 278 return None # span falls short 279 #print " some", s_data[offset:offset+length] 280 return s_data[offset:offset+length] 281 if s_start >= end: 282 # we've gone too far: no further spans will overlap 283 #print " None, gone too far" 284 return None 285 #print " None, ran out of spans" 286 return None 287 288 def add(self, start, data): 289 # first: walk through existing spans, find overlap, modify-in-place 290 # create list of new spans 291 # add new spans 292 # sort 293 # merge adjacent spans 294 #print "add", start, data, self.spans 295 end = start + len(data) 296 i = 0 297 while len(data): 298 #print " loop", start, data, i, len(self.spans), self.spans 299 if i >= len(self.spans): 300 #print " append and done" 301 # append a last span 302 self.spans.append( (start, data) ) 303 break 304 (s_start,s_data) = self.spans[i] 305 # five basic cases: 306 # a: OLD b:OLDD c1:OLD c2:OLD d1:OLDD d2:OLD e: OLLDD 307 # NEW NEW NEW NEWW NEW NEW NEW 308 # 309 # we handle A by inserting a new segment (with "N") and looping, 310 # turning it into B or C. We handle B by replacing a prefix and 311 # terminating. We handle C (both c1 and c2) by replacing the 312 # segment (and, for c2, looping, turning it into A). We handle D 313 # by replacing a suffix (and, for d2, looping, turning it into 314 # A). We handle E by replacing the middle and terminating. 315 if start < s_start: 316 # case A: insert a new span, then loop with the remainder 317 #print " insert new psan" 318 s_len = s_start-start 319 self.spans.insert(i, (start, data[:s_len])) 320 i += 1 321 start = s_start 322 data = data[s_len:] 323 continue 324 s_len = len(s_data) 325 s_end = s_start+s_len 326 if s_start <= start < s_end: 327 #print " modify this span", s_start, start, s_end 328 # we want to modify some data in this span: a prefix, a 329 # suffix, or the whole thing 330 if s_start == start: 331 if s_end <= end: 332 #print " replace whole segment" 333 # case C: replace this segment 334 self.spans[i] = (s_start, data[:s_len]) 335 i += 1 336 start += s_len 337 data = data[s_len:] 338 # C2 is where len(data)>0 339 continue 340 # case B: modify the prefix, retain the suffix 341 #print " modify prefix" 342 self.spans[i] = (s_start, data + s_data[len(data):]) 343 break 344 if start > s_start and end < s_end: 345 # case E: modify the middle 346 #print " modify middle" 347 prefix_len = start - s_start # we retain this much 348 suffix_len = s_end - end # and retain this much 349 newdata = s_data[:prefix_len] + data + s_data[-suffix_len:] 350 self.spans[i] = (s_start, newdata) 351 break 352 # case D: retain the prefix, modify the suffix 353 #print " modify suffix" 354 prefix_len = start - s_start # we retain this much 355 suffix_len = s_len - prefix_len # we replace this much 356 #print " ", s_data, prefix_len, suffix_len, s_len, data 357 self.spans[i] = (s_start, 358 s_data[:prefix_len] + data[:suffix_len]) 359 i += 1 360 start += suffix_len 361 data = data[suffix_len:] 362 #print " now", start, data 363 # D2 is where len(data)>0 364 continue 365 # else we're not there yet 366 #print " still looking" 367 i += 1 368 continue 369 # now merge adjacent spans 370 #print " merging", self.spans 371 newspans = [] 372 for (s_start,s_data) in self.spans: 373 if newspans and adjacent(newspans[-1][0], len(newspans[-1][1]), 374 s_start, len(s_data)): 375 newspans[-1] = (newspans[-1][0], newspans[-1][1] + s_data) 376 else: 377 newspans.append( (s_start, s_data) ) 378 self.spans = newspans 379 self.assert_invariants() 380 #print " done", self.spans 381 382 def remove(self, start, length): 383 i = 0 384 end = start + length 385 #print "remove", start, length, self.spans 386 while i < len(self.spans): 387 (s_start,s_data) = self.spans[i] 388 if s_start >= end: 389 # this segment is entirely right of the removed region, and 390 # all further segments are even further right. We're done. 391 break 392 s_len = len(s_data) 393 s_end = s_start + s_len 394 o = overlap(start, length, s_start, s_len) 395 if not o: 396 i += 1 397 continue 398 o_start, o_len = o 399 o_end = o_start + o_len 400 if o_len == s_len: 401 # remove the whole segment 402 del self.spans[i] 403 continue 404 if o_start == s_start: 405 # remove a prefix, leaving the suffix from o_end to s_end 406 prefix_len = o_end - o_start 407 self.spans[i] = (o_end, s_data[prefix_len:]) 408 i += 1 409 continue 410 elif o_end == s_end: 411 # remove a suffix, leaving the prefix from s_start to o_start 412 prefix_len = o_start - s_start 413 self.spans[i] = (s_start, s_data[:prefix_len]) 414 i += 1 415 continue 416 # remove the middle, creating a new segment 417 # left is s_start:o_start, right is o_end:s_end 418 left_len = o_start - s_start 419 left = s_data[:left_len] 420 right_len = s_end - o_end 421 right = s_data[-right_len:] 422 self.spans[i] = (s_start, left) 423 self.spans.insert(i+1, (o_end, right)) 424 break 425 #print " done", self.spans 426 427 def pop(self, start, length): 428 data = self.get(start, length) 429 if data: 430 self.remove(start, length) 431 return data -
src/allmydata/web/download-status.xhtml
diff --git a/src/allmydata/web/download-status.xhtml b/src/allmydata/web/download-status.xhtml index da029e6..5d43f69 100644
a b 18 18 <li>Status: <span n:render="status"/></li> 19 19 </ul> 20 20 21 <div n:render="events"></div> 21 22 22 23 <div n:render="results"> 23 24 <h2>Download Results</h2> -
src/allmydata/web/status.py
diff --git a/src/allmydata/web/status.py b/src/allmydata/web/status.py index e4241a3..c3a55d7 100644
a b class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 358 358 def download_results(self): 359 359 return defer.maybeDeferred(self.download_status.get_results) 360 360 361 def relative_time(self, t): 362 if t is None: 363 return t 364 if self.download_status.started is not None: 365 return t - self.download_status.started 366 return t 367 def short_relative_time(self, t): 368 t = self.relative_time(t) 369 if t is None: 370 return "" 371 return "+%.6fs" % t 372 373 def renderHTTP(self, ctx): 374 req = inevow.IRequest(ctx) 375 t = get_arg(req, "t") 376 if t == "json": 377 return self.json(req) 378 return rend.Page.renderHTTP(self, ctx) 379 380 def json(self, req): 381 req.setHeader("content-type", "text/plain") 382 data = {} 383 dyhb_events = [] 384 for serverid,requests in self.download_status.dyhb_requests.iteritems(): 385 for req in requests: 386 dyhb_events.append( (base32.b2a(serverid),) + req ) 387 dyhb_events.sort(key=lambda req: req[1]) 388 data["dyhb"] = dyhb_events 389 request_events = [] 390 for serverid,requests in self.download_status.requests.iteritems(): 391 for req in requests: 392 request_events.append( (base32.b2a(serverid),) + req ) 393 request_events.sort(key=lambda req: (req[4],req[1])) 394 data["requests"] = request_events 395 data["segment"] = self.download_status.segment_events 396 data["read"] = self.download_status.read_events 397 return simplejson.dumps(data, indent=1) + "\n" 398 399 def render_events(self, ctx, data): 400 if not self.download_status.storage_index: 401 return 402 srt = self.short_relative_time 403 l = T.ul() 404 405 t = T.table(class_="status-download-events") 406 t[T.tr[T.td["serverid"], T.td["sent"], T.td["received"], 407 T.td["shnums"], T.td["RTT"]]] 408 dyhb_events = [] 409 for serverid,requests in self.download_status.dyhb_requests.iteritems(): 410 for req in requests: 411 dyhb_events.append( (serverid,) + req ) 412 dyhb_events.sort(key=lambda req: req[1]) 413 for d_ev in dyhb_events: 414 (serverid, sent, shnums, received) = d_ev 415 serverid_s = idlib.shortnodeid_b2a(serverid) 416 rtt = received - sent 417 t[T.tr(style="background: %s" % self.color(serverid))[ 418 [T.td[serverid_s], T.td[srt(sent)], T.td[srt(received)], 419 T.td[",".join([str(shnum) for shnum in shnums])], 420 T.td[self.render_time(None, rtt)], 421 ]]] 422 l["DYHB Requests:", t] 423 424 t = T.table(class_="status-download-events") 425 t[T.tr[T.td["range"], T.td["start"], T.td["finish"], T.td["got"], 426 T.td["time"], T.td["decrypttime"], T.td["pausedtime"], 427 T.td["speed"]]] 428 for r_ev in self.download_status.read_events: 429 (start, length, requesttime, finishtime, bytes, decrypt, paused) = r_ev 430 print r_ev 431 if finishtime is not None: 432 rtt = finishtime - requesttime - paused 433 speed = self.render_rate(None, 1.0 * bytes / rtt) 434 rtt = self.render_time(None, rtt) 435 decrypt = self.render_time(None, decrypt) 436 paused = self.render_time(None, paused) 437 else: 438 speed, rtt, decrypt, paused = "","","","" 439 t[T.tr[T.td["[%d:+%d]" % (start, length)], 440 T.td[srt(requesttime)], T.td[srt(finishtime)], 441 T.td[bytes], T.td[rtt], T.td[decrypt], T.td[paused], 442 T.td[speed], 443 ]] 444 l["Read Events:", t] 445 446 t = T.table(class_="status-download-events") 447 t[T.tr[T.td["type"], T.td["segnum"], T.td["when"], T.td["range"], 448 T.td["decodetime"], T.td["segtime"], T.td["speed"]]] 449 reqtime = (None, None) 450 for s_ev in self.download_status.segment_events: 451 (etype, segnum, when, segstart, seglen, decodetime) = s_ev 452 if etype == "request": 453 t[T.tr[T.td["request"], T.td["seg%d" % segnum], 454 T.td[srt(when)]]] 455 reqtime = (segnum, when) 456 elif etype == "delivery": 457 if reqtime[0] == segnum: 458 segtime = when - reqtime[1] 459 speed = self.render_rate(None, 1.0 * seglen / segtime) 460 segtime = self.render_time(None, segtime) 461 else: 462 segtime, speed = "", "" 463 t[T.tr[T.td["delivery"], T.td["seg%d" % segnum], 464 T.td[srt(when)], 465 T.td["[%d:+%d]" % (segstart, seglen)], 466 T.td[self.render_time(None,decodetime)], 467 T.td[segtime], T.td[speed]]] 468 elif etype == "error": 469 t[T.tr[T.td["error"], T.td["seg%d" % segnum]]] 470 l["Segment Events:", t] 471 472 t = T.table(border="1") 473 t[T.tr[T.td["serverid"], T.td["shnum"], T.td["range"], 474 T.td["txtime"], T.td["rxtime"], T.td["received"], T.td["RTT"]]] 475 reqtime = (None, None) 476 request_events = [] 477 for serverid,requests in self.download_status.requests.iteritems(): 478 for req in requests: 479 request_events.append( (serverid,) + req ) 480 request_events.sort(key=lambda req: (req[4],req[1])) 481 for r_ev in request_events: 482 (peerid, shnum, start, length, sent, receivedlen, received) = r_ev 483 rtt = None 484 if received is not None: 485 rtt = received - sent 486 peerid_s = idlib.shortnodeid_b2a(peerid) 487 t[T.tr(style="background: %s" % self.color(peerid))[ 488 T.td[peerid_s], T.td[shnum], 489 T.td["[%d:+%d]" % (start, length)], 490 T.td[srt(sent)], T.td[srt(received)], T.td[receivedlen], 491 T.td[self.render_time(None, rtt)], 492 ]] 493 l["Requests:", t] 494 495 return l 496 497 def color(self, peerid): 498 def m(c): 499 return min(ord(c) / 2 + 0x80, 0xff) 500 return "#%02x%02x%02x" % (m(peerid[0]), m(peerid[1]), m(peerid[2])) 501 361 502 def render_results(self, ctx, data): 362 503 d = self.download_results() 363 504 def _got_results(results): … … class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 371 512 TIME_FORMAT = "%H:%M:%S %d-%b-%Y" 372 513 started_s = time.strftime(TIME_FORMAT, 373 514 time.localtime(data.get_started())) 374 return started_s 515 return started_s + " (%s)" % data.get_started() 375 516 376 517 def render_si(self, ctx, data): 377 518 si_s = base32.b2a_or_none(data.get_storage_index()) -
src/allmydata/web/tahoe.css
diff --git a/src/allmydata/web/tahoe.css b/src/allmydata/web/tahoe.css index 9e0dc2b..a862966 100644
a b table.tahoe-directory { 134 134 display: table-cell; 135 135 text-align: center; 136 136 padding: 0 1em; 137 } 138 No newline at end of file 137 } 138 139 /* recent upload/download status pages */ 140 141 table.status-download-events { 142 border: 1px solid #aaa; 143 } 144 table.status-download-events td { 145 border: 1px solid #a00; 146 padding: 2px 147 }