Ticket #798: new-downloader-v8.diff

File new-downloader-v8.diff, 281.7 KB (added by warner, at 2010-05-27T23:59:51Z)

more integration, refactoring

  • Makefile

    diff --git a/Makefile b/Makefile
    index c7dc647..ae53f6a 100644
    a b coverage-output-text: 
    140140
    141141coverage-output:
    142142        rm -rf coverage-html
    143         coverage html -d coverage-html $(COVERAGE_OMIT)
     143        coverage html -i -d coverage-html $(COVERAGE_OMIT)
    144144        cp .coverage coverage-html/coverage.data
    145145        @echo "now point your browser at coverage-html/index.html"
    146146
    endif 
    184184
    185185pyflakes:
    186186        $(PYTHON) -OOu `which pyflakes` src/allmydata |sort |uniq
     187check-umids:
     188        $(PYTHON) misc/check-umids.py `find src/allmydata -name '*.py'`
    187189
    188190count-lines:
    189191        @echo -n "files: "
  • new file misc/check-umids.py

    diff --git a/misc/check-umids.py b/misc/check-umids.py
    new file mode 100755
    index 0000000..05e8825
    - +  
     1#! /usr/bin/python
     2
     3# ./rumid.py foo.py
     4
     5import sys, re, os
     6
     7ok = True
     8umids = {}
     9
     10for fn in sys.argv[1:]:
     11    fn = os.path.abspath(fn)
     12    for lineno,line in enumerate(open(fn, "r").readlines()):
     13        lineno = lineno+1
     14        if "umid" not in line:
     15            continue
     16        mo = re.search("umid=[\"\']([^\"\']+)[\"\']", line)
     17        if mo:
     18            umid = mo.group(1)
     19            if umid in umids:
     20                oldfn, oldlineno = umids[umid]
     21                print "%s:%d: duplicate umid '%s'" % (fn, lineno, umid)
     22                print "%s:%d: first used here" % (oldfn, oldlineno)
     23                ok = False
     24            umids[umid] = (fn,lineno)
     25
     26if ok:
     27    print "all umids are unique"
     28else:
     29    print "some umids were duplicates"
     30    sys.exit(1)
  • misc/coverage.el

    diff --git a/misc/coverage.el b/misc/coverage.el
    index bad490f..8d69d5d 100644
    a b  
    8484                           'face '(:box "red")
    8585                           )
    8686              )
    87             (message "Added annotations")
     87            (message (format "Added annotations: %d uncovered lines"
     88                             (safe-length uncovered-code-lines)))
    8889            )
    8990          )
    9091      (message "unable to find coverage for this file"))
  • misc/coverage2el.py

    diff --git a/misc/coverage2el.py b/misc/coverage2el.py
    index ed94bd0..7d03a27 100644
    a b  
    11
    2 from coverage import coverage, summary
     2from coverage import coverage, summary, misc
    33
    44class ElispReporter(summary.SummaryReporter):
    55    def report(self):
    class ElispReporter(summary.SummaryReporter): 
    2121        out.write("(let ((results (make-hash-table :test 'equal)))\n")
    2222        for cu in self.code_units:
    2323            f = cu.filename
    24             (fn, executable, missing, mf) = self.coverage.analysis(cu)
     24            try:
     25                (fn, executable, missing, mf) = self.coverage.analysis(cu)
     26            except misc.NoSource:
     27                continue
    2528            code_linenumbers = executable
    2629            uncovered_code = missing
    2730            covered_linenumbers = sorted(set(executable) - set(missing))
  • misc/sizes.py

    diff --git a/misc/sizes.py b/misc/sizes.py
    index d9c230a..7910946 100644
    a b class Sizes: 
    6060            self.block_arity = 0
    6161            self.block_tree_depth = 0
    6262            self.block_overhead = 0
    63             self.bytes_until_some_data = 20 + share_size
     63            self.bytes_until_some_data = 32 + share_size
    6464            self.share_storage_overhead = 0
    6565            self.share_transmission_overhead = 0
    6666
    6767        elif mode == "beta":
    6868            # k=num_blocks, d=1
    69             # each block has a 20-byte hash
     69            # each block has a 32-byte hash
    7070            self.block_arity = num_blocks
    7171            self.block_tree_depth = 1
    72             self.block_overhead = 20
     72            self.block_overhead = 32
    7373            # the share has a list of hashes, one for each block
    7474            self.share_storage_overhead = (self.block_overhead *
    7575                                           num_blocks)
    7676            # we can get away with not sending the hash of the share that
    7777            # we're sending in full, once
    78             self.share_transmission_overhead = self.share_storage_overhead - 20
     78            self.share_transmission_overhead = self.share_storage_overhead - 32
    7979            # we must get the whole list (so it can be validated) before
    8080            # any data can be validated
    8181            self.bytes_until_some_data = (self.share_transmission_overhead +
    class Sizes: 
    8989            # to make things easier, we make the pessimistic assumption that
    9090            # we have to store hashes for all the empty places in the tree
    9191            # (when the number of shares is not an exact exponent of k)
    92             self.block_overhead = 20
     92            self.block_overhead = 32
    9393            # the block hashes are organized into a k-ary tree, which
    9494            # means storing (and eventually transmitting) more hashes. This
    9595            # count includes all the low-level share hashes and the root.
    class Sizes: 
    9898            #print "num_leaves", num_leaves
    9999            #print "hash_nodes", hash_nodes
    100100            # the storage overhead is this
    101             self.share_storage_overhead = 20 * (hash_nodes - 1)
     101            self.share_storage_overhead = 32 * (hash_nodes - 1)
    102102            # the transmission overhead is smaller: if we actually transmit
    103103            # every block, we don't have to transmit 1/k of the
    104104            # lowest-level block hashes, and we don't have to transmit the
    105105            # root because it was already sent with the share-level hash tree
    106             self.share_transmission_overhead = 20 * (hash_nodes
     106            self.share_transmission_overhead = 32 * (hash_nodes
    107107                                                     - 1 # the root
    108108                                                     - num_leaves / k)
    109109            # we must get a full sibling hash chain before we can validate
    110110            # any data
    111111            sibling_length = d * (k-1)
    112             self.bytes_until_some_data = 20 * sibling_length + block_size
     112            self.bytes_until_some_data = 32 * sibling_length + block_size
    113113           
    114114           
    115115
  • misc/storage-overhead.py

    diff --git a/misc/storage-overhead.py b/misc/storage-overhead.py
    index 75a0bf6..a294b8d 100644
    a b  
    11#!/usr/bin/env python
    22
    33import sys, math
    4 from allmydata import upload, uri, encode, storage
     4from allmydata import uri, storage
     5from allmydata.immutable import upload
     6from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
    57from allmydata.util import mathutil
    68
    79def roundup(size, blocksize=4096):
    class BigFakeString: 
    2224    def tell(self):
    2325        return self.fp
    2426
    25 def calc(filesize, params=(3,7,10), segsize=encode.Encoder.MAX_SEGMENT_SIZE):
     27def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE):
    2628    num_shares = params[2]
    2729    if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD:
    28         urisize = len(uri.pack_lit("A"*filesize))
     30        urisize = len(uri.LiteralFileURI("A"*filesize).to_string())
    2931        sharesize = 0
    3032        sharespace = 0
    3133    else:
    32         u = upload.FileUploader(None)
     34        u = upload.FileUploader(None) # XXX changed
    3335        u.set_params(params)
    3436        # unfortunately, Encoder doesn't currently lend itself to answering
    3537        # this question without measuring a filesize, so we have to give it a
  • src/allmydata/client.py

    diff --git a/src/allmydata/client.py b/src/allmydata/client.py
    index 12e7473..d3ae29b 100644
    a b import allmydata 
    1212from allmydata.storage.server import StorageServer
    1313from allmydata import storage_client
    1414from allmydata.immutable.upload import Uploader
    15 from allmydata.immutable.download import Downloader
     15from allmydata.immutable.downloader.util import Terminator
    1616from allmydata.immutable.offloaded import Helper
    1717from allmydata.control import ControlServer
    1818from allmydata.introducer.client import IntroducerClient
    19 from allmydata.util import hashutil, base32, pollmixin, cachedir, log
     19from allmydata.util import hashutil, base32, pollmixin, log
    2020from allmydata.util.abbreviate import parse_abbreviated_size
    2121from allmydata.util.time_format import parse_duration, parse_date
    2222from allmydata.stats import StatsProvider
    class Client(node.Node, pollmixin.PollMixin): 
    278278
    279279        self.init_client_storage_broker()
    280280        self.history = History(self.stats_provider)
     281        self.terminator = Terminator()
     282        self.terminator.setServiceParent(self)
    281283        self.add_service(Uploader(helper_furl, self.stats_provider))
    282         download_cachedir = os.path.join(self.basedir,
    283                                          "private", "cache", "download")
    284         self.download_cache_dirman = cachedir.CacheDirectoryManager(download_cachedir)
    285         self.download_cache_dirman.setServiceParent(self)
    286         self.downloader = Downloader(self.storage_broker, self.stats_provider)
    287284        self.init_stub_client()
    288285        self.init_nodemaker()
    289286
    class Client(node.Node, pollmixin.PollMixin): 
    342339                                   self._secret_holder,
    343340                                   self.get_history(),
    344341                                   self.getServiceNamed("uploader"),
    345                                    self.downloader,
    346                                    self.download_cache_dirman,
     342                                   self.terminator,
    347343                                   self.get_encoding_parameters(),
    348344                                   self._key_generator)
    349345
  • src/allmydata/immutable/checker.py

    diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py
    index 2f2d8f1..31c70e3 100644
    a b class Checker(log.PrefixingLogMixin): 
    8585            level = log.WEIRD
    8686            if f.check(DeadReferenceError):
    8787                level = log.UNUSUAL
    88             self.log("failure from server on 'get_buckets' the REMOTE failure was:", facility="tahoe.immutable.checker", failure=f, level=level, umid="3uuBUQ")
     88            self.log("failure from server on 'get_buckets' the REMOTE failure was:",
     89                     facility="tahoe.immutable.checker",
     90                     failure=f, level=level, umid="AX7wZQ")
    8991            return ({}, serverid, False)
    9092
    9193        d.addCallbacks(_wrap_results, _trap_errs)
  • new file src/allmydata/immutable/downloader/common.py

    diff --git a/src/allmydata/immutable/downloader/__init__.py b/src/allmydata/immutable/downloader/__init__.py
    new file mode 100644
    index 0000000..e69de29
    diff --git a/src/allmydata/immutable/downloader/common.py b/src/allmydata/immutable/downloader/common.py
    new file mode 100644
    index 0000000..7364b8d
    - +  
     1
     2(AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, BADSEGNUM) = \
     3 ("AVAILABLE", "PENDING", "OVERDUE", "COMPLETE", "CORRUPT", "DEAD", "BADSEGNUM")
     4
     5class BadSegmentNumberError(Exception):
     6    pass
     7class WrongSegmentError(Exception):
     8    pass
     9class BadCiphertextHashError(Exception):
     10    pass
     11
  • new file src/allmydata/immutable/downloader/fetcher.py

    diff --git a/src/allmydata/immutable/downloader/fetcher.py b/src/allmydata/immutable/downloader/fetcher.py
    new file mode 100644
    index 0000000..2fd987b
    - +  
     1
     2from twisted.python.failure import Failure
     3from foolscap.api import eventually
     4from allmydata.interfaces import NotEnoughSharesError, NoSharesError
     5from allmydata.util import log
     6from allmydata.util.dictutil import DictOfSets
     7from common import AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, \
     8     BADSEGNUM, BadSegmentNumberError
     9
     10class SegmentFetcher:
     11    """I am responsible for acquiring blocks for a single segment. I will use
     12    the Share instances passed to my add_shares() method to locate, retrieve,
     13    and validate those blocks. I expect my parent node to call my
     14    no_more_shares() method when there are no more shares available. I will
     15    call my parent's want_more_shares() method when I want more: I expect to
     16    see at least one call to add_shares or no_more_shares afterwards.
     17
     18    When I have enough validated blocks, I will call my parent's
     19    process_blocks() method with a dictionary that maps shnum to blockdata.
     20    If I am unable to provide enough blocks, I will call my parent's
     21    fetch_failed() method with (self, f). After either of these events, I
     22    will shut down and do no further work. My parent can also call my stop()
     23    method to have me shut down early."""
     24
     25    def __init__(self, node, segnum, k):
     26        self._node = node # _Node
     27        self.segnum = segnum
     28        self._k = k
     29        self._shares = {} # maps non-dead Share instance to a state, one of
     30                          # (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT).
     31                          # State transition map is:
     32                          #  AVAILABLE -(send-read)-> PENDING
     33                          #  PENDING -(timer)-> OVERDUE
     34                          #  PENDING -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM
     35                          #  OVERDUE -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM
     36                          # If a share becomes DEAD, it is removed from the
     37                          # dict. If it becomes BADSEGNUM, the whole fetch is
     38                          # terminated.
     39        self._share_observers = {} # maps Share to Observer2 for active ones
     40        self._shnums = DictOfSets() # maps shnum to the shares that provide it
     41        self._blocks = {} # maps shnum to validated block data
     42        self._no_more_shares = False
     43        self._bad_segnum = False
     44        self._last_failure = None
     45        self._running = True
     46
     47    def stop(self):
     48        log.msg("SegmentFetcher(%s).stop" % self._node._si_prefix,
     49                level=log.NOISY, umid="LWyqpg")
     50        self._cancel_all_requests()
     51        self._running = False
     52        self._shares.clear() # let GC work # ??? XXX
     53
     54
     55    # called by our parent _Node
     56
     57    def add_shares(self, shares):
     58        # called when ShareFinder locates a new share, and when a non-initial
     59        # segment fetch is started and we already know about shares from the
     60        # previous segment
     61        for s in shares:
     62            self._shares[s] = AVAILABLE
     63            self._shnums.add(s._shnum, s)
     64        eventually(self.loop)
     65
     66    def no_more_shares(self):
     67        # ShareFinder tells us it's reached the end of its list
     68        self._no_more_shares = True
     69        eventually(self.loop)
     70
     71    # internal methods
     72
     73    def _count_shnums(self, *states):
     74        """shnums for which at least one state is in the following list"""
     75        shnums = []
     76        for shnum,shares in self._shnums.iteritems():
     77            matches = [s for s in shares if self._shares.get(s) in states]
     78            if matches:
     79                shnums.append(shnum)
     80        return len(shnums)
     81
     82    def loop(self):
     83        try:
     84            # if any exception occurs here, kill the download
     85            self._do_loop()
     86        except BaseException:
     87            self._node.fetch_failed(self, Failure())
     88            raise
     89
     90    def _do_loop(self):
     91        k = self._k
     92        if not self._running:
     93            return
     94        if self._bad_segnum:
     95            # oops, we were asking for a segment number beyond the end of the
     96            # file. This is an error.
     97            self.stop()
     98            e = BadSegmentNumberError("segnum=%d, numsegs=%d" %
     99                                      (self.segnum, self._node.num_segments))
     100            f = Failure(e)
     101            self._node.fetch_failed(self, f)
     102            return
     103
     104        # are we done?
     105        if self._count_shnums(COMPLETE) >= k:
     106            # yay!
     107            self.stop()
     108            self._node.process_blocks(self.segnum, self._blocks)
     109            return
     110
     111        # we may have exhausted everything
     112        if (self._no_more_shares and
     113            self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) < k):
     114            # no more new shares are coming, and the remaining hopeful shares
     115            # aren't going to be enough. boo!
     116
     117            log.msg("share states: %r" % (self._shares,),
     118                    level=log.NOISY, umid="0ThykQ")
     119            if self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) == 0:
     120                format = ("no shares (need %(k)d)."
     121                          " Last failure: %(last_failure)s")
     122                args = { "k": k,
     123                         "last_failure": self._last_failure }
     124                error = NoSharesError
     125            else:
     126                format = ("ran out of shares: %(complete)d complete,"
     127                          " %(pending)d pending, %(overdue)d overdue,"
     128                          " %(unused)d unused, need %(k)d."
     129                          " Last failure: %(last_failure)s")
     130                args = {"complete": self._count_shnums(COMPLETE),
     131                        "pending": self._count_shnums(PENDING),
     132                        "overdue": self._count_shnums(OVERDUE),
     133                        # 'unused' should be zero
     134                        "unused": self._count_shnums(AVAILABLE),
     135                        "k": k,
     136                        "last_failure": self._last_failure,
     137                        }
     138                error = NotEnoughSharesError
     139            log.msg(format=format, level=log.UNUSUAL, umid="1DsnTg", **args)
     140            e = error(format % args)
     141            f = Failure(e)
     142            self.stop()
     143            self._node.fetch_failed(self, f)
     144            return
     145
     146        # nope, not done. Are we "block-hungry" (i.e. do we want to send out
     147        # more read requests, or do we think we have enough in flight
     148        # already?)
     149        while self._count_shnums(PENDING, COMPLETE) < k:
     150            # we're hungry.. are there any unused shares?
     151            sent = self._send_new_request()
     152            if not sent:
     153                break
     154
     155        # ok, now are we "share-hungry" (i.e. do we have enough known shares
     156        # to make us happy, or should we ask the ShareFinder to get us more?)
     157        if self._count_shnums(AVAILABLE, PENDING, COMPLETE) < k:
     158            # we're hungry for more shares
     159            self._node.want_more_shares()
     160            # that will trigger the ShareFinder to keep looking
     161
     162    def _find_one(self, shares, state):
     163        # TODO could choose fastest
     164        for s in shares:
     165            if self._shares[s] == state:
     166                return s
     167        # can never get here, caller has assert in case of code bug
     168
     169    def _send_new_request(self):
     170        for shnum,shares in sorted(self._shnums.iteritems()):
     171            states = [self._shares[s] for s in shares]
     172            if COMPLETE in states or PENDING in states:
     173                # don't send redundant requests
     174                continue
     175            if AVAILABLE not in states:
     176                # no candidates for this shnum, move on
     177                continue
     178            # here's a candidate. Send a request.
     179            s = self._find_one(shares, AVAILABLE)
     180            assert s
     181            self._shares[s] = PENDING
     182            self._share_observers[s] = o = s.get_block(self.segnum)
     183            o.subscribe(self._block_request_activity, share=s, shnum=shnum)
     184            # TODO: build up a list of candidates, then walk through the
     185            # list, sending requests to the most desireable servers,
     186            # re-checking our block-hunger each time. For non-initial segment
     187            # fetches, this would let us stick with faster servers.
     188            return True
     189        # nothing was sent: don't call us again until you have more shares to
     190        # work with, or one of the existing shares has been declared OVERDUE
     191        return False
     192
     193    def _cancel_all_requests(self):
     194        for o in self._share_observers.values():
     195            o.cancel()
     196        self._share_observers = {}
     197
     198    def _block_request_activity(self, share, shnum, state, block=None, f=None):
     199        # called by Shares, in response to our s.send_request() calls.
     200        if not self._running:
     201            return
     202        log.msg("SegmentFetcher(%s)._block_request_activity:"
     203                " Share(sh%d-on-%s) -> %s" %
     204                (self._node._si_prefix, shnum, share._peerid_s, state),
     205                level=log.NOISY, umid="vilNWA")
     206        # COMPLETE, CORRUPT, DEAD, BADSEGNUM are terminal.
     207        if state in (COMPLETE, CORRUPT, DEAD, BADSEGNUM):
     208            self._share_observers.pop(share, None)
     209        if state is COMPLETE:
     210            # 'block' is fully validated
     211            self._shares[share] = COMPLETE
     212            self._blocks[shnum] = block
     213        elif state is OVERDUE:
     214            self._shares[share] = OVERDUE
     215            # OVERDUE is not terminal: it will eventually transition to
     216            # COMPLETE, CORRUPT, or DEAD.
     217        elif state is CORRUPT:
     218            self._shares[share] = CORRUPT
     219        elif state is DEAD:
     220            del self._shares[share]
     221            self._shnums[shnum].remove(share)
     222            self._last_failure = f
     223        elif state is BADSEGNUM:
     224            self._shares[share] = BADSEGNUM # ???
     225            self._bad_segnum = True
     226        eventually(self.loop)
     227
     228
  • new file src/allmydata/immutable/downloader/finder.py

    diff --git a/src/allmydata/immutable/downloader/finder.py b/src/allmydata/immutable/downloader/finder.py
    new file mode 100644
    index 0000000..7cefefa
    - +  
     1
     2import time
     3now = time.time
     4from foolscap.api import eventually
     5from allmydata.util import base32, log, idlib
     6
     7from share import Share, CommonShare
     8from util import incidentally
     9
     10class RequestToken:
     11    def __init__(self, peerid):
     12        self.peerid = peerid
     13
     14class ShareFinder:
     15    def __init__(self, storage_broker, verifycap, node, download_status,
     16                 logparent=None, max_outstanding_requests=10):
     17        self.running = True # stopped by Share.stop, from Terminator
     18        self.verifycap = verifycap
     19        self._started = False
     20        self._storage_broker = storage_broker
     21        self.share_consumer = self.node = node
     22        self.max_outstanding_requests = max_outstanding_requests
     23
     24        self._hungry = False
     25
     26        self._commonshares = {} # shnum to CommonShare instance
     27        self.undelivered_shares = []
     28        self.pending_requests = set()
     29
     30        self._storage_index = verifycap.storage_index
     31        self._si_prefix = base32.b2a_l(self._storage_index[:8], 60)
     32        self._node_logparent = logparent
     33        self._download_status = download_status
     34        self._lp = log.msg(format="ShareFinder[si=%(si)s] starting",
     35                           si=self._si_prefix,
     36                           level=log.NOISY, parent=logparent, umid="2xjj2A")
     37
     38    def start_finding_servers(self):
     39        # don't get servers until somebody uses us: creating the
     40        # ImmutableFileNode should not cause work to happen yet. Test case is
     41        # test_dirnode, which creates us with storage_broker=None
     42        if not self._started:
     43            si = self.verifycap.storage_index
     44            s = self._storage_broker.get_servers_for_index(si)
     45            self._servers = iter(s)
     46            self._started = True
     47
     48    def log(self, *args, **kwargs):
     49        if "parent" not in kwargs:
     50            kwargs["parent"] = self._lp
     51        return log.msg(*args, **kwargs)
     52
     53    def stop(self):
     54        self.running = False
     55
     56    # called by our parent CiphertextDownloader
     57    def hungry(self):
     58        self.log(format="ShareFinder[si=%(si)s] hungry",
     59                 si=self._si_prefix, level=log.NOISY, umid="NywYaQ")
     60        self.start_finding_servers()
     61        self._hungry = True
     62        eventually(self.loop)
     63
     64    # internal methods
     65    def loop(self):
     66        undelivered_s = ",".join(["sh%d@%s" %
     67                                  (s._shnum, idlib.shortnodeid_b2a(s._peerid))
     68                                  for s in self.undelivered_shares])
     69        pending_s = ",".join([idlib.shortnodeid_b2a(rt.peerid)
     70                              for rt in self.pending_requests]) # sort?
     71        self.log(format="ShareFinder loop: running=%(running)s"
     72                 " hungry=%(hungry)s, undelivered=%(undelivered)s,"
     73                 " pending=%(pending)s",
     74                 running=self.running, hungry=self._hungry,
     75                 undelivered=undelivered_s, pending=pending_s,
     76                 level=log.NOISY, umid="kRtS4Q")
     77        if not self.running:
     78            return
     79        if not self._hungry:
     80            return
     81        if self.undelivered_shares:
     82            sh = self.undelivered_shares.pop(0)
     83            # they will call hungry() again if they want more
     84            self._hungry = False
     85            self.log(format="delivering Share(shnum=%(shnum)d, server=%(peerid)s)",
     86                     shnum=sh._shnum, peerid=sh._peerid_s,
     87                     level=log.NOISY, umid="2n1qQw")
     88            eventually(self.share_consumer.got_shares, [sh])
     89            return
     90
     91        if len(self.pending_requests) >= self.max_outstanding_requests:
     92            # cannot send more requests, must wait for some to retire
     93            return
     94
     95        server = None
     96        try:
     97            if self._servers:
     98                server = self._servers.next()
     99        except StopIteration:
     100            self._servers = None
     101
     102        if server:
     103            self.send_request(server)
     104            # we loop again to get parallel queries. The check above will
     105            # prevent us from looping forever.
     106            eventually(self.loop)
     107            return
     108
     109        if self.pending_requests:
     110            # no server, but there are still requests in flight: maybe one of
     111            # them will make progress
     112            return
     113
     114        self.log(format="ShareFinder.loop: no_more_shares, ever",
     115                 level=log.UNUSUAL, umid="XjQlzg")
     116        # we've run out of servers (so we can't send any more requests), and
     117        # we have nothing in flight. No further progress can be made. They
     118        # are destined to remain hungry.
     119        self.share_consumer.no_more_shares()
     120
     121    def send_request(self, server):
     122        peerid, rref = server
     123        req = RequestToken(peerid)
     124        self.pending_requests.add(req)
     125        lp = self.log(format="sending DYHB to [%(peerid)s]",
     126                      peerid=idlib.shortnodeid_b2a(peerid),
     127                      level=log.NOISY, umid="Io7pyg")
     128        d_ev = self._download_status.add_dyhb_sent(peerid, now())
     129        d = rref.callRemote("get_buckets", self._storage_index)
     130        d.addBoth(incidentally, self.pending_requests.discard, req)
     131        d.addCallbacks(self._got_response, self._got_error,
     132                       callbackArgs=(rref.version, peerid, req, d_ev, lp),
     133                       errbackArgs=(peerid, req, d_ev, lp))
     134        d.addErrback(log.err, format="error in send_request",
     135                     level=log.WEIRD, parent=lp, umid="rpdV0w")
     136        d.addCallback(incidentally, eventually, self.loop)
     137
     138    def _got_response(self, buckets, server_version, peerid, req, d_ev, lp):
     139        shnums = sorted([shnum for shnum in buckets])
     140        d_ev.finished(shnums, now())
     141        if buckets:
     142            shnums_s = ",".join([str(shnum) for shnum in shnums])
     143            self.log(format="got shnums [%(shnums)s] from [%(peerid)s]",
     144                     shnums=shnums_s, peerid=idlib.shortnodeid_b2a(peerid),
     145                     level=log.NOISY, parent=lp, umid="0fcEZw")
     146        else:
     147            self.log(format="no shares from [%(peerid)s]",
     148                     peerid=idlib.shortnodeid_b2a(peerid),
     149                     level=log.NOISY, parent=lp, umid="U7d4JA")
     150        if self.node.num_segments is None:
     151            best_numsegs = self.node.guessed_num_segments
     152        else:
     153            best_numsegs = self.node.num_segments
     154        for shnum, bucket in buckets.iteritems():
     155            if shnum in self._commonshares:
     156                cs = self._commonshares[shnum]
     157            else:
     158                cs = CommonShare(best_numsegs, self._si_prefix, shnum,
     159                                 self._node_logparent)
     160                # Share._get_satisfaction is responsible for updating
     161                # CommonShare.set_numsegs after we know the UEB. Alternatives:
     162                #  1: d = self.node.get_num_segments()
     163                #     d.addCallback(cs.got_numsegs)
     164                #   the problem is that the OneShotObserverList I was using
     165                #   inserts an eventual-send between _get_satisfaction's
     166                #   _satisfy_UEB and _satisfy_block_hash_tree, and the
     167                #   CommonShare didn't get the num_segs message before
     168                #   being asked to set block hash values. To resolve this
     169                #   would require an immediate ObserverList instead of
     170                #   an eventual-send -based one
     171                #  2: break _get_satisfaction into Deferred-attached pieces.
     172                #     Yuck.
     173                self._commonshares[shnum] = cs
     174            s = Share(bucket, server_version, self.verifycap, cs, self.node,
     175                      self._download_status, peerid, shnum,
     176                      self._node_logparent)
     177            self.undelivered_shares.append(s)
     178
     179    def _got_error(self, f, peerid, req, d_ev, lp):
     180        d_ev.finished("error", now())
     181        self.log(format="got error from [%(peerid)s]",
     182                 peerid=idlib.shortnodeid_b2a(peerid), failure=f,
     183                 level=log.UNUSUAL, parent=lp, umid="zUKdCw")
     184
     185
  • new file src/allmydata/immutable/downloader/node.py

    diff --git a/src/allmydata/immutable/downloader/node.py b/src/allmydata/immutable/downloader/node.py
    new file mode 100644
    index 0000000..2991c9e
    - +  
     1
     2import time
     3now = time.time
     4from twisted.python.failure import Failure
     5from twisted.internet import defer
     6from foolscap.api import eventually
     7from allmydata import uri
     8from allmydata.codec import CRSDecoder
     9from allmydata.util import base32, log, hashutil, mathutil, observer
     10from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
     11from allmydata.hashtree import IncompleteHashTree, BadHashError, \
     12     NotEnoughHashesError
     13
     14# local imports
     15from finder import ShareFinder
     16from fetcher import SegmentFetcher
     17from segmentation import Segmentation
     18from common import BadCiphertextHashError
     19
     20class Cancel:
     21    def __init__(self, f):
     22        self._f = f
     23        self.cancelled = False
     24    def cancel(self):
     25        if not self.cancelled:
     26            self.cancelled = True
     27            self._f(self)
     28
     29class DownloadNode:
     30    """Internal class which manages downloads and holds state. External
     31    callers use CiphertextFileNode instead."""
     32
     33    # Share._node points to me
     34    def __init__(self, verifycap, storage_broker, secret_holder,
     35                 terminator, history, download_status):
     36        assert isinstance(verifycap, uri.CHKFileVerifierURI)
     37        self._verifycap = verifycap
     38        self._storage_broker = storage_broker
     39        self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60)
     40        self.running = True
     41        if terminator:
     42            terminator.register(self) # calls self.stop() at stopService()
     43        # the rules are:
     44        # 1: Only send network requests if you're active (self.running is True)
     45        # 2: Use TimerService, not reactor.callLater
     46        # 3: You can do eventual-sends any time.
     47        # These rules should mean that once
     48        # stopService()+flushEventualQueue() fires, everything will be done.
     49        self._secret_holder = secret_holder
     50        self._history = history
     51        self._download_status = download_status
     52
     53        k, N = self._verifycap.needed_shares, self._verifycap.total_shares
     54        self.share_hash_tree = IncompleteHashTree(N)
     55
     56        # we guess the segment size, so Segmentation can pull non-initial
     57        # segments in a single roundtrip. This populates
     58        # .guessed_segment_size, .guessed_num_segments, and
     59        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
     60        # we'll need)
     61        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)
     62
     63        # filled in when we parse a valid UEB
     64        self.have_UEB = False
     65        self.segment_size = None
     66        self.tail_segment_size = None
     67        self.tail_segment_padded = None
     68        self.num_segments = None
     69        self.block_size = None
     70        self.tail_block_size = None
     71
     72        # things to track callers that want data
     73
     74        # _segment_requests can have duplicates
     75        self._segment_requests = [] # (segnum, d, cancel_handle)
     76        self._active_segment = None # a SegmentFetcher, with .segnum
     77
     78        self._segsize_observers = observer.OneShotObserverList()
     79
     80        # we create one top-level logparent for this _Node, and another one
     81        # for each read() call. Segmentation and get_segment() messages are
     82        # associated with the read() call, everything else is tied to the
     83        # _Node's log entry.
     84        lp = log.msg(format="Immutable _Node(%(si)s) created: size=%(size)d,"
     85                     " guessed_segsize=%(guessed_segsize)d,"
     86                     " guessed_numsegs=%(guessed_numsegs)d",
     87                     si=self._si_prefix, size=verifycap.size,
     88                     guessed_segsize=self.guessed_segment_size,
     89                     guessed_numsegs=self.guessed_num_segments,
     90                     level=log.OPERATIONAL, umid="uJ0zAQ")
     91        self._lp = lp
     92
     93        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
     94                                        self._download_status, lp)
     95        self._shares = set()
     96
     97    def _build_guessed_tables(self, max_segment_size):
     98        size = min(self._verifycap.size, max_segment_size)
     99        s = mathutil.next_multiple(size, self._verifycap.needed_shares)
     100        self.guessed_segment_size = s
     101        r = self._calculate_sizes(self.guessed_segment_size)
     102        self.guessed_num_segments = r["num_segments"]
     103        # as with CommonShare, our ciphertext_hash_tree is a stub until we
     104        # get the real num_segments
     105        self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
     106
     107    def __repr__(self):
     108        return "Imm_Node(%s)" % (self._si_prefix,)
     109
     110    def stop(self):
     111        # called by the Terminator at shutdown, mostly for tests
     112        if self._active_segment:
     113            self._active_segment.stop()
     114            self._active_segment = None
     115        self._sharefinder.stop()
     116
     117    # things called by outside callers, via CiphertextFileNode. get_segment()
     118    # may also be called by Segmentation.
     119
     120    def read(self, consumer, offset=0, size=None, read_ev=None):
     121        """I am the main entry point, from which FileNode.read() can get
     122        data. I feed the consumer with the desired range of ciphertext. I
     123        return a Deferred that fires (with the consumer) when the read is
     124        finished.
     125
     126        Note that there is no notion of a 'file pointer': each call to read()
     127        uses an independent offset= value."""
     128        # for concurrent operations: each gets its own Segmentation manager
     129        if size is None:
     130            size = self._verifycap.size
     131        # clip size so offset+size does not go past EOF
     132        size = min(size, self._verifycap.size-offset)
     133        if read_ev is None:
     134            read_ev = self._download_status.add_read_event(offset, size, now())
     135
     136        lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)",
     137                     si=base32.b2a(self._verifycap.storage_index)[:8],
     138                     offset=offset, size=size,
     139                     level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww")
     140        if self._history:
     141            sp = self._history.stats_provider
     142            sp.count("downloader.files_downloaded", 1) # really read() calls
     143            sp.count("downloader.bytes_downloaded", size)
     144        s = Segmentation(self, offset, size, consumer, read_ev, lp)
     145        # this raises an interesting question: what segments to fetch? if
     146        # offset=0, always fetch the first segment, and then allow
     147        # Segmentation to be responsible for pulling the subsequent ones if
     148        # the first wasn't large enough. If offset>0, we're going to need an
     149        # extra roundtrip to get the UEB (and therefore the segment size)
     150        # before we can figure out which segment to get. TODO: allow the
     151        # offset-table-guessing code (which starts by guessing the segsize)
     152        # to assist the offset>0 process.
     153        d = s.start()
     154        def _done(res):
     155            read_ev.finished(now())
     156            return res
     157        d.addBoth(_done)
     158        return d
     159
     160    def get_segment(self, segnum, logparent=None):
     161        """Begin downloading a segment. I return a tuple (d, c): 'd' is a
     162        Deferred that fires with (offset,data) when the desired segment is
     163        available, and c is an object on which c.cancel() can be called to
     164        disavow interest in the segment (after which 'd' will never fire).
     165
     166        You probably need to know the segment size before calling this,
     167        unless you want the first few bytes of the file. If you ask for a
     168        segment number which turns out to be too large, the Deferred will
     169        errback with BadSegmentNumberError.
     170
     171        The Deferred fires with the offset of the first byte of the data
     172        segment, so that you can call get_segment() before knowing the
     173        segment size, and still know which data you received.
     174
     175        The Deferred can also errback with other fatal problems, such as
     176        NotEnoughSharesError, NoSharesError, or BadCiphertextHashError.
     177        """
     178        log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)",
     179                si=base32.b2a(self._verifycap.storage_index)[:8],
     180                segnum=segnum,
     181                level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ")
     182        self._download_status.add_segment_request(segnum, now())
     183        d = defer.Deferred()
     184        c = Cancel(self._cancel_request)
     185        self._segment_requests.append( (segnum, d, c) )
     186        self._start_new_segment()
     187        return (d, c)
     188
     189    def get_segsize(self):
     190        """Return a Deferred that fires when we know the real segment size."""
     191        if self.segment_size:
     192            return defer.succeed(self.segment_size)
     193        # TODO: this downloads (and discards) the first segment of the file.
     194        # We could make this more efficient by writing
     195        # fetcher.SegmentSizeFetcher, with the job of finding a single valid
     196        # share and extracting the UEB. We'd add Share.get_UEB() to request
     197        # just the UEB.
     198        (d,c) = self.get_segment(0)
     199        # this ensures that an error during get_segment() will errback the
     200        # caller, so Repair won't wait forever on completely missing files
     201        d.addCallback(lambda ign: self._segsize_observers.when_fired())
     202        return d
     203
     204    # things called by the Segmentation object used to transform
     205    # arbitrary-sized read() calls into quantized segment fetches
     206
     207    def _start_new_segment(self):
     208        if self._active_segment is None and self._segment_requests:
     209            segnum = self._segment_requests[0][0]
     210            k = self._verifycap.needed_shares
     211            log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d",
     212                    node=repr(self), segnum=segnum,
     213                    level=log.NOISY, umid="wAlnHQ")
     214            self._active_segment = fetcher = SegmentFetcher(self, segnum, k)
     215            active_shares = [s for s in self._shares if s.is_alive()]
     216            fetcher.add_shares(active_shares) # this triggers the loop
     217
     218
     219    # called by our child ShareFinder
     220    def got_shares(self, shares):
     221        self._shares.update(shares)
     222        if self._active_segment:
     223            self._active_segment.add_shares(shares)
     224    def no_more_shares(self):
     225        self._no_more_shares = True
     226        if self._active_segment:
     227            self._active_segment.no_more_shares()
     228
     229    # things called by our Share instances
     230
     231    def validate_and_store_UEB(self, UEB_s):
     232        log.msg("validate_and_store_UEB",
     233                level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw")
     234        h = hashutil.uri_extension_hash(UEB_s)
     235        if h != self._verifycap.uri_extension_hash:
     236            raise BadHashError
     237        UEB_dict = uri.unpack_extension(UEB_s)
     238        self._parse_and_store_UEB(UEB_dict) # sets self._stuff
     239        # TODO: a malformed (but authentic) UEB could throw an assertion in
     240        # _parse_and_store_UEB, and we should abandon the download.
     241        self.have_UEB = True
     242
     243    def _parse_and_store_UEB(self, d):
     244        # Note: the UEB contains needed_shares and total_shares. These are
     245        # redundant and inferior (the filecap contains the authoritative
     246        # values). However, because it is possible to encode the same file in
     247        # multiple ways, and the encoders might choose (poorly) to use the
     248        # same key for both (therefore getting the same SI), we might
     249        # encounter shares for both types. The UEB hashes will be different,
     250        # however, and we'll disregard the "other" encoding's shares as
     251        # corrupted.
     252
     253        # therefore, we ignore d['total_shares'] and d['needed_shares'].
     254
     255        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
     256                ueb=repr(d), vcap=self._verifycap.to_string(),
     257                level=log.NOISY, parent=self._lp, umid="cVqZnA")
     258
     259        k, N = self._verifycap.needed_shares, self._verifycap.total_shares
     260
     261        self.segment_size = d['segment_size']
     262        self._segsize_observers.fire(self.segment_size)
     263
     264        r = self._calculate_sizes(self.segment_size)
     265        self.tail_segment_size = r["tail_segment_size"]
     266        self.tail_segment_padded = r["tail_segment_padded"]
     267        self.num_segments = r["num_segments"]
     268        self.block_size = r["block_size"]
     269        self.tail_block_size = r["tail_block_size"]
     270        log.msg("actual sizes: %s" % (r,),
     271                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
     272        if (self.segment_size == self.guessed_segment_size
     273            and self.num_segments == self.guessed_num_segments):
     274            log.msg("my guess was right!",
     275                    level=log.NOISY, parent=self._lp, umid="x340Ow")
     276        else:
     277            log.msg("my guess was wrong! Extra round trips for me.",
     278                    level=log.NOISY, parent=self._lp, umid="tb7RJw")
     279
     280        # zfec.Decode() instantiation is fast, but still, let's use the same
     281        # codec instance for all but the last segment. 3-of-10 takes 15us on
     282        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
     283        # 2.5ms, worst-case 254-of-255 is 9.3ms
     284        self._codec = CRSDecoder()
     285        self._codec.set_params(self.segment_size, k, N)
     286
     287
     288        # Ciphertext hash tree root is mandatory, so that there is at most
     289        # one ciphertext that matches this read-cap or verify-cap. The
     290        # integrity check on the shares is not sufficient to prevent the
     291        # original encoder from creating some shares of file A and other
     292        # shares of file B. self.ciphertext_hash_tree was a guess before:
     293        # this is where we create it for real.
     294        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
     295        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})
     296
     297        self.share_hash_tree.set_hashes({0: d['share_root_hash']})
     298
     299        # Our job is a fast download, not verification, so we ignore any
     300        # redundant fields. The Verifier uses a different code path which
     301        # does not ignore them.
     302
     303    def _calculate_sizes(self, segment_size):
     304        # segments of ciphertext
     305        size = self._verifycap.size
     306        k = self._verifycap.needed_shares
     307
     308        # this assert matches the one in encode.py:127 inside
     309        # Encoded._got_all_encoding_parameters, where the UEB is constructed
     310        assert segment_size % k == 0
     311
     312        # the last segment is usually short. We don't store a whole segsize,
     313        # but we do pad the segment up to a multiple of k, because the
     314        # encoder requires that.
     315        tail_segment_size = size % segment_size
     316        if tail_segment_size == 0:
     317            tail_segment_size = segment_size
     318        padded = mathutil.next_multiple(tail_segment_size, k)
     319        tail_segment_padded = padded
     320
     321        num_segments = mathutil.div_ceil(size, segment_size)
     322
     323        # each segment is turned into N blocks. All but the last are of size
     324        # block_size, and the last is of size tail_block_size
     325        block_size = segment_size / k
     326        tail_block_size = tail_segment_padded / k
     327
     328        return { "tail_segment_size": tail_segment_size,
     329                 "tail_segment_padded": tail_segment_padded,
     330                 "num_segments": num_segments,
     331                 "block_size": block_size,
     332                 "tail_block_size": tail_block_size,
     333                 }
     334
     335
     336    def process_share_hashes(self, share_hashes):
     337        for hashnum in share_hashes:
     338            if hashnum >= len(self.share_hash_tree):
     339                # "BadHashError" is normally for e.g. a corrupt block. We
     340                # sort of abuse it here to mean a badly numbered hash (which
     341                # indicates corruption in the number bytes, rather than in
     342                # the data bytes).
     343                raise BadHashError("hashnum %d doesn't fit in hashtree(%d)"
     344                                   % (hashnum, len(self.share_hash_tree)))
     345        self.share_hash_tree.set_hashes(share_hashes)
     346
     347    def get_needed_ciphertext_hashes(self, segnum):
     348        cht = self.ciphertext_hash_tree
     349        return cht.needed_hashes(segnum, include_leaf=True)
     350    def process_ciphertext_hashes(self, hashes):
     351        assert self.num_segments is not None
     352        # this may raise BadHashError or NotEnoughHashesError
     353        self.ciphertext_hash_tree.set_hashes(hashes)
     354
     355
     356    # called by our child SegmentFetcher
     357
     358    def want_more_shares(self):
     359        self._sharefinder.hungry()
     360
     361    def fetch_failed(self, sf, f):
     362        assert sf is self._active_segment
     363        self._active_segment = None
     364        # deliver error upwards
     365        for (d,c) in self._extract_requests(sf.segnum):
     366            eventually(self._deliver, d, c, f)
     367
     368    def process_blocks(self, segnum, blocks):
     369        d = defer.maybeDeferred(self._decode_blocks, segnum, blocks)
     370        d.addCallback(self._check_ciphertext_hash, segnum)
     371        def _deliver(result):
     372            ds = self._download_status
     373            if isinstance(result, Failure):
     374                ds.add_segment_error(segnum, now())
     375            else:
     376                (offset, segment, decodetime) = result
     377                ds.add_segment_delivery(segnum, now(),
     378                                        offset, len(segment), decodetime)
     379            log.msg(format="delivering segment(%(segnum)d)",
     380                    segnum=segnum,
     381                    level=log.OPERATIONAL, parent=self._lp,
     382                    umid="j60Ojg")
     383            for (d,c) in self._extract_requests(segnum):
     384                eventually(self._deliver, d, c, result)
     385            self._active_segment = None
     386            self._start_new_segment()
     387        d.addBoth(_deliver)
     388        d.addErrback(lambda f:
     389                     log.err("unhandled error during process_blocks",
     390                             failure=f, level=log.WEIRD,
     391                             parent=self._lp, umid="MkEsCg"))
     392
     393    def _decode_blocks(self, segnum, blocks):
     394        tail = (segnum == self.num_segments-1)
     395        codec = self._codec
     396        block_size = self.block_size
     397        decoded_size = self.segment_size
     398        if tail:
     399            # account for the padding in the last segment
     400            codec = CRSDecoder()
     401            k, N = self._verifycap.needed_shares, self._verifycap.total_shares
     402            codec.set_params(self.tail_segment_padded, k, N)
     403            block_size = self.tail_block_size
     404            decoded_size = self.tail_segment_padded
     405
     406        shares = []
     407        shareids = []
     408        for (shareid, share) in blocks.iteritems():
     409            assert len(share) == block_size
     410            shareids.append(shareid)
     411            shares.append(share)
     412        del blocks
     413
     414        start = now()
     415        d = codec.decode(shares, shareids)   # segment
     416        del shares
     417        def _process(buffers):
     418            decodetime = now() - start
     419            segment = "".join(buffers)
     420            assert len(segment) == decoded_size
     421            del buffers
     422            if tail:
     423                segment = segment[:self.tail_segment_size]
     424            return (segment, decodetime)
     425        d.addCallback(_process)
     426        return d
     427
     428    def _check_ciphertext_hash(self, (segment, decodetime), segnum):
     429        assert self._active_segment.segnum == segnum
     430        assert self.segment_size is not None
     431        offset = segnum * self.segment_size
     432
     433        h = hashutil.crypttext_segment_hash(segment)
     434        try:
     435            self.ciphertext_hash_tree.set_hashes(leaves={segnum: h})
     436            return (offset, segment, decodetime)
     437        except (BadHashError, NotEnoughHashesError):
     438            format = ("hash failure in ciphertext_hash_tree:"
     439                      " segnum=%(segnum)d, SI=%(si)s")
     440            log.msg(format=format, segnum=segnum, si=self._si_prefix,
     441                    failure=Failure(),
     442                    level=log.WEIRD, parent=self._lp, umid="MTwNnw")
     443            # this is especially weird, because we made it past the share
     444            # hash tree. It implies that we're using the wrong encoding, or
     445            # that the uploader deliberately constructed a bad UEB.
     446            msg = format % {"segnum": segnum, "si": self._si_prefix}
     447            raise BadCiphertextHashError(msg)
     448
     449    def _deliver(self, d, c, result):
     450        # this method exists to handle cancel() that occurs between
     451        # _got_segment and _deliver
     452        if not c.cancelled:
     453            d.callback(result) # might actually be an errback
     454
     455    def _extract_requests(self, segnum):
     456        """Remove matching requests and return their (d,c) tuples so that the
     457        caller can retire them."""
     458        retire = [(d,c) for (segnum0, d, c) in self._segment_requests
     459                  if segnum0 == segnum]
     460        self._segment_requests = [t for t in self._segment_requests
     461                                  if t[0] != segnum]
     462        return retire
     463
     464    def _cancel_request(self, c):
     465        self._segment_requests = [t for t in self._segment_requests
     466                                  if t[2] != c]
     467        segnums = [segnum for (segnum,d,c) in self._segment_requests]
     468        if self._active_segment.segnum not in segnums:
     469            self._active_segment.stop()
     470            self._active_segment = None
     471            self._start_new_segment()
  • new file src/allmydata/immutable/downloader/segmentation.py

    diff --git a/src/allmydata/immutable/downloader/segmentation.py b/src/allmydata/immutable/downloader/segmentation.py
    new file mode 100644
    index 0000000..adc138e
    - +  
     1
     2import time
     3now = time.time
     4from zope.interface import implements
     5from twisted.internet import defer
     6from twisted.internet.interfaces import IPushProducer
     7from foolscap.api import eventually
     8from allmydata.util import log
     9from allmydata.util.spans import overlap
     10
     11from common import BadSegmentNumberError, WrongSegmentError
     12
     13class Segmentation:
     14    """I am responsible for a single offset+size read of the file. I handle
     15    segmentation: I figure out which segments are necessary, request them
     16    (from my CiphertextDownloader) in order, and trim the segments down to
     17    match the offset+size span. I use the Producer/Consumer interface to only
     18    request one segment at a time.
     19    """
     20    implements(IPushProducer)
     21    def __init__(self, node, offset, size, consumer, read_ev, logparent=None):
     22        self._node = node
     23        self._hungry = True
     24        self._active_segnum = None
     25        self._cancel_segment_request = None
     26        # these are updated as we deliver data. At any given time, we still
     27        # want to download file[offset:offset+size]
     28        self._offset = offset
     29        self._size = size
     30        assert offset+size <= node._verifycap.size
     31        self._consumer = consumer
     32        self._read_ev = read_ev
     33        self._start_pause = None
     34        self._lp = logparent
     35
     36    def start(self):
     37        self._alive = True
     38        self._deferred = defer.Deferred()
     39        self._consumer.registerProducer(self, True)
     40        self._maybe_fetch_next()
     41        return self._deferred
     42
     43    def _maybe_fetch_next(self):
     44        if not self._alive or not self._hungry:
     45            return
     46        if self._active_segnum is not None:
     47            return
     48        self._fetch_next()
     49
     50    def _fetch_next(self):
     51        if self._size == 0:
     52            # done!
     53            self._alive = False
     54            self._hungry = False
     55            self._consumer.unregisterProducer()
     56            self._deferred.callback(self._consumer)
     57            return
     58        n = self._node
     59        have_actual_segment_size = n.segment_size is not None
     60        guess_s = ""
     61        if not have_actual_segment_size:
     62            guess_s = "probably "
     63        segment_size = n.segment_size or n.guessed_segment_size
     64        if self._offset == 0:
     65            # great! we want segment0 for sure
     66            wanted_segnum = 0
     67        else:
     68            # this might be a guess
     69            wanted_segnum = self._offset // segment_size
     70        log.msg(format="_fetch_next(offset=%(offset)d) %(guess)swants segnum=%(segnum)d",
     71                offset=self._offset, guess=guess_s, segnum=wanted_segnum,
     72                level=log.NOISY, parent=self._lp, umid="5WfN0w")
     73        self._active_segnum = wanted_segnum
     74        d,c = n.get_segment(wanted_segnum, self._lp)
     75        self._cancel_segment_request = c
     76        d.addBoth(self._request_retired)
     77        d.addCallback(self._got_segment, wanted_segnum)
     78        if not have_actual_segment_size:
     79            # we can retry once
     80            d.addErrback(self._retry_bad_segment)
     81        d.addErrback(self._error)
     82
     83    def _request_retired(self, res):
     84        self._active_segnum = None
     85        self._cancel_segment_request = None
     86        return res
     87
     88    def _got_segment(self, (segment_start,segment,decodetime), wanted_segnum):
     89        self._cancel_segment_request = None
     90        # we got file[segment_start:segment_start+len(segment)]
     91        # we want file[self._offset:self._offset+self._size]
     92        log.msg(format="Segmentation got data:"
     93                " want [%(wantstart)d-%(wantend)d),"
     94                " given [%(segstart)d-%(segend)d), for segnum=%(segnum)d",
     95                wantstart=self._offset, wantend=self._offset+self._size,
     96                segstart=segment_start, segend=segment_start+len(segment),
     97                segnum=wanted_segnum,
     98                level=log.OPERATIONAL, parent=self._lp, umid="32dHcg")
     99
     100        o = overlap(segment_start, len(segment),  self._offset, self._size)
     101        # the overlap is file[o[0]:o[0]+o[1]]
     102        if not o or o[0] != self._offset:
     103            # we didn't get the first byte, so we can't use this segment
     104            log.msg("Segmentation handed wrong data:"
     105                    " want [%d-%d), given [%d-%d), for segnum=%d,"
     106                    " for si=%s"
     107                    % (self._offset, self._offset+self._size,
     108                       segment_start, segment_start+len(segment),
     109                       wanted_segnum, self._node._si_prefix),
     110                    level=log.UNUSUAL, parent=self._lp, umid="STlIiA")
     111            # we may retry if the segnum we asked was based on a guess
     112            raise WrongSegmentError("I was given the wrong data.")
     113        offset_in_segment = self._offset - segment_start
     114        desired_data = segment[offset_in_segment:offset_in_segment+o[1]]
     115
     116        self._offset += len(desired_data)
     117        self._size -= len(desired_data)
     118        self._consumer.write(desired_data)
     119        # the consumer might call our .pauseProducing() inside that write()
     120        # call, setting self._hungry=False
     121        self._read_ev.update(len(desired_data), 0, 0)
     122        self._maybe_fetch_next()
     123
     124    def _retry_bad_segment(self, f):
     125        f.trap(WrongSegmentError, BadSegmentNumberError)
     126        # we guessed the segnum wrong: either one that doesn't overlap with
     127        # the start of our desired region, or one that's beyond the end of
     128        # the world. Now that we have the right information, we're allowed to
     129        # retry once.
     130        assert self._node.segment_size is not None
     131        return self._maybe_fetch_next()
     132
     133    def _error(self, f):
     134        log.msg("Error in Segmentation", failure=f,
     135                level=log.WEIRD, parent=self._lp, umid="EYlXBg")
     136        self._alive = False
     137        self._hungry = False
     138        self._consumer.unregisterProducer()
     139        self._deferred.errback(f)
     140
     141    def stopProducing(self):
     142        self._hungry = False
     143        self._alive = False
     144        # cancel any outstanding segment request
     145        if self._cancel_segment_request:
     146            self._cancel_segment_request.cancel()
     147            self._cancel_segment_request = None
     148    def pauseProducing(self):
     149        self._hungry = False
     150        self._start_pause = now()
     151    def resumeProducing(self):
     152        self._hungry = True
     153        eventually(self._maybe_fetch_next)
     154        if self._start_pause is not None:
     155            paused = now() - self._start_pause
     156            self._read_ev.update(0, 0, paused)
     157            self._start_pause = None
  • new file src/allmydata/immutable/downloader/share.py

    diff --git a/src/allmydata/immutable/downloader/share.py b/src/allmydata/immutable/downloader/share.py
    new file mode 100644
    index 0000000..c4dbd73
    - +  
     1
     2import struct
     3import time
     4now = time.time
     5
     6from twisted.python.failure import Failure
     7from foolscap.api import eventually
     8from allmydata.util import base32, log, hashutil, mathutil
     9from allmydata.util.spans import Spans, DataSpans
     10from allmydata.interfaces import HASH_SIZE
     11from allmydata.hashtree import IncompleteHashTree, BadHashError, \
     12     NotEnoughHashesError
     13
     14from allmydata.immutable.layout import make_write_bucket_proxy
     15from util import Observer2
     16from common import COMPLETE, CORRUPT, DEAD, BADSEGNUM
     17
     18
     19class LayoutInvalid(Exception):
     20    pass
     21class DataUnavailable(Exception):
     22    pass
     23
     24class Share:
     25    """I represent a single instance of a single share (e.g. I reference the
     26    shnum2 for share SI=abcde on server xy12t, not the one on server ab45q).
     27    I am associated with a CommonShare that remembers data that is held in
     28    common among e.g. SI=abcde/shnum2 across all servers. I am also
     29    associated with a CiphertextFileNode for e.g. SI=abcde (all shares, all
     30    servers).
     31    """
     32    # this is a specific implementation of IShare for tahoe's native storage
     33    # servers. A different backend would use a different class.
     34
     35    def __init__(self, rref, server_version, verifycap, commonshare, node,
     36                 download_status, peerid, shnum, logparent):
     37        self._rref = rref
     38        self._server_version = server_version
     39        self._node = node # holds share_hash_tree and UEB
     40        self.actual_segment_size = node.segment_size # might still be None
     41        # XXX change node.guessed_segment_size to
     42        # node.best_guess_segment_size(), which should give us the real ones
     43        # if known, else its guess.
     44        self._guess_offsets(verifycap, node.guessed_segment_size)
     45        self.actual_offsets = None
     46        self._UEB_length = None
     47        self._commonshare = commonshare # holds block_hash_tree
     48        self._download_status = download_status
     49        self._peerid = peerid
     50        self._peerid_s = base32.b2a(peerid)[:5]
     51        self._storage_index = verifycap.storage_index
     52        self._si_prefix = base32.b2a(verifycap.storage_index)[:8]
     53        self._shnum = shnum
     54        # self._alive becomes False upon fatal corruption or server error
     55        self._alive = True
     56        self._lp = log.msg(format="%(share)s created", share=repr(self),
     57                           level=log.NOISY, parent=logparent, umid="P7hv2w")
     58
     59        self._pending = Spans() # request sent but no response received yet
     60        self._received = DataSpans() # ACK response received, with data
     61        self._unavailable = Spans() # NAK response received, no data
     62
     63        # any given byte of the share can be in one of four states:
     64        #  in: _wanted, _requested, _received
     65        #      FALSE    FALSE       FALSE : don't care about it at all
     66        #      TRUE     FALSE       FALSE : want it, haven't yet asked for it
     67        #      TRUE     TRUE        FALSE : request is in-flight
     68        #                                   or didn't get it
     69        #      FALSE    TRUE        TRUE  : got it, haven't used it yet
     70        #      FALSE    TRUE        FALSE : got it and used it
     71        #      FALSE    FALSE       FALSE : block consumed, ready to ask again
     72        #
     73        # when we request data and get a NAK, we leave it in _requested
     74        # to remind ourself to not ask for it again. We don't explicitly
     75        # remove it from anything (maybe this should change).
     76        #
     77        # We retain the hashtrees in the Node, so we leave those spans in
     78        # _requested (and never ask for them again, as long as the Node is
     79        # alive). But we don't retain data blocks (too big), so when we
     80        # consume a data block, we remove it from _requested, so a later
     81        # download can re-fetch it.
     82
     83        self._requested_blocks = [] # (segnum, set(observer2..))
     84        ver = server_version["http://allmydata.org/tahoe/protocols/storage/v1"]
     85        self._overrun_ok = ver["tolerates-immutable-read-overrun"]
     86        # If _overrun_ok and we guess the offsets correctly, we can get
     87        # everything in one RTT. If _overrun_ok and we guess wrong, we might
     88        # need two RTT (but we could get lucky and do it in one). If overrun
     89        # is *not* ok (tahoe-1.3.0 or earlier), we need four RTT: 1=version,
     90        # 2=offset table, 3=UEB_length and everything else (hashes, block),
     91        # 4=UEB.
     92
     93        self.had_corruption = False # for unit tests
     94
     95    def __repr__(self):
     96        return "Share(sh%d-on-%s)" % (self._shnum, self._peerid_s)
     97
     98    def is_alive(self):
     99        # XXX: reconsider. If the share sees a single error, should it remain
     100        # dead for all time? Or should the next segment try again? This DEAD
     101        # state is stored elsewhere too (SegmentFetcher per-share states?)
     102        # and needs to be consistent. We clear _alive in self._fail(), which
     103        # is called upon a network error, or layout failure, or hash failure
     104        # in the UEB or a hash tree. We do not _fail() for a hash failure in
     105        # a block, but of course we still tell our callers about
     106        # state=CORRUPT so they'll find a different share.
     107        return self._alive
     108
     109    def _guess_offsets(self, verifycap, guessed_segment_size):
     110        self.guessed_segment_size = guessed_segment_size
     111        size = verifycap.size
     112        k = verifycap.needed_shares
     113        N = verifycap.total_shares
     114        r = self._node._calculate_sizes(guessed_segment_size)
     115        # num_segments, block_size/tail_block_size
     116        # guessed_segment_size/tail_segment_size/tail_segment_padded
     117        share_size = mathutil.div_ceil(size, k)
     118        # share_size is the amount of block data that will be put into each
     119        # share, summed over all segments. It does not include hashes, the
     120        # UEB, or other overhead.
     121
     122        # use the upload-side code to get this as accurate as possible
     123        ht = IncompleteHashTree(N)
     124        num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
     125        wbp = make_write_bucket_proxy(None, share_size, r["block_size"],
     126                                      r["num_segments"], num_share_hashes, 0,
     127                                      None)
     128        self._fieldsize = wbp.fieldsize
     129        self._fieldstruct = wbp.fieldstruct
     130        self.guessed_offsets = wbp._offsets
     131
     132    # called by our client, the SegmentFetcher
     133    def get_block(self, segnum):
     134        """Add a block number to the list of requests. This will eventually
     135        result in a fetch of the data necessary to validate the block, then
     136        the block itself. The fetch order is generally
     137        first-come-first-served, but requests may be answered out-of-order if
     138        data becomes available sooner.
     139
     140        I return an Observer2, which has two uses. The first is to call
     141        o.subscribe(), which gives me a place to send state changes and
     142        eventually the data block. The second is o.cancel(), which removes
     143        the request (if it is still active).
     144
     145        I will distribute the following events through my Observer2:
     146         - state=OVERDUE: ?? I believe I should have had an answer by now.
     147                          You may want to ask another share instead.
     148         - state=BADSEGNUM: the segnum you asked for is too large. I must
     149                            fetch a valid UEB before I can determine this,
     150                            so the notification is asynchronous
     151         - state=COMPLETE, block=data: here is a valid block
     152         - state=CORRUPT: this share contains corrupted data
     153         - state=DEAD, f=Failure: the server reported an error, this share
     154                                  is unusable
     155        """
     156        log.msg("%s.get_block(%d)" % (repr(self), segnum),
     157                level=log.NOISY, parent=self._lp, umid="RTo9MQ")
     158        assert segnum >= 0
     159        o = Observer2()
     160        o.set_canceler(self, "_cancel_block_request")
     161        for i,(segnum0,observers) in enumerate(self._requested_blocks):
     162            if segnum0 == segnum:
     163                observers.add(o)
     164                break
     165        else:
     166            self._requested_blocks.append( (segnum, set([o])) )
     167        eventually(self.loop)
     168        return o
     169
     170    def _cancel_block_request(self, o):
     171        new_requests = []
     172        for e in self._requested_blocks:
     173            (segnum0, observers) = e
     174            observers.discard(o)
     175            if observers:
     176                new_requests.append(e)
     177        self._requested_blocks = new_requests
     178
     179    # internal methods
     180    def _active_segnum_and_observers(self):
     181        if self._requested_blocks:
     182            # we only retrieve information for one segment at a time, to
     183            # minimize alacrity (first come, first served)
     184            return self._requested_blocks[0]
     185        return None, []
     186
     187    def loop(self):
     188        try:
     189            # if any exceptions occur here, kill the download
     190            log.msg("%s.loop, reqs=[%s], pending=%s, received=%s,"
     191                    " unavailable=%s" %
     192                    (repr(self),
     193                     ",".join([str(req[0]) for req in self._requested_blocks]),
     194                     self._pending.dump(), self._received.dump(),
     195                     self._unavailable.dump() ),
     196                    level=log.NOISY, parent=self._lp, umid="BaL1zw")
     197            self._do_loop()
     198            # all exception cases call self._fail(), which clears self._alive
     199        except (BadHashError, NotEnoughHashesError, LayoutInvalid), e:
     200            # Abandon this share. We do this if we see corruption in the
     201            # offset table, the UEB, or a hash tree. We don't abandon the
     202            # whole share if we see corruption in a data block (we abandon
     203            # just the one block, and still try to get data from other blocks
     204            # on the same server). In theory, we could get good data from a
     205            # share with a corrupt UEB (by first getting the UEB from some
     206            # other share), or corrupt hash trees, but the logic to decide
     207            # when this is safe is non-trivial. So for now, give up at the
     208            # first sign of corruption.
     209            #
     210            # _satisfy_*() code which detects corruption should first call
     211            # self._signal_corruption(), and then raise the exception.
     212            log.msg(format="corruption detected in %(share)s",
     213                    share=repr(self),
     214                    level=log.UNUSUAL, parent=self._lp, umid="gWspVw")
     215            self._fail(Failure(e), log.UNUSUAL)
     216        except DataUnavailable, e:
     217            # Abandon this share.
     218            log.msg(format="need data that will never be available"
     219                    " from %s: pending=%s, received=%s, unavailable=%s" %
     220                    (repr(self),
     221                     self._pending.dump(), self._received.dump(),
     222                     self._unavailable.dump() ),
     223                    level=log.UNUSUAL, parent=self._lp, umid="F7yJnQ")
     224            self._fail(Failure(e), log.UNUSUAL)
     225        except BaseException:
     226            self._fail(Failure())
     227            raise
     228        log.msg("%s.loop done, reqs=[%s], pending=%s, received=%s,"
     229                " unavailable=%s" %
     230                (repr(self),
     231                 ",".join([str(req[0]) for req in self._requested_blocks]),
     232                 self._pending.dump(), self._received.dump(),
     233                 self._unavailable.dump() ),
     234                level=log.NOISY, parent=self._lp, umid="9lRaRA")
     235
     236    def _do_loop(self):
     237        # we are (eventually) called after all state transitions:
     238        #  new segments added to self._requested_blocks
     239        #  new data received from servers (responses to our read() calls)
     240        #  impatience timer fires (server appears slow)
     241        if not self._alive:
     242            return
     243
     244        # First, consume all of the information that we currently have, for
     245        # all the segments people currently want.
     246        while self._get_satisfaction():
     247            pass
     248
     249        # When we get no satisfaction (from the data we've received so far),
     250        # we determine what data we desire (to satisfy more requests). The
     251        # number of segments is finite, so I can't get no satisfaction
     252        # forever.
     253        wanted, needed = self._desire()
     254
     255        # Finally, send out requests for whatever we need (desire minus
     256        # have). You can't always get what you want, but if you try
     257        # sometimes, you just might find, you get what you need.
     258        self._send_requests(wanted + needed)
     259
     260        # and sometimes you can't even get what you need
     261        disappointment = needed & self._unavailable
     262        if len(disappointment):
     263            self.had_corruption = True
     264            raise DataUnavailable("need %s but will never get it" %
     265                                  disappointment.dump())
     266
     267    def _get_satisfaction(self):
     268        # return True if we retired a data block, and should therefore be
     269        # called again. Return False if we don't retire a data block (even if
     270        # we do retire some other data, like hash chains).
     271
     272        if self.actual_offsets is None:
     273            if not self._satisfy_offsets():
     274                # can't even look at anything without the offset table
     275                return False
     276
     277        if not self._node.have_UEB:
     278            if not self._satisfy_UEB():
     279                # can't check any hashes without the UEB
     280                return False
     281        self.actual_segment_size = self._node.segment_size # might be updated
     282        assert self.actual_segment_size is not None
     283
     284        # knowing the UEB means knowing num_segments. Despite the redundancy,
     285        # this is the best place to set this. CommonShare.set_numsegs will
     286        # ignore duplicate calls.
     287        assert self._node.num_segments is not None
     288        cs = self._commonshare
     289        cs.set_numsegs(self._node.num_segments)
     290
     291        segnum, observers = self._active_segnum_and_observers()
     292        # if segnum is None, we don't really need to do anything (we have no
     293        # outstanding readers right now), but we'll fill in the bits that
     294        # aren't tied to any particular segment.
     295
     296        if segnum is not None and segnum >= self._node.num_segments:
     297            for o in observers:
     298                o.notify(state=BADSEGNUM)
     299            self._requested_blocks.pop(0)
     300            return True
     301
     302        if self._node.share_hash_tree.needed_hashes(self._shnum):
     303            if not self._satisfy_share_hash_tree():
     304                # can't check block_hash_tree without a root
     305                return False
     306
     307        if cs.need_block_hash_root():
     308            block_hash_root = self._node.share_hash_tree.get_leaf(self._shnum)
     309            cs.set_block_hash_root(block_hash_root)
     310
     311        if segnum is None:
     312            return False # we don't want any particular segment right now
     313
     314        # block_hash_tree
     315        needed_hashes = self._commonshare.get_needed_block_hashes(segnum)
     316        if needed_hashes:
     317            if not self._satisfy_block_hash_tree(needed_hashes):
     318                # can't check block without block_hash_tree
     319                return False
     320
     321        # ciphertext_hash_tree
     322        needed_hashes = self._node.get_needed_ciphertext_hashes(segnum)
     323        if needed_hashes:
     324            if not self._satisfy_ciphertext_hash_tree(needed_hashes):
     325                # can't check decoded blocks without ciphertext_hash_tree
     326                return False
     327
     328        # data blocks
     329        return self._satisfy_data_block(segnum, observers)
     330
     331    def _satisfy_offsets(self):
     332        version_s = self._received.get(0, 4)
     333        if version_s is None:
     334            return False
     335        (version,) = struct.unpack(">L", version_s)
     336        if version == 1:
     337            table_start = 0x0c
     338            self._fieldsize = 0x4
     339            self._fieldstruct = "L"
     340        elif version == 2:
     341            table_start = 0x14
     342            self._fieldsize = 0x8
     343            self._fieldstruct = "Q"
     344        else:
     345            self.had_corruption = True
     346            raise LayoutInvalid("unknown version %d (I understand 1 and 2)"
     347                                % version)
     348        offset_table_size = 6 * self._fieldsize
     349        table_s = self._received.pop(table_start, offset_table_size)
     350        if table_s is None:
     351            return False
     352        fields = struct.unpack(">"+6*self._fieldstruct, table_s)
     353        offsets = {}
     354        for i,field in enumerate(['data',
     355                                  'plaintext_hash_tree', # UNUSED
     356                                  'crypttext_hash_tree',
     357                                  'block_hashes',
     358                                  'share_hashes',
     359                                  'uri_extension',
     360                                  ] ):
     361            offsets[field] = fields[i]
     362        self.actual_offsets = offsets
     363        log.msg("actual offsets: data=%d, plaintext_hash_tree=%d, crypttext_hash_tree=%d, block_hashes=%d, share_hashes=%d, uri_extension=%d" % tuple(fields))
     364        self._received.remove(0, 4) # don't need this anymore
     365
     366        # validate the offsets a bit
     367        share_hashes_size = offsets["uri_extension"] - offsets["share_hashes"]
     368        if share_hashes_size < 0 or share_hashes_size % (2+HASH_SIZE) != 0:
     369            # the share hash chain is stored as (hashnum,hash) pairs
     370            self.had_corruption = True
     371            raise LayoutInvalid("share hashes malformed -- should be a"
     372                                " multiple of %d bytes -- not %d" %
     373                                (2+HASH_SIZE, share_hashes_size))
     374        block_hashes_size = offsets["share_hashes"] - offsets["block_hashes"]
     375        if block_hashes_size < 0 or block_hashes_size % (HASH_SIZE) != 0:
     376            # the block hash tree is stored as a list of hashes
     377            self.had_corruption = True
     378            raise LayoutInvalid("block hashes malformed -- should be a"
     379                                " multiple of %d bytes -- not %d" %
     380                                (HASH_SIZE, block_hashes_size))
     381        # we only look at 'crypttext_hash_tree' if the UEB says we're
     382        # actually using it. Same with 'plaintext_hash_tree'. This gives us
     383        # some wiggle room: a place to stash data for later extensions.
     384
     385        return True
     386
     387    def _satisfy_UEB(self):
     388        o = self.actual_offsets
     389        fsize = self._fieldsize
     390        UEB_length_s = self._received.get(o["uri_extension"], fsize)
     391        if not UEB_length_s:
     392            return False
     393        (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s)
     394        UEB_s = self._received.pop(o["uri_extension"]+fsize, UEB_length)
     395        if not UEB_s:
     396            return False
     397        self._received.remove(o["uri_extension"], fsize)
     398        try:
     399            self._node.validate_and_store_UEB(UEB_s)
     400            return True
     401        except (LayoutInvalid, BadHashError), e:
     402            # TODO: if this UEB was bad, we'll keep trying to validate it
     403            # over and over again. Only log.err on the first one, or better
     404            # yet skip all but the first
     405            f = Failure(e)
     406            self._signal_corruption(f, o["uri_extension"], fsize+UEB_length)
     407            self.had_corruption = True
     408            raise
     409
     410    def _satisfy_share_hash_tree(self):
     411        # the share hash chain is stored as (hashnum,hash) tuples, so you
     412        # can't fetch just the pieces you need, because you don't know
     413        # exactly where they are. So fetch everything, and parse the results
     414        # later.
     415        o = self.actual_offsets
     416        hashlen = o["uri_extension"] - o["share_hashes"]
     417        assert hashlen % (2+HASH_SIZE) == 0
     418        hashdata = self._received.get(o["share_hashes"], hashlen)
     419        if not hashdata:
     420            return False
     421        share_hashes = {}
     422        for i in range(0, hashlen, 2+HASH_SIZE):
     423            (hashnum,) = struct.unpack(">H", hashdata[i:i+2])
     424            hashvalue = hashdata[i+2:i+2+HASH_SIZE]
     425            share_hashes[hashnum] = hashvalue
     426        try:
     427            self._node.process_share_hashes(share_hashes)
     428            # adds to self._node.share_hash_tree
     429        except (BadHashError, NotEnoughHashesError), e:
     430            f = Failure(e)
     431            self._signal_corruption(f, o["share_hashes"], hashlen)
     432            self.had_corruption = True
     433            raise
     434        self._received.remove(o["share_hashes"], hashlen)
     435        return True
     436
     437    def _signal_corruption(self, f, start, offset):
     438        # there was corruption somewhere in the given range
     439        reason = "corruption in share[%d-%d): %s" % (start, start+offset,
     440                                                     str(f.value))
     441        self._rref.callRemoteOnly("advise_corrupt_share", reason)
     442
     443    def _satisfy_block_hash_tree(self, needed_hashes):
     444        o_bh = self.actual_offsets["block_hashes"]
     445        block_hashes = {}
     446        for hashnum in needed_hashes:
     447            hashdata = self._received.get(o_bh+hashnum*HASH_SIZE, HASH_SIZE)
     448            if hashdata:
     449                block_hashes[hashnum] = hashdata
     450            else:
     451                return False # missing some hashes
     452        # note that we don't submit any hashes to the block_hash_tree until
     453        # we've gotten them all, because the hash tree will throw an
     454        # exception if we only give it a partial set (which it therefore
     455        # cannot validate)
     456        try:
     457            self._commonshare.process_block_hashes(block_hashes)
     458        except (BadHashError, NotEnoughHashesError), e:
     459            f = Failure(e)
     460            hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())])
     461            log.msg(format="hash failure in block_hashes=(%(hashnums)s),"
     462                    " from %(share)s",
     463                    hashnums=hashnums, shnum=self._shnum, share=repr(self),
     464                    failure=f, level=log.WEIRD, parent=self._lp, umid="yNyFdA")
     465            hsize = max(0, max(needed_hashes)) * HASH_SIZE
     466            self._signal_corruption(f, o_bh, hsize)
     467            self.had_corruption = True
     468            raise
     469        for hashnum in needed_hashes:
     470            self._received.remove(o_bh+hashnum*HASH_SIZE, HASH_SIZE)
     471        return True
     472
     473    def _satisfy_ciphertext_hash_tree(self, needed_hashes):
     474        start = self.actual_offsets["crypttext_hash_tree"]
     475        hashes = {}
     476        for hashnum in needed_hashes:
     477            hashdata = self._received.get(start+hashnum*HASH_SIZE, HASH_SIZE)
     478            if hashdata:
     479                hashes[hashnum] = hashdata
     480            else:
     481                return False # missing some hashes
     482        # we don't submit any hashes to the ciphertext_hash_tree until we've
     483        # gotten them all
     484        try:
     485            self._node.process_ciphertext_hashes(hashes)
     486        except (BadHashError, NotEnoughHashesError), e:
     487            f = Failure(e)
     488            hashnums = ",".join([str(n) for n in sorted(hashes.keys())])
     489            log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s),"
     490                    " from %(share)s",
     491                    hashnums=hashnums, share=repr(self), failure=f,
     492                    level=log.WEIRD, parent=self._lp, umid="iZI0TA")
     493            hsize = max(0, max(needed_hashes))*HASH_SIZE
     494            self._signal_corruption(f, start, hsize)
     495            self.had_corruption = True
     496            raise
     497        for hashnum in needed_hashes:
     498            self._received.remove(start+hashnum*HASH_SIZE, HASH_SIZE)
     499        return True
     500
     501    def _satisfy_data_block(self, segnum, observers):
     502        tail = (segnum == self._node.num_segments-1)
     503        datastart = self.actual_offsets["data"]
     504        blockstart = datastart + segnum * self._node.block_size
     505        blocklen = self._node.block_size
     506        if tail:
     507            blocklen = self._node.tail_block_size
     508
     509        block = self._received.pop(blockstart, blocklen)
     510        if not block:
     511            log.msg("no data for block %s (want [%d:+%d])" % (repr(self),
     512                                                              blockstart, blocklen))
     513            return False
     514        log.msg(format="%(share)s._satisfy_data_block [%(start)d:+%(length)d]",
     515                share=repr(self), start=blockstart, length=blocklen,
     516                level=log.NOISY, parent=self._lp, umid="uTDNZg")
     517        # this block is being retired, either as COMPLETE or CORRUPT, since
     518        # no further data reads will help
     519        assert self._requested_blocks[0][0] == segnum
     520        try:
     521            self._commonshare.check_block(segnum, block)
     522            # hurrah, we have a valid block. Deliver it.
     523            for o in observers:
     524                # goes to SegmentFetcher._block_request_activity
     525                o.notify(state=COMPLETE, block=block)
     526        except (BadHashError, NotEnoughHashesError), e:
     527            # rats, we have a corrupt block. Notify our clients that they
     528            # need to look elsewhere, and advise the server. Unlike
     529            # corruption in other parts of the share, this doesn't cause us
     530            # to abandon the whole share.
     531            f = Failure(e)
     532            log.msg(format="hash failure in block %(segnum)d, from %(share)s",
     533                    segnum=segnum, share=repr(self), failure=f,
     534                    level=log.WEIRD, parent=self._lp, umid="mZjkqA")
     535            for o in observers:
     536                o.notify(state=CORRUPT)
     537            self._signal_corruption(f, blockstart, blocklen)
     538            self.had_corruption = True
     539        # in either case, we've retired this block
     540        self._requested_blocks.pop(0)
     541        # popping the request keeps us from turning around and wanting the
     542        # block again right away
     543        return True # got satisfaction
     544
     545    def _desire(self):
     546        segnum, observers = self._active_segnum_and_observers() # maybe None
     547
     548        # 'want_it' is for data we merely want: we know that we don't really
     549        # need it. This includes speculative reads, like the first 1KB of the
     550        # share (for the offset table) and the first 2KB of the UEB.
     551        #
     552        # 'need_it' is for data that, if we have the real offset table, we'll
     553        # need. If we are only guessing at the offset table, it's merely
     554        # wanted. (The share is abandoned if we can't get data that we really
     555        # need).
     556        #
     557        # 'gotta_gotta_have_it' is for data that we absolutely need,
     558        # independent of whether we're still guessing about the offset table:
     559        # the version number and the offset table itself.
     560        #
     561        # Mr. Popeil, I'm in trouble, need your assistance on the double. Aww..
     562
     563        desire = Spans(), Spans(), Spans()
     564        (want_it, need_it, gotta_gotta_have_it) = desire
     565
     566        self.actual_segment_size = self._node.segment_size # might be updated
     567        o = self.actual_offsets or self.guessed_offsets
     568        segsize = self.actual_segment_size or self.guessed_segment_size
     569        r = self._node._calculate_sizes(segsize)
     570
     571        if not self.actual_offsets:
     572            # all _desire functions add bits to the three desire[] spans
     573            self._desire_offsets(desire)
     574
     575        # we can use guessed offsets as long as this server tolerates
     576        # overrun. Otherwise, we must wait for the offsets to arrive before
     577        # we try to read anything else.
     578        if self.actual_offsets or self._overrun_ok:
     579            if not self._node.have_UEB:
     580                self._desire_UEB(desire, o)
     581            # They might ask for a segment that doesn't look right.
     582            # _satisfy() will catch+reject bad segnums once we know the UEB
     583            # (and therefore segsize and numsegs), so we'll only fail this
     584            # test if we're still guessing. We want to avoid asking the
     585            # hashtrees for needed_hashes() for bad segnums. So don't enter
     586            # _desire_hashes or _desire_data unless the segnum looks
     587            # reasonable.
     588            if segnum < r["num_segments"]:
     589                # XXX somehow we're getting here for sh5. we don't yet know
     590                # the actual_segment_size, we're still working off the guess.
     591                # the ciphertext_hash_tree has been corrected, but the
     592                # commonshare._block_hash_tree is still in the guessed state.
     593                self._desire_share_hashes(desire, o)
     594                if segnum is not None:
     595                    self._desire_block_hashes(desire, o, segnum)
     596                    self._desire_data(desire, o, r, segnum, segsize)
     597            else:
     598                log.msg("_desire: segnum(%d) looks wrong (numsegs=%d)"
     599                        % (segnum, r["num_segments"]),
     600                        level=log.UNUSUAL, parent=self._lp, umid="tuYRQQ")
     601
     602        log.msg("end _desire: want_it=%s need_it=%s gotta=%s"
     603                % (want_it.dump(), need_it.dump(), gotta_gotta_have_it.dump()))
     604        if self.actual_offsets:
     605            return (want_it, need_it+gotta_gotta_have_it)
     606        else:
     607            return (want_it+need_it, gotta_gotta_have_it)
     608
     609    def _desire_offsets(self, desire):
     610        (want_it, need_it, gotta_gotta_have_it) = desire
     611        if self._overrun_ok:
     612            # easy! this includes version number, sizes, and offsets
     613            want_it.add(0, 1024)
     614            return
     615
     616        # v1 has an offset table that lives [0x0,0x24). v2 lives [0x0,0x44).
     617        # To be conservative, only request the data that we know lives there,
     618        # even if that means more roundtrips.
     619
     620        gotta_gotta_have_it.add(0, 4)  # version number, always safe
     621        version_s = self._received.get(0, 4)
     622        if not version_s:
     623            return
     624        (version,) = struct.unpack(">L", version_s)
     625        # The code in _satisfy_offsets will have checked this version
     626        # already. There is no code path to get this far with version>2.
     627        assert 1 <= version <= 2, "can't get here, version=%d" % version
     628        if version == 1:
     629            table_start = 0x0c
     630            fieldsize = 0x4
     631        elif version == 2:
     632            table_start = 0x14
     633            fieldsize = 0x8
     634        offset_table_size = 6 * fieldsize
     635        gotta_gotta_have_it.add(table_start, offset_table_size)
     636
     637    def _desire_UEB(self, desire, o):
     638        (want_it, need_it, gotta_gotta_have_it) = desire
     639
     640        # UEB data is stored as (length,data).
     641        if self._overrun_ok:
     642            # We can pre-fetch 2kb, which should probably cover it. If it
     643            # turns out to be larger, we'll come back here later with a known
     644            # length and fetch the rest.
     645            want_it.add(o["uri_extension"], 2048)
     646            # now, while that is probably enough to fetch the whole UEB, it
     647            # might not be, so we need to do the next few steps as well. In
     648            # most cases, the following steps will not actually add anything
     649            # to need_it
     650
     651        need_it.add(o["uri_extension"], self._fieldsize)
     652        # only use a length if we're sure it's correct, otherwise we'll
     653        # probably fetch a huge number
     654        if not self.actual_offsets:
     655            return
     656        UEB_length_s = self._received.get(o["uri_extension"], self._fieldsize)
     657        if UEB_length_s:
     658            (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s)
     659            # we know the length, so make sure we grab everything
     660            need_it.add(o["uri_extension"]+self._fieldsize, UEB_length)
     661
     662    def _desire_share_hashes(self, desire, o):
     663        (want_it, need_it, gotta_gotta_have_it) = desire
     664
     665        if self._node.share_hash_tree.needed_hashes(self._shnum):
     666            hashlen = o["uri_extension"] - o["share_hashes"]
     667            need_it.add(o["share_hashes"], hashlen)
     668
     669    def _desire_block_hashes(self, desire, o, segnum):
     670        (want_it, need_it, gotta_gotta_have_it) = desire
     671
     672        # block hash chain
     673        for hashnum in self._commonshare.get_needed_block_hashes(segnum):
     674            need_it.add(o["block_hashes"]+hashnum*HASH_SIZE, HASH_SIZE)
     675
     676        # ciphertext hash chain
     677        for hashnum in self._node.get_needed_ciphertext_hashes(segnum):
     678            need_it.add(o["crypttext_hash_tree"]+hashnum*HASH_SIZE, HASH_SIZE)
     679
     680    def _desire_data(self, desire, o, r, segnum, segsize):
     681        (want_it, need_it, gotta_gotta_have_it) = desire
     682        tail = (segnum == r["num_segments"]-1)
     683        datastart = o["data"]
     684        blockstart = datastart + segnum * r["block_size"]
     685        blocklen = r["block_size"]
     686        if tail:
     687            blocklen = r["tail_block_size"]
     688        need_it.add(blockstart, blocklen)
     689
     690    def _send_requests(self, desired):
     691        ask = desired - self._pending - self._received.get_spans()
     692        log.msg("%s._send_requests, desired=%s, pending=%s, ask=%s" %
     693                (repr(self), desired.dump(), self._pending.dump(), ask.dump()),
     694                level=log.NOISY, parent=self._lp, umid="E94CVA")
     695        # XXX At one time, this code distinguished between data blocks and
     696        # hashes, and made sure to send (small) requests for hashes before
     697        # sending (big) requests for blocks. The idea was to make sure that
     698        # all hashes arrive before the blocks, so the blocks can be consumed
     699        # and released in a single turn. I removed this for simplicity.
     700        # Reconsider the removal: maybe bring it back.
     701        ds = self._download_status
     702
     703        for (start, length) in ask:
     704            # TODO: quantize to reasonably-large blocks
     705            self._pending.add(start, length)
     706            lp = log.msg(format="%(share)s._send_request"
     707                         " [%(start)d:+%(length)d]",
     708                         share=repr(self),
     709                         start=start, length=length,
     710                         level=log.NOISY, parent=self._lp, umid="sgVAyA")
     711            req_ev = ds.add_request_sent(self._peerid, self._shnum,
     712                                         start, length, now())
     713            d = self._send_request(start, length)
     714            d.addCallback(self._got_data, start, length, req_ev, lp)
     715            d.addErrback(self._got_error, start, length, req_ev, lp)
     716            d.addCallback(self._trigger_loop)
     717            d.addErrback(lambda f:
     718                         log.err(format="unhandled error during send_request",
     719                                 failure=f, parent=self._lp,
     720                                 level=log.WEIRD, umid="qZu0wg"))
     721
     722    def _send_request(self, start, length):
     723        return self._rref.callRemote("read", start, length)
     724
     725    def _got_data(self, data, start, length, req_ev, lp):
     726        req_ev.finished(len(data), now())
     727        if not self._alive:
     728            return
     729        log.msg(format="%(share)s._got_data [%(start)d:+%(length)d] -> %(datalen)d",
     730                share=repr(self), start=start, length=length, datalen=len(data),
     731                level=log.NOISY, parent=lp, umid="5Qn6VQ")
     732        self._pending.remove(start, length)
     733        self._received.add(start, data)
     734
     735        # if we ask for [a:c], and we get back [a:b] (b<c), that means we're
     736        # never going to get [b:c]. If we really need that data, this block
     737        # will never complete. The easiest way to get into this situation is
     738        # to hit a share with a corrupted offset table, or one that's somehow
     739        # been truncated. On the other hand, when overrun_ok is true, we ask
     740        # for data beyond the end of the share all the time (it saves some
     741        # RTT when we don't know the length of the share ahead of time). So
     742        # not every asked-for-but-not-received byte is fatal.
     743        if len(data) < length:
     744            self._unavailable.add(start+len(data), length-len(data))
     745
     746        # XXX if table corruption causes our sections to overlap, then one
     747        # consumer (i.e. block hash tree) will pop/remove the data that
     748        # another consumer (i.e. block data) mistakenly thinks it needs. It
     749        # won't ask for that data again, because the span is in
     750        # self._requested. But that span won't be in self._unavailable
     751        # because we got it back from the server. TODO: handle this properly
     752        # (raise DataUnavailable). Then add sanity-checking
     753        # no-overlaps-allowed tests to the offset-table unpacking code to
     754        # catch this earlier. XXX
     755
     756        # accumulate a wanted/needed span (not as self._x, but passed into
     757        # desire* functions). manage a pending/in-flight list. when the
     758        # requests are sent out, empty/discard the wanted/needed span and
     759        # populate/augment the pending list. when the responses come back,
     760        # augment either received+data or unavailable.
     761
     762        # if a corrupt offset table results in double-usage, we'll send
     763        # double requests.
     764
     765        # the wanted/needed span is only "wanted" for the first pass. Once
     766        # the offset table arrives, it's all "needed".
     767
     768    def _got_error(self, f, start, length, req_ev, lp):
     769        req_ev.finished("error", now())
     770        log.msg(format="error requesting %(start)d+%(length)d"
     771                " from %(server)s for si %(si)s",
     772                start=start, length=length,
     773                server=self._peerid_s, si=self._si_prefix,
     774                failure=f, parent=lp, level=log.UNUSUAL, umid="BZgAJw")
     775        # retire our observers, assuming we won't be able to make any
     776        # further progress
     777        self._fail(f, log.UNUSUAL)
     778
     779    def _trigger_loop(self, res):
     780        if self._alive:
     781            eventually(self.loop)
     782        return res
     783
     784    def _fail(self, f, level=log.WEIRD):
     785        log.msg(format="abandoning %(share)s",
     786                share=repr(self), failure=f,
     787                level=level, parent=self._lp, umid="JKM2Og")
     788        self._alive = False
     789        for (segnum, observers) in self._requested_blocks:
     790            for o in observers:
     791                o.notify(state=DEAD, f=f)
     792
     793
     794class CommonShare:
     795    """I hold data that is common across all instances of a single share,
     796    like sh2 on both servers A and B. This is just the block hash tree.
     797    """
     798    def __init__(self, guessed_numsegs, si_prefix, shnum, logparent):
     799        self.si_prefix = si_prefix
     800        self.shnum = shnum
     801        # in the beginning, before we have the real UEB, we can only guess at
     802        # the number of segments. But we want to ask for block hashes early.
     803        # So if we're asked for which block hashes are needed before we know
     804        # numsegs for sure, we return a guess.
     805        self._block_hash_tree = IncompleteHashTree(guessed_numsegs)
     806        self._know_numsegs = False
     807        self._logparent = logparent
     808
     809    def set_numsegs(self, numsegs):
     810        if self._know_numsegs:
     811            return
     812        self._block_hash_tree = IncompleteHashTree(numsegs)
     813        self._know_numsegs = True
     814
     815    def need_block_hash_root(self):
     816        return bool(not self._block_hash_tree[0])
     817
     818    def set_block_hash_root(self, roothash):
     819        assert self._know_numsegs
     820        self._block_hash_tree.set_hashes({0: roothash})
     821
     822    def get_needed_block_hashes(self, segnum):
     823        # XXX: include_leaf=True needs thought: how did the old downloader do
     824        # it? I think it grabbed *all* block hashes and set them all at once.
     825        # Since we want to fetch less data, we either need to fetch the leaf
     826        # too, or wait to set the block hashes until we've also received the
     827        # block itself, so we can hash it too, and set the chain+leaf all at
     828        # the same time.
     829        return self._block_hash_tree.needed_hashes(segnum, include_leaf=True)
     830
     831    def process_block_hashes(self, block_hashes):
     832        assert self._know_numsegs
     833        # this may raise BadHashError or NotEnoughHashesError
     834        self._block_hash_tree.set_hashes(block_hashes)
     835
     836    def check_block(self, segnum, block):
     837        assert self._know_numsegs
     838        h = hashutil.block_hash(block)
     839        # this may raise BadHashError or NotEnoughHashesError
     840        self._block_hash_tree.set_hashes(leaves={segnum: h})
  • new file src/allmydata/immutable/downloader/status.py

    diff --git a/src/allmydata/immutable/downloader/status.py b/src/allmydata/immutable/downloader/status.py
    new file mode 100644
    index 0000000..5d60db0
    - +  
     1
     2import itertools
     3from zope.interface import implements
     4from allmydata.interfaces import IDownloadStatus
     5
     6class RequestEvent:
     7    def __init__(self, download_status, tag):
     8        self._download_status = download_status
     9        self._tag = tag
     10    def finished(self, received, when):
     11        self._download_status.add_request_finished(self._tag, received, when)
     12
     13class DYHBEvent:
     14    def __init__(self, download_status, tag):
     15        self._download_status = download_status
     16        self._tag = tag
     17    def finished(self, shnums, when):
     18        self._download_status.add_dyhb_finished(self._tag, shnums, when)
     19
     20class ReadEvent:
     21    def __init__(self, download_status, tag):
     22        self._download_status = download_status
     23        self._tag = tag
     24    def update(self, bytes, decrypttime, pausetime):
     25        self._download_status.update_read_event(self._tag, bytes,
     26                                                decrypttime, pausetime)
     27    def finished(self, finishtime):
     28        self._download_status.finish_read_event(self._tag, finishtime)
     29
     30class DownloadStatus:
     31    # There is one DownloadStatus for each CiphertextFileNode. The status
     32    # object will keep track of all activity for that node.
     33    implements(IDownloadStatus)
     34    statusid_counter = itertools.count(0)
     35
     36    def __init__(self, storage_index, size):
     37        self.storage_index = storage_index
     38        self.size = size
     39        self.counter = self.statusid_counter.next()
     40        self.helper = False
     41        self.started = None
     42        # self.dyhb_requests tracks "do you have a share" requests and
     43        # responses. It maps serverid to a tuple of:
     44        #  send time
     45        #  tuple of response shnums (None if response hasn't arrived, "error")
     46        #  response time (None if response hasn't arrived yet)
     47        self.dyhb_requests = {}
     48
     49        # self.requests tracks share-data requests and responses. It maps
     50        # serverid to a tuple of:
     51        #  shnum,
     52        #  start,length,  (of data requested)
     53        #  send time
     54        #  response length (None if reponse hasn't arrived yet, or "error")
     55        #  response time (None if response hasn't arrived)
     56        self.requests = {}
     57
     58        # self.segment_events tracks segment requests and delivery. It is a
     59        # list of:
     60        #  type ("request", "delivery", "error")
     61        #  segment number
     62        #  event time
     63        #  segment start (file offset of first byte, None except in "delivery")
     64        #  segment length (only in "delivery")
     65        #  time spent in decode (only in "delivery")
     66        self.segment_events = []
     67
     68        # self.read_events tracks read() requests. It is a list of:
     69        #  start,length  (of data requested)
     70        #  request time
     71        #  finish time (None until finished)
     72        #  bytes returned (starts at 0, grows as segments are delivered)
     73        #  time spent in decrypt (None for ciphertext-only reads)
     74        #  time spent paused
     75        self.read_events = []
     76
     77        self.known_shares = [] # (serverid, shnum)
     78        self.problems = []
     79
     80
     81    def add_dyhb_sent(self, serverid, when):
     82        r = (when, None, None)
     83        if serverid not in self.dyhb_requests:
     84            self.dyhb_requests[serverid] = []
     85        self.dyhb_requests[serverid].append(r)
     86        tag = (serverid, len(self.dyhb_requests[serverid])-1)
     87        return DYHBEvent(self, tag)
     88
     89    def add_dyhb_finished(self, tag, shnums, when):
     90        # received="error" on error, else tuple(shnums)
     91        (serverid, index) = tag
     92        r = self.dyhb_requests[serverid][index]
     93        (sent, _, _) = r
     94        r = (sent, shnums, when)
     95        self.dyhb_requests[serverid][index] = r
     96
     97    def add_request_sent(self, serverid, shnum, start, length, when):
     98        r = (shnum, start, length, when, None, None)
     99        if serverid not in self.requests:
     100            self.requests[serverid] = []
     101        self.requests[serverid].append(r)
     102        tag = (serverid, len(self.requests[serverid])-1)
     103        return RequestEvent(self, tag)
     104
     105    def add_request_finished(self, tag, received, when):
     106        # received="error" on error, else len(data)
     107        (serverid, index) = tag
     108        r = self.requests[serverid][index]
     109        (shnum, start, length, sent, _, _) = r
     110        r = (shnum, start, length, sent, received, when)
     111        self.requests[serverid][index] = r
     112
     113    def add_segment_request(self, segnum, when):
     114        if self.started is None:
     115            self.started = when
     116        r = ("request", segnum, when, None, None, None)
     117        self.segment_events.append(r)
     118    def add_segment_delivery(self, segnum, when, start, length, decodetime):
     119        r = ("delivery", segnum, when, start, length, decodetime)
     120        self.segment_events.append(r)
     121    def add_segment_error(self, segnum, when):
     122        r = ("error", segnum, when, None, None, None)
     123        self.segment_events.append(r)
     124
     125    def add_read_event(self, start, length, when):
     126        if self.started is None:
     127            self.started = when
     128        r = (start, length, when, None, 0, 0, 0)
     129        self.read_events.append(r)
     130        tag = len(self.read_events)-1
     131        return ReadEvent(self, tag)
     132    def update_read_event(self, tag, bytes_d, decrypt_d, paused_d):
     133        r = self.read_events[tag]
     134        (start, length, requesttime, finishtime, bytes, decrypt, paused) = r
     135        bytes += bytes_d
     136        decrypt += decrypt_d
     137        paused += paused_d
     138        r = (start, length, requesttime, finishtime, bytes, decrypt, paused)
     139        self.read_events[tag] = r
     140    def finish_read_event(self, tag, finishtime):
     141        r = self.read_events[tag]
     142        (start, length, requesttime, _, bytes, decrypt, paused) = r
     143        r = (start, length, requesttime, finishtime, bytes, decrypt, paused)
     144        self.read_events[tag] = r
     145
     146    def add_known_share(self, serverid, shnum):
     147        self.known_shares.append( (serverid, shnum) )
     148
     149    def add_problem(self, p):
     150        self.problems.append(p)
     151
     152    # IDownloadStatus methods
     153    def get_counter(self):
     154        return self.counter
     155    def get_storage_index(self):
     156        return self.storage_index
     157    def get_size(self):
     158        return self.size
     159    def get_status(self):
     160        return "not impl yet" # TODO
     161    def get_progress(self):
     162        return 0.1 # TODO
     163    def using_helper(self):
     164        return False
     165    def get_active(self):
     166        return False # TODO
     167    def get_started(self):
     168        return self.started
     169    def get_results(self):
     170        return None # TODO
  • new file src/allmydata/immutable/downloader/util.py

    diff --git a/src/allmydata/immutable/downloader/util.py b/src/allmydata/immutable/downloader/util.py
    new file mode 100644
    index 0000000..d45f5cc
    - +  
     1import weakref
     2
     3from twisted.application import service
     4from foolscap.api import eventually
     5
     6class Observer2:
     7    """A simple class to distribute multiple events to a single subscriber.
     8    It accepts arbitrary kwargs, but no posargs."""
     9    def __init__(self):
     10        self._watcher = None
     11        self._undelivered_results = []
     12        self._canceler = None
     13
     14    def set_canceler(self, c, methname):
     15        """I will call c.METHNAME(self) when somebody cancels me."""
     16        # we use a weakref to avoid creating a cycle between us and the thing
     17        # we're observing: they'll be holding a reference to us to compare
     18        # against the value we pass to their canceler function. However,
     19        # since bound methods are first-class objects (and not kept alive by
     20        # the object they're bound to), we can't just stash a weakref to the
     21        # bound cancel method. Instead, we must hold a weakref to the actual
     22        # object, and obtain its cancel method later.
     23        # http://code.activestate.com/recipes/81253-weakmethod/ has an
     24        # alternative.
     25        self._canceler = (weakref.ref(c), methname)
     26
     27    def subscribe(self, observer, **watcher_kwargs):
     28        self._watcher = (observer, watcher_kwargs)
     29        while self._undelivered_results:
     30            self._notify(self._undelivered_results.pop(0))
     31
     32    def notify(self, **result_kwargs):
     33        if self._watcher:
     34            self._notify(result_kwargs)
     35        else:
     36            self._undelivered_results.append(result_kwargs)
     37
     38    def _notify(self, result_kwargs):
     39        o, watcher_kwargs = self._watcher
     40        kwargs = dict(result_kwargs)
     41        kwargs.update(watcher_kwargs)
     42        eventually(o, **kwargs)
     43
     44    def cancel(self):
     45        wr,methname = self._canceler
     46        o = wr()
     47        if o:
     48            getattr(o,methname)(self)
     49
     50
     51def incidentally(res, f, *args, **kwargs):
     52    """Add me to a Deferred chain like this:
     53     d.addBoth(incidentally, func, arg)
     54    and I'll behave as if you'd added the following function:
     55     def _(res):
     56         func(arg)
     57         return res
     58    This is useful if you want to execute an expression when the Deferred
     59    fires, but don't care about its value.
     60    """
     61    f(*args, **kwargs)
     62    return res
     63
     64
     65class Terminator(service.Service):
     66    def __init__(self):
     67        self._clients = weakref.WeakKeyDictionary()
     68    def register(self, c):
     69        self._clients[c] = None
     70    def stopService(self):
     71        for c in self._clients:
     72            c.stop()
     73        return service.Service.stopService(self)
  • src/allmydata/immutable/filenode.py

    diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py
    index 70044a7..1d5be94 100644
    a b  
    1 import copy, os.path, stat
    2 from cStringIO import StringIO
     1
     2import binascii
     3import copy
     4import time
     5now = time.time
    36from zope.interface import implements
    47from twisted.internet import defer
    5 from twisted.internet.interfaces import IPushProducer
    6 from twisted.protocols import basic
    7 from foolscap.api import eventually
    8 from allmydata.interfaces import IImmutableFileNode, ICheckable, \
    9      IDownloadTarget, IUploadResults
    10 from allmydata.util import dictutil, log, base32
    11 from allmydata.uri import CHKFileURI, LiteralFileURI
    12 from allmydata.immutable.checker import Checker
    13 from allmydata.check_results import CheckResults, CheckAndRepairResults
    14 from allmydata.immutable.repairer import Repairer
    15 from allmydata.immutable import download
    16 
    17 class _ImmutableFileNodeBase(object):
    18     implements(IImmutableFileNode, ICheckable)
    19 
    20     def get_write_uri(self):
    21         return None
    22 
    23     def get_readonly_uri(self):
    24         return self.get_uri()
    25 
    26     def is_mutable(self):
    27         return False
    28 
    29     def is_readonly(self):
    30         return True
    31 
    32     def is_unknown(self):
    33         return False
    34 
    35     def is_allowed_in_immutable_directory(self):
    36         return True
    37 
    38     def raise_error(self):
    39         pass
    40 
    41     def __hash__(self):
    42         return self.u.__hash__()
    43     def __eq__(self, other):
    44         if isinstance(other, _ImmutableFileNodeBase):
    45             return self.u.__eq__(other.u)
    46         else:
    47             return False
    48     def __ne__(self, other):
    49         if isinstance(other, _ImmutableFileNodeBase):
    50             return self.u.__eq__(other.u)
    51         else:
    52             return True
    53 
    54 class PortionOfFile:
    55     # like a list slice (things[2:14]), but for a file on disk
    56     def __init__(self, fn, offset=0, size=None):
    57         self.f = open(fn, "rb")
    58         self.f.seek(offset)
    59         self.bytes_left = size
    60 
    61     def read(self, size=None):
    62         # bytes_to_read = min(size, self.bytes_left), but None>anything
    63         if size is None:
    64             bytes_to_read = self.bytes_left
    65         elif self.bytes_left is None:
    66             bytes_to_read = size
    67         else:
    68             bytes_to_read = min(size, self.bytes_left)
    69         data = self.f.read(bytes_to_read)
    70         if self.bytes_left is not None:
    71             self.bytes_left -= len(data)
    72         return data
    73 
    74 class DownloadCache:
    75     implements(IDownloadTarget)
    76 
    77     def __init__(self, filecap, storage_index, downloader,
    78                  cachedirectorymanager):
    79         self._downloader = downloader
    80         self._uri = filecap
    81         self._storage_index = storage_index
    82         self.milestones = set() # of (offset,size,Deferred)
    83         self.cachedirectorymanager = cachedirectorymanager
    84         self.cachefile = None
    85         self.download_in_progress = False
    86         # five states:
    87         #  new ImmutableFileNode, no downloads ever performed
    88         #  new ImmutableFileNode, leftover file (partial)
    89         #  new ImmutableFileNode, leftover file (whole)
    90         #  download in progress, not yet complete
    91         #  download complete
    92 
    93     def when_range_available(self, offset, size):
    94         assert isinstance(offset, (int,long))
    95         assert isinstance(size, (int,long))
    96 
    97         d = defer.Deferred()
    98         self.milestones.add( (offset,size,d) )
    99         self._check_milestones()
    100         if self.milestones and not self.download_in_progress:
    101             self.download_in_progress = True
    102             log.msg(format=("immutable filenode read [%(si)s]: " +
    103                             "starting download"),
    104                     si=base32.b2a(self._storage_index),
    105                     umid="h26Heg", level=log.OPERATIONAL)
    106             d2 = self._downloader.download(self._uri, self)
    107             d2.addBoth(self._download_done)
    108             d2.addErrback(self._download_failed)
    109             d2.addErrback(log.err, umid="cQaM9g")
    110         return d
    111 
    112     def read(self, consumer, offset, size):
    113         assert offset+size <= self.get_filesize()
    114         if not self.cachefile:
    115             self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index))
    116         f = PortionOfFile(self.cachefile.get_filename(), offset, size)
    117         d = basic.FileSender().beginFileTransfer(f, consumer)
    118         d.addCallback(lambda lastSent: consumer)
    119         return d
    120 
    121     def _download_done(self, res):
    122         # clear download_in_progress, so failed downloads can be re-tried
    123         self.download_in_progress = False
    124         return res
    125 
    126     def _download_failed(self, f):
    127         # tell anyone who's waiting that we failed
    128         for m in self.milestones:
    129             (offset,size,d) = m
    130             eventually(d.errback, f)
    131         self.milestones.clear()
    132 
    133     def _check_milestones(self):
    134         current_size = self.get_filesize()
    135         for m in list(self.milestones):
    136             (offset,size,d) = m
    137             if offset+size <= current_size:
    138                 log.msg(format=("immutable filenode read [%(si)s] " +
    139                                 "%(offset)d+%(size)d vs %(filesize)d: " +
    140                                 "done"),
    141                         si=base32.b2a(self._storage_index),
    142                         offset=offset, size=size, filesize=current_size,
    143                         umid="nuedUg", level=log.NOISY)
    144                 self.milestones.discard(m)
    145                 eventually(d.callback, None)
    146             else:
    147                 log.msg(format=("immutable filenode read [%(si)s] " +
    148                                 "%(offset)d+%(size)d vs %(filesize)d: " +
    149                                 "still waiting"),
    150                         si=base32.b2a(self._storage_index),
    151                         offset=offset, size=size, filesize=current_size,
    152                         umid="8PKOhg", level=log.NOISY)
    153 
    154     def get_filesize(self):
    155         if not self.cachefile:
    156             self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index))
    157         try:
    158             filesize = os.stat(self.cachefile.get_filename())[stat.ST_SIZE]
    159         except OSError:
    160             filesize = 0
    161         return filesize
    162 
    163 
    164     def open(self, size):
    165         if not self.cachefile:
    166             self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index))
    167         self.f = open(self.cachefile.get_filename(), "wb")
    168 
    169     def write(self, data):
    170         self.f.write(data)
    171         self._check_milestones()
    172 
    173     def close(self):
    174         self.f.close()
    175         self._check_milestones()
    176 
    177     def fail(self, why):
    178         pass
    179     def register_canceller(self, cb):
    180         pass
    181     def finish(self):
    182         return None
    183     # The following methods are just because the target might be a
    184     # repairer.DownUpConnector, and just because the current CHKUpload object
    185     # expects to find the storage index and encoding parameters in its
    186     # Uploadable.
    187     def set_storageindex(self, storageindex):
    188         pass
    189     def set_encodingparams(self, encodingparams):
    190         pass
     8from twisted.internet.interfaces import IConsumer
    1919
     10from allmydata.interfaces import IImmutableFileNode, IUploadResults
     11from allmydata import uri
     12from allmydata.check_results import CheckResults, CheckAndRepairResults
     13from allmydata.util.dictutil import DictOfSets
     14from pycryptopp.cipher.aes import AES
    19215
    193 class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin):
    194     def __init__(self, filecap, storage_broker, secret_holder,
    195                  downloader, history, cachedirectorymanager):
    196         assert isinstance(filecap, CHKFileURI)
    197         self.u = filecap
     16# local imports
     17from allmydata.immutable.checker import Checker
     18from allmydata.immutable.repairer import Repairer
     19from allmydata.immutable.downloader.node import DownloadNode
     20from allmydata.immutable.downloader.status import DownloadStatus
     21
     22class CiphertextFileNode:
     23    def __init__(self, verifycap, storage_broker, secret_holder,
     24                 terminator, history, download_status=None):
     25        assert isinstance(verifycap, uri.CHKFileVerifierURI)
     26        self._verifycap = verifycap
    19827        self._storage_broker = storage_broker
    19928        self._secret_holder = secret_holder
    200         self._downloader = downloader
    201         self._history = history
    202         storage_index = self.get_storage_index()
    203         self.download_cache = DownloadCache(filecap, storage_index, downloader,
    204                                             cachedirectorymanager)
    205         prefix = self.u.get_verify_cap().to_string()
    206         log.PrefixingLogMixin.__init__(self, "allmydata.immutable.filenode", prefix=prefix)
    207         self.log("starting", level=log.OPERATIONAL)
     29        if download_status is None:
     30            ds = DownloadStatus(verifycap.storage_index, verifycap.size)
     31            if history:
     32                history.add_download(ds)
     33            download_status = ds
     34        self._node = DownloadNode(verifycap, storage_broker, secret_holder,
     35                                  terminator, history, download_status)
     36
     37    def read(self, consumer, offset=0, size=None, read_ev=None):
     38        """I am the main entry point, from which FileNode.read() can get
     39        data. I feed the consumer with the desired range of ciphertext. I
     40        return a Deferred that fires (with the consumer) when the read is
     41        finished."""
     42        return self._node.read(consumer, offset, size, read_ev)
     43
     44    def get_segment(self, segnum):
     45        """Begin downloading a segment. I return a tuple (d, c): 'd' is a
     46        Deferred that fires with (offset,data) when the desired segment is
     47        available, and c is an object on which c.cancel() can be called to
     48        disavow interest in the segment (after which 'd' will never fire).
     49
     50        You probably need to know the segment size before calling this,
     51        unless you want the first few bytes of the file. If you ask for a
     52        segment number which turns out to be too large, the Deferred will
     53        errback with BadSegmentNumberError.
     54
     55        The Deferred fires with the offset of the first byte of the data
     56        segment, so that you can call get_segment() before knowing the
     57        segment size, and still know which data you received.
     58        """
     59        return self._node.get_segment(segnum)
     60
     61    def get_segment_size(self):
     62        # return a Deferred that fires with the file's real segment size
     63        return self._node.get_segsize()
    20864
    209     def get_size(self):
    210         return self.u.get_size()
    211     def get_current_size(self):
    212         return defer.succeed(self.get_size())
    213 
    214     def get_cap(self):
    215         return self.u
    216     def get_readcap(self):
    217         return self.u.get_readonly()
     65    def get_storage_index(self):
     66        return self._verifycap.storage_index
    21867    def get_verify_cap(self):
    219         return self.u.get_verify_cap()
    220     def get_repair_cap(self):
    221         # CHK files can be repaired with just the verifycap
    222         return self.u.get_verify_cap()
     68        return self._verifycap
     69    def get_size(self):
     70        return self._verifycap.size
    22371
    224     def get_uri(self):
    225         return self.u.to_string()
     72    def raise_error(self):
     73        pass
    22674
    227     def get_storage_index(self):
    228         return self.u.get_storage_index()
    22975
    23076    def check_and_repair(self, monitor, verify=False, add_lease=False):
    231         verifycap = self.get_verify_cap()
     77        verifycap = self._verifycap
     78        storage_index = verifycap.storage_index
    23279        sb = self._storage_broker
    23380        servers = sb.get_all_servers()
    23481        sh = self._secret_holder
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    23885                    monitor=monitor)
    23986        d = c.start()
    24087        def _maybe_repair(cr):
    241             crr = CheckAndRepairResults(self.u.get_storage_index())
     88            crr = CheckAndRepairResults(storage_index)
    24289            crr.pre_repair_results = cr
    24390            if cr.is_healthy():
    24491                crr.post_repair_results = cr
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    24895                crr.repair_successful = False # until proven successful
    24996                def _gather_repair_results(ur):
    25097                    assert IUploadResults.providedBy(ur), ur
    251                     # clone the cr -- check results to form the basic of the prr -- post-repair results
     98                    # clone the cr (check results) to form the basis of the
     99                    # prr (post-repair results)
    252100                    prr = CheckResults(cr.uri, cr.storage_index)
    253101                    prr.data = copy.deepcopy(cr.data)
    254102
    255103                    sm = prr.data['sharemap']
    256                     assert isinstance(sm, dictutil.DictOfSets), sm
     104                    assert isinstance(sm, DictOfSets), sm
    257105                    sm.update(ur.sharemap)
    258106                    servers_responding = set(prr.data['servers-responding'])
    259107                    servers_responding.union(ur.sharemap.iterkeys())
    260108                    prr.data['servers-responding'] = list(servers_responding)
    261109                    prr.data['count-shares-good'] = len(sm)
    262110                    prr.data['count-good-share-hosts'] = len(sm)
    263                     is_healthy = bool(len(sm) >= self.u.total_shares)
    264                     is_recoverable = bool(len(sm) >= self.u.needed_shares)
     111                    is_healthy = bool(len(sm) >= verifycap.total_shares)
     112                    is_recoverable = bool(len(sm) >= verifycap.needed_shares)
    265113                    prr.set_healthy(is_healthy)
    266114                    prr.set_recoverable(is_recoverable)
    267115                    crr.repair_successful = is_healthy
    268                     prr.set_needs_rebalancing(len(sm) >= self.u.total_shares)
     116                    prr.set_needs_rebalancing(len(sm) >= verifycap.total_shares)
    269117
    270118                    crr.post_repair_results = prr
    271119                    return crr
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    275123                    crr.repair_successful = False
    276124                    crr.repair_failure = f
    277125                    return f
    278                 r = Repairer(storage_broker=sb, secret_holder=sh,
    279                              verifycap=verifycap, monitor=monitor)
     126                r = Repairer(self, storage_broker=sb, secret_holder=sh,
     127                             monitor=monitor)
    280128                d = r.start()
    281129                d.addCallbacks(_gather_repair_results, _repair_error)
    282130                return d
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    285133        return d
    286134
    287135    def check(self, monitor, verify=False, add_lease=False):
    288         verifycap = self.get_verify_cap()
     136        verifycap = self._verifycap
    289137        sb = self._storage_broker
    290138        servers = sb.get_all_servers()
    291139        sh = self._secret_holder
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    295143                    monitor=monitor)
    296144        return v.start()
    297145
     146
     147class DecryptingConsumer:
     148    """I sit between a CiphertextDownloader (which acts as a Producer) and
     149    the real Consumer, decrypting everything that passes by. The real
     150    Consumer sees the real Producer, but the Producer sees us instead of the
     151    real consumer."""
     152    implements(IConsumer)
     153
     154    def __init__(self, consumer, readkey, offset, read_event):
     155        self._consumer = consumer
     156        self._read_event = read_event
     157        # TODO: pycryptopp CTR-mode needs random-access operations: I want
     158        # either a=AES(readkey, offset) or better yet both of:
     159        #  a=AES(readkey, offset=0)
     160        #  a.process(ciphertext, offset=xyz)
     161        # For now, we fake it with the existing iv= argument.
     162        offset_big = offset // 16
     163        offset_small = offset % 16
     164        iv = binascii.unhexlify("%032x" % offset_big)
     165        self._decryptor = AES(readkey, iv=iv)
     166        self._decryptor.process("\x00"*offset_small)
     167
     168    def registerProducer(self, producer, streaming):
     169        # this passes through, so the real consumer can flow-control the real
     170        # producer. Therefore we don't need to provide any IPushProducer
     171        # methods. We implement all the IConsumer methods as pass-throughs,
     172        # and only intercept write() to perform decryption.
     173        self._consumer.registerProducer(producer, streaming)
     174    def unregisterProducer(self):
     175        self._consumer.unregisterProducer()
     176    def write(self, ciphertext):
     177        started = now()
     178        plaintext = self._decryptor.process(ciphertext)
     179        elapsed = now() - started
     180        self._read_event.update(0, elapsed, 0)
     181        self._consumer.write(plaintext)
     182
     183class ImmutableFileNode:
     184    implements(IImmutableFileNode)
     185
     186    # I wrap a CiphertextFileNode with a decryption key
     187    def __init__(self, filecap, storage_broker, secret_holder, terminator,
     188                 history):
     189        assert isinstance(filecap, uri.CHKFileURI)
     190        verifycap = filecap.get_verify_cap()
     191        ds = DownloadStatus(verifycap.storage_index, verifycap.size)
     192        if history:
     193            history.add_download(ds)
     194        self._download_status = ds
     195        self._cnode = CiphertextFileNode(verifycap, storage_broker,
     196                                         secret_holder, terminator, history, ds)
     197        assert isinstance(filecap, uri.CHKFileURI)
     198        self.u = filecap
     199        self._readkey = filecap.key
     200
     201    # TODO: I'm not sure about this.. what's the use case for node==node? If
     202    # we keep it here, we should also put this on CiphertextFileNode
     203    def __hash__(self):
     204        return self.u.__hash__()
     205    def __eq__(self, other):
     206        if isinstance(other, ImmutableFileNode):
     207            return self.u.__eq__(other.u)
     208        else:
     209            return False
     210    def __ne__(self, other):
     211        if isinstance(other, ImmutableFileNode):
     212            return self.u.__eq__(other.u)
     213        else:
     214            return True
     215
    298216    def read(self, consumer, offset=0, size=None):
    299         self.log("read", offset=offset, size=size,
    300                  umid="UPP8FA", level=log.OPERATIONAL)
    301         if size is None:
    302             size = self.get_size() - offset
    303         size = min(size, self.get_size() - offset)
    304 
    305         if offset == 0 and size == self.get_size():
    306             # don't use the cache, just do a normal streaming download
    307             self.log("doing normal full download", umid="VRSBwg", level=log.OPERATIONAL)
    308             target = download.ConsumerAdapter(consumer)
    309             return self._downloader.download(self.get_cap(), target,
    310                                              self._parentmsgid,
    311                                              history=self._history)
    312 
    313         d = self.download_cache.when_range_available(offset, size)
    314         d.addCallback(lambda res:
    315                       self.download_cache.read(consumer, offset, size))
     217        actual_size = size
     218        if actual_size == None:
     219            actual_size = self.u.size
     220        actual_size = actual_size - offset
     221        read_ev = self._download_status.add_read_event(offset,actual_size,
     222                                                       now())
     223        decryptor = DecryptingConsumer(consumer, self._readkey, offset, read_ev)
     224        d = self._cnode.read(decryptor, offset, size, read_ev)
     225        d.addCallback(lambda dc: consumer)
    316226        return d
    317227
    318 class LiteralProducer:
    319     implements(IPushProducer)
    320     def resumeProducing(self):
    321         pass
    322     def stopProducing(self):
     228    def raise_error(self):
    323229        pass
    324230
     231    def get_write_uri(self):
     232        return None
    325233
    326 class LiteralFileNode(_ImmutableFileNodeBase):
    327 
    328     def __init__(self, filecap):
    329         assert isinstance(filecap, LiteralFileURI)
    330         self.u = filecap
    331 
    332     def get_size(self):
    333         return len(self.u.data)
    334     def get_current_size(self):
    335         return defer.succeed(self.get_size())
     234    def get_readonly_uri(self):
     235        return self.get_uri()
    336236
     237    def get_uri(self):
     238        return self.u.to_string()
    337239    def get_cap(self):
    338240        return self.u
    339241    def get_readcap(self):
    340         return self.u
     242        return self.u.get_readonly()
    341243    def get_verify_cap(self):
    342         return None
     244        return self.u.get_verify_cap()
    343245    def get_repair_cap(self):
    344         return None
    345 
    346     def get_uri(self):
    347         return self.u.to_string()
     246        # CHK files can be repaired with just the verifycap
     247        return self.u.get_verify_cap()
    348248
    349249    def get_storage_index(self):
    350         return None
     250        return self.u.get_storage_index()
    351251
    352     def check(self, monitor, verify=False, add_lease=False):
    353         return defer.succeed(None)
     252    def get_size(self):
     253        return self.u.get_size()
     254    def get_current_size(self):
     255        return defer.succeed(self.get_size())
    354256
    355     def check_and_repair(self, monitor, verify=False, add_lease=False):
    356         return defer.succeed(None)
     257    def is_mutable(self):
     258        return False
    357259
    358     def read(self, consumer, offset=0, size=None):
    359         if size is None:
    360             data = self.u.data[offset:]
    361         else:
    362             data = self.u.data[offset:offset+size]
    363 
    364         # We use twisted.protocols.basic.FileSender, which only does
    365         # non-streaming, i.e. PullProducer, where the receiver/consumer must
    366         # ask explicitly for each chunk of data. There are only two places in
    367         # the Twisted codebase that can't handle streaming=False, both of
    368         # which are in the upload path for an FTP/SFTP server
    369         # (protocols.ftp.FileConsumer and
    370         # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is
    371         # likely to be used as the target for a Tahoe download.
    372 
    373         d = basic.FileSender().beginFileTransfer(StringIO(data), consumer)
    374         d.addCallback(lambda lastSent: consumer)
    375         return d
     260    def is_readonly(self):
     261        return True
     262
     263    def is_unknown(self):
     264        return False
     265
     266    def is_allowed_in_immutable_directory(self):
     267        return True
     268
     269    def check_and_repair(self, monitor, verify=False, add_lease=False):
     270        return self._cnode.check_and_repair(monitor, verify, add_lease)
     271    def check(self, monitor, verify=False, add_lease=False):
     272        return self._cnode.check(monitor, verify, add_lease)
  • src/allmydata/immutable/layout.py

    diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py
    index 6e07da7..27fb844 100644
    a b limitations described in #346. 
    7474# they are still provided when writing so that older versions of Tahoe can
    7575# read them.
    7676
     77FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares
     78
    7779def make_write_bucket_proxy(rref, data_size, block_size, num_segments,
    7880                            num_share_hashes, uri_extension_size_max, nodeid):
    7981    # Use layout v1 for small files, so they'll be readable by older versions
    8082    # (<tahoe-1.3.0). Use layout v2 for large files; they'll only be readable
    8183    # by tahoe-1.3.0 or later.
    8284    try:
     85        if FORCE_V2:
     86            raise FileTooLargeError
    8387        wbp = WriteBucketProxy(rref, data_size, block_size, num_segments,
    8488                               num_share_hashes, uri_extension_size_max, nodeid)
    8589    except FileTooLargeError:
  • new file src/allmydata/immutable/literal.py

    diff --git a/src/allmydata/immutable/literal.py b/src/allmydata/immutable/literal.py
    new file mode 100644
    index 0000000..09466cb
    - +  
     1from cStringIO import StringIO
     2from zope.interface import implements
     3from twisted.internet import defer
     4from twisted.internet.interfaces import IPushProducer
     5from twisted.protocols import basic
     6from allmydata.interfaces import IImmutableFileNode, ICheckable
     7from allmydata.uri import LiteralFileURI
     8
     9class _ImmutableFileNodeBase(object):
     10    implements(IImmutableFileNode, ICheckable)
     11
     12    def get_write_uri(self):
     13        return None
     14
     15    def get_readonly_uri(self):
     16        return self.get_uri()
     17
     18    def is_mutable(self):
     19        return False
     20
     21    def is_readonly(self):
     22        return True
     23
     24    def is_unknown(self):
     25        return False
     26
     27    def is_allowed_in_immutable_directory(self):
     28        return True
     29
     30    def raise_error(self):
     31        pass
     32
     33    def __hash__(self):
     34        return self.u.__hash__()
     35    def __eq__(self, other):
     36        if isinstance(other, _ImmutableFileNodeBase):
     37            return self.u.__eq__(other.u)
     38        else:
     39            return False
     40    def __ne__(self, other):
     41        if isinstance(other, _ImmutableFileNodeBase):
     42            return self.u.__eq__(other.u)
     43        else:
     44            return True
     45
     46
     47class LiteralProducer:
     48    implements(IPushProducer)
     49    def resumeProducing(self):
     50        pass
     51    def stopProducing(self):
     52        pass
     53
     54
     55class LiteralFileNode(_ImmutableFileNodeBase):
     56
     57    def __init__(self, filecap):
     58        assert isinstance(filecap, LiteralFileURI)
     59        self.u = filecap
     60
     61    def get_size(self):
     62        return len(self.u.data)
     63    def get_current_size(self):
     64        return defer.succeed(self.get_size())
     65
     66    def get_cap(self):
     67        return self.u
     68    def get_readcap(self):
     69        return self.u
     70    def get_verify_cap(self):
     71        return None
     72    def get_repair_cap(self):
     73        return None
     74
     75    def get_uri(self):
     76        return self.u.to_string()
     77
     78    def get_storage_index(self):
     79        return None
     80
     81    def check(self, monitor, verify=False, add_lease=False):
     82        return defer.succeed(None)
     83
     84    def check_and_repair(self, monitor, verify=False, add_lease=False):
     85        return defer.succeed(None)
     86
     87    def read(self, consumer, offset=0, size=None):
     88        if size is None:
     89            data = self.u.data[offset:]
     90        else:
     91            data = self.u.data[offset:offset+size]
     92
     93        # We use twisted.protocols.basic.FileSender, which only does
     94        # non-streaming, i.e. PullProducer, where the receiver/consumer must
     95        # ask explicitly for each chunk of data. There are only two places in
     96        # the Twisted codebase that can't handle streaming=False, both of
     97        # which are in the upload path for an FTP/SFTP server
     98        # (protocols.ftp.FileConsumer and
     99        # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is
     100        # likely to be used as the target for a Tahoe download.
     101
     102        d = basic.FileSender().beginFileTransfer(StringIO(data), consumer)
     103        d.addCallback(lambda lastSent: consumer)
     104        return d
  • new file src/allmydata/immutable/notes.txt

    diff --git a/src/allmydata/immutable/notes.txt b/src/allmydata/immutable/notes.txt
    new file mode 100644
    index 0000000..ad11565
    - +  
     1
     2# TODO: if server1 has all shares, and server2-10 have one each, make the
     3# loop stall slightly before requesting all shares from the first server, to
     4# give it a chance to learn about the other shares and get some diversity.
     5# Or, don't bother, let the first block all come from one server, and take
     6# comfort in the fact that we'll learn about the other servers by the time we
     7# fetch the second block.
     8#
     9# davidsarah points out that we could use sequential (instead of parallel)
     10# fetching of multiple block from a single server: by the time the first
     11# block arrives, we'll hopefully have heard about other shares. This would
     12# induce some RTT delays (i.e. lose pipelining) in the case that this server
     13# has the only shares, but that seems tolerable. We could rig it to only use
     14# sequential requests on the first segment.
     15
     16# as a query gets later, we're more willing to duplicate work.
     17
     18# should change server read protocol to allow small shares to be fetched in a
     19# single RTT. Instead of get_buckets-then-read, just use read(shnums, readv),
     20# where shnums=[] means all shares, and the return value is a dict of
     21# # shnum->ta (like with mutable files). The DYHB query should also fetch the
     22# offset table, since everything else can be located once we have that.
     23
     24
     25# ImmutableFileNode
     26#    DecryptingConsumer
     27#  CiphertextFileNode
     28#    Segmentation
     29#   ShareFinder
     30#   SegmentFetcher[segnum] (one at a time)
     31#   CommonShare[shnum]
     32#   Share[shnum,server]
     33
     34
     35# TODO: if offset table is corrupt, attacker could cause us to fetch whole
     36# (large) share. But only from that one server, and they could throw lots of
     37# data at our connection anyways.
     38
     39# log budget: when downloading at 1MBps (i.e. 8 segments-per-second), 10
     40# log.OPERATIONAL per second, 100 log.NOISY per second. With k=3, that's 3
     41# log.NOISY per block fetch.
     42
     43
     44# test_cli.Error failed for a while: ShareFinder created, used up
     45# (NotEnoughSharesError), started again. The self.running=False is the
     46# problem.
     47#
     48# The second download is hungry, but because ShareFinder.running is false, it
     49# never notifies the SegmentFetcher that there are no more shares coming, so
     50# the download never completes. To trigger this in tests, we need the first
     51# download to want more shares (so it must fail with NotEnoughSharesError, or
     52# we must lose a share/server between downloads).
     53#
     54# fix was to not call self.stop when ShareFinder runs out of shares. stop()
     55# is now only called by the Terminator.
     56
     57# TODO: make sure that _signal_corruption(f) isn't sending private local
     58# variables in the CopiedFailure
     59
     60# tests to write:
     61# * truncated share, so _satisfy_* doesn't get all it wants
     62# * slow server
     63
     64# all classes are also Services, and the rule is that you don't initiate more
     65# work unless self.running
     66
     67# GC: decide whether each service is restartable or not. For non-restartable
     68# services, stopService() should delete a lot of attributes to kill reference
     69# cycles. The primary goal is to decref remote storage BucketReaders when a
     70# download is complete.
     71
     72========================================
     73old stuff from download2_off:
     74
     75#! /usr/bin/python
     76
     77# known (shnum,Server) pairs are sorted into a list according to
     78# desireability. This sort is picking a winding path through a matrix of
     79# [shnum][server]. The goal is to get diversity of both shnum and server.
     80
     81# The initial order is:
     82#  find the lowest shnum on the first server, add it
     83#  look at the next server, find the lowest shnum that we don't already have
     84#   if any
     85#  next server, etc, until all known servers are checked
     86#  now look at servers that we skipped (because ...
     87
     88# Keep track of which block requests are outstanding by (shnum,Server). Don't
     89# bother prioritizing "validated" shares: the overhead to pull the share hash
     90# chain is tiny (4 hashes = 128 bytes), and the overhead to pull a new block
     91# hash chain is also tiny (1GB file, 8192 segments of 128KiB each, 13 hashes,
     92# 832 bytes). Each time a block request is sent, also request any necessary
     93# hashes. Don't bother with a "ValidatedShare" class (as distinct from some
     94# other sort of Share). Don't bother avoiding duplicate hash-chain requests.
     95
     96# For each outstanding segread, walk the list and send requests (skipping
     97# outstanding shnums) until requests for k distinct shnums are in flight. If
     98# we can't do that, ask for more. If we get impatient on a request, find the
     99# first non-outstanding
     100
     101# start with the first Share in the list, and send a request. Then look at
     102# the next one. If we already have a pending request for the same shnum or
     103# server, push that Share down onto the fallback list and try the next one,
     104# etc. If we run out of non-fallback shares, use the fallback ones,
     105# preferring shnums that we don't have outstanding requests for (i.e. assume
     106# that all requests will complete). Do this by having a second fallback list.
     107
     108# hell, I'm reviving the Herder. But remember, we're still talking 3 objects
     109# per file, not thousands.
     110
     111# actually, don't bother sorting the initial list. Append Shares as the
     112# responses come back, that will put the fastest servers at the front of the
     113# list, and give a tiny preference to servers that are earlier in the
     114# permuted order.
     115
     116# more ideas:
     117#  sort shares by:
     118#   1: number of roundtrips needed to get some data
     119#   2: share number
     120#   3: ms of RTT delay
     121# maybe measure average time-to-completion of requests, compare completion
     122# time against that, much larger indicates congestion on the server side
     123# or the server's upstream speed is less than our downstream. Minimum
     124# time-to-completion indicates min(our-downstream,their-upstream). Could
     125# fetch shares one-at-a-time to measure that better.
     126
     127# when should we risk duplicate work and send a new request?
     128
     129def walk(self):
     130    shares = sorted(list)
     131    oldshares = copy(shares)
     132    outstanding = list()
     133    fallbacks = list()
     134    second_fallbacks = list()
     135    while len(outstanding.nonlate.shnums) < k: # need more requests
     136        while oldshares:
     137            s = shares.pop(0)
     138            if s.server in outstanding.servers or s.shnum in outstanding.shnums:
     139                fallbacks.append(s)
     140                continue
     141            outstanding.append(s)
     142            send_request(s)
     143            break #'while need_more_requests'
     144        # must use fallback list. Ask for more servers while we're at it.
     145        ask_for_more_servers()
     146        while fallbacks:
     147            s = fallbacks.pop(0)
     148            if s.shnum in outstanding.shnums:
     149                # assume that the outstanding requests will complete, but
     150                # send new requests for other shnums to existing servers
     151                second_fallbacks.append(s)
     152                continue
     153            outstanding.append(s)
     154            send_request(s)
     155            break #'while need_more_requests'
     156        # if we get here, we're being forced to send out multiple queries per
     157        # share. We've already asked for more servers, which might help. If
     158        # there are no late outstanding queries, then duplicate shares won't
     159        # help. Don't send queries for duplicate shares until some of the
     160        # queries are late.
     161        if outstanding.late:
     162            # we're allowed to try any non-outstanding share
     163            while second_fallbacks:
     164                pass
     165    newshares = outstanding + fallbacks + second_fallbacks + oldshares
     166       
     167
     168class Server:
     169    """I represent an abstract Storage Server. One day, the StorageBroker
     170    will return instances of me. For now, the StorageBroker returns (peerid,
     171    RemoteReference) tuples, and this code wraps a Server instance around
     172    them.
     173    """
     174    def __init__(self, peerid, ss):
     175        self.peerid = peerid
     176        self.remote = ss
     177        self._remote_buckets = {} # maps shnum to RIBucketReader
     178        # TODO: release the bucket references on shares that we no longer
     179        # want. OTOH, why would we not want them? Corruption?
     180
     181    def send_query(self, storage_index):
     182        """I return a Deferred that fires with a set of shnums. If the server
     183        had shares available, I will retain the RemoteReferences to its
     184        buckets, so that get_data(shnum, range) can be called later."""
     185        d = self.remote.callRemote("get_buckets", self.storage_index)
     186        d.addCallback(self._got_response)
     187        return d
     188
     189    def _got_response(self, r):
     190        self._remote_buckets = r
     191        return set(r.keys())
     192
     193class ShareOnAServer:
     194    """I represent one instance of a share, known to live on a specific
     195    server. I am created every time a server responds affirmatively to a
     196    do-you-have-block query."""
     197
     198    def __init__(self, shnum, server):
     199        self._shnum = shnum
     200        self._server = server
     201        self._block_hash_tree = None
     202
     203    def cost(self, segnum):
     204        """I return a tuple of (roundtrips, bytes, rtt), indicating how
     205        expensive I think it would be to fetch the given segment. Roundtrips
     206        indicates how many roundtrips it is likely to take (one to get the
     207        data and hashes, plus one to get the offset table and UEB if this is
     208        the first segment we've ever fetched). 'bytes' is how many bytes we
     209        must fetch (estimated). 'rtt' is estimated round-trip time (float) in
     210        seconds for a trivial request. The downloading algorithm will compare
     211        costs to decide which shares should be used."""
     212        # the most significant factor here is roundtrips: a Share for which
     213        # we already have the offset table is better to than a brand new one
     214
     215    def max_bandwidth(self):
     216        """Return a float, indicating the highest plausible bytes-per-second
     217        that I've observed coming from this share. This will be based upon
     218        the minimum (bytes-per-fetch / time-per-fetch) ever observed. This
     219        can we used to estimate the server's upstream bandwidth. Clearly this
     220        is only accurate if a share is retrieved with no contention for
     221        either the upstream, downstream, or middle of the connection, but it
     222        may still serve as a useful metric for deciding which servers to pull
     223        from."""
     224
     225    def get_segment(self, segnum):
     226        """I return a Deferred that will fire with the segment data, or
     227        errback."""
     228
     229class NativeShareOnAServer(ShareOnAServer):
     230    """For tahoe native (foolscap) servers, I contain a RemoteReference to
     231    the RIBucketReader instance."""
     232    def __init__(self, shnum, server, rref):
     233        ShareOnAServer.__init__(self, shnum, server)
     234        self._rref = rref # RIBucketReader
     235
     236class Share:
     237    def __init__(self, shnum):
     238        self._shnum = shnum
     239        # _servers are the Server instances which appear to hold a copy of
     240        # this share. It is populated when the ValidShare is first created,
     241        # or when we receive a get_buckets() response for a shnum that
     242        # already has a ValidShare instance. When we lose the connection to a
     243        # server, we remove it.
     244        self._servers = set()
     245        # offsets, UEB, and share_hash_tree all live in the parent.
     246        # block_hash_tree lives here.
     247        self._block_hash_tree = None
     248
     249        self._want
     250
     251    def get_servers(self):
     252        return self._servers
     253
     254
     255    def get_block(self, segnum):
     256        # read enough data to obtain a single validated block
     257        if not self.have_offsets:
     258            # we get the offsets in their own read, since they tell us where
     259            # everything else lives. We must fetch offsets for each share
     260            # separately, since they aren't directly covered by the UEB.
     261            pass
     262        if not self.parent.have_ueb:
     263            # use _guessed_segsize to make a guess about the layout, so we
     264            # can fetch both the offset table and the UEB in the same read.
     265            # This also requires making a guess about the presence or absence
     266            # of the plaintext_hash_tree. Oh, and also the version number. Oh
     267            # well.
     268            pass
     269
     270class CiphertextDownloader:
     271    """I manage all downloads for a single file. I operate a state machine
     272    with input events that are local read() requests, responses to my remote
     273    'get_bucket' and 'read_bucket' messages, and connection establishment and
     274    loss. My outbound events are connection establishment requests and bucket
     275    read requests messages.
     276    """
     277    # eventually this will merge into the FileNode
     278    ServerClass = Server # for tests to override
     279
     280    def __init__(self, storage_index, ueb_hash, size, k, N, storage_broker,
     281                 shutdowner):
     282        # values we get from the filecap
     283        self._storage_index = si = storage_index
     284        self._ueb_hash = ueb_hash
     285        self._size = size
     286        self._needed_shares = k
     287        self._total_shares = N
     288        self._share_hash_tree = IncompleteHashTree(self._total_shares)
     289        # values we discover when we first fetch the UEB
     290        self._ueb = None # is dict after UEB fetch+validate
     291        self._segsize = None
     292        self._numsegs = None
     293        self._blocksize = None
     294        self._tail_segsize = None
     295        self._ciphertext_hash = None # optional
     296        # structures we create when we fetch the UEB, then continue to fill
     297        # as we download the file
     298        self._share_hash_tree = None # is IncompleteHashTree after UEB fetch
     299        self._ciphertext_hash_tree = None
     300
     301        # values we learn as we download the file
     302        self._offsets = {} # (shnum,Server) to offset table (dict)
     303        self._block_hash_tree = {} # shnum to IncompleteHashTree
     304        # other things which help us
     305        self._guessed_segsize = min(128*1024, size)
     306        self._active_share_readers = {} # maps shnum to Reader instance
     307        self._share_readers = [] # sorted by preference, best first
     308        self._readers = set() # set of Reader instances
     309        self._recent_horizon = 10 # seconds
     310
     311        # 'shutdowner' is a MultiService parent used to cancel all downloads
     312        # when the node is shutting down, to let tests have a clean reactor.
     313
     314        self._init_available_servers()
     315        self._init_find_enough_shares()
     316
     317    # _available_servers is an iterator that provides us with Server
     318    # instances. Each time we pull out a Server, we immediately send it a
     319    # query, so we don't need to keep track of who we've sent queries to.
     320
     321    def _init_available_servers(self):
     322        self._available_servers = self._get_available_servers()
     323        self._no_more_available_servers = False
     324
     325    def _get_available_servers(self):
     326        """I am a generator of servers to use, sorted by the order in which
     327        we should query them. I make sure there are no duplicates in this
     328        list."""
     329        # TODO: make StorageBroker responsible for this non-duplication, and
     330        # replace this method with a simple iter(get_servers_for_index()),
     331        # plus a self._no_more_available_servers=True
     332        seen = set()
     333        sb = self._storage_broker
     334        for (peerid, ss) in sb.get_servers_for_index(self._storage_index):
     335            if peerid not in seen:
     336                yield self.ServerClass(peerid, ss) # Server(peerid, ss)
     337                seen.add(peerid)
     338        self._no_more_available_servers = True
     339
     340    # this block of code is responsible for having enough non-problematic
     341    # distinct shares/servers available and ready for download, and for
     342    # limiting the number of queries that are outstanding. The idea is that
     343    # we'll use the k fastest/best shares, and have the other ones in reserve
     344    # in case those servers stop responding or respond too slowly. We keep
     345    # track of all known shares, but we also keep track of problematic shares
     346    # (ones with hash failures or lost connections), so we can put them at
     347    # the bottom of the list.
     348
     349    def _init_find_enough_shares(self):
     350        # _unvalidated_sharemap maps shnum to set of Servers, and remembers
     351        # where viable (but not yet validated) shares are located. Each
     352        # get_bucket() response adds to this map, each act of validation
     353        # removes from it.
     354        self._sharemap = DictOfSets()
     355
     356        # _sharemap maps shnum to set of Servers, and remembers where viable
     357        # shares are located. Each get_bucket() response adds to this map,
     358        # each hash failure or disconnect removes from it. (TODO: if we
     359        # disconnect but reconnect later, we should be allowed to re-query).
     360        self._sharemap = DictOfSets()
     361
     362        # _problem_shares is a set of (shnum, Server) tuples, and
     363
     364        # _queries_in_flight maps a Server to a timestamp, which remembers
     365        # which servers we've sent queries to (and when) but have not yet
     366        # heard a response. This lets us put a limit on the number of
     367        # outstanding queries, to limit the size of the work window (how much
     368        # extra work we ask servers to do in the hopes of keeping our own
     369        # pipeline filled). We remove a Server from _queries_in_flight when
     370        # we get an answer/error or we finally give up. If we ever switch to
     371        # a non-connection-oriented protocol (like UDP, or forwarded Chord
     372        # queries), we can use this information to retransmit any query that
     373        # has gone unanswered for too long.
     374        self._queries_in_flight = dict()
     375
     376    def _count_recent_queries_in_flight(self):
     377        now = time.time()
     378        recent = now - self._recent_horizon
     379        return len([s for (s,when) in self._queries_in_flight.items()
     380                    if when > recent])
     381
     382    def _find_enough_shares(self):
     383        # goal: have 2*k distinct not-invalid shares available for reading,
     384        # from 2*k distinct servers. Do not have more than 4*k "recent"
     385        # queries in flight at a time.
     386        if (len(self._sharemap) >= 2*self._needed_shares
     387            and len(self._sharemap.values) >= 2*self._needed_shares):
     388            return
     389        num = self._count_recent_queries_in_flight()
     390        while num < 4*self._needed_shares:
     391            try:
     392                s = self._available_servers.next()
     393            except StopIteration:
     394                return # no more progress can be made
     395            self._queries_in_flight[s] = time.time()
     396            d = s.send_query(self._storage_index)
     397            d.addBoth(incidentally, self._queries_in_flight.discard, s)
     398            d.addCallbacks(lambda shnums: [self._sharemap.add(shnum, s)
     399                                           for shnum in shnums],
     400                           lambda f: self._query_error(f, s))
     401            d.addErrback(self._error)
     402            d.addCallback(self._reschedule)
     403            num += 1
     404
     405    def _query_error(self, f, s):
     406        # a server returned an error, log it gently and ignore
     407        level = log.WEIRD
     408        if f.check(DeadReferenceError):
     409            level = log.UNUSUAL
     410        log.msg("Error during get_buckets to server=%(server)s", server=str(s),
     411                failure=f, level=level, umid="3uuBUQ")
     412
     413    # this block is responsible for turning known shares into usable shares,
     414    # by fetching enough data to validate their contents.
     415
     416    # UEB (from any share)
     417    # share hash chain, validated (from any share, for given shnum)
     418    # block hash (any share, given shnum)
     419
     420    def _got_ueb(self, ueb_data, share):
     421        if self._ueb is not None:
     422            return
     423        if hashutil.uri_extension_hash(ueb_data) != self._ueb_hash:
     424            share.error("UEB hash does not match")
     425            return
     426        d = uri.unpack_extension(ueb_data)
     427        self.share_size = mathutil.div_ceil(self._size, self._needed_shares)
     428
     429
     430        # There are several kinds of things that can be found in a UEB.
     431        # First, things that we really need to learn from the UEB in order to
     432        # do this download. Next: things which are optional but not redundant
     433        # -- if they are present in the UEB they will get used. Next, things
     434        # that are optional and redundant. These things are required to be
     435        # consistent: they don't have to be in the UEB, but if they are in
     436        # the UEB then they will be checked for consistency with the
     437        # already-known facts, and if they are inconsistent then an exception
     438        # will be raised. These things aren't actually used -- they are just
     439        # tested for consistency and ignored. Finally: things which are
     440        # deprecated -- they ought not be in the UEB at all, and if they are
     441        # present then a warning will be logged but they are otherwise
     442        # ignored.
     443
     444        # First, things that we really need to learn from the UEB:
     445        # segment_size, crypttext_root_hash, and share_root_hash.
     446        self._segsize = d['segment_size']
     447
     448        self._blocksize = mathutil.div_ceil(self._segsize, self._needed_shares)
     449        self._numsegs = mathutil.div_ceil(self._size, self._segsize)
     450
     451        self._tail_segsize = self._size % self._segsize
     452        if self._tail_segsize == 0:
     453            self._tail_segsize = self._segsize
     454        # padding for erasure code
     455        self._tail_segsize = mathutil.next_multiple(self._tail_segsize,
     456                                                    self._needed_shares)
     457
     458        # Ciphertext hash tree root is mandatory, so that there is at most
     459        # one ciphertext that matches this read-cap or verify-cap. The
     460        # integrity check on the shares is not sufficient to prevent the
     461        # original encoder from creating some shares of file A and other
     462        # shares of file B.
     463        self._ciphertext_hash_tree = IncompleteHashTree(self._numsegs)
     464        self._ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})
     465
     466        self._share_hash_tree.set_hashes({0: d['share_root_hash']})
     467
     468
     469        # Next: things that are optional and not redundant: crypttext_hash
     470        if 'crypttext_hash' in d:
     471            if len(self._ciphertext_hash) == hashutil.CRYPTO_VAL_SIZE:
     472                self._ciphertext_hash = d['crypttext_hash']
     473            else:
     474                log.msg("ignoring bad-length UEB[crypttext_hash], "
     475                        "got %d bytes, want %d" % (len(d['crypttext_hash']),
     476                                                   hashutil.CRYPTO_VAL_SIZE),
     477                        umid="oZkGLA", level=log.WEIRD)
     478
     479        # we ignore all of the redundant fields when downloading. The
     480        # Verifier uses a different code path which does not ignore them.
     481
     482        # finally, set self._ueb as a marker that we don't need to request it
     483        # anymore
     484        self._ueb = d
     485
     486    def _got_share_hashes(self, hashes, share):
     487        assert isinstance(hashes, dict)
     488        try:
     489            self._share_hash_tree.set_hashes(hashes)
     490        except (IndexError, BadHashError, NotEnoughHashesError), le:
     491            share.error("Bad or missing hashes")
     492            return
     493
     494    #def _got_block_hashes(
     495
     496    def _init_validate_enough_shares(self):
     497        # _valid_shares maps shnum to ValidatedShare instances, and is
     498        # populated once the block hash root has been fetched and validated
     499        # (which requires any valid copy of the UEB, and a valid copy of the
     500        # share hash chain for each shnum)
     501        self._valid_shares = {}
     502
     503        # _target_shares is an ordered list of ReadyShare instances, each of
     504        # which is a (shnum, server) tuple. It is sorted in order of
     505        # preference: we expect to get the fastest response from the
     506        # ReadyShares at the front of the list. It is also sorted to
     507        # distribute the shnums, so that fetching shares from
     508        # _target_shares[:k] is likely (but not guaranteed) to give us k
     509        # distinct shares. The rule is that we skip over entries for blocks
     510        # that we've already received, limit the number of recent queries for
     511        # the same block,
     512        self._target_shares = []
     513
     514    def _validate_enough_shares(self):
     515        # my goal is to have at least 2*k distinct validated shares from at
     516        # least 2*k distinct servers
     517        valid_share_servers = set()
     518        for vs in self._valid_shares.values():
     519            valid_share_servers.update(vs.get_servers())
     520        if (len(self._valid_shares) >= 2*self._needed_shares
     521            and len(self._valid_share_servers) >= 2*self._needed_shares):
     522            return
     523        #for
     524
     525    def _reschedule(self, _ign):
     526        # fire the loop again
     527        if not self._scheduled:
     528            self._scheduled = True
     529            eventually(self._loop)
     530
     531    def _loop(self):
     532        self._scheduled = False
     533        # what do we need?
     534
     535        self._find_enough_shares()
     536        self._validate_enough_shares()
     537
     538        if not self._ueb:
     539            # we always need a copy of the UEB
     540            pass
     541
     542    def _error(self, f):
     543        # this is an unexpected error: a coding bug
     544        log.err(f, level=log.UNUSUAL)
     545           
     546
     547
     548# using a single packed string (and an offset table) may be an artifact of
     549# our native storage server: other backends might allow cheap multi-part
     550# files (think S3, several buckets per share, one for each section).
     551
     552# find new names for:
     553#  data_holder
     554#  Share / Share2  (ShareInstance / Share? but the first is more useful)
     555
     556class IShare(Interface):
     557    """I represent a single instance of a single share (e.g. I reference the
     558    shnum2 for share SI=abcde on server xy12t, not the one on server ab45q).
     559    This interface is used by SegmentFetcher to retrieve validated blocks.
     560    """
     561    def get_block(segnum):
     562        """Return an Observer2, which will be notified with the following
     563        events:
     564         state=COMPLETE, block=data (terminal): validated block data
     565         state=OVERDUE (non-terminal): we have reason to believe that the
     566                                       request might have stalled, or we
     567                                       might just be impatient
     568         state=CORRUPT (terminal): the data we received was corrupt
     569         state=DEAD (terminal): the connection has failed
     570        """
     571
     572
     573# it'd be nice if we receive the hashes before the block, or just
     574# afterwards, so we aren't stuck holding on to unvalidated blocks
     575# that we can't process. If we guess the offsets right, we can
     576# accomplish this by sending the block request after the metadata
     577# requests (by keeping two separate requestlists), and have a one RTT
     578# pipeline like:
     579#  1a=metadata, 1b=block
     580#  1b->process+deliver : one RTT
     581
     582# But if we guess wrong, and fetch the wrong part of the block, we'll
     583# have a pipeline that looks like:
     584#  1a=wrong metadata, 1b=wrong block
     585#  1a->2a=right metadata,2b=right block
     586#  2b->process+deliver
     587# which means two RTT and buffering one block (which, since we'll
     588# guess the segsize wrong for everything, means buffering one
     589# segment)
     590
     591# if we start asking for multiple segments, we could get something
     592# worse:
     593#  1a=wrong metadata, 1b=wrong block0, 1c=wrong block1, ..
     594#  1a->2a=right metadata,2b=right block0,2c=right block1, .
     595#  2b->process+deliver
     596
     597# which means two RTT but fetching and buffering the whole file
     598# before delivering anything. However, since we don't know when the
     599# other shares are going to arrive, we need to avoid having more than
     600# one block in the pipeline anyways. So we shouldn't be able to get
     601# into this state.
     602
     603# it also means that, instead of handling all of
     604# self._requested_blocks at once, we should only be handling one
     605# block at a time: one of the requested block should be special
     606# (probably FIFO). But retire all we can.
     607
     608    # this might be better with a Deferred, using COMPLETE as the success
     609    # case and CORRUPT/DEAD in an errback, because that would let us hold the
     610    # 'share' and 'shnum' arguments locally (instead of roundtripping them
     611    # through Share.send_request). But that OVERDUE is not terminal. So I
     612    # want a new sort of callback mechanism, with the extra-argument-passing
     613    # aspects of Deferred, but without being so one-shot. Is this a job for
     614    # Observer? No, it doesn't take extra arguments. So this uses Observer2.
     615
     616
     617class Reader:
     618    """I am responsible for a single offset+size read of the file. I handle
     619    segmentation: I figure out which segments are necessary, request them
     620    (from my CiphertextDownloader) in order, and trim the segments down to
     621    match the offset+size span. I use the Producer/Consumer interface to only
     622    request one segment at a time.
     623    """
     624    implements(IPushProducer)
     625    def __init__(self, consumer, offset, size):
     626        self._needed = []
     627        self._consumer = consumer
     628        self._hungry = False
     629        self._offset = offset
     630        self._size = size
     631        self._segsize = None
     632    def start(self):
     633        self._alive = True
     634        self._deferred = defer.Deferred()
     635        # the process doesn't actually start until set_segment_size()
     636        return self._deferred
     637
     638    def set_segment_size(self, segsize):
     639        if self._segsize is not None:
     640            return
     641        self._segsize = segsize
     642        self._compute_segnums()
     643
     644    def _compute_segnums(self, segsize):
     645        # now that we know the file's segsize, what segments (and which
     646        # ranges of each) will we need?
     647        size = self._size
     648        offset = self._offset
     649        while size:
     650            assert size >= 0
     651            this_seg_num = int(offset / self._segsize)
     652            this_seg_offset = offset - (seg_num*self._segsize)
     653            this_seg_size = min(size, self._segsize-seg_offset)
     654            size -= this_seg_size
     655            if size:
     656                offset += this_seg_size
     657            yield (this_seg_num, this_seg_offset, this_seg_size)
     658
     659    def get_needed_segments(self):
     660        return set([segnum for (segnum, off, size) in self._needed])
     661
     662
     663    def stopProducing(self):
     664        self._hungry = False
     665        self._alive = False
     666        # TODO: cancel the segment requests
     667    def pauseProducing(self):
     668        self._hungry = False
     669    def resumeProducing(self):
     670        self._hungry = True
     671    def add_segment(self, segnum, offset, size):
     672        self._needed.append( (segnum, offset, size) )
     673    def got_segment(self, segnum, segdata):
     674        """Return True if this schedule has more to go, or False if it is
     675        done."""
     676        assert self._needed[0][segnum] == segnum
     677        (_ign, offset, size) = self._needed.pop(0)
     678        data = segdata[offset:offset+size]
     679        self._consumer.write(data)
     680        if not self._needed:
     681            # we're done
     682            self._alive = False
     683            self._hungry = False
     684            self._consumer.unregisterProducer()
     685            self._deferred.callback(self._consumer)
     686    def error(self, f):
     687        self._alive = False
     688        self._hungry = False
     689        self._consumer.unregisterProducer()
     690        self._deferred.errback(f)
     691
     692
     693
     694class x:
     695    def OFFread(self, consumer, offset=0, size=None):
     696        """I am the main entry point, from which FileNode.read() can get
     697        data."""
     698        # tolerate concurrent operations: each gets its own Reader
     699        if size is None:
     700            size = self._size - offset
     701        r = Reader(consumer, offset, size)
     702        self._readers.add(r)
     703        d = r.start()
     704        if self.segment_size is not None:
     705            r.set_segment_size(self.segment_size)
     706            # TODO: if we can't find any segments, and thus never get a
     707            # segsize, tell the Readers to give up
     708        return d
  • src/allmydata/immutable/repairer.py

    diff --git a/src/allmydata/immutable/repairer.py b/src/allmydata/immutable/repairer.py
    index fa6a604..64fb9a1 100644
    a b  
    11from zope.interface import implements
    22from twisted.internet import defer
    33from allmydata.storage.server import si_b2a
    4 from allmydata.util import log, observer
    5 from allmydata.util.assertutil import precondition, _assert
    6 from allmydata.uri import CHKFileVerifierURI
    7 from allmydata.interfaces import IEncryptedUploadable, IDownloadTarget
    8 from twisted.internet.interfaces import IConsumer
     4from allmydata.util import log, consumer
     5from allmydata.util.assertutil import precondition
     6from allmydata.interfaces import IEncryptedUploadable
    97
    10 from allmydata.immutable import download, upload
    11 
    12 import collections
     8from allmydata.immutable import upload
    139
    1410class Repairer(log.PrefixingLogMixin):
     11    implements(IEncryptedUploadable)
    1512    """I generate any shares which were not available and upload them to
    1613    servers.
    1714
    class Repairer(log.PrefixingLogMixin): 
    4340    cancelled (by invoking its raise_if_cancelled() method).
    4441    """
    4542
    46     def __init__(self, storage_broker, secret_holder, verifycap, monitor):
    47         assert precondition(isinstance(verifycap, CHKFileVerifierURI))
    48 
    49         logprefix = si_b2a(verifycap.get_storage_index())[:5]
     43    def __init__(self, filenode, storage_broker, secret_holder, monitor):
     44        logprefix = si_b2a(filenode.get_storage_index())[:5]
    5045        log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer",
    5146                                       prefix=logprefix)
    52 
     47        self._filenode = filenode
    5348        self._storage_broker = storage_broker
    5449        self._secret_holder = secret_holder
    55         self._verifycap = verifycap
    5650        self._monitor = monitor
     51        self._offset = 0
    5752
    5853    def start(self):
    5954        self.log("starting repair")
    60         duc = DownUpConnector()
    61         dl = download.CiphertextDownloader(self._storage_broker,
    62                                            self._verifycap, target=duc,
    63                                            monitor=self._monitor)
    64         ul = upload.CHKUploader(self._storage_broker, self._secret_holder)
    65 
    66         d = defer.Deferred()
    67 
    68         # If the upload or the download fails or is stopped, then the repair
    69         # failed.
    70         def _errb(f):
    71             d.errback(f)
    72             return None
    73 
    74         # If the upload succeeds, then the repair has succeeded.
    75         def _cb(res):
    76             d.callback(res)
    77         ul.start(duc).addCallbacks(_cb, _errb)
    78 
    79         # If the download fails or is stopped, then the repair failed.
    80         d2 = dl.start()
    81         d2.addErrback(_errb)
    82 
    83         # We ignore the callback from d2.  Is this right?  Ugh.
    84 
     55        d = self._filenode.get_segment_size()
     56        def _got_segsize(segsize):
     57            vcap = self._filenode.get_verify_cap()
     58            k = vcap.needed_shares
     59            N = vcap.total_shares
     60            happy = upload.BaseUploadable.default_encoding_param_happy
     61            self._encodingparams = (k, happy, N, segsize)
     62            ul = upload.CHKUploader(self._storage_broker, self._secret_holder)
     63            return ul.start(self) # I am the IEncryptedUploadable
     64        d.addCallback(_got_segsize)
    8565        return d
    8666
    87 class DownUpConnector(log.PrefixingLogMixin):
    88     implements(IEncryptedUploadable, IDownloadTarget, IConsumer)
    89     """I act like an 'encrypted uploadable' -- something that a local
    90     uploader can read ciphertext from in order to upload the ciphertext.
    91     However, unbeknownst to the uploader, I actually download the ciphertext
    92     from a CiphertextDownloader instance as it is needed.
    93 
    94     On the other hand, I act like a 'download target' -- something that a
    95     local downloader can write ciphertext to as it downloads the ciphertext.
    96     That downloader doesn't realize, of course, that I'm just turning around
    97     and giving the ciphertext to the uploader."""
    98 
    99     # The theory behind this class is nice: just satisfy two separate
    100     # interfaces. The implementation is slightly horrible, because of
    101     # "impedance mismatch" -- the downloader expects to be able to
    102     # synchronously push data in, and the uploader expects to be able to read
    103     # data out with a "read(THIS_SPECIFIC_LENGTH)" which returns a deferred.
    104     # The two interfaces have different APIs for pausing/unpausing. The
    105     # uploader requests metadata like size and encodingparams which the
    106     # downloader provides either eventually or not at all (okay I just now
    107     # extended the downloader to provide encodingparams). Most of this
    108     # slightly horrible code would disappear if CiphertextDownloader just
    109     # used this object as an IConsumer (plus maybe a couple of other methods)
    110     # and if the Uploader simply expected to be treated as an IConsumer (plus
    111     # maybe a couple of other things).
    112 
    113     def __init__(self, buflim=2**19):
    114         """If we're already holding at least buflim bytes, then tell the
    115         downloader to pause until we have less than buflim bytes."""
    116         log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer")
    117         self.buflim = buflim
    118         self.bufs = collections.deque() # list of strings
    119         self.bufsiz = 0 # how many bytes total in bufs
    120 
    121         # list of deferreds which will fire with the requested ciphertext
    122         self.next_read_ds = collections.deque()
    123 
    124         # how many bytes of ciphertext were requested by each deferred
    125         self.next_read_lens = collections.deque()
    126 
    127         self._size_osol = observer.OneShotObserverList()
    128         self._encodingparams_osol = observer.OneShotObserverList()
    129         self._storageindex_osol = observer.OneShotObserverList()
    130         self._closed_to_pusher = False
    131 
    132         # once seg size is available, the following attribute will be created
    133         # to hold it:
    134 
    135         # self.encodingparams # (provided by the object which is pushing data
    136         # into me, required by the object which is pulling data out of me)
    137 
    138         # open() will create the following attribute:
    139         # self.size # size of the whole file (provided by the object which is
    140         # pushing data into me, required by the object which is pulling data
    141         # out of me)
    142 
    143         # set_upload_status() will create the following attribute:
    144 
    145         # self.upload_status # XXX do we need to actually update this? Is
    146         # anybody watching the results during a repair?
    147 
    148     def _satisfy_reads_if_possible(self):
    149         assert bool(self.next_read_ds) == bool(self.next_read_lens)
    150         while self.next_read_ds and ((self.bufsiz >= self.next_read_lens[0])
    151                                      or self._closed_to_pusher):
    152             nrd = self.next_read_ds.popleft()
    153             nrl = self.next_read_lens.popleft()
    154 
    155             # Pick out the requested number of bytes from self.bufs, turn it
    156             # into a string, and callback the deferred with that.
    157             res = []
    158             ressize = 0
    159             while ressize < nrl and self.bufs:
    160                 nextbuf = self.bufs.popleft()
    161                 res.append(nextbuf)
    162                 ressize += len(nextbuf)
    163                 if ressize > nrl:
    164                     extra = ressize - nrl
    165                     self.bufs.appendleft(nextbuf[:-extra])
    166                     res[-1] = nextbuf[:-extra]
    167             assert _assert(sum(len(x) for x in res) <= nrl, [len(x) for x in res], nrl)
    168             assert _assert(sum(len(x) for x in res) == nrl or self._closed_to_pusher, [len(x) for x in res], nrl)
    169             self.bufsiz -= nrl
    170             if self.bufsiz < self.buflim and self.producer:
    171                 self.producer.resumeProducing()
    172             nrd.callback(res)
    173 
    174     # methods to satisfy the IConsumer and IDownloadTarget interfaces. (From
    175     # the perspective of a downloader I am an IDownloadTarget and an
    176     # IConsumer.)
    177     def registerProducer(self, producer, streaming):
    178         assert streaming # We know how to handle only streaming producers.
    179         self.producer = producer # the downloader
    180     def unregisterProducer(self):
    181         self.producer = None
    182     def open(self, size):
    183         self.size = size
    184         self._size_osol.fire(self.size)
    185     def set_encodingparams(self, encodingparams):
    186         self.encodingparams = encodingparams
    187         self._encodingparams_osol.fire(self.encodingparams)
    188     def set_storageindex(self, storageindex):
    189         self.storageindex = storageindex
    190         self._storageindex_osol.fire(self.storageindex)
    191     def write(self, data):
    192         precondition(data) # please don't write empty strings
    193         self.bufs.append(data)
    194         self.bufsiz += len(data)
    195         self._satisfy_reads_if_possible()
    196         if self.bufsiz >= self.buflim and self.producer:
    197             self.producer.pauseProducing()
    198     def finish(self):
    199         pass
    200     def close(self):
    201         self._closed_to_pusher = True
    202         # Any reads which haven't been satisfied by now are going to
    203         # have to be satisfied with short reads.
    204         self._satisfy_reads_if_possible()
    20567
    20668    # methods to satisfy the IEncryptedUploader interface
    20769    # (From the perspective of an uploader I am an IEncryptedUploadable.)
    20870    def set_upload_status(self, upload_status):
    20971        self.upload_status = upload_status
    21072    def get_size(self):
    211         if hasattr(self, 'size'): # attribute created by self.open()
    212             return defer.succeed(self.size)
    213         else:
    214             return self._size_osol.when_fired()
     73        size = self._filenode.get_size()
     74        assert size is not None
     75        return defer.succeed(size)
    21576    def get_all_encoding_parameters(self):
    216         # We have to learn the encoding params from pusher.
    217         if hasattr(self, 'encodingparams'):
    218             # attribute created by self.set_encodingparams()
    219             return defer.succeed(self.encodingparams)
    220         else:
    221             return self._encodingparams_osol.when_fired()
     77        return defer.succeed(self._encodingparams)
    22278    def read_encrypted(self, length, hash_only):
    223         """Returns a deferred which eventually fired with the requested
    224         ciphertext."""
     79        """Returns a deferred which eventually fires with the requested
     80        ciphertext, as a list of strings."""
    22581        precondition(length) # please don't ask to read 0 bytes
    226         d = defer.Deferred()
    227         self.next_read_ds.append(d)
    228         self.next_read_lens.append(length)
    229         self._satisfy_reads_if_possible()
     82        mc = consumer.MemoryConsumer()
     83        d = self._filenode.read(mc, self._offset, length)
     84        self._offset += length
     85        d.addCallback(lambda ign: mc.chunks)
    23086        return d
    23187    def get_storage_index(self):
    232         # We have to learn the storage index from pusher.
    233         if hasattr(self, 'storageindex'):
    234             # attribute created by self.set_storageindex()
    235             return defer.succeed(self.storageindex)
    236         else:
    237             return self._storageindex.when_fired()
     88        return self._filenode.get_storage_index()
     89    def close(self):
     90        pass
  • src/allmydata/immutable/upload.py

    diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py
    index ca7d56b..7ac86c2 100644
    a b from allmydata.util.assertutil import precondition 
    2020from allmydata.util.rrefutil import add_version_to_remote_reference
    2121from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
    2222     IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \
    23      NoServersError, InsufficientVersionError, UploadUnhappinessError
     23     NoServersError, InsufficientVersionError, UploadUnhappinessError, \
     24     DEFAULT_MAX_SEGMENT_SIZE
    2425from allmydata.immutable import layout
    2526from pycryptopp.cipher.aes import AES
    2627
    class AssistedUploader: 
    11701171        return self._upload_status
    11711172
    11721173class BaseUploadable:
    1173     default_max_segment_size = 128*KiB # overridden by max_segment_size
     1174    # this is overridden by max_segment_size
     1175    default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE
    11741176    default_encoding_param_k = 3 # overridden by encoding_parameters
    11751177    default_encoding_param_happy = 7
    11761178    default_encoding_param_n = 10
  • src/allmydata/interfaces.py

    diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py
    index f325bb1..75aa51e 100644
    a b WriteEnablerSecret = Hash # used to protect mutable bucket modifications 
    2424LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
    2525LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
    2626
     27KiB = 1024
     28DEFAULT_MAX_SEGMENT_SIZE = 128*KiB
     29
    2730class RIStubClient(RemoteInterface):
    2831    """Each client publishes a service announcement for a dummy object called
    2932    the StubClient. This object doesn't actually offer any services, but the
  • src/allmydata/nodemaker.py

    diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py
    index a30efbf..ef182a4 100644
    a b import weakref 
    22from zope.interface import implements
    33from allmydata.util.assertutil import precondition
    44from allmydata.interfaces import INodeMaker, MustBeDeepImmutableError
    5 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
     5from allmydata.immutable.literal import LiteralFileNode
     6from allmydata.immutable.filenode import ImmutableFileNode, CiphertextFileNode
    67from allmydata.immutable.upload import Data
    78from allmydata.mutable.filenode import MutableFileNode
    89from allmydata.dirnode import DirectoryNode, pack_children
    class NodeMaker: 
    1718    implements(INodeMaker)
    1819
    1920    def __init__(self, storage_broker, secret_holder, history,
    20                  uploader, downloader, download_cache_dirman,
     21                 uploader, terminator,
    2122                 default_encoding_parameters, key_generator):
    2223        self.storage_broker = storage_broker
    2324        self.secret_holder = secret_holder
    2425        self.history = history
    2526        self.uploader = uploader
    26         self.downloader = downloader
    27         self.download_cache_dirman = download_cache_dirman
     27        self.terminator = terminator
    2828        self.default_encoding_parameters = default_encoding_parameters
    2929        self.key_generator = key_generator
    3030
    class NodeMaker: 
    3434        return LiteralFileNode(cap)
    3535    def _create_immutable(self, cap):
    3636        return ImmutableFileNode(cap, self.storage_broker, self.secret_holder,
    37                                  self.downloader, self.history,
    38                                  self.download_cache_dirman)
     37                                 self.terminator, self.history)
     38    def _create_immutable_verifier(self, cap):
     39        return CiphertextFileNode(cap, self.storage_broker, self.secret_holder,
     40                                  self.terminator, self.history)
    3941    def _create_mutable(self, cap):
    4042        n = MutableFileNode(self.storage_broker, self.secret_holder,
    4143                            self.default_encoding_parameters,
    class NodeMaker: 
    4850        # this returns synchronously. It starts with a "cap string".
    4951        assert isinstance(writecap, (str, type(None))), type(writecap)
    5052        assert isinstance(readcap,  (str, type(None))), type(readcap)
    51        
     53
    5254        bigcap = writecap or readcap
    5355        if not bigcap:
    5456            # maybe the writecap was hidden because we're in a readonly
    class NodeMaker: 
    7880            return self._create_lit(cap)
    7981        if isinstance(cap, uri.CHKFileURI):
    8082            return self._create_immutable(cap)
     83        if isinstance(cap, uri.CHKFileVerifierURI):
     84            return self._create_immutable_verifier(cap)
    8185        if isinstance(cap, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI)):
    8286            return self._create_mutable(cap)
    8387        if isinstance(cap, (uri.DirectoryURI,
  • src/allmydata/test/test_cli.py

    diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py
    index c65474f..3566960 100644
    a b class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 
    20402040            self.delete_shares_numbered(ur.uri, range(1,10))
    20412041        d.addCallback(_stash_bad)
    20422042
     2043        # the download is abandoned as soon as it's clear that we won't get
     2044        # enough shares. The one remaining share might be in either the
     2045        # COMPLETE or the PENDING state.
     2046        in_complete_msg = "ran out of shares: 1 complete, 0 pending, 0 overdue, 0 unused, need 3"
     2047        in_pending_msg = "ran out of shares: 0 complete, 1 pending, 0 overdue, 0 unused, need 3"
     2048
    20432049        d.addCallback(lambda ign: self.do_cli("get", self.uri_1share))
    20442050        def _check1((rc, out, err)):
    20452051            self.failIfEqual(rc, 0)
    20462052            self.failUnless("410 Gone" in err, err)
    20472053            self.failUnlessIn("NotEnoughSharesError: ", err)
    2048             self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err)
     2054            self.failUnless(in_complete_msg in err or in_pending_msg in err,
     2055                            err)
    20492056        d.addCallback(_check1)
    20502057
    20512058        targetf = os.path.join(self.basedir, "output")
    class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 
    20542061            self.failIfEqual(rc, 0)
    20552062            self.failUnless("410 Gone" in err, err)
    20562063            self.failUnlessIn("NotEnoughSharesError: ", err)
    2057             self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err)
     2064            self.failUnless(in_complete_msg in err or in_pending_msg in err,
     2065                            err)
    20582066            self.failIf(os.path.exists(targetf))
    20592067        d.addCallback(_check2)
    20602068
  • src/allmydata/test/test_dirnode.py

    diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py
    index e6aaf77..3779327 100644
    a b class Packing(unittest.TestCase): 
    11061106    def test_unpack_and_pack_behavior(self):
    11071107        known_tree = b32decode(self.known_tree)
    11081108        nodemaker = NodeMaker(None, None, None,
    1109                               None, None, None,
     1109                              None, None,
    11101110                              {"k": 3, "n": 10}, None)
    11111111        write_uri = "URI:SSK-RO:e3mdrzfwhoq42hy5ubcz6rp3o4:ybyibhnp3vvwuq2vaw2ckjmesgkklfs6ghxleztqidihjyofgw7q"
    11121112        filenode = nodemaker.create_from_cap(write_uri)
    class Packing(unittest.TestCase): 
    11681168        return kids
    11691169
    11701170    def test_deep_immutable(self):
    1171         nm = NodeMaker(None, None, None, None, None, None, {"k": 3, "n": 10},
    1172                        None)
     1171        nm = NodeMaker(None, None, None, None, None, {"k": 3, "n": 10}, None)
    11731172        fn = MinimalFakeMutableFile()
    11741173
    11751174        kids = self._make_kids(nm, ["imm", "lit", "write", "read",
    class FakeNodeMaker(NodeMaker): 
    12631262class FakeClient2(Client):
    12641263    def __init__(self):
    12651264        self.nodemaker = FakeNodeMaker(None, None, None,
    1266                                        None, None, None,
     1265                                       None, None,
    12671266                                       {"k":3,"n":10}, None)
    12681267    def create_node_from_uri(self, rwcap, rocap):
    12691268        return self.nodemaker.create_from_cap(rwcap, rocap)
    class Deleter(GridTestMixin, unittest.TestCase): 
    15471546        def _do_delete(ignored):
    15481547            nm = UCWEingNodeMaker(c0.storage_broker, c0._secret_holder,
    15491548                                  c0.get_history(), c0.getServiceNamed("uploader"),
    1550                                   c0.downloader,
    1551                                   c0.download_cache_dirman,
     1549                                  c0.terminator,
    15521550                                  c0.get_encoding_parameters(),
    15531551                                  c0._key_generator)
    15541552            n = nm.create_from_cap(self.root_uri)
  • src/allmydata/test/test_download.py

    diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py
    index b54bf01..95fae2f 100644
    a b  
    55
    66import os
    77from twisted.trial import unittest
     8from twisted.internet import defer
    89from allmydata import uri
    910from allmydata.storage.server import storage_index_to_dir
    10 from allmydata.util import base32, fileutil
    11 from allmydata.util.consumer import download_to_data
    12 from allmydata.immutable import upload
     11from allmydata.util import base32, fileutil, spans, log
     12from allmydata.util.consumer import download_to_data, MemoryConsumer
     13from allmydata.immutable import upload, layout
    1314from allmydata.test.no_network import GridTestMixin
     15from allmydata.test.common import ShouldFailMixin
     16from allmydata.interfaces import NotEnoughSharesError, NoSharesError
     17from allmydata.immutable.downloader.common import BadSegmentNumberError, \
     18     BadCiphertextHashError
     19from allmydata.codec import CRSDecoder
     20from foolscap.eventual import fireEventually, flushEventualQueue
    1421
    1522plaintext = "This is a moderate-sized file.\n" * 10
    1623mutable_plaintext = "This is a moderate-sized mutable file.\n" * 10
    mutable_shares = { 
    6875}
    6976#--------- END stored_shares.py ----------------
    7077
    71 class DownloadTest(GridTestMixin, unittest.TestCase):
    72     timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box.
    73     def test_download(self):
    74         self.basedir = self.mktemp()
    75         self.set_up_grid()
    76         self.c0 = self.g.clients[0]
    77 
    78         # do this to create the shares
    79         #return self.create_shares()
    80 
    81         self.load_shares()
    82         d = self.download_immutable()
    83         d.addCallback(self.download_mutable)
    84         return d
     78class _Base(GridTestMixin, ShouldFailMixin):
    8579
    8680    def create_shares(self, ignored=None):
    8781        u = upload.Data(plaintext, None)
    class DownloadTest(GridTestMixin, unittest.TestCase): 
    178172        def _got_data(data):
    179173            self.failUnlessEqual(data, plaintext)
    180174        d.addCallback(_got_data)
     175        # make sure we can use the same node twice
     176        d.addCallback(lambda ign: download_to_data(n))
     177        d.addCallback(_got_data)
    181178        return d
    182179
    183180    def download_mutable(self, ignored=None):
    class DownloadTest(GridTestMixin, unittest.TestCase): 
    188185        d.addCallback(_got_data)
    189186        return d
    190187
     188class DownloadTest(_Base, unittest.TestCase):
     189    timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box.
     190    def test_download(self):
     191        self.basedir = self.mktemp()
     192        self.set_up_grid()
     193        self.c0 = self.g.clients[0]
     194
     195        # do this to create the shares
     196        #return self.create_shares()
     197
     198        self.load_shares()
     199        d = self.download_immutable()
     200        d.addCallback(self.download_mutable)
     201        return d
     202
     203    def test_download_failover(self):
     204        self.basedir = self.mktemp()
     205        self.set_up_grid()
     206        self.c0 = self.g.clients[0]
     207
     208        self.load_shares()
     209        si = uri.from_string(immutable_uri).get_storage_index()
     210        si_dir = storage_index_to_dir(si)
     211
     212        n = self.c0.create_node_from_uri(immutable_uri)
     213        d = download_to_data(n)
     214        def _got_data(data):
     215            self.failUnlessEqual(data, plaintext)
     216        d.addCallback(_got_data)
     217
     218        def _clobber_some_shares(ign):
     219            # find the three shares that were used, and delete them. Then
     220            # download again, forcing the downloader to fail over to other
     221            # shares
     222            for s in n._cnode._node._shares:
     223                for clientnum in immutable_shares:
     224                    for shnum in immutable_shares[clientnum]:
     225                        if s._shnum == shnum:
     226                            fn = os.path.join(self.get_serverdir(clientnum),
     227                                              "shares", si_dir, str(shnum))
     228                            os.unlink(fn)
     229        d.addCallback(_clobber_some_shares)
     230        d.addCallback(lambda ign: download_to_data(n))
     231        d.addCallback(_got_data)
     232
     233        def _clobber_most_shares(ign):
     234            # delete all but one of the shares that are still alive
     235            live_shares = [s for s in n._cnode._node._shares if s.is_alive()]
     236            save_me = live_shares[0]._shnum
     237            for clientnum in immutable_shares:
     238                for shnum in immutable_shares[clientnum]:
     239                    if shnum == save_me:
     240                        continue
     241                    fn = os.path.join(self.get_serverdir(clientnum),
     242                                      "shares", si_dir, str(shnum))
     243                    if os.path.exists(fn):
     244                        os.unlink(fn)
     245            # now the download should fail with NotEnoughSharesError
     246            return self.shouldFail(NotEnoughSharesError, "1shares", None,
     247                                   download_to_data, n)
     248        d.addCallback(_clobber_most_shares)
     249
     250        def _clobber_all_shares(ign):
     251            # delete the last remaining share
     252            for clientnum in immutable_shares:
     253                for shnum in immutable_shares[clientnum]:
     254                    fn = os.path.join(self.get_serverdir(clientnum),
     255                                      "shares", si_dir, str(shnum))
     256                    if os.path.exists(fn):
     257                        os.unlink(fn)
     258            # now a new download should fail with NoSharesError. We want a
     259            # new ImmutableFileNode so it will forget about the old shares.
     260            # If we merely called create_node_from_uri() without first
     261            # dereferencing the original node, the NodeMaker's _node_cache
     262            # would give us back the old one.
     263            n = None
     264            n = self.c0.create_node_from_uri(immutable_uri)
     265            return self.shouldFail(NoSharesError, "0shares", None,
     266                                   download_to_data, n)
     267        d.addCallback(_clobber_all_shares)
     268        return d
     269
     270    def test_badguess(self):
     271        self.basedir = self.mktemp()
     272        self.set_up_grid()
     273        self.c0 = self.g.clients[0]
     274        self.load_shares()
     275        n = self.c0.create_node_from_uri(immutable_uri)
     276
     277        # Cause the downloader to guess a segsize that's too low, so it will
     278        # ask for a segment number that's too high (beyond the end of the
     279        # real list, causing BadSegmentNumberError), to exercise
     280        # Segmentation._retry_bad_segment
     281
     282        con1 = MemoryConsumer()
     283        n._cnode._node._build_guessed_tables(90)
     284        # plaintext size of 310 bytes, wrong-segsize of 90 bytes, will make
     285        # us think that file[180:200] is in the third segment (segnum=2), but
     286        # really there's only one segment
     287        d = n.read(con1, 180, 20)
     288        def _done(res):
     289            self.failUnlessEqual("".join(con1.chunks), plaintext[180:200])
     290        d.addCallback(_done)
     291        return d
     292
     293    def test_simultaneous_badguess(self):
     294        self.basedir = self.mktemp()
     295        self.set_up_grid()
     296        self.c0 = self.g.clients[0]
     297
     298        # upload a file with multiple segments, and a non-default segsize, to
     299        # exercise the offset-guessing code. Because we don't tell the
     300        # downloader about the unusual segsize, it will guess wrong, and have
     301        # to do extra roundtrips to get the correct data.
     302        u = upload.Data(plaintext, None)
     303        u.max_segment_size = 70 # 5 segs, 8-wide hashtree
     304        con1 = MemoryConsumer()
     305        con2 = MemoryConsumer()
     306        d = self.c0.upload(u)
     307        def _uploaded(ur):
     308            n = self.c0.create_node_from_uri(ur.uri)
     309            d1 = n.read(con1, 70, 20)
     310            d2 = n.read(con2, 140, 20)
     311            return defer.gatherResults([d1,d2])
     312        d.addCallback(_uploaded)
     313        def _done(res):
     314            self.failUnlessEqual("".join(con1.chunks), plaintext[70:90])
     315            self.failUnlessEqual("".join(con2.chunks), plaintext[140:160])
     316        d.addCallback(_done)
     317        return d
     318
     319    def test_simultaneous_goodguess(self):
     320        self.basedir = self.mktemp()
     321        self.set_up_grid()
     322        self.c0 = self.g.clients[0]
     323
     324        # upload a file with multiple segments, and a non-default segsize, to
     325        # exercise the offset-guessing code. This time we *do* tell the
     326        # downloader about the unusual segsize, so it can guess right.
     327        u = upload.Data(plaintext, None)
     328        u.max_segment_size = 70 # 5 segs, 8-wide hashtree
     329        con1 = MemoryConsumer()
     330        con2 = MemoryConsumer()
     331        d = self.c0.upload(u)
     332        def _uploaded(ur):
     333            n = self.c0.create_node_from_uri(ur.uri)
     334            n._cnode._node._build_guessed_tables(u.max_segment_size)
     335            d1 = n.read(con1, 70, 20)
     336            #d2 = n.read(con2, 140, 20) # XXX
     337            d2 = defer.succeed(None)
     338            return defer.gatherResults([d1,d2])
     339        d.addCallback(_uploaded)
     340        def _done(res):
     341            self.failUnlessEqual("".join(con1.chunks), plaintext[70:90])
     342            self.failUnlessEqual("".join(con2.chunks), plaintext[140:160])
     343        #d.addCallback(_done)
     344        return d
     345
     346    def test_sequential_goodguess(self):
     347        self.basedir = self.mktemp()
     348        self.set_up_grid()
     349        self.c0 = self.g.clients[0]
     350        data = (plaintext*100)[:30000] # multiple of k
     351
     352        # upload a file with multiple segments, and a non-default segsize, to
     353        # exercise the offset-guessing code. This time we *do* tell the
     354        # downloader about the unusual segsize, so it can guess right.
     355        u = upload.Data(data, None)
     356        u.max_segment_size = 6000 # 5 segs, 8-wide hashtree
     357        con1 = MemoryConsumer()
     358        con2 = MemoryConsumer()
     359        d = self.c0.upload(u)
     360        def _uploaded(ur):
     361            n = self.c0.create_node_from_uri(ur.uri)
     362            n._cnode._node._build_guessed_tables(u.max_segment_size)
     363            d = n.read(con1, 12000, 20)
     364            def _read1(ign):
     365                self.failUnlessEqual("".join(con1.chunks), data[12000:12020])
     366                return n.read(con2, 24000, 20)
     367            d.addCallback(_read1)
     368            def _read2(ign):
     369                self.failUnlessEqual("".join(con2.chunks), data[24000:24020])
     370            d.addCallback(_read2)
     371            return d
     372        d.addCallback(_uploaded)
     373        return d
     374
     375
     376    def test_simultaneous_get_blocks(self):
     377        self.basedir = self.mktemp()
     378        self.set_up_grid()
     379        self.c0 = self.g.clients[0]
     380
     381        self.load_shares()
     382        stay_empty = []
     383
     384        n = self.c0.create_node_from_uri(immutable_uri)
     385        d = download_to_data(n)
     386        def _use_shares(ign):
     387            shares = list(n._cnode._node._shares)
     388            s0 = shares[0]
     389            # make sure .cancel works too
     390            o0 = s0.get_block(0)
     391            o0.subscribe(lambda **kwargs: stay_empty.append(kwargs))
     392            o1 = s0.get_block(0)
     393            o2 = s0.get_block(0)
     394            o0.cancel()
     395            o3 = s0.get_block(1) # state=BADSEGNUM
     396            d1 = defer.Deferred()
     397            d2 = defer.Deferred()
     398            d3 = defer.Deferred()
     399            o1.subscribe(lambda **kwargs: d1.callback(kwargs))
     400            o2.subscribe(lambda **kwargs: d2.callback(kwargs))
     401            o3.subscribe(lambda **kwargs: d3.callback(kwargs))
     402            return defer.gatherResults([d1,d2,d3])
     403        d.addCallback(_use_shares)
     404        def _done(res):
     405            r1,r2,r3 = res
     406            self.failUnlessEqual(r1["state"], "COMPLETE")
     407            self.failUnlessEqual(r2["state"], "COMPLETE")
     408            self.failUnlessEqual(r3["state"], "BADSEGNUM")
     409            self.failUnless("block" in r1)
     410            self.failUnless("block" in r2)
     411            self.failIf(stay_empty)
     412        d.addCallback(_done)
     413        return d
     414
     415    def test_download_no_overrun(self):
     416        self.basedir = self.mktemp()
     417        self.set_up_grid()
     418        self.c0 = self.g.clients[0]
     419
     420        self.load_shares()
     421
     422        # tweak the client's copies of server-version data, so it believes
     423        # that they're old and can't handle reads that overrun the length of
     424        # the share. This exercises a different code path.
     425        for (peerid, rref) in self.c0.storage_broker.get_all_servers():
     426            v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"]
     427            v1["tolerates-immutable-read-overrun"] = False
     428
     429        n = self.c0.create_node_from_uri(immutable_uri)
     430        d = download_to_data(n)
     431        def _got_data(data):
     432            self.failUnlessEqual(data, plaintext)
     433        d.addCallback(_got_data)
     434        return d
     435
     436    def test_download_segment(self):
     437        self.basedir = self.mktemp()
     438        self.set_up_grid()
     439        self.c0 = self.g.clients[0]
     440        self.load_shares()
     441        n = self.c0.create_node_from_uri(immutable_uri)
     442        cn = n._cnode
     443        (d,c) = cn.get_segment(0)
     444        def _got_segment((offset,data,decodetime)):
     445            self.failUnlessEqual(offset, 0)
     446            self.failUnlessEqual(len(data), len(plaintext))
     447        d.addCallback(_got_segment)
     448        return d
     449
     450    def test_download_segment_cancel(self):
     451        self.basedir = self.mktemp()
     452        self.set_up_grid()
     453        self.c0 = self.g.clients[0]
     454        self.load_shares()
     455        n = self.c0.create_node_from_uri(immutable_uri)
     456        cn = n._cnode
     457        (d,c) = cn.get_segment(0)
     458        fired = []
     459        d.addCallback(fired.append)
     460        c.cancel()
     461        d = fireEventually()
     462        d.addCallback(flushEventualQueue)
     463        def _check(ign):
     464            self.failUnlessEqual(fired, [])
     465        d.addCallback(_check)
     466        return d
     467
     468    def test_download_bad_segment(self):
     469        self.basedir = self.mktemp()
     470        self.set_up_grid()
     471        self.c0 = self.g.clients[0]
     472        self.load_shares()
     473        n = self.c0.create_node_from_uri(immutable_uri)
     474        cn = n._cnode
     475        def _try_download():
     476            (d,c) = cn.get_segment(1)
     477            return d
     478        d = self.shouldFail(BadSegmentNumberError, "badseg",
     479                            "segnum=1, numsegs=1",
     480                            _try_download)
     481        return d
     482
     483    def test_download_segment_terminate(self):
     484        self.basedir = self.mktemp()
     485        self.set_up_grid()
     486        self.c0 = self.g.clients[0]
     487        self.load_shares()
     488        n = self.c0.create_node_from_uri(immutable_uri)
     489        cn = n._cnode
     490        (d,c) = cn.get_segment(0)
     491        fired = []
     492        d.addCallback(fired.append)
     493        self.c0.terminator.disownServiceParent()
     494        d = fireEventually()
     495        d.addCallback(flushEventualQueue)
     496        def _check(ign):
     497            self.failUnlessEqual(fired, [])
     498        d.addCallback(_check)
     499        return d
     500
     501    def test_stop_producing(self):
     502        self.basedir = self.mktemp()
     503        self.set_up_grid()
     504        self.c0 = self.g.clients[0]
     505        self.load_shares()
     506        n = self.c0.create_node_from_uri(immutable_uri)
     507
     508        con = MemoryConsumer()
     509        d = n.read(con)
     510        con.producer.stopProducing()
     511        # d should never fire
     512        del d
     513
     514    def test_download_segment_bad_ciphertext_hash(self):
     515        # The crypttext_hash_tree asserts the integrity of the decoded
     516        # ciphertext, and exists to detect two sorts of problems. The first
     517        # is a bug in zfec decode. The second is the "two-sided t-shirt"
     518        # attack (found by Christian Grothoff), in which a malicious uploader
     519        # creates two sets of shares (one for file A, second for file B),
     520        # uploads a combination of them (shares 0-4 of A, 5-9 of B), and then
     521        # builds an otherwise normal UEB around those shares: their goal is
     522        # to give their victim a filecap which sometimes downloads the good A
     523        # contents, and sometimes the bad B contents, depending upon which
     524        # servers/shares they can get to. Having a hash of the ciphertext
     525        # forces them to commit to exactly one version. (Christian's prize
     526        # for finding this problem was a t-shirt with two sides: the shares
     527        # of file A on the front, B on the back).
     528
     529        # creating a set of shares with this property is too hard, although
     530        # it'd be nice to do so and confirm our fix. (it requires a lot of
     531        # tampering with the uploader). So instead, we just damage the
     532        # decoder. The tail decoder is rebuilt each time, so we need to use a
     533        # file with multiple segments.
     534        self.basedir = self.mktemp()
     535        self.set_up_grid()
     536        self.c0 = self.g.clients[0]
     537
     538        u = upload.Data(plaintext, None)
     539        u.max_segment_size = 60 # 6 segs
     540        d = self.c0.upload(u)
     541        def _uploaded(ur):
     542            n = self.c0.create_node_from_uri(ur.uri)
     543            n._cnode._node._build_guessed_tables(u.max_segment_size)
     544
     545            d = download_to_data(n)
     546            def _break_codec(data):
     547                # the codec isn't created until the UEB is retrieved
     548                node = n._cnode._node
     549                vcap = node._verifycap
     550                k, N = vcap.needed_shares, vcap.total_shares
     551                bad_codec = BrokenDecoder()
     552                bad_codec.set_params(node.segment_size, k, N)
     553                node._codec = bad_codec
     554            d.addCallback(_break_codec)
     555            # now try to download it again. The broken codec will provide
     556            # ciphertext that fails the hash test.
     557            d.addCallback(lambda ign:
     558                          self.shouldFail(BadCiphertextHashError, "badhash",
     559                                          "hash failure in "
     560                                          "ciphertext_hash_tree: segnum=0",
     561                                          download_to_data, n))
     562            return d
     563        d.addCallback(_uploaded)
     564        return d
     565
     566    def OFFtest_download_segment_XXX(self):
     567        self.basedir = self.mktemp()
     568        self.set_up_grid()
     569        self.c0 = self.g.clients[0]
     570
     571        # upload a file with multiple segments, and a non-default segsize, to
     572        # exercise the offset-guessing code. This time we *do* tell the
     573        # downloader about the unusual segsize, so it can guess right.
     574        u = upload.Data(plaintext, None)
     575        u.max_segment_size = 70 # 5 segs, 8-wide hashtree
     576        con1 = MemoryConsumer()
     577        con2 = MemoryConsumer()
     578        d = self.c0.upload(u)
     579        def _uploaded(ur):
     580            n = self.c0.create_node_from_uri(ur.uri)
     581            n._cnode._node._build_guessed_tables(u.max_segment_size)
     582            d1 = n.read(con1, 70, 20)
     583            #d2 = n.read(con2, 140, 20)
     584            d2 = defer.succeed(None)
     585            return defer.gatherResults([d1,d2])
     586        d.addCallback(_uploaded)
     587        def _done(res):
     588            self.failUnlessEqual("".join(con1.chunks), plaintext[70:90])
     589            self.failUnlessEqual("".join(con2.chunks), plaintext[140:160])
     590        #d.addCallback(_done)
     591        return d
     592
     593    def test_duplicate_shares(self):
     594        self.basedir = self.mktemp()
     595        self.set_up_grid()
     596        self.c0 = self.g.clients[0]
     597
     598        self.load_shares()
     599        # make sure everybody has a copy of sh0. The second server contacted
     600        # will report two shares, and the ShareFinder will handle the
     601        # duplicate by attaching both to the same CommonShare instance.
     602        si = uri.from_string(immutable_uri).get_storage_index()
     603        si_dir = storage_index_to_dir(si)
     604        sh0_file = [sharefile
     605                    for (shnum, serverid, sharefile)
     606                    in self.find_shares(immutable_uri)
     607                    if shnum == 0][0]
     608        sh0_data = open(sh0_file, "rb").read()
     609        for clientnum in immutable_shares:
     610            if 0 in immutable_shares[clientnum]:
     611                continue
     612            cdir = self.get_serverdir(clientnum)
     613            target = os.path.join(cdir, "shares", si_dir, "0")
     614            outf = open(target, "wb")
     615            outf.write(sh0_data)
     616            outf.close()
     617
     618        d = self.download_immutable()
     619        return d
     620
     621    def test_verifycap(self):
     622        self.basedir = self.mktemp()
     623        self.set_up_grid()
     624        self.c0 = self.g.clients[0]
     625        self.load_shares()
     626
     627        n = self.c0.create_node_from_uri(immutable_uri)
     628        vcap = n.get_verify_cap().to_string()
     629        vn = self.c0.create_node_from_uri(vcap)
     630        d = download_to_data(vn)
     631        def _got_ciphertext(ciphertext):
     632            self.failUnlessEqual(len(ciphertext), len(plaintext))
     633            self.failIfEqual(ciphertext, plaintext)
     634        d.addCallback(_got_ciphertext)
     635        return d
     636
     637class BrokenDecoder(CRSDecoder):
     638    def decode(self, shares, shareids):
     639        d = CRSDecoder.decode(self, shares, shareids)
     640        def _decoded(buffers):
     641            def _corruptor(s, which):
     642                return s[:which] + chr(ord(s[which])^0x01) + s[which+1:]
     643            buffers[0] = _corruptor(buffers[0], 0) # flip lsb of first byte
     644            return buffers
     645        d.addCallback(_decoded)
     646        return d
     647
     648class Corruption(_Base, unittest.TestCase):
     649
     650    def test_each_byte(self):
     651        # Setting catalog_detection=True performs an exhaustive test of the
     652        # Downloader's response to corruption in the lsb of each byte of the
     653        # 2070-byte share, with two goals: make sure we tolerate all forms of
     654        # corruption (i.e. don't hang or return bad data), and make a list of
     655        # which bytes can be corrupted without influencing the download
     656        # (since we don't need every byte of the share). That takes 50s to
     657        # run on my laptop and doesn't have any actual asserts, so we don't
     658        # normally do that.
     659        self.catalog_detection = False
     660
     661        self.basedir = "download/Corruption/each_byte"
     662        self.set_up_grid()
     663        self.c0 = self.g.clients[0]
     664
     665        # to exercise the block-hash-tree code properly, we need to have
     666        # multiple segments. We don't tell the downloader about the different
     667        # segsize, so it guesses wrong and must do extra roundtrips.
     668        u = upload.Data(plaintext, None)
     669        u.max_segment_size = 120 # 3 segs, 4-wide hashtree
     670
     671        def _fix_sh0(res):
     672            f = open(self.sh0_file, "wb")
     673            f.write(self.sh0_orig)
     674            f.close()
     675        def _corrupt_flip(ign, imm_uri, which):
     676            log.msg("corrupt %d" % which)
     677            def _corruptor(s, debug=False):
     678                return s[:which] + chr(ord(s[which])^0x01) + s[which+1:]
     679            self.corrupt_shares_numbered(imm_uri, [0], _corruptor)
     680
     681        def _corrupt_set(ign, imm_uri, which, newvalue):
     682            log.msg("corrupt %d" % which)
     683            def _corruptor(s, debug=False):
     684                return s[:which] + chr(newvalue) + s[which+1:]
     685            self.corrupt_shares_numbered(imm_uri, [0], _corruptor)
     686
     687        if self.catalog_detection:
     688            undetected = spans.Spans()
     689
     690        def _download(ign, imm_uri, which, expected):
     691            n = self.c0.create_node_from_uri(imm_uri)
     692            # for this test to work, we need to have a new Node each time.
     693            # Make sure the NodeMaker's weakcache hasn't interfered.
     694            assert not n._cnode._node._shares
     695            d = download_to_data(n)
     696            def _got_data(data):
     697                self.failUnlessEqual(data, plaintext)
     698                shnums = sorted([s._shnum for s in n._cnode._node._shares])
     699                no_sh0 = bool(0 not in shnums)
     700                sh0 = [s for s in n._cnode._node._shares if s._shnum == 0]
     701                sh0_had_corruption = False
     702                if sh0 and sh0[0].had_corruption:
     703                    sh0_had_corruption = True
     704                num_needed = len(n._cnode._node._shares)
     705                if self.catalog_detection:
     706                    detected = no_sh0 or sh0_had_corruption or (num_needed!=3)
     707                    if not detected:
     708                        undetected.add(which, 1)
     709                if expected == "no-sh0":
     710                    self.failIfIn(0, shnums)
     711                elif expected == "0bad-need-3":
     712                    self.failIf(no_sh0)
     713                    self.failUnless(sh0[0].had_corruption)
     714                    self.failUnlessEqual(num_needed, 3)
     715                elif expected == "need-4th":
     716                    self.failIf(no_sh0)
     717                    self.failUnless(sh0[0].had_corruption)
     718                    self.failIfEqual(num_needed, 3)
     719            d.addCallback(_got_data)
     720            return d
     721
     722
     723        d = self.c0.upload(u)
     724        def _uploaded(ur):
     725            imm_uri = ur.uri
     726            self.sh0_file = [sharefile
     727                             for (shnum, serverid, sharefile)
     728                             in self.find_shares(imm_uri)
     729                             if shnum == 0][0]
     730            self.sh0_orig = open(self.sh0_file, "rb").read()
     731            d = defer.succeed(None)
     732            # 'victims' is a list of corruption tests to run. Each one flips
     733            # the low-order bit of the specified offset in the share file (so
     734            # offset=0 is the MSB of the container version, offset=15 is the
     735            # LSB of the share version, offset=24 is the MSB of the
     736            # data-block-offset, and offset=48 is the first byte of the first
     737            # data-block). Each one also specifies what sort of corruption
     738            # we're expecting to see.
     739            no_sh0_victims = [0,1,2,3] # container version
     740            need3_victims =  [ ] # none currently in this category
     741            # when the offsets are corrupted, the Share will be unable to
     742            # retrieve the data it wants (because it thinks that data lives
     743            # off in the weeds somewhere), and Share treats DataUnavailable
     744            # as abandon-this-share, so in general we'll be forced to look
     745            # for a 4th share.
     746            need_4th_victims = [12,13,14,15, # share version
     747                                24,25,26,27, # offset[data]
     748                                32,33,34,35, # offset[crypttext_hash_tree]
     749                                36,37,38,39, # offset[block_hashes]
     750                                44,45,46,47, # offset[UEB]
     751                                ]
     752            need_4th_victims.append(48) # block data
     753            # when corrupting hash trees, we must corrupt a value that isn't
     754            # directly set from somewhere else. Since we download data from
     755            # seg0, corrupt something on its hash chain, like [2] (the
     756            # right-hand child of the root)
     757            need_4th_victims.append(600+2*32) # block_hashes[2]
     758            # Share.loop is pretty conservative: it abandons the share at the
     759            # first sign of corruption. It doesn't strictly need to be this
     760            # way: if the UEB were corrupt, we could still get good block
     761            # data from that share, as long as there was a good copy of the
     762            # UEB elsewhere. If this behavior is relaxed, then corruption in
     763            # the following fields (which are present in multiple shares)
     764            # should fall into the "need3_victims" case instead of the
     765            # "need_4th_victims" case.
     766            need_4th_victims.append(376+2*32) # crypttext_hash_tree[2]
     767            need_4th_victims.append(824) # share_hashes
     768            need_4th_victims.append(994) # UEB length
     769            need_4th_victims.append(998) # UEB
     770            corrupt_me = ([(i,"no-sh0") for i in no_sh0_victims] +
     771                          [(i, "0bad-need-3") for i in need3_victims] +
     772                          [(i, "need-4th") for i in need_4th_victims])
     773            if self.catalog_detection:
     774                corrupt_me = [(i, "") for i in range(len(self.sh0_orig))]
     775            for i,expected in corrupt_me:
     776                d.addCallback(_corrupt_flip, imm_uri, i)
     777                d.addCallback(_download, imm_uri, i, expected)
     778                d.addCallback(_fix_sh0)
     779                d.addCallback(fireEventually)
     780            corrupt_values = [(3, 2, "no-sh0"),
     781                              (15, 2, "need-4th"), # share looks v2
     782                              ]
     783            for i,newvalue,expected in corrupt_values:
     784                d.addCallback(_corrupt_set, imm_uri, i, newvalue)
     785                d.addCallback(_download, imm_uri, i, expected)
     786                d.addCallback(_fix_sh0)
     787                d.addCallback(fireEventually)
     788            return d
     789        d.addCallback(_uploaded)
     790        def _show_results(ign):
     791            print
     792            print ("of [0:%d], corruption ignored in %s" %
     793                   (len(self.sh0_orig), undetected.dump()))
     794        if self.catalog_detection:
     795            d.addCallback(_show_results)
     796            # of [0:2070], corruption ignored in len=1133:
     797            # [4-11],[16-23],[28-31],[152-439],[600-663],[1309-2069]
     798            #  [4-11]: container sizes
     799            #  [16-23]: share block/data sizes
     800            #  [152-375]: plaintext hash tree
     801            #  [376-408]: crypttext_hash_tree[0] (root)
     802            #  [408-439]: crypttext_hash_tree[1] (computed)
     803            #  [600-631]: block hash tree[0] (root)
     804            #  [632-663]: block hash tree[1] (computed)
     805            #  [1309-]: reserved+unused UEB space
     806        return d
     807
     808
     809class DownloadV2(_Base, unittest.TestCase):
     810    # tests which exercise v2-share code. They first upload a file with
     811    # FORCE_V2 set.
     812
     813    def setUp(self):
     814        d = defer.maybeDeferred(_Base.setUp, self)
     815        def _set_force_v2(ign):
     816            self.old_force_v2 = layout.FORCE_V2
     817            layout.FORCE_V2 = True
     818        d.addCallback(_set_force_v2)
     819        return d
     820    def tearDown(self):
     821        layout.FORCE_V2 = self.old_force_v2
     822        return _Base.tearDown(self)
     823
     824    def test_download(self):
     825        self.basedir = self.mktemp()
     826        self.set_up_grid()
     827        self.c0 = self.g.clients[0]
     828
     829        # upload a file
     830        u = upload.Data(plaintext, None)
     831        d = self.c0.upload(u)
     832        def _uploaded(ur):
     833            imm_uri = ur.uri
     834            n = self.c0.create_node_from_uri(imm_uri)
     835            return download_to_data(n)
     836        d.addCallback(_uploaded)
     837        return d
     838
     839    def test_download_no_overrun(self):
     840        self.basedir = self.mktemp()
     841        self.set_up_grid()
     842        self.c0 = self.g.clients[0]
     843
     844        # tweak the client's copies of server-version data, so it believes
     845        # that they're old and can't handle reads that overrun the length of
     846        # the share. This exercises a different code path.
     847        for (peerid, rref) in self.c0.storage_broker.get_all_servers():
     848            v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"]
     849            v1["tolerates-immutable-read-overrun"] = False
     850
     851        # upload a file
     852        u = upload.Data(plaintext, None)
     853        d = self.c0.upload(u)
     854        def _uploaded(ur):
     855            imm_uri = ur.uri
     856            n = self.c0.create_node_from_uri(imm_uri)
     857            return download_to_data(n)
     858        d.addCallback(_uploaded)
     859        return d
     860
     861    def OFF_test_no_overrun_corrupt_shver(self): # unnecessary
     862        self.basedir = self.mktemp()
     863        self.set_up_grid()
     864        self.c0 = self.g.clients[0]
     865
     866        for (peerid, rref) in self.c0.storage_broker.get_all_servers():
     867            v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"]
     868            v1["tolerates-immutable-read-overrun"] = False
     869
     870        # upload a file
     871        u = upload.Data(plaintext, None)
     872        d = self.c0.upload(u)
     873        def _uploaded(ur):
     874            imm_uri = ur.uri
     875            def _do_corrupt(which, newvalue):
     876                def _corruptor(s, debug=False):
     877                    return s[:which] + chr(newvalue) + s[which+1:]
     878                self.corrupt_shares_numbered(imm_uri, [0], _corruptor)
     879            _do_corrupt(12+3, 0x00)
     880            n = self.c0.create_node_from_uri(imm_uri)
     881            d = download_to_data(n)
     882            def _got_data(data):
     883                self.failUnlessEqual(data, plaintext)
     884            d.addCallback(_got_data)
     885            return d
     886        d.addCallback(_uploaded)
     887        return d
  • src/allmydata/test/test_filenode.py

    diff --git a/src/allmydata/test/test_filenode.py b/src/allmydata/test/test_filenode.py
    index 5f3feaa..61bb0e8 100644
    a b  
    22from twisted.trial import unittest
    33from allmydata import uri, client
    44from allmydata.monitor import Monitor
    5 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
     5from allmydata.immutable.literal import LiteralFileNode
     6from allmydata.immutable.filenode import ImmutableFileNode
    67from allmydata.mutable.filenode import MutableFileNode
    7 from allmydata.util import hashutil, cachedir
     8from allmydata.util import hashutil
    89from allmydata.util.consumer import download_to_data
    910
    1011class NotANode:
    class Node(unittest.TestCase): 
    3031                           needed_shares=3,
    3132                           total_shares=10,
    3233                           size=1000)
    33         cf = cachedir.CacheFile("none")
    34         fn1 = ImmutableFileNode(u, None, None, None, None, cf)
    35         fn2 = ImmutableFileNode(u, None, None, None, None, cf)
     34        fn1 = ImmutableFileNode(u, None, None, None, None)
     35        fn2 = ImmutableFileNode(u, None, None, None, None)
    3636        self.failUnlessEqual(fn1, fn2)
    3737        self.failIfEqual(fn1, "I am not a filenode")
    3838        self.failIfEqual(fn1, NotANode())
  • src/allmydata/test/test_hung_server.py

    diff --git a/src/allmydata/test/test_hung_server.py b/src/allmydata/test/test_hung_server.py
    index 4aef484..b87658d 100644
    a b class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    1919    # Many of these tests take around 60 seconds on François's ARM buildslave:
    2020    # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel
    2121    timeout = 120
     22    skip="not ready"
    2223
    2324    def _break(self, servers):
    2425        for (id, ss) in servers:
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    109110            stage_4_d = None # currently we aren't doing any tests which require this for mutable files
    110111        else:
    111112            d = download_to_data(n)
    112             stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME
     113            #stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME
     114            stage_4_d = None
    113115        return (d, stage_4_d,)
    114116
    115117    def _wait_for_data(self, n):
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    137139                                   self._download_and_check)
    138140        else:
    139141            return self.shouldFail(NotEnoughSharesError, self.basedir,
    140                                    "Failed to get enough shareholders",
     142                                   "ran out of shares",
    141143                                   self._download_and_check)
    142144
    143145
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    230232        return d
    231233
    232234    def test_failover_during_stage_4(self):
     235        raise unittest.SkipTest("needs rewrite")
    233236        # See #287
    234237        d = defer.succeed(None)
    235238        for mutable in [False]:
  • src/allmydata/test/test_immutable.py

    diff --git a/src/allmydata/test/test_immutable.py b/src/allmydata/test/test_immutable.py
    index a430db2..a61c058 100644
    a b from twisted.internet import defer 
    55from twisted.trial import unittest
    66import random
    77
    8 class Test(common.ShareManglingMixin, unittest.TestCase):
     8class Test(common.ShareManglingMixin, common.ShouldFailMixin, unittest.TestCase):
    99    def test_test_code(self):
    1010        # The following process of stashing the shares, running
    1111        # replace_shares, and asserting that the new set of shares equals the
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    1818            return res
    1919        d.addCallback(_stash_it)
    2020
    21         # The following process of deleting 8 of the shares and asserting that you can't
    22         # download it is more to test this test code than to test the Tahoe code...
     21        # The following process of deleting 8 of the shares and asserting
     22        # that you can't download it is more to test this test code than to
     23        # test the Tahoe code...
    2324        def _then_delete_8(unused=None):
    2425            self.replace_shares(stash[0], storage_index=self.uri.get_storage_index())
    2526            for i in range(8):
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    4243        return d
    4344
    4445    def test_download(self):
    45         """ Basic download.  (This functionality is more or less already tested by test code in
    46         other modules, but this module is also going to test some more specific things about
    47         immutable download.)
     46        """ Basic download. (This functionality is more or less already
     47        tested by test code in other modules, but this module is also going
     48        to test some more specific things about immutable download.)
    4849        """
    4950        d = defer.succeed(None)
    5051        before_download_reads = self._count_reads()
    5152        def _after_download(unused=None):
    5253            after_download_reads = self._count_reads()
    53             self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads))
     54            #print before_download_reads, after_download_reads
     55            self.failIf(after_download_reads-before_download_reads > 27,
     56                        (after_download_reads, before_download_reads))
    5457        d.addCallback(self._download_and_check_plaintext)
    5558        d.addCallback(_after_download)
    5659        return d
    5760
    5861    def test_download_from_only_3_remaining_shares(self):
    59         """ Test download after 7 random shares (of the 10) have been removed. """
     62        """ Test download after 7 random shares (of the 10) have been
     63        removed."""
    6064        d = defer.succeed(None)
    6165        def _then_delete_7(unused=None):
    6266            for i in range(7):
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    6569        d.addCallback(_then_delete_7)
    6670        def _after_download(unused=None):
    6771            after_download_reads = self._count_reads()
     72            #print before_download_reads, after_download_reads
    6873            self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads))
    6974        d.addCallback(self._download_and_check_plaintext)
    7075        d.addCallback(_after_download)
    7176        return d
    7277
    7378    def test_download_from_only_3_shares_with_good_crypttext_hash(self):
    74         """ Test download after 7 random shares (of the 10) have had their crypttext hash tree corrupted. """
     79        """ Test download after 7 random shares (of the 10) have had their
     80        crypttext hash tree corrupted."""
    7581        d = defer.succeed(None)
    7682        def _then_corrupt_7(unused=None):
    7783            shnums = range(10)
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    8490        return d
    8591
    8692    def test_download_abort_if_too_many_missing_shares(self):
    87         """ Test that download gives up quickly when it realizes there aren't enough shares out
    88         there."""
    89         d = defer.succeed(None)
    90         def _then_delete_8(unused=None):
    91             for i in range(8):
    92                 self._delete_a_share()
    93         d.addCallback(_then_delete_8)
    94 
    95         before_download_reads = self._count_reads()
    96         def _attempt_to_download(unused=None):
    97             d2 = download_to_data(self.n)
    98 
    99             def _callb(res):
    100                 self.fail("Should have gotten an error from attempt to download, not %r" % (res,))
    101             def _errb(f):
    102                 self.failUnless(f.check(NotEnoughSharesError))
    103             d2.addCallbacks(_callb, _errb)
    104             return d2
    105 
    106         d.addCallback(_attempt_to_download)
    107 
    108         def _after_attempt(unused=None):
    109             after_download_reads = self._count_reads()
    110             # To pass this test, you are required to give up before actually trying to read any
    111             # share data.
    112             self.failIf(after_download_reads-before_download_reads > 0, (after_download_reads, before_download_reads))
    113         d.addCallback(_after_attempt)
     93        """ Test that download gives up quickly when it realizes there aren't
     94        enough shares out there."""
     95        for i in range(8):
     96            self._delete_a_share()
     97        d = self.shouldFail(NotEnoughSharesError, "delete 8", None,
     98                            download_to_data, self.n)
     99        # the new downloader pipelines a bunch of read requests in parallel,
     100        # so don't bother asserting anything about the number of reads
    114101        return d
    115102
    116103    def test_download_abort_if_too_many_corrupted_shares(self):
    117         """ Test that download gives up quickly when it realizes there aren't enough uncorrupted
    118         shares out there. It should be able to tell because the corruption occurs in the
    119         sharedata version number, which it checks first."""
     104        """Test that download gives up quickly when it realizes there aren't
     105        enough uncorrupted shares out there. It should be able to tell
     106        because the corruption occurs in the sharedata version number, which
     107        it checks first."""
    120108        d = defer.succeed(None)
    121109        def _then_corrupt_8(unused=None):
    122110            shnums = range(10)
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    140128
    141129        def _after_attempt(unused=None):
    142130            after_download_reads = self._count_reads()
    143             # To pass this test, you are required to give up before reading all of the share
    144             # data.  Actually, we could give up sooner than 45 reads, but currently our download
    145             # code does 45 reads.  This test then serves as a "performance regression detector"
    146             # -- if you change download code so that it takes *more* reads, then this test will
    147             # fail.
    148             self.failIf(after_download_reads-before_download_reads > 45, (after_download_reads, before_download_reads))
     131            #print before_download_reads, after_download_reads
     132            # To pass this test, you are required to give up before reading
     133            # all of the share data. Actually, we could give up sooner than
     134            # 45 reads, but currently our download code does 45 reads. This
     135            # test then serves as a "performance regression detector" -- if
     136            # you change download code so that it takes *more* reads, then
     137            # this test will fail.
     138            self.failIf(after_download_reads-before_download_reads > 45,
     139                        (after_download_reads, before_download_reads))
    149140        d.addCallback(_after_attempt)
    150141        return d
    151142
    152143
    153 # XXX extend these tests to show bad behavior of various kinds from servers: raising exception from each remove_foo() method, for example
     144# XXX extend these tests to show bad behavior of various kinds from servers:
     145# raising exception from each remove_foo() method, for example
    154146
    155147# XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit
    156148
     149# TODO: delete this whole file
  • src/allmydata/test/test_mutable.py

    diff --git a/src/allmydata/test/test_mutable.py b/src/allmydata/test/test_mutable.py
    index fa29d34..1c3825c 100644
    a b def make_nodemaker(s=None, num_peers=10): 
    197197    keygen = client.KeyGenerator()
    198198    keygen.set_default_keysize(522)
    199199    nodemaker = NodeMaker(storage_broker, sh, None,
    200                           None, None, None,
     200                          None, None,
    201201                          {"k": 3, "n": 10}, keygen)
    202202    return nodemaker
    203203
  • src/allmydata/test/test_repairer.py

    diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py
    index 91ab704..8075a21 100644
    a b from allmydata.test import common 
    33from allmydata.monitor import Monitor
    44from allmydata import check_results
    55from allmydata.interfaces import NotEnoughSharesError
    6 from allmydata.immutable import repairer, upload
     6from allmydata.immutable import upload
    77from allmydata.util.consumer import download_to_data
    88from twisted.internet import defer
    99from twisted.trial import unittest
    WRITE_LEEWAY = 35 
    363363# Optimally, you could repair one of these (small) files in a single write.
    364364DELTA_WRITES_PER_SHARE = 1 * WRITE_LEEWAY
    365365
    366 class DownUpConnector(unittest.TestCase):
    367     def test_deferred_satisfaction(self):
    368         duc = repairer.DownUpConnector()
    369         duc.registerProducer(None, True) # just because you have to call registerProducer first
    370         # case 1: total data in buf is < requested data at time of request
    371         duc.write('\x01')
    372         d = duc.read_encrypted(2, False)
    373         def _then(data):
    374             self.failUnlessEqual(len(data), 2)
    375             self.failUnlessEqual(data[0], '\x01')
    376             self.failUnlessEqual(data[1], '\x02')
    377         d.addCallback(_then)
    378         duc.write('\x02')
    379         return d
    380 
    381     def test_extra(self):
    382         duc = repairer.DownUpConnector()
    383         duc.registerProducer(None, True) # just because you have to call registerProducer first
    384         # case 1: total data in buf is < requested data at time of request
    385         duc.write('\x01')
    386         d = duc.read_encrypted(2, False)
    387         def _then(data):
    388             self.failUnlessEqual(len(data), 2)
    389             self.failUnlessEqual(data[0], '\x01')
    390             self.failUnlessEqual(data[1], '\x02')
    391         d.addCallback(_then)
    392         duc.write('\x02\0x03')
    393         return d
    394 
    395     def test_short_reads_1(self):
    396         # You don't get fewer bytes than you requested -- instead you get no callback at all.
    397         duc = repairer.DownUpConnector()
    398         duc.registerProducer(None, True) # just because you have to call registerProducer first
    399 
    400         d = duc.read_encrypted(2, False)
    401         duc.write('\x04')
    402 
    403         def _callb(res):
    404             self.fail("Shouldn't have gotten this callback res: %s" % (res,))
    405         d.addCallback(_callb)
    406 
    407         # Also in the other order of read-vs-write:
    408         duc2 = repairer.DownUpConnector()
    409         duc2.registerProducer(None, True) # just because you have to call registerProducer first
    410         duc2.write('\x04')
    411         d = duc2.read_encrypted(2, False)
    412 
    413         def _callb2(res):
    414             self.fail("Shouldn't have gotten this callback res: %s" % (res,))
    415         d.addCallback(_callb2)
    416 
    417         # But once the DUC is closed then you *do* get short reads.
    418         duc3 = repairer.DownUpConnector()
    419         duc3.registerProducer(None, True) # just because you have to call registerProducer first
    420 
    421         d = duc3.read_encrypted(2, False)
    422         duc3.write('\x04')
    423         duc3.close()
    424         def _callb3(res):
    425             self.failUnlessEqual(len(res), 1)
    426             self.failUnlessEqual(res[0], '\x04')
    427         d.addCallback(_callb3)
    428         return d
    429 
    430     def test_short_reads_2(self):
    431         # Also in the other order of read-vs-write.
    432         duc = repairer.DownUpConnector()
    433         duc.registerProducer(None, True) # just because you have to call registerProducer first
    434 
    435         duc.write('\x04')
    436         d = duc.read_encrypted(2, False)
    437         duc.close()
    438 
    439         def _callb(res):
    440             self.failUnlessEqual(len(res), 1)
    441             self.failUnlessEqual(res[0], '\x04')
    442         d.addCallback(_callb)
    443         return d
    444 
    445     def test_short_reads_3(self):
    446         # Also if it is closed before the read.
    447         duc = repairer.DownUpConnector()
    448         duc.registerProducer(None, True) # just because you have to call registerProducer first
    449 
    450         duc.write('\x04')
    451         duc.close()
    452         d = duc.read_encrypted(2, False)
    453         def _callb(res):
    454             self.failUnlessEqual(len(res), 1)
    455             self.failUnlessEqual(res[0], '\x04')
    456         d.addCallback(_callb)
    457         return d
    458 
    459366class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin,
    460367               common.ShouldFailMixin):
    461368
  • src/allmydata/test/test_system.py

    diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py
    index c1b1f7f..a9956e3 100644
    a b from allmydata import uri 
    99from allmydata.storage.mutable import MutableShareFile
    1010from allmydata.storage.server import si_a2b
    1111from allmydata.immutable import offloaded, upload
    12 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
     12from allmydata.immutable.literal import LiteralFileNode
     13from allmydata.immutable.filenode import ImmutableFileNode
    1314from allmydata.util import idlib, mathutil
    1415from allmydata.util import log, base32
    1516from allmydata.util.consumer import MemoryConsumer, download_to_data
  • src/allmydata/test/test_util.py

    diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py
    index 0a326b3..2fceee5 100644
    a b from twisted.trial import unittest 
    77from twisted.internet import defer, reactor
    88from twisted.python.failure import Failure
    99from twisted.python import log
     10from hashlib import md5
    1011
    1112from allmydata.util import base32, idlib, humanreadable, mathutil, hashutil
    1213from allmydata.util import assertutil, fileutil, deferredutil, abbreviate
    1314from allmydata.util import limiter, time_format, pollmixin, cachedir
    1415from allmydata.util import statistics, dictutil, pipeline
    1516from allmydata.util import log as tahoe_log
     17from allmydata.util.spans import Spans, overlap, DataSpans
    1618
    1719class Base32(unittest.TestCase):
    1820    def test_b2a_matches_Pythons(self):
    class Log(unittest.TestCase): 
    15371539        tahoe_log.err(format="intentional sample error",
    15381540                      failure=f, level=tahoe_log.OPERATIONAL, umid="wO9UoQ")
    15391541        self.flushLoggedErrors(SampleError)
     1542
     1543
     1544class SimpleSpans:
     1545    # this is a simple+inefficient form of util.spans.Spans . We compare the
     1546    # behavior of this reference model against the real (efficient) form.
     1547
     1548    def __init__(self, _span_or_start=None, length=None):
     1549        self._have = set()
     1550        if length is not None:
     1551            for i in range(_span_or_start, _span_or_start+length):
     1552                self._have.add(i)
     1553        elif _span_or_start:
     1554            for (start,length) in _span_or_start:
     1555                self.add(start, length)
     1556
     1557    def add(self, start, length):
     1558        for i in range(start, start+length):
     1559            self._have.add(i)
     1560        return self
     1561
     1562    def remove(self, start, length):
     1563        for i in range(start, start+length):
     1564            self._have.discard(i)
     1565        return self
     1566
     1567    def each(self):
     1568        return sorted(self._have)
     1569
     1570    def __iter__(self):
     1571        items = sorted(self._have)
     1572        prevstart = None
     1573        prevend = None
     1574        for i in items:
     1575            if prevstart is None:
     1576                prevstart = prevend = i
     1577                continue
     1578            if i == prevend+1:
     1579                prevend = i
     1580                continue
     1581            yield (prevstart, prevend-prevstart+1)
     1582            prevstart = prevend = i
     1583        if prevstart is not None:
     1584            yield (prevstart, prevend-prevstart+1)
     1585
     1586    def __len__(self):
     1587        # this also gets us bool(s)
     1588        return len(self._have)
     1589
     1590    def __add__(self, other):
     1591        s = self.__class__(self)
     1592        for (start, length) in other:
     1593            s.add(start, length)
     1594        return s
     1595
     1596    def __sub__(self, other):
     1597        s = self.__class__(self)
     1598        for (start, length) in other:
     1599            s.remove(start, length)
     1600        return s
     1601
     1602    def __iadd__(self, other):
     1603        for (start, length) in other:
     1604            self.add(start, length)
     1605        return self
     1606
     1607    def __isub__(self, other):
     1608        for (start, length) in other:
     1609            self.remove(start, length)
     1610        return self
     1611
     1612    def __and__(self, other):
     1613        s = self.__class__()
     1614        for i in other.each():
     1615            if i in self._have:
     1616                s.add(i, 1)
     1617        return s
     1618
     1619    def __contains__(self, (start,length)):
     1620        for i in range(start, start+length):
     1621            if i not in self._have:
     1622                return False
     1623        return True
     1624
     1625class ByteSpans(unittest.TestCase):
     1626    def test_basic(self):
     1627        s = Spans()
     1628        self.failUnlessEqual(list(s), [])
     1629        self.failIf(s)
     1630        self.failIf((0,1) in s)
     1631        self.failUnlessEqual(len(s), 0)
     1632
     1633        s1 = Spans(3, 4) # 3,4,5,6
     1634        self._check1(s1)
     1635
     1636        s2 = Spans(s1)
     1637        self._check1(s2)
     1638
     1639        s2.add(10,2) # 10,11
     1640        self._check1(s1)
     1641        self.failUnless((10,1) in s2)
     1642        self.failIf((10,1) in s1)
     1643        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11])
     1644        self.failUnlessEqual(len(s2), 6)
     1645
     1646        s2.add(15,2).add(20,2)
     1647        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21])
     1648        self.failUnlessEqual(len(s2), 10)
     1649
     1650        s2.remove(4,3).remove(15,1)
     1651        self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21])
     1652        self.failUnlessEqual(len(s2), 6)
     1653
     1654        s1 = SimpleSpans(3, 4) # 3 4 5 6
     1655        s2 = SimpleSpans(5, 4) # 5 6 7 8
     1656        i = s1 & s2
     1657        self.failUnlessEqual(list(i.each()), [5, 6])
     1658
     1659    def _check1(self, s):
     1660        self.failUnlessEqual(list(s), [(3,4)])
     1661        self.failUnless(s)
     1662        self.failUnlessEqual(len(s), 4)
     1663        self.failIf((0,1) in s)
     1664        self.failUnless((3,4) in s)
     1665        self.failUnless((3,1) in s)
     1666        self.failUnless((5,2) in s)
     1667        self.failUnless((6,1) in s)
     1668        self.failIf((6,2) in s)
     1669        self.failIf((7,1) in s)
     1670        self.failUnlessEqual(list(s.each()), [3,4,5,6])
     1671
     1672    def test_math(self):
     1673        s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9
     1674        s2 = Spans(5, 3) # 5,6,7
     1675        s3 = Spans(8, 4) # 8,9,10,11
     1676
     1677        s = s1 - s2
     1678        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
     1679        s = s1 - s3
     1680        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
     1681        s = s2 - s3
     1682        self.failUnlessEqual(list(s.each()), [5,6,7])
     1683        s = s1 & s2
     1684        self.failUnlessEqual(list(s.each()), [5,6,7])
     1685        s = s2 & s1
     1686        self.failUnlessEqual(list(s.each()), [5,6,7])
     1687        s = s1 & s3
     1688        self.failUnlessEqual(list(s.each()), [8,9])
     1689        s = s3 & s1
     1690        self.failUnlessEqual(list(s.each()), [8,9])
     1691        s = s2 & s3
     1692        self.failUnlessEqual(list(s.each()), [])
     1693        s = s3 & s2
     1694        self.failUnlessEqual(list(s.each()), [])
     1695        s = Spans() & s3
     1696        self.failUnlessEqual(list(s.each()), [])
     1697        s = s3 & Spans()
     1698        self.failUnlessEqual(list(s.each()), [])
     1699
     1700        s = s1 + s2
     1701        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
     1702        s = s1 + s3
     1703        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
     1704        s = s2 + s3
     1705        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
     1706
     1707        s = Spans(s1)
     1708        s -= s2
     1709        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
     1710        s = Spans(s1)
     1711        s -= s3
     1712        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
     1713        s = Spans(s2)
     1714        s -= s3
     1715        self.failUnlessEqual(list(s.each()), [5,6,7])
     1716
     1717        s = Spans(s1)
     1718        s += s2
     1719        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
     1720        s = Spans(s1)
     1721        s += s3
     1722        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
     1723        s = Spans(s2)
     1724        s += s3
     1725        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
     1726
     1727    def test_random(self):
     1728        # attempt to increase coverage of corner cases by comparing behavior
     1729        # of a simple-but-slow model implementation against the
     1730        # complex-but-fast actual implementation, in a large number of random
     1731        # operations
     1732        S1 = SimpleSpans
     1733        S2 = Spans
     1734        s1 = S1(); s2 = S2()
     1735        seed = ""
     1736        def _create(subseed):
     1737            ns1 = S1(); ns2 = S2()
     1738            for i in range(10):
     1739                what = md5(subseed+str(i)).hexdigest()
     1740                start = int(what[2:4], 16)
     1741                length = max(1,int(what[5:6], 16))
     1742                ns1.add(start, length); ns2.add(start, length)
     1743            return ns1, ns2
     1744
     1745        #print
     1746        for i in range(1000):
     1747            what = md5(seed+str(i)).hexdigest()
     1748            op = what[0]
     1749            subop = what[1]
     1750            start = int(what[2:4], 16)
     1751            length = max(1,int(what[5:6], 16))
     1752            #print what
     1753            if op in "0":
     1754                if subop in "01234":
     1755                    s1 = S1(); s2 = S2()
     1756                elif subop in "5678":
     1757                    s1 = S1(start, length); s2 = S2(start, length)
     1758                else:
     1759                    s1 = S1(s1); s2 = S2(s2)
     1760                #print "s2 = %s" % s2.dump()
     1761            elif op in "123":
     1762                #print "s2.add(%d,%d)" % (start, length)
     1763                s1.add(start, length); s2.add(start, length)
     1764            elif op in "456":
     1765                #print "s2.remove(%d,%d)" % (start, length)
     1766                s1.remove(start, length); s2.remove(start, length)
     1767            elif op in "78":
     1768                ns1, ns2 = _create(what[7:11])
     1769                #print "s2 + %s" % ns2.dump()
     1770                s1 = s1 + ns1; s2 = s2 + ns2
     1771            elif op in "9a":
     1772                ns1, ns2 = _create(what[7:11])
     1773                #print "%s - %s" % (s2.dump(), ns2.dump())
     1774                s1 = s1 - ns1; s2 = s2 - ns2
     1775            elif op in "bc":
     1776                ns1, ns2 = _create(what[7:11])
     1777                #print "s2 += %s" % ns2.dump()
     1778                s1 += ns1; s2 += ns2
     1779            elif op in "de":
     1780                ns1, ns2 = _create(what[7:11])
     1781                #print "%s -= %s" % (s2.dump(), ns2.dump())
     1782                s1 -= ns1; s2 -= ns2
     1783            else:
     1784                ns1, ns2 = _create(what[7:11])
     1785                #print "%s &= %s" % (s2.dump(), ns2.dump())
     1786                s1 = s1 & ns1; s2 = s2 & ns2
     1787            #print "s2 now %s" % s2.dump()
     1788            self.failUnlessEqual(list(s1.each()), list(s2.each()))
     1789            self.failUnlessEqual(len(s1), len(s2))
     1790            self.failUnlessEqual(bool(s1), bool(s2))
     1791            self.failUnlessEqual(list(s1), list(s2))
     1792            for j in range(10):
     1793                what = md5(what[12:14]+str(j)).hexdigest()
     1794                start = int(what[2:4], 16)
     1795                length = max(1, int(what[5:6], 16))
     1796                span = (start, length)
     1797                self.failUnlessEqual(bool(span in s1), bool(span in s2))
     1798
     1799
     1800    # s()
     1801    # s(start,length)
     1802    # s(s0)
     1803    # s.add(start,length) : returns s
     1804    # s.remove(start,length)
     1805    # s.each() -> list of byte offsets, mostly for testing
     1806    # list(s) -> list of (start,length) tuples, one per span
     1807    # (start,length) in s -> True if (start..start+length-1) are all members
     1808    #  NOT equivalent to x in list(s)
     1809    # len(s) -> number of bytes, for testing, bool(), and accounting/limiting
     1810    # bool(s)  (__len__)
     1811    # s = s1+s2, s1-s2, +=s1, -=s1
     1812
     1813    def test_overlap(self):
     1814        for a in range(20):
     1815            for b in range(10):
     1816                for c in range(20):
     1817                    for d in range(10):
     1818                        self._test_overlap(a,b,c,d)
     1819
     1820    def _test_overlap(self, a, b, c, d):
     1821        s1 = set(range(a,a+b))
     1822        s2 = set(range(c,c+d))
     1823        #print "---"
     1824        #self._show_overlap(s1, "1")
     1825        #self._show_overlap(s2, "2")
     1826        o = overlap(a,b,c,d)
     1827        expected = s1.intersection(s2)
     1828        if not expected:
     1829            self.failUnlessEqual(o, None)
     1830        else:
     1831            start,length = o
     1832            so = set(range(start,start+length))
     1833            #self._show(so, "o")
     1834            self.failUnlessEqual(so, expected)
     1835
     1836    def _show_overlap(self, s, c):
     1837        import sys
     1838        out = sys.stdout
     1839        if s:
     1840            for i in range(max(s)):
     1841                if i in s:
     1842                    out.write(c)
     1843                else:
     1844                    out.write(" ")
     1845        out.write("\n")
     1846
     1847def extend(s, start, length, fill):
     1848    if len(s) >= start+length:
     1849        return s
     1850    assert len(fill) == 1
     1851    return s + fill*(start+length-len(s))
     1852
     1853def replace(s, start, data):
     1854    assert len(s) >= start+len(data)
     1855    return s[:start] + data + s[start+len(data):]
     1856
     1857class SimpleDataSpans:
     1858    def __init__(self, other=None):
     1859        self.missing = "" # "1" where missing, "0" where found
     1860        self.data = ""
     1861        if other:
     1862            for (start, data) in other.get_chunks():
     1863                self.add(start, data)
     1864
     1865    def __len__(self):
     1866        return len(self.missing.translate(None, "1"))
     1867    def _dump(self):
     1868        return [i for (i,c) in enumerate(self.missing) if c == "0"]
     1869    def _have(self, start, length):
     1870        m = self.missing[start:start+length]
     1871        if not m or len(m)<length or int(m):
     1872            return False
     1873        return True
     1874    def get_chunks(self):
     1875        for i in self._dump():
     1876            yield (i, self.data[i])
     1877    def get_spans(self):
     1878        return SimpleSpans([(start,len(data))
     1879                            for (start,data) in self.get_chunks()])
     1880    def get(self, start, length):
     1881        if self._have(start, length):
     1882            return self.data[start:start+length]
     1883        return None
     1884    def pop(self, start, length):
     1885        data = self.get(start, length)
     1886        if data:
     1887            self.remove(start, length)
     1888        return data
     1889    def remove(self, start, length):
     1890        self.missing = replace(extend(self.missing, start, length, "1"),
     1891                               start, "1"*length)
     1892    def add(self, start, data):
     1893        self.missing = replace(extend(self.missing, start, len(data), "1"),
     1894                               start, "0"*len(data))
     1895        self.data = replace(extend(self.data, start, len(data), " "),
     1896                            start, data)
     1897
     1898
     1899class StringSpans(unittest.TestCase):
     1900    def do_basic(self, klass):
     1901        ds = klass()
     1902        self.failUnlessEqual(len(ds), 0)
     1903        self.failUnlessEqual(list(ds._dump()), [])
     1904        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0)
     1905        s = ds.get_spans()
     1906        self.failUnlessEqual(ds.get(0, 4), None)
     1907        self.failUnlessEqual(ds.pop(0, 4), None)
     1908        ds.remove(0, 4)
     1909
     1910        ds.add(2, "four")
     1911        self.failUnlessEqual(len(ds), 4)
     1912        self.failUnlessEqual(list(ds._dump()), [2,3,4,5])
     1913        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
     1914        s = ds.get_spans()
     1915        self.failUnless((2,2) in s)
     1916        self.failUnlessEqual(ds.get(0, 4), None)
     1917        self.failUnlessEqual(ds.pop(0, 4), None)
     1918        self.failUnlessEqual(ds.get(4, 4), None)
     1919
     1920        ds2 = klass(ds)
     1921        self.failUnlessEqual(len(ds2), 4)
     1922        self.failUnlessEqual(list(ds2._dump()), [2,3,4,5])
     1923        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4)
     1924        self.failUnlessEqual(ds2.get(0, 4), None)
     1925        self.failUnlessEqual(ds2.pop(0, 4), None)
     1926        self.failUnlessEqual(ds2.pop(2, 3), "fou")
     1927        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1)
     1928        self.failUnlessEqual(ds2.get(2, 3), None)
     1929        self.failUnlessEqual(ds2.get(5, 1), "r")
     1930        self.failUnlessEqual(ds.get(2, 3), "fou")
     1931        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
     1932
     1933        ds.add(0, "23")
     1934        self.failUnlessEqual(len(ds), 6)
     1935        self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5])
     1936        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6)
     1937        self.failUnlessEqual(ds.get(0, 4), "23fo")
     1938        self.failUnlessEqual(ds.pop(0, 4), "23fo")
     1939        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2)
     1940        self.failUnlessEqual(ds.get(0, 4), None)
     1941        self.failUnlessEqual(ds.pop(0, 4), None)
     1942
     1943        ds = klass()
     1944        ds.add(2, "four")
     1945        ds.add(3, "ea")
     1946        self.failUnlessEqual(ds.get(2, 4), "fear")
     1947
     1948    def do_scan(self, klass):
     1949        # do a test with gaps and spans of size 1 and 2
     1950        #  left=(1,11) * right=(1,11) * gapsize=(1,2)
     1951        # 111, 112, 121, 122, 211, 212, 221, 222
     1952        #    211
     1953        #      121
     1954        #         112
     1955        #            212
     1956        #               222
     1957        #                   221
     1958        #                      111
     1959        #                        122
     1960        #  11 1  1 11 11  11  1 1  111
     1961        # 0123456789012345678901234567
     1962        # abcdefghijklmnopqrstuvwxyz-=
     1963        pieces = [(1, "bc"),
     1964                  (4, "e"),
     1965                  (7, "h"),
     1966                  (9, "jk"),
     1967                  (12, "mn"),
     1968                  (16, "qr"),
     1969                  (20, "u"),
     1970                  (22, "w"),
     1971                  (25, "z-="),
     1972                  ]
     1973        p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27])
     1974        S = "abcdefghijklmnopqrstuvwxyz-="
     1975        # TODO: when adding data, add capital letters, to make sure we aren't
     1976        # just leaving the old data in place
     1977        l = len(S)
     1978        def base():
     1979            ds = klass()
     1980            for start, data in pieces:
     1981                ds.add(start, data)
     1982            return ds
     1983        def dump(s):
     1984            p = set(s._dump())
     1985            # wow, this is the first time I've ever wanted ?: in python
     1986            # note: this requires python2.5
     1987            d = "".join([(S[i] if i in p else " ") for i in range(l)])
     1988            assert len(d) == l
     1989            return d
     1990        DEBUG = False
     1991        for start in range(0, l):
     1992            for end in range(start+1, l):
     1993                # add [start-end) to the baseline
     1994                which = "%d-%d" % (start, end-1)
     1995                p_added = set(range(start, end))
     1996                b = base()
     1997                if DEBUG:
     1998                    print
     1999                    print dump(b), which
     2000                    add = klass(); add.add(start, S[start:end])
     2001                    print dump(add)
     2002                b.add(start, S[start:end])
     2003                if DEBUG:
     2004                    print dump(b)
     2005                # check that the new span is there
     2006                d = b.get(start, end-start)
     2007                self.failUnlessEqual(d, S[start:end], which)
     2008                # check that all the original pieces are still there
     2009                for t_start, t_data in pieces:
     2010                    t_len = len(t_data)
     2011                    self.failUnlessEqual(b.get(t_start, t_len),
     2012                                         S[t_start:t_start+t_len],
     2013                                         "%s %d+%d" % (which, t_start, t_len))
     2014                # check that a lot of subspans are mostly correct
     2015                for t_start in range(l):
     2016                    for t_len in range(1,4):
     2017                        d = b.get(t_start, t_len)
     2018                        if d is not None:
     2019                            which2 = "%s+(%d-%d)" % (which, t_start,
     2020                                                     t_start+t_len-1)
     2021                            self.failUnlessEqual(d, S[t_start:t_start+t_len],
     2022                                                 which2)
     2023                        # check that removing a subspan gives the right value
     2024                        b2 = klass(b)
     2025                        b2.remove(t_start, t_len)
     2026                        removed = set(range(t_start, t_start+t_len))
     2027                        for i in range(l):
     2028                            exp = (((i in p_elements) or (i in p_added))
     2029                                   and (i not in removed))
     2030                            which2 = "%s-(%d-%d)" % (which, t_start,
     2031                                                     t_start+t_len-1)
     2032                            self.failUnlessEqual(bool(b2.get(i, 1)), exp,
     2033                                                 which2+" %d" % i)
     2034
     2035    def test_test(self):
     2036        self.do_basic(SimpleDataSpans)
     2037        self.do_scan(SimpleDataSpans)
     2038
     2039    def test_basic(self):
     2040        self.do_basic(DataSpans)
     2041        self.do_scan(DataSpans)
     2042
     2043    def test_random(self):
     2044        # attempt to increase coverage of corner cases by comparing behavior
     2045        # of a simple-but-slow model implementation against the
     2046        # complex-but-fast actual implementation, in a large number of random
     2047        # operations
     2048        S1 = SimpleDataSpans
     2049        S2 = DataSpans
     2050        s1 = S1(); s2 = S2()
     2051        seed = ""
     2052        def _randstr(length, seed):
     2053            created = 0
     2054            pieces = []
     2055            while created < length:
     2056                piece = md5(seed + str(created)).hexdigest()
     2057                pieces.append(piece)
     2058                created += len(piece)
     2059            return "".join(pieces)[:length]
     2060        def _create(subseed):
     2061            ns1 = S1(); ns2 = S2()
     2062            for i in range(10):
     2063                what = md5(subseed+str(i)).hexdigest()
     2064                start = int(what[2:4], 16)
     2065                length = max(1,int(what[5:6], 16))
     2066                ns1.add(start, _randstr(length, what[7:9]));
     2067                ns2.add(start, _randstr(length, what[7:9]))
     2068            return ns1, ns2
     2069
     2070        #print
     2071        for i in range(1000):
     2072            what = md5(seed+str(i)).hexdigest()
     2073            op = what[0]
     2074            subop = what[1]
     2075            start = int(what[2:4], 16)
     2076            length = max(1,int(what[5:6], 16))
     2077            #print what
     2078            if op in "0":
     2079                if subop in "0123456":
     2080                    s1 = S1(); s2 = S2()
     2081                else:
     2082                    s1, s2 = _create(what[7:11])
     2083                #print "s2 = %s" % list(s2._dump())
     2084            elif op in "123456":
     2085                #print "s2.add(%d,%d)" % (start, length)
     2086                s1.add(start, _randstr(length, what[7:9]));
     2087                s2.add(start, _randstr(length, what[7:9]))
     2088            elif op in "789abc":
     2089                #print "s2.remove(%d,%d)" % (start, length)
     2090                s1.remove(start, length); s2.remove(start, length)
     2091            else:
     2092                #print "s2.pop(%d,%d)" % (start, length)
     2093                d1 = s1.pop(start, length); d2 = s2.pop(start, length)
     2094                self.failUnlessEqual(d1, d2)
     2095            #print "s1 now %s" % list(s1._dump())
     2096            #print "s2 now %s" % list(s2._dump())
     2097            self.failUnlessEqual(len(s1), len(s2))
     2098            self.failUnlessEqual(list(s1._dump()), list(s2._dump()))
     2099            for j in range(100):
     2100                what = md5(what[12:14]+str(j)).hexdigest()
     2101                start = int(what[2:4], 16)
     2102                length = max(1, int(what[5:6], 16))
     2103                d1 = s1.get(start, length); d2 = s2.get(start, length)
     2104                self.failUnlessEqual(d1, d2, "%d+%d" % (start, length))
  • src/allmydata/test/test_web.py

    diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py
    index 0df0bbe..e0c8f12 100644
    a b class FakeClient(Client): 
    105105        self.uploader = FakeUploader()
    106106        self.uploader.setServiceParent(self)
    107107        self.nodemaker = FakeNodeMaker(None, self._secret_holder, None,
    108                                        self.uploader, None, None,
     108                                       self.uploader, None,
    109109                                       None, None)
    110110
    111111    def startService(self):
    class Grid(GridTestMixin, WebErrorMixin, unittest.TestCase, ShouldFailMixin): 
    41244124                   "no servers were connected, but it might also indicate "
    41254125                   "severe corruption. You should perform a filecheck on "
    41264126                   "this object to learn more. The full error message is: "
    4127                    "Failed to get enough shareholders: have 0, need 3")
     4127                   "no shares (need 3). Last failure: None")
    41284128            self.failUnlessEqual(exp, body)
    41294129        d.addCallback(_check_zero_shares)
    41304130
    class Grid(GridTestMixin, WebErrorMixin, unittest.TestCase, ShouldFailMixin): 
    41364136        def _check_one_share(body):
    41374137            self.failIf("<html>" in body, body)
    41384138            body = " ".join(body.strip().split())
    4139             exp = ("NotEnoughSharesError: This indicates that some "
     4139            msg = ("NotEnoughSharesError: This indicates that some "
    41404140                   "servers were unavailable, or that shares have been "
    41414141                   "lost to server departure, hard drive failure, or disk "
    41424142                   "corruption. You should perform a filecheck on "
    41434143                   "this object to learn more. The full error message is:"
    4144                    " Failed to get enough shareholders: have 1, need 3")
    4145             self.failUnlessEqual(exp, body)
     4144                   " ran out of shares: %d complete, %d pending, 0 overdue,"
     4145                   " 0 unused, need 3. Last failure: None")
     4146            msg1 = msg % (1, 0)
     4147            msg2 = msg % (0, 1)
     4148            self.failUnless(body == msg1 or body == msg2, body)
    41464149        d.addCallback(_check_one_share)
    41474150
    41484151        d.addCallback(lambda ignored:
  • src/allmydata/util/dictutil.py

    diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py
    index 3dc815b..91785ac 100644
    a b class DictOfSets(dict): 
    5757        if not self[key]:
    5858            del self[key]
    5959
     60    def allvalues(self):
     61        # return a set that merges all value sets
     62        r = set()
     63        for key in self:
     64            r.update(self[key])
     65        return r
     66
    6067class UtilDict:
    6168    def __init__(self, initialdata={}):
    6269        self.d = {}
  • new file src/allmydata/util/spans.py

    diff --git a/src/allmydata/util/spans.py b/src/allmydata/util/spans.py
    new file mode 100755
    index 0000000..2a199f0
    - +  
     1
     2class Spans:
     3    """I represent a compressed list of booleans, one per index (an integer).
     4    Typically, each index represents an offset into a large string, pointing
     5    to a specific byte of a share. In this context, True means that byte has
     6    been received, or has been requested.
     7
     8    Another way to look at this is maintaining a set of integers, optimized
     9    for operations on spans like 'add range to set' and 'is range in set?'.
     10
     11    This is a python equivalent of perl's Set::IntSpan module, frequently
     12    used to represent .newsrc contents.
     13
     14    Rather than storing an actual (large) list or dictionary, I represent my
     15    internal state as a sorted list of spans, each with a start and a length.
     16    My API is presented in terms of start+length pairs. I provide set
     17    arithmetic operators, to efficiently answer questions like 'I want bytes
     18    XYZ, I already requested bytes ABC, and I've already received bytes DEF:
     19    what bytes should I request now?'.
     20
     21    The new downloader will use it to keep track of which bytes we've requested
     22    or received already.
     23    """
     24
     25    def __init__(self, _span_or_start=None, length=None):
     26        self._spans = list()
     27        if length is not None:
     28            self._spans.append( (_span_or_start, length) )
     29        elif _span_or_start:
     30            for (start,length) in _span_or_start:
     31                self.add(start, length)
     32        self._check()
     33
     34    def _check(self):
     35        assert sorted(self._spans) == self._spans
     36        prev_end = None
     37        try:
     38            for (start,length) in self._spans:
     39                if prev_end is not None:
     40                    assert start > prev_end
     41                prev_end = start+length
     42        except AssertionError:
     43            print "BAD:", self.dump()
     44            raise
     45
     46    def add(self, start, length):
     47        assert start >= 0
     48        assert length > 0
     49        #print " ADD [%d+%d -%d) to %s" % (start, length, start+length, self.dump())
     50        first_overlap = last_overlap = None
     51        for i,(s_start,s_length) in enumerate(self._spans):
     52            #print "  (%d+%d)-> overlap=%s adjacent=%s" % (s_start,s_length, overlap(s_start, s_length, start, length), adjacent(s_start, s_length, start, length))
     53            if (overlap(s_start, s_length, start, length)
     54                or adjacent(s_start, s_length, start, length)):
     55                last_overlap = i
     56                if first_overlap is None:
     57                    first_overlap = i
     58                continue
     59            # no overlap
     60            if first_overlap is not None:
     61                break
     62        #print "  first_overlap", first_overlap, last_overlap
     63        if first_overlap is None:
     64            # no overlap, so just insert the span and sort by starting
     65            # position.
     66            self._spans.insert(0, (start,length))
     67            self._spans.sort()
     68        else:
     69            # everything from [first_overlap] to [last_overlap] overlapped
     70            first_start,first_length = self._spans[first_overlap]
     71            last_start,last_length = self._spans[last_overlap]
     72            newspan_start = min(start, first_start)
     73            newspan_end = max(start+length, last_start+last_length)
     74            newspan_length = newspan_end - newspan_start
     75            newspan = (newspan_start, newspan_length)
     76            self._spans[first_overlap:last_overlap+1] = [newspan]
     77        #print "  ADD done: %s" % self.dump()
     78        self._check()
     79
     80        return self
     81
     82    def remove(self, start, length):
     83        assert start >= 0
     84        assert length > 0
     85        #print " REMOVE [%d+%d -%d) from %s" % (start, length, start+length, self.dump())
     86        first_complete_overlap = last_complete_overlap = None
     87        for i,(s_start,s_length) in enumerate(self._spans):
     88            s_end = s_start + s_length
     89            o = overlap(s_start, s_length, start, length)
     90            if o:
     91                o_start, o_length = o
     92                o_end = o_start+o_length
     93                if o_start == s_start and o_end == s_end:
     94                    # delete this span altogether
     95                    if first_complete_overlap is None:
     96                        first_complete_overlap = i
     97                    last_complete_overlap = i
     98                elif o_start == s_start:
     99                    # we only overlap the left side, so trim the start
     100                    #    1111
     101                    #  rrrr
     102                    #    oo
     103                    # ->   11
     104                    new_start = o_end
     105                    new_end = s_end
     106                    assert new_start > s_start
     107                    new_length = new_end - new_start
     108                    self._spans[i] = (new_start, new_length)
     109                elif o_end == s_end:
     110                    # we only overlap the right side
     111                    #    1111
     112                    #      rrrr
     113                    #      oo
     114                    # -> 11
     115                    new_start = s_start
     116                    new_end = o_start
     117                    assert new_end < s_end
     118                    new_length = new_end - new_start
     119                    self._spans[i] = (new_start, new_length)
     120                else:
     121                    # we overlap the middle, so create a new span. No need to
     122                    # examine any other spans.
     123                    #    111111
     124                    #      rr
     125                    #    LL  RR
     126                    left_start = s_start
     127                    left_end = o_start
     128                    left_length = left_end - left_start
     129                    right_start = o_end
     130                    right_end = s_end
     131                    right_length = right_end - right_start
     132                    self._spans[i] = (left_start, left_length)
     133                    self._spans.append( (right_start, right_length) )
     134                    self._spans.sort()
     135                    break
     136        if first_complete_overlap is not None:
     137            del self._spans[first_complete_overlap:last_complete_overlap+1]
     138        #print "  REMOVE done: %s" % self.dump()
     139        self._check()
     140        return self
     141
     142    def dump(self):
     143        return "len=%d: %s" % (len(self),
     144                               ",".join(["[%d-%d]" % (start,start+l-1)
     145                                         for (start,l) in self._spans]) )
     146
     147    def each(self):
     148        for start, length in self._spans:
     149            for i in range(start, start+length):
     150                yield i
     151
     152    def __iter__(self):
     153        for s in self._spans:
     154            yield s
     155
     156    def __len__(self):
     157        # this also gets us bool(s)
     158        return sum([length for start,length in self._spans])
     159
     160    def __add__(self, other):
     161        s = self.__class__(self)
     162        for (start, length) in other:
     163            s.add(start, length)
     164        return s
     165
     166    def __sub__(self, other):
     167        s = self.__class__(self)
     168        for (start, length) in other:
     169            s.remove(start, length)
     170        return s
     171
     172    def __iadd__(self, other):
     173        for (start, length) in other:
     174            self.add(start, length)
     175        return self
     176
     177    def __isub__(self, other):
     178        for (start, length) in other:
     179            self.remove(start, length)
     180        return self
     181
     182    def __and__(self, other):
     183        if not self._spans:
     184            return self.__class__()
     185        bounds = self.__class__(self._spans[0][0],
     186                                self._spans[-1][0]+self._spans[-1][1])
     187        not_other = bounds - other
     188        return self - not_other
     189
     190    def __contains__(self, (start,length)):
     191        for span_start,span_length in self._spans:
     192            o = overlap(start, length, span_start, span_length)
     193            if o:
     194                o_start,o_length = o
     195                if o_start == start and o_length == length:
     196                    return True
     197        return False
     198
     199def overlap(start0, length0, start1, length1):
     200    # return start2,length2 of the overlapping region, or None
     201    #  00      00   000   0000  00  00 000  00   00  00      00
     202    #     11    11   11    11   111 11 11  1111 111 11    11
     203    left = max(start0, start1)
     204    right = min(start0+length0, start1+length1)
     205    # if there is overlap, 'left' will be its start, and right-1 will
     206    # be the end'
     207    if left < right:
     208        return (left, right-left)
     209    return None
     210
     211def adjacent(start0, length0, start1, length1):
     212    if (start0 < start1) and start0+length0 == start1:
     213        return True
     214    elif (start1 < start0) and start1+length1 == start0:
     215        return True
     216    return False
     217
     218class DataSpans:
     219    """I represent portions of a large string. Equivalently, I can be said to
     220    maintain a large array of characters (with gaps of empty elements). I can
     221    be used to manage access to a remote share, where some pieces have been
     222    retrieved, some have been requested, and others have not been read.
     223    """
     224
     225    def __init__(self, other=None):
     226        self.spans = [] # (start, data) tuples, non-overlapping, merged
     227        if other:
     228            for (start, data) in other.get_chunks():
     229                self.add(start, data)
     230
     231    def __len__(self):
     232        # return number of bytes we're holding
     233        return sum([len(data) for (start,data) in self.spans])
     234
     235    def _dump(self):
     236        # return iterator of sorted list of offsets, one per byte
     237        for (start,data) in self.spans:
     238            for i in range(start, start+len(data)):
     239                yield i
     240
     241    def dump(self):
     242        return "len=%d: %s" % (len(self),
     243                               ",".join(["[%d-%d]" % (start,start+len(data)-1)
     244                                         for (start,data) in self.spans]) )
     245
     246    def get_chunks(self):
     247        return list(self.spans)
     248
     249    def get_spans(self):
     250        """Return a Spans object with a bit set for each byte I hold"""
     251        return Spans([(start, len(data)) for (start,data) in self.spans])
     252
     253    def assert_invariants(self):
     254        if not self.spans:
     255            return
     256        prev_start = self.spans[0][0]
     257        prev_end = prev_start + len(self.spans[0][1])
     258        for start, data in self.spans[1:]:
     259            if not start > prev_end:
     260                # adjacent or overlapping: bad
     261                print "ASSERTION FAILED", self.spans
     262                raise AssertionError
     263
     264    def get(self, start, length):
     265        # returns a string of LENGTH, or None
     266        #print "get", start, length, self.spans
     267        end = start+length
     268        for (s_start,s_data) in self.spans:
     269            s_end = s_start+len(s_data)
     270            #print " ",s_start,s_end
     271            if s_start <= start < s_end:
     272                # we want some data from this span. Because we maintain
     273                # strictly merged and non-overlapping spans, everything we
     274                # want must be in this span.
     275                offset = start - s_start
     276                if offset + length > len(s_data):
     277                    #print " None, span falls short"
     278                    return None # span falls short
     279                #print " some", s_data[offset:offset+length]
     280                return s_data[offset:offset+length]
     281            if s_start >= end:
     282                # we've gone too far: no further spans will overlap
     283                #print " None, gone too far"
     284                return None
     285        #print " None, ran out of spans"
     286        return None
     287
     288    def add(self, start, data):
     289        # first: walk through existing spans, find overlap, modify-in-place
     290        #  create list of new spans
     291        #  add new spans
     292        #  sort
     293        #  merge adjacent spans
     294        #print "add", start, data, self.spans
     295        end = start + len(data)
     296        i = 0
     297        while len(data):
     298            #print " loop", start, data, i, len(self.spans), self.spans
     299            if i >= len(self.spans):
     300                #print " append and done"
     301                # append a last span
     302                self.spans.append( (start, data) )
     303                break
     304            (s_start,s_data) = self.spans[i]
     305            # five basic cases:
     306            #  a: OLD  b:OLDD  c1:OLD  c2:OLD   d1:OLDD  d2:OLD  e: OLLDD
     307            #    NEW     NEW      NEW     NEWW      NEW      NEW     NEW
     308            #
     309            # we handle A by inserting a new segment (with "N") and looping,
     310            # turning it into B or C. We handle B by replacing a prefix and
     311            # terminating. We handle C (both c1 and c2) by replacing the
     312            # segment (and, for c2, looping, turning it into A). We handle D
     313            # by replacing a suffix (and, for d2, looping, turning it into
     314            # A). We handle E by replacing the middle and terminating.
     315            if start < s_start:
     316                # case A: insert a new span, then loop with the remainder
     317                #print " insert new psan"
     318                s_len = s_start-start
     319                self.spans.insert(i, (start, data[:s_len]))
     320                i += 1
     321                start = s_start
     322                data = data[s_len:]
     323                continue
     324            s_len = len(s_data)
     325            s_end = s_start+s_len
     326            if s_start <= start < s_end:
     327                #print " modify this span", s_start, start, s_end
     328                # we want to modify some data in this span: a prefix, a
     329                # suffix, or the whole thing
     330                if s_start == start:
     331                    if s_end <= end:
     332                        #print " replace whole segment"
     333                        # case C: replace this segment
     334                        self.spans[i] = (s_start, data[:s_len])
     335                        i += 1
     336                        start += s_len
     337                        data = data[s_len:]
     338                        # C2 is where len(data)>0
     339                        continue
     340                    # case B: modify the prefix, retain the suffix
     341                    #print " modify prefix"
     342                    self.spans[i] = (s_start, data + s_data[len(data):])
     343                    break
     344                if start > s_start and end < s_end:
     345                    # case E: modify the middle
     346                    #print " modify middle"
     347                    prefix_len = start - s_start # we retain this much
     348                    suffix_len = s_end - end # and retain this much
     349                    newdata = s_data[:prefix_len] + data + s_data[-suffix_len:]
     350                    self.spans[i] = (s_start, newdata)
     351                    break
     352                # case D: retain the prefix, modify the suffix
     353                #print " modify suffix"
     354                prefix_len = start - s_start # we retain this much
     355                suffix_len = s_len - prefix_len # we replace this much
     356                #print "  ", s_data, prefix_len, suffix_len, s_len, data
     357                self.spans[i] = (s_start,
     358                                 s_data[:prefix_len] + data[:suffix_len])
     359                i += 1
     360                start += suffix_len
     361                data = data[suffix_len:]
     362                #print "  now", start, data
     363                # D2 is where len(data)>0
     364                continue
     365            # else we're not there yet
     366            #print " still looking"
     367            i += 1
     368            continue
     369        # now merge adjacent spans
     370        #print " merging", self.spans
     371        newspans = []
     372        for (s_start,s_data) in self.spans:
     373            if newspans and adjacent(newspans[-1][0], len(newspans[-1][1]),
     374                                     s_start, len(s_data)):
     375                newspans[-1] = (newspans[-1][0], newspans[-1][1] + s_data)
     376            else:
     377                newspans.append( (s_start, s_data) )
     378        self.spans = newspans
     379        self.assert_invariants()
     380        #print " done", self.spans
     381
     382    def remove(self, start, length):
     383        i = 0
     384        end = start + length
     385        #print "remove", start, length, self.spans
     386        while i < len(self.spans):
     387            (s_start,s_data) = self.spans[i]
     388            if s_start >= end:
     389                # this segment is entirely right of the removed region, and
     390                # all further segments are even further right. We're done.
     391                break
     392            s_len = len(s_data)
     393            s_end = s_start + s_len
     394            o = overlap(start, length, s_start, s_len)
     395            if not o:
     396                i += 1
     397                continue
     398            o_start, o_len = o
     399            o_end = o_start + o_len
     400            if o_len == s_len:
     401                # remove the whole segment
     402                del self.spans[i]
     403                continue
     404            if o_start == s_start:
     405                # remove a prefix, leaving the suffix from o_end to s_end
     406                prefix_len = o_end - o_start
     407                self.spans[i] = (o_end, s_data[prefix_len:])
     408                i += 1
     409                continue
     410            elif o_end == s_end:
     411                # remove a suffix, leaving the prefix from s_start to o_start
     412                prefix_len = o_start - s_start
     413                self.spans[i] = (s_start, s_data[:prefix_len])
     414                i += 1
     415                continue
     416            # remove the middle, creating a new segment
     417            # left is s_start:o_start, right is o_end:s_end
     418            left_len = o_start - s_start
     419            left = s_data[:left_len]
     420            right_len = s_end - o_end
     421            right = s_data[-right_len:]
     422            self.spans[i] = (s_start, left)
     423            self.spans.insert(i+1, (o_end, right))
     424            break
     425        #print " done", self.spans
     426
     427    def pop(self, start, length):
     428        data = self.get(start, length)
     429        if data:
     430            self.remove(start, length)
     431        return data
  • src/allmydata/web/download-status.xhtml

    diff --git a/src/allmydata/web/download-status.xhtml b/src/allmydata/web/download-status.xhtml
    index da029e6..5d43f69 100644
    a b  
    1818  <li>Status: <span n:render="status"/></li>
    1919</ul>
    2020
     21<div n:render="events"></div>
    2122
    2223<div n:render="results">
    2324  <h2>Download Results</h2>
  • src/allmydata/web/status.py

    diff --git a/src/allmydata/web/status.py b/src/allmydata/web/status.py
    index e4241a3..c3a55d7 100644
    a b class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 
    358358    def download_results(self):
    359359        return defer.maybeDeferred(self.download_status.get_results)
    360360
     361    def relative_time(self, t):
     362        if t is None:
     363            return t
     364        if self.download_status.started is not None:
     365            return t - self.download_status.started
     366        return t
     367    def short_relative_time(self, t):
     368        t = self.relative_time(t)
     369        if t is None:
     370            return ""
     371        return "+%.6fs" % t
     372
     373    def renderHTTP(self, ctx):
     374        req = inevow.IRequest(ctx)
     375        t = get_arg(req, "t")
     376        if t == "json":
     377            return self.json(req)
     378        return rend.Page.renderHTTP(self, ctx)
     379
     380    def json(self, req):
     381        req.setHeader("content-type", "text/plain")
     382        data = {}
     383        dyhb_events = []
     384        for serverid,requests in self.download_status.dyhb_requests.iteritems():
     385            for req in requests:
     386                dyhb_events.append( (base32.b2a(serverid),) + req )
     387        dyhb_events.sort(key=lambda req: req[1])
     388        data["dyhb"] = dyhb_events
     389        request_events = []
     390        for serverid,requests in self.download_status.requests.iteritems():
     391            for req in requests:
     392                request_events.append( (base32.b2a(serverid),) + req )
     393        request_events.sort(key=lambda req: (req[4],req[1]))
     394        data["requests"] = request_events
     395        data["segment"] = self.download_status.segment_events
     396        data["read"] = self.download_status.read_events
     397        return simplejson.dumps(data, indent=1) + "\n"
     398
     399    def render_events(self, ctx, data):
     400        if not self.download_status.storage_index:
     401            return
     402        srt = self.short_relative_time
     403        l = T.ul()
     404
     405        t = T.table(class_="status-download-events")
     406        t[T.tr[T.td["serverid"], T.td["sent"], T.td["received"],
     407               T.td["shnums"], T.td["RTT"]]]
     408        dyhb_events = []
     409        for serverid,requests in self.download_status.dyhb_requests.iteritems():
     410            for req in requests:
     411                dyhb_events.append( (serverid,) + req )
     412        dyhb_events.sort(key=lambda req: req[1])
     413        for d_ev in dyhb_events:
     414            (serverid, sent, shnums, received) = d_ev
     415            serverid_s = idlib.shortnodeid_b2a(serverid)
     416            rtt = received - sent
     417            t[T.tr(style="background: %s" % self.color(serverid))[
     418                [T.td[serverid_s], T.td[srt(sent)], T.td[srt(received)],
     419                 T.td[",".join([str(shnum) for shnum in shnums])],
     420                 T.td[self.render_time(None, rtt)],
     421                 ]]]
     422        l["DYHB Requests:", t]
     423
     424        t = T.table(class_="status-download-events")
     425        t[T.tr[T.td["range"], T.td["start"], T.td["finish"], T.td["got"],
     426               T.td["time"], T.td["decrypttime"], T.td["pausedtime"],
     427               T.td["speed"]]]
     428        for r_ev in self.download_status.read_events:
     429            (start, length, requesttime, finishtime, bytes, decrypt, paused) = r_ev
     430            print r_ev
     431            if finishtime is not None:
     432                rtt = finishtime - requesttime - paused
     433                speed = self.render_rate(None, 1.0 * bytes / rtt)
     434                rtt = self.render_time(None, rtt)
     435                decrypt = self.render_time(None, decrypt)
     436                paused = self.render_time(None, paused)
     437            else:
     438                speed, rtt, decrypt, paused = "","","",""
     439            t[T.tr[T.td["[%d:+%d]" % (start, length)],
     440                   T.td[srt(requesttime)], T.td[srt(finishtime)],
     441                   T.td[bytes], T.td[rtt], T.td[decrypt], T.td[paused],
     442                   T.td[speed],
     443                   ]]
     444        l["Read Events:", t]
     445
     446        t = T.table(class_="status-download-events")
     447        t[T.tr[T.td["type"], T.td["segnum"], T.td["when"], T.td["range"],
     448               T.td["decodetime"], T.td["segtime"], T.td["speed"]]]
     449        reqtime = (None, None)
     450        for s_ev in self.download_status.segment_events:
     451            (etype, segnum, when, segstart, seglen, decodetime) = s_ev
     452            if etype == "request":
     453                t[T.tr[T.td["request"], T.td["seg%d" % segnum],
     454                       T.td[srt(when)]]]
     455                reqtime = (segnum, when)
     456            elif etype == "delivery":
     457                if reqtime[0] == segnum:
     458                    segtime = when - reqtime[1]
     459                    speed = self.render_rate(None, 1.0 * seglen / segtime)
     460                    segtime = self.render_time(None, segtime)
     461                else:
     462                    segtime, speed = "", ""
     463                t[T.tr[T.td["delivery"], T.td["seg%d" % segnum],
     464                       T.td[srt(when)],
     465                       T.td["[%d:+%d]" % (segstart, seglen)],
     466                       T.td[self.render_time(None,decodetime)],
     467                       T.td[segtime], T.td[speed]]]
     468            elif etype == "error":
     469                t[T.tr[T.td["error"], T.td["seg%d" % segnum]]]
     470        l["Segment Events:", t]
     471
     472        t = T.table(border="1")
     473        t[T.tr[T.td["serverid"], T.td["shnum"], T.td["range"],
     474               T.td["txtime"], T.td["rxtime"], T.td["received"], T.td["RTT"]]]
     475        reqtime = (None, None)
     476        request_events = []
     477        for serverid,requests in self.download_status.requests.iteritems():
     478            for req in requests:
     479                request_events.append( (serverid,) + req )
     480        request_events.sort(key=lambda req: (req[4],req[1]))
     481        for r_ev in request_events:
     482            (peerid, shnum, start, length, sent, receivedlen, received) = r_ev
     483            rtt = None
     484            if received is not None:
     485                rtt = received - sent
     486            peerid_s = idlib.shortnodeid_b2a(peerid)
     487            t[T.tr(style="background: %s" % self.color(peerid))[
     488                T.td[peerid_s], T.td[shnum],
     489                T.td["[%d:+%d]" % (start, length)],
     490                T.td[srt(sent)], T.td[srt(received)], T.td[receivedlen],
     491                T.td[self.render_time(None, rtt)],
     492                ]]
     493        l["Requests:", t]
     494
     495        return l
     496
     497    def color(self, peerid):
     498        def m(c):
     499            return min(ord(c) / 2 + 0x80, 0xff)
     500        return "#%02x%02x%02x" % (m(peerid[0]), m(peerid[1]), m(peerid[2]))
     501
    361502    def render_results(self, ctx, data):
    362503        d = self.download_results()
    363504        def _got_results(results):
    class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 
    371512        TIME_FORMAT = "%H:%M:%S %d-%b-%Y"
    372513        started_s = time.strftime(TIME_FORMAT,
    373514                                  time.localtime(data.get_started()))
    374         return started_s
     515        return started_s + " (%s)" % data.get_started()
    375516
    376517    def render_si(self, ctx, data):
    377518        si_s = base32.b2a_or_none(data.get_storage_index())
  • src/allmydata/web/tahoe.css

    diff --git a/src/allmydata/web/tahoe.css b/src/allmydata/web/tahoe.css
    index 9e0dc2b..a862966 100644
    a b table.tahoe-directory { 
    134134  display: table-cell;
    135135  text-align: center;
    136136  padding: 0 1em;
    137 }
    138  No newline at end of file
     137}
     138
     139/* recent upload/download status pages */
     140
     141table.status-download-events {
     142  border: 1px solid #aaa;
     143}
     144table.status-download-events td {
     145  border: 1px solid #a00;
     146  padding: 2px
     147}