Ticket #393: more-tests.diff.patch

File more-tests.diff.patch, 19.5 KB (added by zooko, at 2011-04-01T23:14:27Z)
  • src/allmydata/mutable/filenode.py

    diff -rN -u old-ticket393/src/allmydata/mutable/filenode.py new-ticket393/src/allmydata/mutable/filenode.py
    old new  
    986986        power-of-two boundary, this operation will use roughly
    987987        O(data.get_size()) memory/bandwidth/CPU to perform the update.
    988988        Otherwise, it must download, re-encode, and upload the entire
    989         file again, which will use O(filesize) resources.
     989        file again, which will use O(filesize) resources. XXX no, it needs to download, modify, and re-upload only the hash tree, not the entire file contents
    990990        """
    991991        return self._do_serialized(self._update, data, offset)
    992992
     
    994994    def _update(self, data, offset):
    995995        """
    996996        I update the mutable file version represented by this particular
    997         IMutableVersion by inserting the data in data at the offset
     997        IMutableVersion by inserting XXX overwriting    the data in data at the offset
    998998        offset. I return a Deferred that fires when this has been
    999999        completed.
    10001000        """
     
    10541054        # what we'll do later.
    10551055        start_segment = offset // segsize
    10561056
    1057         # We only need the end segment if the data we append does not go
    1058         # beyond the current end-of-file.
     1057        # We only need the end segment if the data we write does not
     1058        # reach the end of the file.
    10591059        end_segment = start_segment
     1060        # print "offset: %s, spansize: %s, end=%s, filesize: %s" % (offset, data.get_size(), offset+data.get_size(), self.get_size())
    10601061        if offset + data.get_size() < self.get_size():
    10611062            end_data = offset + data.get_size()
    10621063            end_segment = end_data // segsize
  • src/allmydata/mutable/layout.py

    diff -rN -u old-ticket393/src/allmydata/mutable/layout.py new-ticket393/src/allmydata/mutable/layout.py
    old new  
    1515#  PREFIX:
    1616#    >: Big-endian byte order; the most significant byte is first (leftmost).
    1717#    B: The version information; an 8 bit version identifier. Stored as
    18 #       an unsigned char. This is currently 00 00 00 00; our modifications
     18#       an unsigned char. This is currently 00 00 00 00; our modifications XXX what modifications?
    1919#       will turn it into 00 00 00 01.
    2020#    Q: The sequence number; this is sort of like a revision history for
    2121#       mutable files; they start at 1 and increase as they are changed after
     
    244244
    245245        self._block_size = self._segment_size / self._required_shares
    246246
     247        # XXX if the caller calls set_* to give us SDMF contents in multiple separate calls, could we incrementally upload them to the server even though they will end up stored and hashed as a single segment there?
    247248        # This is meant to mimic how SDMF files were built before MDMF
    248249        # entered the picture: we generate each share in its entirety,
    249250        # then push it off to the storage server in one write. When
     
    427428        The verinfo tuple for MDMF files contains:
    428429            - seqnum
    429430            - root hash
    430             - a blank (nothing)
     431            - a blank (nothing) # XXX salts?
    431432            - segsize
    432433            - datalen
    433434            - k
     
    556557    shares.
    557558    """
    558559    # Expected layout, MDMF:
    559     # offset:     size:       name:
     560    # offset:     size:       name:                                     pass # to know location: pass # to know contents:
    560561    #-- signed part --
    561     # 0           1           version number (01)
    562     # 1           8           sequence number
    563     # 9           32          share tree root hash
    564     # 41          1           The "k" encoding parameter
    565     # 42          1           The "N" encoding parameter
    566     # 43          8           The segment size of the uploaded file
    567     # 51          8           The data length of the original plaintext
     562    # 0           1           version number (01)                       0
     563    # 1           8           sequence number                           0
     564    # 9           32          share tree root hash                      0
     565    # 41          1           The "k" encoding parameter                0
     566    # 42          1           The "N" encoding parameter                0
     567    # 43          8           The segment size of the uploaded file     0
     568    # 51          8           The data length of the original plaintext 0
    568569    #-- end signed part --
    569570    # 59          8           The offset of the encrypted private key
    570571    # 83          8           The offset of the signature
    571572    # 91          8           The offset of the verification key
    572     # 67          8           The offset of the block hash tree
    573573    # 75          8           The offset of the share hash chain
    574     # 99          8           The offset of the EOF
     574    # 99          8           The offset of the share data
     575    # 67          8           The offset of the block hash tree
    575576    #
     577    # __          1280        encrypted signing key (XXX note: experimentally serialized private keys by pycryptopp's implementation of RSA-PSS-SHA256 always come out to 1216 or 1217 bytes...)
     578    # __          256         verification key
     579    # __          f(N)        share hash chain
     580    # __          256         signature over the first (?) 8 (?) fields
     581    # __          L           share data (with per-block salt)
     582    # __+L        f(L)        block hash tree
     583
    576584    # followed by salts and share data, the encrypted private key, the
    577585    # block hash tree, the share hash chain, a signature over the first
    578586    # eight fields, and a verification key.
    579587    #
    580588    # The checkstring is the first three fields -- the version number,
    581     # sequence number, root hash and root salt hash. This is consistent
     589    # sequence number, root hash and root salt hash. This is consistent # XXX the first *four* fields? Should root salt hash be added to the schema above?
    582590    # in meaning to what we have with SDMF files, except now instead of
    583591    # using the literal salt, we use a value derived from all of the
    584592    # salts -- the share hash root.
     
    597605    #      and where they should go.. We can also figure out where the
    598606    #      encrypted private key should go, because we can figure out how
    599607    #      big the share data will be.
    600     #
     608
     609# XXX it would be a major added feature if we could initialize without knowing the data length. From a cursory inspection, I think this might be possible if the fields whose length are changed by the data length came at the end of the layout. Those fields are block hash tree, and... Oh, there is only one field whose length changes if the length of the file changes, and that is block hash tree. If we put that one at the end of the layout, then we can know where all the other fields go without knowing the data length. --Zooko 2011-03-16
     610
     611# Hm, and also it would be better if we could compute the exact positions of more fields just starting from facts that we get earlier, such as k,  N, segsize. (We get those early facts from cap or CEB.) For example, the offset of the encrypted private key. If the encrypted private key comes before any variable length fields, and therefore if we can compute its offset a priori instead of looking it up in a field, then we might be able to optimize out a round trip on read sometimes, and we definitely get to reduce complexity of the data format.
     612
    601613    #   1: Encrypt, encode, and upload the file in chunks. Do something
    602614    #      like
    603615    #
  • src/allmydata/mutable/publish.py

    diff -rN -u old-ticket393/src/allmydata/mutable/publish.py new-ticket393/src/allmydata/mutable/publish.py
    old new  
    534534
    535535    def setup_encoding_parameters(self, offset=0):
    536536        if self._version == MDMF_VERSION:
     537            # print "WHEEE I'm USING D_M_S_S: ", DEFAULT_MAX_SEGMENT_SIZE
    537538            segment_size = DEFAULT_MAX_SEGMENT_SIZE # 128 KiB by default
    538539        else:
    539540            segment_size = self.datalength # SDMF is only one segment
  • src/allmydata/mutable/servermap.py

    diff -rN -u old-ticket393/src/allmydata/mutable/servermap.py new-ticket393/src/allmydata/mutable/servermap.py
    old new  
    724724                # fetch the block hash tree and first + last segment, as
    725725                # configured earlier.
    726726                # Then set them in wherever we happen to want to set
    727                 # them.
     727                # them.XXX this comment does not help me. :-)
    728728                ds = []
    729729                # XXX: We do this above, too. Is there a good way to
    730730                # make the two routines share the value without
  • src/allmydata/nodemaker.py

    diff -rN -u old-ticket393/src/allmydata/nodemaker.py new-ticket393/src/allmydata/nodemaker.py
    old new  
    9494        n = MutableFileNode(self.storage_broker, self.secret_holder,
    9595                            self.default_encoding_parameters, self.history)
    9696        n.set_version(version)
     97        print "-> thingie: ", self.key_generator.generate
    9798        d = self.key_generator.generate(keysize)
    9899        d.addCallback(n.create_with_keys, contents)
    99100        d.addCallback(lambda res: n)
  • src/allmydata/storage/crawler.py

    diff -rN -u old-ticket393/src/allmydata/storage/crawler.py new-ticket393/src/allmydata/storage/crawler.py
    old new  
    7272        self.server = server
    7373        self.sharedir = server.sharedir
    7474        self.statefile = statefile
    75         self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2]
    76                          for i in range(2**10)]
     75        # self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2]
     76        #                  for i in range(2**10)]
     77        self.prefixes = []
    7778        self.prefixes.sort()
    7879        self.timer = None
    7980        self.bucket_cache = (None, [])
  • src/allmydata/storage/server.py

    diff -rN -u old-ticket393/src/allmydata/storage/server.py new-ticket393/src/allmydata/storage/server.py
    old new  
    9494                                   expiration_override_lease_duration,
    9595                                   expiration_cutoff_date,
    9696                                   expiration_sharetypes)
    97         self.lease_checker.setServiceParent(self)
     97        # xxx self.lease_checker.setServiceParent(self)
    9898
    9999    def __repr__(self):
    100100        return "<StorageServer %s>" % (idlib.shortnodeid_b2a(self.my_nodeid),)
     
    102102    def add_bucket_counter(self):
    103103        statefile = os.path.join(self.storedir, "bucket_counter.state")
    104104        self.bucket_counter = BucketCountingCrawler(self, statefile)
    105         self.bucket_counter.setServiceParent(self)
     105        #xxx self.bucket_counter.setServiceParent(self)
    106106
    107107    def count(self, name, delta=1):
    108108        if self.stats_provider:
  • src/allmydata/test/common_util.py

    diff -rN -u old-ticket393/src/allmydata/test/common_util.py new-ticket393/src/allmydata/test/common_util.py
    old new  
    11import os, signal, time
    22from random import randrange
    33
     4
    45from twisted.internet import reactor, defer
    56from twisted.python import failure
    67
    78def insecurerandstr(n):
    8     return ''.join(map(chr, map(randrange, [0]*n, [256]*n)))
     9    # return ''.join(map(chr, map(randrange, [0]*n, [256]*n)))
     10    return os.urandom(n)
    911
    1012def flip_bit(good, which):
    1113    # flip the low-order bit of good[which]
  • src/allmydata/test/test_mutable.py

    diff -rN -u old-ticket393/src/allmydata/test/test_mutable.py new-ticket393/src/allmydata/test/test_mutable.py
    old new  
    55from twisted.internet import defer, reactor
    66from allmydata import uri, client
    77from allmydata.nodemaker import NodeMaker
    8 from allmydata.util import base32, consumer
     8from allmydata.util import base32, consumer, mathutil
    99from allmydata.util.hashutil import tagged_hash, ssk_writekey_hash, \
    1010     ssk_pubkey_fingerprint_hash
    1111from allmydata.util.deferredutil import gatherResults
     
    1313     NotEnoughSharesError, SDMF_VERSION, MDMF_VERSION
    1414from allmydata.monitor import Monitor
    1515from allmydata.test.common import ShouldFailMixin
     16from allmydata.test.common_util import insecurerandstr
    1617from allmydata.test.no_network import GridTestMixin
    1718from foolscap.api import eventually, fireEventually
    1819from foolscap.logging import log
     
    2526     NotEnoughServersError, CorruptShareError
    2627from allmydata.mutable.retrieve import Retrieve
    2728from allmydata.mutable.publish import Publish, MutableFileHandle, \
    28                                       MutableData, \
    29                                       DEFAULT_MAX_SEGMENT_SIZE
     29    DEFAULT_MAX_SEGMENT_SIZE, MutableData
    3030from allmydata.mutable.servermap import ServerMap, ServermapUpdater
    3131from allmydata.mutable.layout import unpack_header, MDMFSlotReadProxy
    3232from allmydata.mutable.repairer import MustForceRepairError
    3333
    3434import allmydata.test.common_util as testutil
    3535
     36import mock
     37
    3638# this "FakeStorage" exists to put the share data in RAM and avoid using real
    3739# network connections, both to speed up the tests and to reduce the amount of
    3840# non-mutable.py code being exercised.
     
    31113113            self.failUnlessEqual(results, new_data))
    31123114        return d
    31133115
    3114     def test_replace_in_last_segment(self):
    3115         # The wrapper should know how to handle the tail segment
    3116         # appropriately.
    3117         replace_offset = len(self.data) - 100
    3118         new_data = self.data[:replace_offset] + "replaced"
    3119         rest_offset = replace_offset + len("replaced")
    3120         new_data += self.data[rest_offset:]
    3121         d = self.mdmf_node.get_best_mutable_version()
    3122         d.addCallback(lambda mv:
    3123             mv.update(MutableData("replaced"), replace_offset))
    3124         d.addCallback(lambda ignored:
    3125             self.mdmf_node.download_best_version())
    3126         d.addCallback(lambda results:
    3127             self.failUnlessEqual(results, new_data))
    3128         return d
    3129 
    3130 
    31313116    def test_multiple_segment_replace(self):
    31323117        replace_offset = 2 * DEFAULT_MAX_SEGMENT_SIZE
    31333118        new_data = self.data[:replace_offset]
     
    31453130        d.addCallback(lambda results:
    31463131            self.failUnlessEqual(results, new_data))
    31473132        return d
     3133
     3134CONFIGUREDSEGSIZE = 7
     3135REALSEGSIZE = mathutil.next_multiple(CONFIGUREDSEGSIZE, 3)
     3136SIZE = 15 * 10 ** 5
     3137# data = 'A' * filesize
     3138filedata = insecurerandstr(SIZE)
     3139def get_file_data(size):
     3140    assert size <= len(filedata), (size, len(filedata))
     3141    return filedata[:size]
     3142
     3143# spandata = 'B' * size
     3144spandata = insecurerandstr(SIZE)
     3145def get_span_data(size):
     3146    assert size <= len(spandata)
     3147    return spandata[:size]
     3148
     3149class UpdateSpans(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin):
     3150    """ This is like class Update but we moved some tests out of there
     3151    and refactored the upload of files to be done on a per-test-method
     3152    basis instead of per instance. """
     3153   
     3154    def setUp(self):
     3155        self.patcher1 = mock.patch('allmydata.mutable.publish.DEFAULT_MAX_SEGMENT_SIZE', CONFIGUREDSEGSIZE)
     3156        self.patcher1.__enter__()
     3157
     3158        GridTestMixin.setUp(self)
     3159        self.basedir = self.mktemp()
     3160        self.set_up_grid()
     3161        self.c = self.g.clients[0]
     3162        self.nm = self.c.nodemaker
     3163
     3164        # stub out time wasting RSA key generation
     3165        #XXXclass FakePrivateKeyjj/Fake
     3166
     3167        #XXXdef generate_mock(keysize):
     3168        #XXX    return defer.succeed((mock.Mock(), mock.Mock()))
     3169        #XXXself.nm.key_generator.generate = generate_mock
     3170
     3171    def tearDown(self):
     3172        self.patcher1.__exit__()
     3173        return GridTestMixin.tearDown(self)
     3174
     3175    def do_upload(self, filesize):
     3176        data = get_file_data(filesize)
     3177        d = self.nm.create_mutable_file(MutableData(data),
     3178                                         version=MDMF_VERSION)
     3179        print "hello 1 ", d
     3180        def _then(n):
     3181            assert isinstance(n, MutableFileNode)
     3182
     3183            self.mdmf_node = n
     3184            return n, data
     3185        d.addCallback(_then)
     3186        return d
     3187
     3188    def test_edge_cases(self):
     3189        d = defer.succeed(None)
     3190        EVEN_MULTIPLE = REALSEGSIZE*5
     3191        NOT_EVEN_MULTIPLE = EVEN_MULTIPLE + 1
     3192
     3193        cases = []
     3194        ss = REALSEGSIZE
     3195        for filesize in (EVEN_MULTIPLE,): # XXX test NOT_EVEN_MULTIPLE
     3196            d.addCallback(lambda ignored, filesize=filesize: self.do_upload(filesize))
     3197
     3198            for startseg in (0,):
     3199            # for startseg in (0, 1, 3, 4):
     3200                for endseg in range(startseg, 7):
     3201                # for endseg in range(startseg, 7):
     3202                    for startindex in (0, ):
     3203                    # for startindex in (0, 1, 2, ss-1):
     3204                        for endindex in (1, ss-3, ):
     3205                        # for endindex in (1, ss-3, ss-2, ss-1, ss):
     3206                            offset = ss*startseg+startindex
     3207                            end = ss*endseg+endindex
     3208                            size = end - offset
     3209                            if size <= 0:
     3210                                continue
     3211                            cases.append((offset, end))
     3212                            d.addCallback(self._do_replacement_test, offset, size)
     3213
     3214        # print "list, set ", len(cases), len(set(cases))
     3215        return d
     3216    test_edge_cases.timeout = 1000
     3217
     3218    def test_replace_in_last_segment_uneven_file_size(self):
     3219        # marked for death
     3220        return self._do_replacement_test(900000, 900000-100, 8)
     3221
     3222    def test_replace_first_segment(self):
     3223        return self._do_replacement_test(900000, 0, REALSEGSIZE)
     3224
     3225    def test_replace_last_segment_even_file_size(self):
     3226        filesize = REALSEGSIZE*9
     3227        numsegs = mathutil.div_ceil(filesize, REALSEGSIZE)
     3228        offset = (numsegs-1)*REALSEGSIZE
     3229        size = filesize - offset
     3230        return self._do_replacement_test(filesize, offset, size)
     3231
     3232    def test_replace_last_segment_uneven_file_size(self):
     3233        filesize = 900000
     3234        numsegs = mathutil.div_ceil(filesize, REALSEGSIZE)
     3235        offset = (numsegs-1)*REALSEGSIZE
     3236        size = filesize - offset
     3237        return self._do_replacement_test(filesize, offset, size)
     3238
     3239    def _do_replacement_test(self, (node, olddata), offset, size):
     3240        # print "hello _do_replacement_test(%s)" % ((node, olddata,),)
     3241        # print offset, size
     3242        d = node.get_best_mutable_version()
     3243
     3244        spandata = get_span_data(size)
     3245        d.addCallback(lambda mv:
     3246                          mv.update(MutableData(spandata), offset))
     3247        d.addCallback(lambda ignored:
     3248                          node.download_best_version())
     3249
     3250        newfiledata = olddata[:offset] + spandata + olddata[offset+size:]
     3251        d.addCallback(lambda results:
     3252                           self.failUnlessEqual(results, newfiledata))
     3253        d.addCallback(lambda ignore: (node, newfiledata))
     3254        return d