Ticket #393: more-tests.diff.patch
File more-tests.diff.patch, 19.5 KB (added by zooko, at 2011-04-01T23:14:27Z) |
---|
-
src/allmydata/mutable/filenode.py
diff -rN -u old-ticket393/src/allmydata/mutable/filenode.py new-ticket393/src/allmydata/mutable/filenode.py
old new 986 986 power-of-two boundary, this operation will use roughly 987 987 O(data.get_size()) memory/bandwidth/CPU to perform the update. 988 988 Otherwise, it must download, re-encode, and upload the entire 989 file again, which will use O(filesize) resources. 989 file again, which will use O(filesize) resources. XXX no, it needs to download, modify, and re-upload only the hash tree, not the entire file contents 990 990 """ 991 991 return self._do_serialized(self._update, data, offset) 992 992 … … 994 994 def _update(self, data, offset): 995 995 """ 996 996 I update the mutable file version represented by this particular 997 IMutableVersion by inserting the data in data at the offset997 IMutableVersion by inserting XXX overwriting the data in data at the offset 998 998 offset. I return a Deferred that fires when this has been 999 999 completed. 1000 1000 """ … … 1054 1054 # what we'll do later. 1055 1055 start_segment = offset // segsize 1056 1056 1057 # We only need the end segment if the data we append does not go1058 # beyond the current end-of-file.1057 # We only need the end segment if the data we write does not 1058 # reach the end of the file. 1059 1059 end_segment = start_segment 1060 # print "offset: %s, spansize: %s, end=%s, filesize: %s" % (offset, data.get_size(), offset+data.get_size(), self.get_size()) 1060 1061 if offset + data.get_size() < self.get_size(): 1061 1062 end_data = offset + data.get_size() 1062 1063 end_segment = end_data // segsize -
src/allmydata/mutable/layout.py
diff -rN -u old-ticket393/src/allmydata/mutable/layout.py new-ticket393/src/allmydata/mutable/layout.py
old new 15 15 # PREFIX: 16 16 # >: Big-endian byte order; the most significant byte is first (leftmost). 17 17 # B: The version information; an 8 bit version identifier. Stored as 18 # an unsigned char. This is currently 00 00 00 00; our modifications 18 # an unsigned char. This is currently 00 00 00 00; our modifications XXX what modifications? 19 19 # will turn it into 00 00 00 01. 20 20 # Q: The sequence number; this is sort of like a revision history for 21 21 # mutable files; they start at 1 and increase as they are changed after … … 244 244 245 245 self._block_size = self._segment_size / self._required_shares 246 246 247 # XXX if the caller calls set_* to give us SDMF contents in multiple separate calls, could we incrementally upload them to the server even though they will end up stored and hashed as a single segment there? 247 248 # This is meant to mimic how SDMF files were built before MDMF 248 249 # entered the picture: we generate each share in its entirety, 249 250 # then push it off to the storage server in one write. When … … 427 428 The verinfo tuple for MDMF files contains: 428 429 - seqnum 429 430 - root hash 430 - a blank (nothing) 431 - a blank (nothing) # XXX salts? 431 432 - segsize 432 433 - datalen 433 434 - k … … 556 557 shares. 557 558 """ 558 559 # Expected layout, MDMF: 559 # offset: size: name: 560 # offset: size: name: pass # to know location: pass # to know contents: 560 561 #-- signed part -- 561 # 0 1 version number (01) 562 # 1 8 sequence number 563 # 9 32 share tree root hash 564 # 41 1 The "k" encoding parameter 565 # 42 1 The "N" encoding parameter 566 # 43 8 The segment size of the uploaded file 567 # 51 8 The data length of the original plaintext 562 # 0 1 version number (01) 0 563 # 1 8 sequence number 0 564 # 9 32 share tree root hash 0 565 # 41 1 The "k" encoding parameter 0 566 # 42 1 The "N" encoding parameter 0 567 # 43 8 The segment size of the uploaded file 0 568 # 51 8 The data length of the original plaintext 0 568 569 #-- end signed part -- 569 570 # 59 8 The offset of the encrypted private key 570 571 # 83 8 The offset of the signature 571 572 # 91 8 The offset of the verification key 572 # 67 8 The offset of the block hash tree573 573 # 75 8 The offset of the share hash chain 574 # 99 8 The offset of the EOF 574 # 99 8 The offset of the share data 575 # 67 8 The offset of the block hash tree 575 576 # 577 # __ 1280 encrypted signing key (XXX note: experimentally serialized private keys by pycryptopp's implementation of RSA-PSS-SHA256 always come out to 1216 or 1217 bytes...) 578 # __ 256 verification key 579 # __ f(N) share hash chain 580 # __ 256 signature over the first (?) 8 (?) fields 581 # __ L share data (with per-block salt) 582 # __+L f(L) block hash tree 583 576 584 # followed by salts and share data, the encrypted private key, the 577 585 # block hash tree, the share hash chain, a signature over the first 578 586 # eight fields, and a verification key. 579 587 # 580 588 # The checkstring is the first three fields -- the version number, 581 # sequence number, root hash and root salt hash. This is consistent 589 # sequence number, root hash and root salt hash. This is consistent # XXX the first *four* fields? Should root salt hash be added to the schema above? 582 590 # in meaning to what we have with SDMF files, except now instead of 583 591 # using the literal salt, we use a value derived from all of the 584 592 # salts -- the share hash root. … … 597 605 # and where they should go.. We can also figure out where the 598 606 # encrypted private key should go, because we can figure out how 599 607 # big the share data will be. 600 # 608 609 # XXX it would be a major added feature if we could initialize without knowing the data length. From a cursory inspection, I think this might be possible if the fields whose length are changed by the data length came at the end of the layout. Those fields are block hash tree, and... Oh, there is only one field whose length changes if the length of the file changes, and that is block hash tree. If we put that one at the end of the layout, then we can know where all the other fields go without knowing the data length. --Zooko 2011-03-16 610 611 # Hm, and also it would be better if we could compute the exact positions of more fields just starting from facts that we get earlier, such as k, N, segsize. (We get those early facts from cap or CEB.) For example, the offset of the encrypted private key. If the encrypted private key comes before any variable length fields, and therefore if we can compute its offset a priori instead of looking it up in a field, then we might be able to optimize out a round trip on read sometimes, and we definitely get to reduce complexity of the data format. 612 601 613 # 1: Encrypt, encode, and upload the file in chunks. Do something 602 614 # like 603 615 # -
src/allmydata/mutable/publish.py
diff -rN -u old-ticket393/src/allmydata/mutable/publish.py new-ticket393/src/allmydata/mutable/publish.py
old new 534 534 535 535 def setup_encoding_parameters(self, offset=0): 536 536 if self._version == MDMF_VERSION: 537 # print "WHEEE I'm USING D_M_S_S: ", DEFAULT_MAX_SEGMENT_SIZE 537 538 segment_size = DEFAULT_MAX_SEGMENT_SIZE # 128 KiB by default 538 539 else: 539 540 segment_size = self.datalength # SDMF is only one segment -
src/allmydata/mutable/servermap.py
diff -rN -u old-ticket393/src/allmydata/mutable/servermap.py new-ticket393/src/allmydata/mutable/servermap.py
old new 724 724 # fetch the block hash tree and first + last segment, as 725 725 # configured earlier. 726 726 # Then set them in wherever we happen to want to set 727 # them. 727 # them.XXX this comment does not help me. :-) 728 728 ds = [] 729 729 # XXX: We do this above, too. Is there a good way to 730 730 # make the two routines share the value without -
src/allmydata/nodemaker.py
diff -rN -u old-ticket393/src/allmydata/nodemaker.py new-ticket393/src/allmydata/nodemaker.py
old new 94 94 n = MutableFileNode(self.storage_broker, self.secret_holder, 95 95 self.default_encoding_parameters, self.history) 96 96 n.set_version(version) 97 print "-> thingie: ", self.key_generator.generate 97 98 d = self.key_generator.generate(keysize) 98 99 d.addCallback(n.create_with_keys, contents) 99 100 d.addCallback(lambda res: n) -
src/allmydata/storage/crawler.py
diff -rN -u old-ticket393/src/allmydata/storage/crawler.py new-ticket393/src/allmydata/storage/crawler.py
old new 72 72 self.server = server 73 73 self.sharedir = server.sharedir 74 74 self.statefile = statefile 75 self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] 76 for i in range(2**10)] 75 # self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] 76 # for i in range(2**10)] 77 self.prefixes = [] 77 78 self.prefixes.sort() 78 79 self.timer = None 79 80 self.bucket_cache = (None, []) -
src/allmydata/storage/server.py
diff -rN -u old-ticket393/src/allmydata/storage/server.py new-ticket393/src/allmydata/storage/server.py
old new 94 94 expiration_override_lease_duration, 95 95 expiration_cutoff_date, 96 96 expiration_sharetypes) 97 self.lease_checker.setServiceParent(self)97 # xxx self.lease_checker.setServiceParent(self) 98 98 99 99 def __repr__(self): 100 100 return "<StorageServer %s>" % (idlib.shortnodeid_b2a(self.my_nodeid),) … … 102 102 def add_bucket_counter(self): 103 103 statefile = os.path.join(self.storedir, "bucket_counter.state") 104 104 self.bucket_counter = BucketCountingCrawler(self, statefile) 105 self.bucket_counter.setServiceParent(self)105 #xxx self.bucket_counter.setServiceParent(self) 106 106 107 107 def count(self, name, delta=1): 108 108 if self.stats_provider: -
src/allmydata/test/common_util.py
diff -rN -u old-ticket393/src/allmydata/test/common_util.py new-ticket393/src/allmydata/test/common_util.py
old new 1 1 import os, signal, time 2 2 from random import randrange 3 3 4 4 5 from twisted.internet import reactor, defer 5 6 from twisted.python import failure 6 7 7 8 def insecurerandstr(n): 8 return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) 9 # return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) 10 return os.urandom(n) 9 11 10 12 def flip_bit(good, which): 11 13 # flip the low-order bit of good[which] -
src/allmydata/test/test_mutable.py
diff -rN -u old-ticket393/src/allmydata/test/test_mutable.py new-ticket393/src/allmydata/test/test_mutable.py
old new 5 5 from twisted.internet import defer, reactor 6 6 from allmydata import uri, client 7 7 from allmydata.nodemaker import NodeMaker 8 from allmydata.util import base32, consumer 8 from allmydata.util import base32, consumer, mathutil 9 9 from allmydata.util.hashutil import tagged_hash, ssk_writekey_hash, \ 10 10 ssk_pubkey_fingerprint_hash 11 11 from allmydata.util.deferredutil import gatherResults … … 13 13 NotEnoughSharesError, SDMF_VERSION, MDMF_VERSION 14 14 from allmydata.monitor import Monitor 15 15 from allmydata.test.common import ShouldFailMixin 16 from allmydata.test.common_util import insecurerandstr 16 17 from allmydata.test.no_network import GridTestMixin 17 18 from foolscap.api import eventually, fireEventually 18 19 from foolscap.logging import log … … 25 26 NotEnoughServersError, CorruptShareError 26 27 from allmydata.mutable.retrieve import Retrieve 27 28 from allmydata.mutable.publish import Publish, MutableFileHandle, \ 28 MutableData, \ 29 DEFAULT_MAX_SEGMENT_SIZE 29 DEFAULT_MAX_SEGMENT_SIZE, MutableData 30 30 from allmydata.mutable.servermap import ServerMap, ServermapUpdater 31 31 from allmydata.mutable.layout import unpack_header, MDMFSlotReadProxy 32 32 from allmydata.mutable.repairer import MustForceRepairError 33 33 34 34 import allmydata.test.common_util as testutil 35 35 36 import mock 37 36 38 # this "FakeStorage" exists to put the share data in RAM and avoid using real 37 39 # network connections, both to speed up the tests and to reduce the amount of 38 40 # non-mutable.py code being exercised. … … 3111 3113 self.failUnlessEqual(results, new_data)) 3112 3114 return d 3113 3115 3114 def test_replace_in_last_segment(self):3115 # The wrapper should know how to handle the tail segment3116 # appropriately.3117 replace_offset = len(self.data) - 1003118 new_data = self.data[:replace_offset] + "replaced"3119 rest_offset = replace_offset + len("replaced")3120 new_data += self.data[rest_offset:]3121 d = self.mdmf_node.get_best_mutable_version()3122 d.addCallback(lambda mv:3123 mv.update(MutableData("replaced"), replace_offset))3124 d.addCallback(lambda ignored:3125 self.mdmf_node.download_best_version())3126 d.addCallback(lambda results:3127 self.failUnlessEqual(results, new_data))3128 return d3129 3130 3131 3116 def test_multiple_segment_replace(self): 3132 3117 replace_offset = 2 * DEFAULT_MAX_SEGMENT_SIZE 3133 3118 new_data = self.data[:replace_offset] … … 3145 3130 d.addCallback(lambda results: 3146 3131 self.failUnlessEqual(results, new_data)) 3147 3132 return d 3133 3134 CONFIGUREDSEGSIZE = 7 3135 REALSEGSIZE = mathutil.next_multiple(CONFIGUREDSEGSIZE, 3) 3136 SIZE = 15 * 10 ** 5 3137 # data = 'A' * filesize 3138 filedata = insecurerandstr(SIZE) 3139 def get_file_data(size): 3140 assert size <= len(filedata), (size, len(filedata)) 3141 return filedata[:size] 3142 3143 # spandata = 'B' * size 3144 spandata = insecurerandstr(SIZE) 3145 def get_span_data(size): 3146 assert size <= len(spandata) 3147 return spandata[:size] 3148 3149 class UpdateSpans(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin): 3150 """ This is like class Update but we moved some tests out of there 3151 and refactored the upload of files to be done on a per-test-method 3152 basis instead of per instance. """ 3153 3154 def setUp(self): 3155 self.patcher1 = mock.patch('allmydata.mutable.publish.DEFAULT_MAX_SEGMENT_SIZE', CONFIGUREDSEGSIZE) 3156 self.patcher1.__enter__() 3157 3158 GridTestMixin.setUp(self) 3159 self.basedir = self.mktemp() 3160 self.set_up_grid() 3161 self.c = self.g.clients[0] 3162 self.nm = self.c.nodemaker 3163 3164 # stub out time wasting RSA key generation 3165 #XXXclass FakePrivateKeyjj/Fake 3166 3167 #XXXdef generate_mock(keysize): 3168 #XXX return defer.succeed((mock.Mock(), mock.Mock())) 3169 #XXXself.nm.key_generator.generate = generate_mock 3170 3171 def tearDown(self): 3172 self.patcher1.__exit__() 3173 return GridTestMixin.tearDown(self) 3174 3175 def do_upload(self, filesize): 3176 data = get_file_data(filesize) 3177 d = self.nm.create_mutable_file(MutableData(data), 3178 version=MDMF_VERSION) 3179 print "hello 1 ", d 3180 def _then(n): 3181 assert isinstance(n, MutableFileNode) 3182 3183 self.mdmf_node = n 3184 return n, data 3185 d.addCallback(_then) 3186 return d 3187 3188 def test_edge_cases(self): 3189 d = defer.succeed(None) 3190 EVEN_MULTIPLE = REALSEGSIZE*5 3191 NOT_EVEN_MULTIPLE = EVEN_MULTIPLE + 1 3192 3193 cases = [] 3194 ss = REALSEGSIZE 3195 for filesize in (EVEN_MULTIPLE,): # XXX test NOT_EVEN_MULTIPLE 3196 d.addCallback(lambda ignored, filesize=filesize: self.do_upload(filesize)) 3197 3198 for startseg in (0,): 3199 # for startseg in (0, 1, 3, 4): 3200 for endseg in range(startseg, 7): 3201 # for endseg in range(startseg, 7): 3202 for startindex in (0, ): 3203 # for startindex in (0, 1, 2, ss-1): 3204 for endindex in (1, ss-3, ): 3205 # for endindex in (1, ss-3, ss-2, ss-1, ss): 3206 offset = ss*startseg+startindex 3207 end = ss*endseg+endindex 3208 size = end - offset 3209 if size <= 0: 3210 continue 3211 cases.append((offset, end)) 3212 d.addCallback(self._do_replacement_test, offset, size) 3213 3214 # print "list, set ", len(cases), len(set(cases)) 3215 return d 3216 test_edge_cases.timeout = 1000 3217 3218 def test_replace_in_last_segment_uneven_file_size(self): 3219 # marked for death 3220 return self._do_replacement_test(900000, 900000-100, 8) 3221 3222 def test_replace_first_segment(self): 3223 return self._do_replacement_test(900000, 0, REALSEGSIZE) 3224 3225 def test_replace_last_segment_even_file_size(self): 3226 filesize = REALSEGSIZE*9 3227 numsegs = mathutil.div_ceil(filesize, REALSEGSIZE) 3228 offset = (numsegs-1)*REALSEGSIZE 3229 size = filesize - offset 3230 return self._do_replacement_test(filesize, offset, size) 3231 3232 def test_replace_last_segment_uneven_file_size(self): 3233 filesize = 900000 3234 numsegs = mathutil.div_ceil(filesize, REALSEGSIZE) 3235 offset = (numsegs-1)*REALSEGSIZE 3236 size = filesize - offset 3237 return self._do_replacement_test(filesize, offset, size) 3238 3239 def _do_replacement_test(self, (node, olddata), offset, size): 3240 # print "hello _do_replacement_test(%s)" % ((node, olddata,),) 3241 # print offset, size 3242 d = node.get_best_mutable_version() 3243 3244 spandata = get_span_data(size) 3245 d.addCallback(lambda mv: 3246 mv.update(MutableData(spandata), offset)) 3247 d.addCallback(lambda ignored: 3248 node.download_best_version()) 3249 3250 newfiledata = olddata[:offset] + spandata + olddata[offset+size:] 3251 d.addCallback(lambda results: 3252 self.failUnlessEqual(results, newfiledata)) 3253 d.addCallback(lambda ignore: (node, newfiledata)) 3254 return d