source: trunk/src/allmydata/mutable/layout.py

Last change on this file was 4da491a, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-03-11T20:37:27Z

remove more usage of "future"

  • Property mode set to 100644
File size: 69.6 KB
Line 
1"""
2Ported to Python 3.
3"""
4
5import struct
6from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \
7     BadShareError
8from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
9                                 MDMF_VERSION, IMutableSlotWriter
10from allmydata.util import mathutil
11from twisted.python import failure
12from twisted.internet import defer
13from zope.interface import implementer
14
15
16# These strings describe the format of the packed structs they help process.
17# Here's what they mean:
18#
19#  PREFIX:
20#    >: Big-endian byte order; the most significant byte is first (leftmost).
21#    B: The container version information; stored as an unsigned 8-bit integer.
22#       This is currently either SDMF_VERSION or MDMF_VERSION.
23#    Q: The sequence number; this is sort of like a revision history for
24#       mutable files; they start at 1 and increase as they are changed after
25#       being uploaded. Stored as an unsigned 64-bit integer.
26#  32s: The root hash of the share hash tree. We use sha-256d, so we use 32
27#       bytes to store the value.
28#  16s: The salt for the readkey. This is a 16-byte random value.
29#
30#  SIGNED_PREFIX additions, things that are covered by the signature:
31#    B: The "k" encoding parameter. We store this as an unsigned 8-bit
32#       integer, since our erasure coding scheme cannot encode to more than
33#       255 pieces.
34#    B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for
35#       the same reason as above.
36#    Q: The segment size of the uploaded file. This is an unsigned 64-bit
37#       integer, to allow handling large segments and files. For SDMF the
38#       segment size is the data length plus padding; for MDMF it can be
39#       smaller.
40#    Q: The data length of the uploaded file. Like the segment size field,
41#       it is an unsigned 64-bit integer.
42#
43#   HEADER additions:
44#     L: The offset of the signature. An unsigned 32-bit integer.
45#     L: The offset of the share hash chain. An unsigned 32-bit integer.
46#     L: The offset of the block hash tree. An unsigned 32-bit integer.
47#     L: The offset of the share data. An unsigned 32-bit integer.
48#     Q: The offset of the encrypted private key. An unsigned 64-bit integer,
49#        to account for the possibility of a lot of share data.
50#     Q: The offset of the EOF. An unsigned 64-bit integer, to account for
51#        the possibility of a lot of share data.
52#
53#  After all of these, we have the following:
54#    - The verification key: Occupies the space between the end of the header
55#      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
56#    - The signature, which goes from the signature offset to the share hash
57#      chain offset.
58#    - The share hash chain, which goes from the share hash chain offset to
59#      the block hash tree offset.
60#    - The share data, which goes from the share data offset to the encrypted
61#      private key offset.
62#    - The encrypted private key offset, which goes until the end of the file.
63#
64#  The block hash tree in this encoding has only one share, so the offset of
65#  the share data will be 32 bits more than the offset of the block hash tree.
66#  Given this, we may need to check to see how many bytes a reasonably sized
67#  block hash tree will take up.
68
69PREFIX = ">BQ32s16s" # each version may have a different prefix
70SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
71SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
72HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
73HEADER_LENGTH = struct.calcsize(HEADER)
74OFFSETS = ">LLLLQQ"
75OFFSETS_LENGTH = struct.calcsize(OFFSETS)
76
77MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary
78
79
80# These are still used for some tests of SDMF files.
81def unpack_header(data):
82    o = {}
83    (version,
84     seqnum,
85     root_hash,
86     IV,
87     k, N, segsize, datalen,
88     o['signature'],
89     o['share_hash_chain'],
90     o['block_hash_tree'],
91     o['share_data'],
92     o['enc_privkey'],
93     o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
94    return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
95
96def unpack_share(data):
97    assert len(data) >= HEADER_LENGTH
98    o = {}
99    (version,
100     seqnum,
101     root_hash,
102     IV,
103     k, N, segsize, datalen,
104     o['signature'],
105     o['share_hash_chain'],
106     o['block_hash_tree'],
107     o['share_data'],
108     o['enc_privkey'],
109     o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
110
111    if version != 0:
112        raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
113
114    if len(data) < o['EOF']:
115        raise NeedMoreDataError(o['EOF'],
116                                o['enc_privkey'], o['EOF']-o['enc_privkey'])
117
118    pubkey = data[HEADER_LENGTH:o['signature']]
119    signature = data[o['signature']:o['share_hash_chain']]
120    share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
121    share_hash_format = ">H32s"
122    hsize = struct.calcsize(share_hash_format)
123    if len(share_hash_chain_s) % hsize != 0:
124        raise BadShareError("hash chain is %d bytes, not multiple of %d"
125                            % (len(share_hash_chain_s), hsize))
126    share_hash_chain = []
127    for i in range(0, len(share_hash_chain_s), hsize):
128        chunk = share_hash_chain_s[i:i+hsize]
129        (hid, h) = struct.unpack(share_hash_format, chunk)
130        share_hash_chain.append( (hid, h) )
131    share_hash_chain = dict(share_hash_chain)
132    block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
133    if len(block_hash_tree_s) % 32 != 0:
134        raise BadShareError("block_hash_tree is %d bytes, not multiple of %d"
135                            % (len(block_hash_tree_s), 32))
136    block_hash_tree = []
137    for i in range(0, len(block_hash_tree_s), 32):
138        block_hash_tree.append(block_hash_tree_s[i:i+32])
139
140    share_data = data[o['share_data']:o['enc_privkey']]
141    enc_privkey = data[o['enc_privkey']:o['EOF']]
142
143    return (seqnum, root_hash, IV, k, N, segsize, datalen,
144            pubkey, signature, share_hash_chain, block_hash_tree,
145            share_data, enc_privkey)
146
147def get_version_from_checkstring(checkstring):
148    (t, ) = struct.unpack(">B", checkstring[:1])
149    return t
150
151def unpack_sdmf_checkstring(checkstring):
152    cs_len = struct.calcsize(PREFIX)
153    version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
154    assert version == SDMF_VERSION, version
155    return (seqnum, root_hash, IV)
156
157def unpack_mdmf_checkstring(checkstring):
158    cs_len = struct.calcsize(MDMFCHECKSTRING)
159    version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
160    assert version == MDMF_VERSION, version
161    return (seqnum, root_hash)
162
163def pack_offsets(verification_key_length, signature_length,
164                 share_hash_chain_length, block_hash_tree_length,
165                 share_data_length, encprivkey_length):
166    post_offset = HEADER_LENGTH
167    offsets = {}
168    o1 = offsets['signature'] = post_offset + verification_key_length
169    o2 = offsets['share_hash_chain'] = o1 + signature_length
170    o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
171    o4 = offsets['share_data'] = o3 + block_hash_tree_length
172    o5 = offsets['enc_privkey'] = o4 + share_data_length
173    offsets['EOF'] = o5 + encprivkey_length
174
175    return struct.pack(">LLLLQQ",
176                       offsets['signature'],
177                       offsets['share_hash_chain'],
178                       offsets['block_hash_tree'],
179                       offsets['share_data'],
180                       offsets['enc_privkey'],
181                       offsets['EOF'])
182
183def pack_share(prefix, verification_key, signature,
184               share_hash_chain, block_hash_tree,
185               share_data, encprivkey):
186    share_hash_chain_s = b"".join([struct.pack(">H32s", i, share_hash_chain[i])
187                                   for i in sorted(share_hash_chain.keys())])
188    for h in block_hash_tree:
189        assert len(h) == 32
190    block_hash_tree_s = b"".join(block_hash_tree)
191
192    offsets = pack_offsets(len(verification_key),
193                           len(signature),
194                           len(share_hash_chain_s),
195                           len(block_hash_tree_s),
196                           len(share_data),
197                           len(encprivkey))
198    final_share = b"".join([prefix,
199                            offsets,
200                            verification_key,
201                            signature,
202                            share_hash_chain_s,
203                            block_hash_tree_s,
204                            share_data,
205                            encprivkey])
206    return final_share
207
208def pack_prefix(seqnum, root_hash, IV,
209                required_shares, total_shares,
210                segment_size, data_length):
211    prefix = struct.pack(SIGNED_PREFIX,
212                         0, # version,
213                         seqnum,
214                         root_hash,
215                         IV,
216                         required_shares,
217                         total_shares,
218                         segment_size,
219                         data_length,
220                         )
221    return prefix
222
223
224@implementer(IMutableSlotWriter)
225class SDMFSlotWriteProxy(object):
226    """
227    I represent a remote write slot for an SDMF mutable file. I build a
228    share in memory, and then write it in one piece to the remote
229    server. This mimics how SDMF shares were built before MDMF (and the
230    new MDMF uploader), but provides that functionality in a way that
231    allows the MDMF uploader to be built without much special-casing for
232    file format, which makes the uploader code more readable.
233    """
234    def __init__(self,
235                 shnum,
236                 storage_server, # an IStorageServer
237                 storage_index,
238                 secrets, # (write_enabler, renew_secret, cancel_secret)
239                 seqnum, # the sequence number of the mutable file
240                 required_shares,
241                 total_shares,
242                 segment_size,
243                 data_length): # the length of the original file
244        self.shnum = shnum
245        self._storage_server = storage_server
246        self._storage_index = storage_index
247        self._secrets = secrets
248        self._seqnum = seqnum
249        self._required_shares = required_shares
250        self._total_shares = total_shares
251        self._segment_size = segment_size
252        self._data_length = data_length
253
254        # This is an SDMF file, so it should have only one segment, so,
255        # modulo padding of the data length, the segment size and the
256        # data length should be the same.
257        expected_segment_size = mathutil.next_multiple(data_length,
258                                                       self._required_shares)
259        assert expected_segment_size == segment_size
260
261        self._block_size = self._segment_size // self._required_shares
262
263        # This is meant to mimic how SDMF files were built before MDMF
264        # entered the picture: we generate each share in its entirety,
265        # then push it off to the storage server in one write. When
266        # callers call set_*, they are just populating this dict.
267        # finish_publishing will stitch these pieces together into a
268        # coherent share, and then write the coherent share to the
269        # storage server.
270        self._share_pieces = {}
271
272        # This tells the write logic what checkstring to use when
273        # writing remote shares.
274        self._testvs = []
275
276        self._readvs = [(0, struct.calcsize(PREFIX))]
277
278
279    def set_checkstring(self, checkstring_or_seqnum,
280                              root_hash=None,
281                              salt=None):
282        """
283        Set the checkstring that I will pass to the remote server when
284        writing.
285
286            @param checkstring_or_seqnum: A packed checkstring to use,
287                   or a sequence number. I will treat this as a checkstr
288
289        Note that implementations can differ in which semantics they
290        wish to support for set_checkstring -- they can, for example,
291        build the checkstring themselves from its constituents, or
292        some other thing.
293        """
294        if root_hash and salt:
295            checkstring = struct.pack(PREFIX,
296                                      0,
297                                      checkstring_or_seqnum,
298                                      root_hash,
299                                      salt)
300        else:
301            checkstring = checkstring_or_seqnum
302        self._testvs = [(0, len(checkstring), checkstring)]
303
304
305    def get_checkstring(self):
306        """
307        Get the checkstring that I think currently exists on the remote
308        server.
309        """
310        if self._testvs:
311            return self._testvs[0][2]
312        return b""
313
314
315    def put_block(self, data, segnum, salt):
316        """
317        Add a block and salt to the share.
318        """
319        # SDMF files have only one segment
320        assert segnum == 0
321        assert len(data) == self._block_size
322        assert len(salt) == SALT_SIZE
323
324        self._share_pieces['sharedata'] = data
325        self._share_pieces['salt'] = salt
326
327        # TODO: Figure out something intelligent to return.
328        return defer.succeed(None)
329
330
331    def put_encprivkey(self, encprivkey):
332        """
333        Add the encrypted private key to the share.
334        """
335        self._share_pieces['encprivkey'] = encprivkey
336
337        return defer.succeed(None)
338
339
340    def put_blockhashes(self, blockhashes):
341        """
342        Add the block hash tree to the share.
343        """
344        assert isinstance(blockhashes, list)
345        for h in blockhashes:
346            assert len(h) == HASH_SIZE
347
348        # serialize the blockhashes, then set them.
349        blockhashes_s = b"".join(blockhashes)
350        self._share_pieces['block_hash_tree'] = blockhashes_s
351
352        return defer.succeed(None)
353
354
355    def put_sharehashes(self, sharehashes):
356        """
357        Add the share hash chain to the share.
358        """
359        assert isinstance(sharehashes, dict)
360        for h in sharehashes.values():
361            assert len(h) == HASH_SIZE
362
363        # serialize the sharehashes, then set them.
364        sharehashes_s = b"".join([struct.pack(">H32s", i, sharehashes[i])
365                                  for i in sorted(sharehashes.keys())])
366        self._share_pieces['share_hash_chain'] = sharehashes_s
367
368        return defer.succeed(None)
369
370
371    def put_root_hash(self, root_hash):
372        """
373        Add the root hash to the share.
374        """
375        assert len(root_hash) == HASH_SIZE
376
377        self._share_pieces['root_hash'] = root_hash
378
379        return defer.succeed(None)
380
381
382    def put_salt(self, salt):
383        """
384        Add a salt to an empty SDMF file.
385        """
386        assert len(salt) == SALT_SIZE
387
388        self._share_pieces['salt'] = salt
389        self._share_pieces['sharedata'] = b""
390
391
392    def get_signable(self):
393        """
394        Return the part of the share that needs to be signed.
395
396        SDMF writers need to sign the packed representation of the
397        first eight fields of the remote share, that is:
398            - version number (0)
399            - sequence number
400            - root of the share hash tree
401            - salt
402            - k
403            - n
404            - segsize
405            - datalen
406
407        This method is responsible for returning that to callers.
408        """
409        return struct.pack(SIGNED_PREFIX,
410                           0,
411                           self._seqnum,
412                           self._share_pieces['root_hash'],
413                           self._share_pieces['salt'],
414                           self._required_shares,
415                           self._total_shares,
416                           self._segment_size,
417                           self._data_length)
418
419
420    def put_signature(self, signature):
421        """
422        Add the signature to the share.
423        """
424        self._share_pieces['signature'] = signature
425
426        return defer.succeed(None)
427
428
429    def put_verification_key(self, verification_key):
430        """
431        Add the verification key to the share.
432        """
433        self._share_pieces['verification_key'] = verification_key
434
435        return defer.succeed(None)
436
437
438    def get_verinfo(self):
439        """
440        I return my verinfo tuple. This is used by the ServermapUpdater
441        to keep track of versions of mutable files.
442
443        The verinfo tuple for MDMF files contains:
444            - seqnum
445            - root hash
446            - a blank (nothing)
447            - segsize
448            - datalen
449            - k
450            - n
451            - prefix (the thing that you sign)
452            - a tuple of offsets
453
454        We include the nonce in MDMF to simplify processing of version
455        information tuples.
456
457        The verinfo tuple for SDMF files is the same, but contains a
458        16-byte IV instead of a hash of salts.
459        """
460        return (self._seqnum,
461                self._share_pieces['root_hash'],
462                self._share_pieces['salt'],
463                self._segment_size,
464                self._data_length,
465                self._required_shares,
466                self._total_shares,
467                self.get_signable(),
468                self._get_offsets_tuple())
469
470    def _get_offsets_dict(self):
471        post_offset = HEADER_LENGTH
472        offsets = {}
473
474        verification_key_length = len(self._share_pieces['verification_key'])
475        o1 = offsets['signature'] = post_offset + verification_key_length
476
477        signature_length = len(self._share_pieces['signature'])
478        o2 = offsets['share_hash_chain'] = o1 + signature_length
479
480        share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
481        o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
482
483        block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
484        o4 = offsets['share_data'] = o3 + block_hash_tree_length
485
486        share_data_length = len(self._share_pieces['sharedata'])
487        o5 = offsets['enc_privkey'] = o4 + share_data_length
488
489        encprivkey_length = len(self._share_pieces['encprivkey'])
490        offsets['EOF'] = o5 + encprivkey_length
491        return offsets
492
493
494    def _get_offsets_tuple(self):
495        offsets = self._get_offsets_dict()
496        return tuple([(key, value) for key, value in offsets.items()])
497
498
499    def _pack_offsets(self):
500        offsets = self._get_offsets_dict()
501        return struct.pack(">LLLLQQ",
502                           offsets['signature'],
503                           offsets['share_hash_chain'],
504                           offsets['block_hash_tree'],
505                           offsets['share_data'],
506                           offsets['enc_privkey'],
507                           offsets['EOF'])
508
509
510    def finish_publishing(self):
511        """
512        Do anything necessary to finish writing the share to a remote
513        server. I require that no further publishing needs to take place
514        after this method has been called.
515        """
516        for k in ["sharedata", "encprivkey", "signature", "verification_key",
517                  "share_hash_chain", "block_hash_tree"]:
518            assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
519        # This is the only method that actually writes something to the
520        # remote server.
521        # First, we need to pack the share into data that we can write
522        # to the remote server in one write.
523        offsets = self._pack_offsets()
524        prefix = self.get_signable()
525        final_share = b"".join([prefix,
526                                offsets,
527                                self._share_pieces['verification_key'],
528                                self._share_pieces['signature'],
529                                self._share_pieces['share_hash_chain'],
530                                self._share_pieces['block_hash_tree'],
531                                self._share_pieces['sharedata'],
532                                self._share_pieces['encprivkey']])
533
534        # Our only data vector is going to be writing the final share,
535        # in its entirely.
536        datavs = [(0, final_share)]
537
538        if not self._testvs:
539            # Our caller has not provided us with another checkstring
540            # yet, so we assume that we are writing a new share, and set
541            # a test vector that will only allow a new share to be written.
542            self._testvs = []
543            self._testvs.append(tuple([0, 1, b""]))
544
545        tw_vectors = {}
546        tw_vectors[self.shnum] = (self._testvs, datavs, None)
547        return self._storage_server.slot_testv_and_readv_and_writev(
548            self._storage_index,
549            self._secrets,
550            tw_vectors,
551            # TODO is it useful to read something?
552            self._readvs,
553        )
554
555
556MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
557MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
558MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
559MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
560MDMFCHECKSTRING = ">BQ32s"
561MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
562MDMFOFFSETS = ">QQQQQQQQ"
563MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
564
565PRIVATE_KEY_SIZE = 1220
566SIGNATURE_SIZE = 260
567VERIFICATION_KEY_SIZE = 292
568# We know we won't have more than 256 shares, and we know that we won't need
569# to store more than ln2(256) hash-chain nodes to validate, so that's our
570# bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
571SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
572
573@implementer(IMutableSlotWriter)
574class MDMFSlotWriteProxy(object):
575
576    """
577    I represent a remote write slot for an MDMF mutable file.
578
579    I abstract away from my caller the details of block and salt
580    management, and the implementation of the on-disk format for MDMF
581    shares.
582    """
583    # Expected layout, MDMF:
584    # offset:     size:       name:
585    #-- signed part --
586    # 0           1           version number (01)
587    # 1           8           sequence number
588    # 9           32          share tree root hash
589    # 41          1           The "k" encoding parameter
590    # 42          1           The "N" encoding parameter
591    # 43          8           The segment size of the uploaded file
592    # 51          8           The data length of the original plaintext
593    #-- end signed part --
594    # 59          8           The offset of the encrypted private key
595    # 67          8           The offset of the share hash chain
596    # 75          8           The offset of the signature
597    # 83          8           The offset of the verification key
598    # 91          8           The offset of the end of the v. key.
599    # 99          8           The offset of the share data
600    # 107         8           The offset of the block hash tree
601    # 115         8           The offset of EOF
602    # 123         var         encrypted private key
603    # var         var         share hash chain
604    # var         var         signature
605    # var         var         verification key
606    # var         large       share data
607    # var         var         block hash tree
608    #
609    # We order the fields that way to make smart downloaders -- downloaders
610    # which prempetively read a big part of the share -- possible.
611    #
612    # The checkstring is the first three fields -- the version number,
613    # sequence number, root hash and root salt hash. This is consistent
614    # in meaning to what we have with SDMF files, except now instead of
615    # using the literal salt, we use a value derived from all of the
616    # salts -- the share hash root.
617    #
618    # The salt is stored before the block for each segment. The block
619    # hash tree is computed over the combination of block and salt for
620    # each segment. In this way, we get integrity checking for both
621    # block and salt with the current block hash tree arrangement.
622    #
623    # The ordering of the offsets is different to reflect the dependencies
624    # that we'll run into with an MDMF file. The expected write flow is
625    # something like this:
626    #
627    #   0: Initialize with the sequence number, encoding parameters and
628    #      data length. From this, we can deduce the number of segments,
629    #      and where they should go.. We can also figure out where the
630    #      encrypted private key should go, because we can figure out how
631    #      big the share data will be.
632    #
633    #   1: Encrypt, encode, and upload the file in chunks. Do something
634    #      like
635    #
636    #       put_block(data, segnum, salt)
637    #
638    #      to write a block and a salt to the disk. We can do both of
639    #      these operations now because we have enough of the offsets to
640    #      know where to put them.
641    #
642    #   2: Put the encrypted private key. Use:
643    #
644    #        put_encprivkey(encprivkey)
645    #
646    #      Now that we know the length of the private key, we can fill
647    #      in the offset for the block hash tree.
648    #
649    #   3: We're now in a position to upload the block hash tree for
650    #      a share. Put that using something like:
651    #
652    #        put_blockhashes(block_hash_tree)
653    #
654    #      Note that block_hash_tree is a list of hashes -- we'll take
655    #      care of the details of serializing that appropriately. When
656    #      we get the block hash tree, we are also in a position to
657    #      calculate the offset for the share hash chain, and fill that
658    #      into the offsets table.
659    #
660    #   4: We're now in a position to upload the share hash chain for
661    #      a share. Do that with something like:
662    #
663    #        put_sharehashes(share_hash_chain)
664    #
665    #      share_hash_chain should be a dictionary mapping shnums to
666    #      32-byte hashes -- the wrapper handles serialization.
667    #      We'll know where to put the signature at this point, also.
668    #      The root of this tree will be put explicitly in the next
669    #      step.
670    #
671    #   5: Before putting the signature, we must first put the
672    #      root_hash. Do this with:
673    #
674    #        put_root_hash(root_hash).
675    #
676    #      In terms of knowing where to put this value, it was always
677    #      possible to place it, but it makes sense semantically to
678    #      place it after the share hash tree, so that's why you do it
679    #      in this order.
680    #
681    #   6: With the root hash put, we can now sign the header. Use:
682    #
683    #        get_signable()
684    #
685    #      to get the part of the header that you want to sign, and use:
686    #
687    #        put_signature(signature)
688    #
689    #      to write your signature to the remote server.
690    #
691    #   6: Add the verification key, and finish. Do:
692    #
693    #        put_verification_key(key)
694    #
695    #      and
696    #
697    #        finish_publish()
698    #
699    # Checkstring management:
700    #
701    # To write to a mutable slot, we have to provide test vectors to ensure
702    # that we are writing to the same data that we think we are. These
703    # vectors allow us to detect uncoordinated writes; that is, writes
704    # where both we and some other shareholder are writing to the
705    # mutable slot, and to report those back to the parts of the program
706    # doing the writing.
707    #
708    # With SDMF, this was easy -- all of the share data was written in
709    # one go, so it was easy to detect uncoordinated writes, and we only
710    # had to do it once. With MDMF, not all of the file is written at
711    # once.
712    #
713    # If a share is new, we write out as much of the header as we can
714    # before writing out anything else. This gives other writers a
715    # canary that they can use to detect uncoordinated writes, and, if
716    # they do the same thing, gives us the same canary. We them update
717    # the share. We won't be able to write out two fields of the header
718    # -- the share tree hash and the salt hash -- until we finish
719    # writing out the share. We only require the writer to provide the
720    # initial checkstring, and keep track of what it should be after
721    # updates ourselves.
722    #
723    # If we haven't written anything yet, then on the first write (which
724    # will probably be a block + salt of a share), we'll also write out
725    # the header. On subsequent passes, we'll expect to see the header.
726    # This changes in two places:
727    #
728    #   - When we write out the salt hash
729    #   - When we write out the root of the share hash tree
730    #
731    # since these values will change the header. It is possible that we
732    # can just make those be written in one operation to minimize
733    # disruption.
734    def __init__(self,
735                 shnum,
736                 storage_server, # a remote reference to a storage server
737                 storage_index,
738                 secrets, # (write_enabler, renew_secret, cancel_secret)
739                 seqnum, # the sequence number of the mutable file
740                 required_shares,
741                 total_shares,
742                 segment_size,
743                 data_length): # the length of the original file
744        self.shnum = shnum
745        self._storage_server = storage_server
746        self._storage_index = storage_index
747        self._seqnum = seqnum
748        self._required_shares = required_shares
749        assert self.shnum >= 0 and self.shnum < total_shares
750        self._total_shares = total_shares
751        # We build up the offset table as we write things. It is the
752        # last thing we write to the remote server.
753        self._offsets = {}
754        self._testvs = []
755        # This is a list of write vectors that will be sent to our
756        # remote server once we are directed to write things there.
757        self._writevs = []
758        self._secrets = secrets
759        # The segment size needs to be a multiple of the k parameter --
760        # any padding should have been carried out by the publisher
761        # already.
762        assert segment_size % required_shares == 0
763        self._segment_size = segment_size
764        self._data_length = data_length
765
766        # These are set later -- we define them here so that we can
767        # check for their existence easily
768
769        # This is the root of the share hash tree -- the Merkle tree
770        # over the roots of the block hash trees computed for shares in
771        # this upload.
772        self._root_hash = None
773
774        # We haven't yet written anything to the remote bucket. By
775        # setting this, we tell the _write method as much. The write
776        # method will then know that it also needs to add a write vector
777        # for the checkstring (or what we have of it) to the first write
778        # request. We'll then record that value for future use.  If
779        # we're expecting something to be there already, we need to call
780        # set_checkstring before we write anything to tell the first
781        # write about that.
782        self._written = False
783
784        # When writing data to the storage servers, we get a read vector
785        # for free. We'll read the checkstring, which will help us
786        # figure out what's gone wrong if a write fails.
787        self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
788
789        # We calculate the number of segments because it tells us
790        # where the salt part of the file ends/share segment begins,
791        # and also because it provides a useful amount of bounds checking.
792        self._num_segments = mathutil.div_ceil(self._data_length,
793                                               self._segment_size)
794        self._block_size = self._segment_size // self._required_shares
795        # We also calculate the share size, to help us with block
796        # constraints later.
797        tail_size = self._data_length % self._segment_size
798        if not tail_size:
799            self._tail_block_size = self._block_size
800        else:
801            self._tail_block_size = mathutil.next_multiple(tail_size,
802                                                           self._required_shares)
803            self._tail_block_size = self._tail_block_size // self._required_shares
804
805        # We already know where the sharedata starts; right after the end
806        # of the header (which is defined as the signable part + the offsets)
807        # We can also calculate where the encrypted private key begins
808        # from what we know know.
809        self._actual_block_size = self._block_size + SALT_SIZE
810        data_size = self._actual_block_size * (self._num_segments - 1)
811        data_size += self._tail_block_size
812        data_size += SALT_SIZE
813        self._offsets['enc_privkey'] = MDMFHEADERSIZE
814
815        # We don't define offsets for these because we want them to be
816        # tightly packed -- this allows us to ignore the responsibility
817        # of padding individual values, and of removing that padding
818        # later. So nonconstant_start is where we start writing
819        # nonconstant data.
820        nonconstant_start = self._offsets['enc_privkey']
821        nonconstant_start += PRIVATE_KEY_SIZE
822        nonconstant_start += SIGNATURE_SIZE
823        nonconstant_start += VERIFICATION_KEY_SIZE
824        nonconstant_start += SHARE_HASH_CHAIN_SIZE
825
826        self._offsets['share_data'] = nonconstant_start
827
828        # Finally, we know how big the share data will be, so we can
829        # figure out where the block hash tree needs to go.
830        # XXX: But this will go away if Zooko wants to make it so that
831        # you don't need to know the size of the file before you start
832        # uploading it.
833        self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
834                    data_size
835
836        # Done. We can snow start writing.
837
838
839    def set_checkstring(self,
840                        seqnum_or_checkstring,
841                        root_hash=None,
842                        salt=None):
843        """
844        Set checkstring checkstring for the given shnum.
845
846        This can be invoked in one of two ways.
847
848        With one argument, I assume that you are giving me a literal
849        checkstring -- e.g., the output of get_checkstring. I will then
850        set that checkstring as it is. This form is used by unit tests.
851
852        With two arguments, I assume that you are giving me a sequence
853        number and root hash to make a checkstring from. In that case, I
854        will build a checkstring and set it for you. This form is used
855        by the publisher.
856
857        By default, I assume that I am writing new shares to the grid.
858        If you don't explcitly set your own checkstring, I will use
859        one that requires that the remote share not exist. You will want
860        to use this method if you are updating a share in-place;
861        otherwise, writes will fail.
862        """
863        # You're allowed to overwrite checkstrings with this method;
864        # I assume that users know what they are doing when they call
865        # it.
866        if root_hash:
867            checkstring = struct.pack(MDMFCHECKSTRING,
868                                      1,
869                                      seqnum_or_checkstring,
870                                      root_hash)
871        else:
872            checkstring = seqnum_or_checkstring
873
874        if checkstring == b"":
875            # We special-case this, since len("") = 0, but we need
876            # length of 1 for the case of an empty share to work on the
877            # storage server, which is what a checkstring that is the
878            # empty string means.
879            self._testvs = []
880        else:
881            self._testvs = []
882            self._testvs.append((0, len(checkstring), checkstring))
883
884
885    def __repr__(self):
886        return "MDMFSlotWriteProxy for share %d" % self.shnum
887
888
889    def get_checkstring(self):
890        """
891        Given a share number, I return a representation of what the
892        checkstring for that share on the server will look like.
893
894        I am mostly used for tests.
895        """
896        if self._root_hash:
897            roothash = self._root_hash
898        else:
899            roothash = b"\x00" * 32
900        return struct.pack(MDMFCHECKSTRING,
901                           1,
902                           self._seqnum,
903                           roothash)
904
905
906    def put_block(self, data, segnum, salt):
907        """
908        I queue a write vector for the data, salt, and segment number
909        provided to me. I return None, as I do not actually cause
910        anything to be written yet.
911        """
912        if segnum >= self._num_segments:
913            raise LayoutInvalid("I won't overwrite the block hash tree")
914        if len(salt) != SALT_SIZE:
915            raise LayoutInvalid("I was given a salt of size %d, but "
916                                "I wanted a salt of size %d")
917        if segnum + 1 == self._num_segments:
918            if len(data) != self._tail_block_size:
919                raise LayoutInvalid("I was given the wrong size block to write")
920        elif len(data) != self._block_size:
921            raise LayoutInvalid("I was given the wrong size block to write")
922
923        # We want to write at len(MDMFHEADER) + segnum * block_size.
924        offset = self._offsets['share_data'] + \
925            (self._actual_block_size * segnum)
926        data = salt + data
927
928        self._writevs.append(tuple([offset, data]))
929
930
931    def put_encprivkey(self, encprivkey):
932        """
933        I queue a write vector for the encrypted private key provided to
934        me.
935        """
936        assert self._offsets
937        assert self._offsets['enc_privkey']
938        # You shouldn't re-write the encprivkey after the block hash
939        # tree is written, since that could cause the private key to run
940        # into the block hash tree. Before it writes the block hash
941        # tree, the block hash tree writing method writes the offset of
942        # the share hash chain. So that's a good indicator of whether or
943        # not the block hash tree has been written.
944        if "signature" in self._offsets:
945            raise LayoutInvalid("You can't put the encrypted private key "
946                                "after putting the share hash chain")
947
948        self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
949                len(encprivkey)
950
951        self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
952
953
954    def put_blockhashes(self, blockhashes):
955        """
956        I queue a write vector to put the block hash tree in blockhashes
957        onto the remote server.
958
959        The encrypted private key must be queued before the block hash
960        tree, since we need to know how large it is to know where the
961        block hash tree should go. The block hash tree must be put
962        before the share hash chain, since its size determines the
963        offset of the share hash chain.
964        """
965        assert self._offsets
966        assert "block_hash_tree" in self._offsets
967
968        assert isinstance(blockhashes, list)
969
970        blockhashes_s = b"".join(blockhashes)
971        self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
972
973        self._writevs.append(tuple([self._offsets['block_hash_tree'],
974                                  blockhashes_s]))
975
976
977    def put_sharehashes(self, sharehashes):
978        """
979        I queue a write vector to put the share hash chain in my
980        argument onto the remote server.
981
982        The block hash tree must be queued before the share hash chain,
983        since we need to know where the block hash tree ends before we
984        can know where the share hash chain starts. The share hash chain
985        must be put before the signature, since the length of the packed
986        share hash chain determines the offset of the signature. Also,
987        semantically, you must know what the root of the block hash tree
988        is before you can generate a valid signature.
989        """
990        assert isinstance(sharehashes, dict)
991        assert self._offsets
992        if "share_hash_chain" not in self._offsets:
993            raise LayoutInvalid("You must put the block hash tree before "
994                                "putting the share hash chain")
995
996        # The signature comes after the share hash chain. If the
997        # signature has already been written, we must not write another
998        # share hash chain. The signature writes the verification key
999        # offset when it gets sent to the remote server, so we look for
1000        # that.
1001        if "verification_key" in self._offsets:
1002            raise LayoutInvalid("You must write the share hash chain "
1003                                "before you write the signature")
1004        sharehashes_s = b"".join([struct.pack(">H32s", i, sharehashes[i])
1005                                  for i in sorted(sharehashes.keys())])
1006        self._offsets['signature'] = self._offsets['share_hash_chain'] + \
1007            len(sharehashes_s)
1008        self._writevs.append(tuple([self._offsets['share_hash_chain'],
1009                            sharehashes_s]))
1010
1011
1012    def put_root_hash(self, roothash):
1013        """
1014        Put the root hash (the root of the share hash tree) in the
1015        remote slot.
1016        """
1017        # It does not make sense to be able to put the root
1018        # hash without first putting the share hashes, since you need
1019        # the share hashes to generate the root hash.
1020        #
1021        # Signature is defined by the routine that places the share hash
1022        # chain, so it's a good thing to look for in finding out whether
1023        # or not the share hash chain exists on the remote server.
1024        if len(roothash) != HASH_SIZE:
1025            raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1026                                 % HASH_SIZE)
1027        self._root_hash = roothash
1028        # To write both of these values, we update the checkstring on
1029        # the remote server, which includes them
1030        checkstring = self.get_checkstring()
1031        self._writevs.append(tuple([0, checkstring]))
1032        # This write, if successful, changes the checkstring, so we need
1033        # to update our internal checkstring to be consistent with the
1034        # one on the server.
1035
1036
1037    def get_signable(self):
1038        """
1039        Get the first seven fields of the mutable file; the parts that
1040        are signed.
1041        """
1042        if not self._root_hash:
1043            raise LayoutInvalid("You need to set the root hash "
1044                                "before getting something to "
1045                                "sign")
1046        return struct.pack(MDMFSIGNABLEHEADER,
1047                           1,
1048                           self._seqnum,
1049                           self._root_hash,
1050                           self._required_shares,
1051                           self._total_shares,
1052                           self._segment_size,
1053                           self._data_length)
1054
1055
1056    def put_signature(self, signature):
1057        """
1058        I queue a write vector for the signature of the MDMF share.
1059
1060        I require that the root hash and share hash chain have been put
1061        to the grid before I will write the signature to the grid.
1062        """
1063        if "signature" not in self._offsets:
1064            raise LayoutInvalid("You must put the share hash chain "
1065        # It does not make sense to put a signature without first
1066        # putting the root hash and the salt hash (since otherwise
1067        # the signature would be incomplete), so we don't allow that.
1068                       "before putting the signature")
1069        if not self._root_hash:
1070            raise LayoutInvalid("You must complete the signed prefix "
1071                                "before computing a signature")
1072        # If we put the signature after we put the verification key, we
1073        # could end up running into the verification key, and will
1074        # probably screw up the offsets as well. So we don't allow that.
1075        if "verification_key_end" in self._offsets:
1076            raise LayoutInvalid("You can't put the signature after the "
1077                                "verification key")
1078        # The method that writes the verification key defines the EOF
1079        # offset before writing the verification key, so look for that.
1080        self._offsets['verification_key'] = self._offsets['signature'] +\
1081            len(signature)
1082        self._writevs.append(tuple([self._offsets['signature'], signature]))
1083
1084
1085    def put_verification_key(self, verification_key):
1086        """
1087        I queue a write vector for the verification key.
1088
1089        I require that the signature have been written to the storage
1090        server before I allow the verification key to be written to the
1091        remote server.
1092        """
1093        if "verification_key" not in self._offsets:
1094            raise LayoutInvalid("You must put the signature before you "
1095                                "can put the verification key")
1096
1097        self._offsets['verification_key_end'] = \
1098            self._offsets['verification_key'] + len(verification_key)
1099        assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1100        self._writevs.append(tuple([self._offsets['verification_key'],
1101                            verification_key]))
1102
1103
1104    def _get_offsets_tuple(self):
1105        return tuple([(key, value) for key, value in self._offsets.items()])
1106
1107
1108    def get_verinfo(self):
1109        return (self._seqnum,
1110                self._root_hash,
1111                None,
1112                self._segment_size,
1113                self._data_length,
1114                self._required_shares,
1115                self._total_shares,
1116                self.get_signable(),
1117                self._get_offsets_tuple())
1118
1119
1120    def finish_publishing(self):
1121        """
1122        I add a write vector for the offsets table, and then cause all
1123        of the write vectors that I've dealt with so far to be published
1124        to the remote server, ending the write process.
1125        """
1126        if "verification_key_end" not in self._offsets:
1127            raise LayoutInvalid("You must put the verification key before "
1128                                "you can publish the offsets")
1129        offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1130        offsets = struct.pack(MDMFOFFSETS,
1131                              self._offsets['enc_privkey'],
1132                              self._offsets['share_hash_chain'],
1133                              self._offsets['signature'],
1134                              self._offsets['verification_key'],
1135                              self._offsets['verification_key_end'],
1136                              self._offsets['share_data'],
1137                              self._offsets['block_hash_tree'],
1138                              self._offsets['EOF'])
1139        self._writevs.append(tuple([offsets_offset, offsets]))
1140        encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1141        params = struct.pack(">BBQQ",
1142                             self._required_shares,
1143                             self._total_shares,
1144                             self._segment_size,
1145                             self._data_length)
1146        self._writevs.append(tuple([encoding_parameters_offset, params]))
1147        return self._write(self._writevs)
1148
1149
1150    def _write(self, datavs, on_failure=None, on_success=None):
1151        """I write the data vectors in datavs to the remote slot."""
1152        tw_vectors = {}
1153        if not self._testvs:
1154            # Make sure we will only successfully write if the share didn't
1155            # previously exist.
1156            self._testvs = []
1157            self._testvs.append(tuple([0, 1, b""]))
1158        if not self._written:
1159            # Write a new checkstring to the share when we write it, so
1160            # that we have something to check later.
1161            new_checkstring = self.get_checkstring()
1162            datavs.append((0, new_checkstring))
1163            def _first_write():
1164                self._written = True
1165                self._testvs = [(0, len(new_checkstring), new_checkstring)]
1166            on_success = _first_write
1167        tw_vectors[self.shnum] = (self._testvs, datavs, None)
1168        d = self._storage_server.slot_testv_and_readv_and_writev(
1169            self._storage_index,
1170            self._secrets,
1171            tw_vectors,
1172            self._readv,
1173        )
1174        def _result(results):
1175            if isinstance(results, failure.Failure) or not results[0]:
1176                # Do nothing; the write was unsuccessful.
1177                if on_failure: on_failure()
1178            else:
1179                if on_success: on_success()
1180            return results
1181        d.addBoth(_result)
1182        return d
1183
1184def _handle_bad_struct(f):
1185    # struct.unpack errors mean the server didn't give us enough data, so
1186    # this share is bad
1187    f.trap(struct.error)
1188    raise BadShareError(f.value.args[0])
1189
1190class MDMFSlotReadProxy(object):
1191    """
1192    I read from a mutable slot filled with data written in the MDMF data
1193    format (which is described above).
1194
1195    I can be initialized with some amount of data, which I will use (if
1196    it is valid) to eliminate some of the need to fetch it from servers.
1197    """
1198    def __init__(self,
1199                 storage_server,
1200                 storage_index,
1201                 shnum,
1202                 data=b"",
1203                 data_is_everything=False):
1204        # Start the initialization process.
1205        self._storage_server = storage_server
1206        self._storage_index = storage_index
1207        self.shnum = shnum
1208
1209        # Before doing anything, the reader is probably going to want to
1210        # verify that the signature is correct. To do that, they'll need
1211        # the verification key, and the signature. To get those, we'll
1212        # need the offset table. So fetch the offset table on the
1213        # assumption that that will be the first thing that a reader is
1214        # going to do.
1215
1216        # The fact that these encoding parameters are None tells us
1217        # that we haven't yet fetched them from the remote share, so we
1218        # should. We could just not set them, but the checks will be
1219        # easier to read if we don't have to use hasattr.
1220        self._version_number = None
1221        self._sequence_number = None
1222        self._root_hash = None
1223        # Filled in if we're dealing with an SDMF file. Unused
1224        # otherwise.
1225        self._salt = None
1226        self._required_shares = None
1227        self._total_shares = None
1228        self._segment_size = None
1229        self._data_length = None
1230        self._offsets = None
1231
1232        # If the user has chosen to initialize us with some data, we'll
1233        # try to satisfy subsequent data requests with that data before
1234        # asking the storage server for it.
1235        self._data = data
1236
1237        # If the provided data is known to be complete, then we know there's
1238        # nothing to be gained by querying the server, so we should just
1239        # partially satisfy requests with what we have.
1240        self._data_is_everything = data_is_everything
1241
1242        # The way callers interact with cache in the filenode returns
1243        # None if there isn't any cached data, but the way we index the
1244        # cached data requires a string, so convert None to "".
1245        if self._data == None:
1246            self._data = b""
1247
1248
1249    def _maybe_fetch_offsets_and_header(self, force_remote=False):
1250        """
1251        I fetch the offset table and the header from the remote slot if
1252        I don't already have them. If I do have them, I do nothing and
1253        return an empty Deferred.
1254        """
1255        if self._offsets:
1256            return defer.succeed(None)
1257        # At this point, we may be either SDMF or MDMF. Fetching 107
1258        # bytes will be enough to get header and offsets for both SDMF and
1259        # MDMF, though we'll be left with 4 more bytes than we
1260        # need if this ends up being MDMF. This is probably less
1261        # expensive than the cost of a second roundtrip.
1262        readvs = [(0, 123)]
1263        d = self._read(readvs, force_remote)
1264        d.addCallback(self._process_encoding_parameters)
1265        d.addCallback(self._process_offsets)
1266        d.addErrback(_handle_bad_struct)
1267        return d
1268
1269
1270    def _process_encoding_parameters(self, encoding_parameters):
1271        if self.shnum not in encoding_parameters:
1272            raise BadShareError("no data for shnum %d" % self.shnum)
1273        encoding_parameters = encoding_parameters[self.shnum][0]
1274        # The first byte is the version number. It will tell us what
1275        # to do next.
1276        (verno,) = struct.unpack(">B", encoding_parameters[:1])
1277        if verno == MDMF_VERSION:
1278            read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1279            (verno,
1280             seqnum,
1281             root_hash,
1282             k,
1283             n,
1284             segsize,
1285             datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1286                                      encoding_parameters[:read_size])
1287            if segsize == 0 and datalen == 0:
1288                # Empty file, no segments.
1289                self._num_segments = 0
1290            else:
1291                self._num_segments = mathutil.div_ceil(datalen, segsize)
1292
1293        elif verno == SDMF_VERSION:
1294            read_size = SIGNED_PREFIX_LENGTH
1295            (verno,
1296             seqnum,
1297             root_hash,
1298             salt,
1299             k,
1300             n,
1301             segsize,
1302             datalen) = struct.unpack(">BQ32s16s BBQQ",
1303                                encoding_parameters[:SIGNED_PREFIX_LENGTH])
1304            self._salt = salt
1305            if segsize == 0 and datalen == 0:
1306                # empty file
1307                self._num_segments = 0
1308            else:
1309                # non-empty SDMF files have one segment.
1310                self._num_segments = 1
1311        else:
1312            raise UnknownVersionError("You asked me to read mutable file "
1313                                      "version %d, but I only understand "
1314                                      "%d and %d" % (verno, SDMF_VERSION,
1315                                                     MDMF_VERSION))
1316
1317        self._version_number = verno
1318        self._sequence_number = seqnum
1319        self._root_hash = root_hash
1320        self._required_shares = k
1321        self._total_shares = n
1322        self._segment_size = segsize
1323        self._data_length = datalen
1324
1325        self._block_size = self._segment_size // self._required_shares
1326        # We can upload empty files, and need to account for this fact
1327        # so as to avoid zero-division and zero-modulo errors.
1328        if datalen > 0:
1329            tail_size = self._data_length % self._segment_size
1330        else:
1331            tail_size = 0
1332        if not tail_size:
1333            self._tail_block_size = self._block_size
1334        else:
1335            self._tail_block_size = mathutil.next_multiple(tail_size,
1336                                                    self._required_shares)
1337            self._tail_block_size = self._tail_block_size // self._required_shares
1338
1339        return encoding_parameters
1340
1341
1342    def _process_offsets(self, offsets):
1343        if self._version_number == 0:
1344            read_size = OFFSETS_LENGTH
1345            read_offset = SIGNED_PREFIX_LENGTH
1346            end = read_size + read_offset
1347            (signature,
1348             share_hash_chain,
1349             block_hash_tree,
1350             share_data,
1351             enc_privkey,
1352             EOF) = struct.unpack(">LLLLQQ",
1353                                  offsets[read_offset:end])
1354            self._offsets = {}
1355            self._offsets['signature'] = signature
1356            self._offsets['share_data'] = share_data
1357            self._offsets['block_hash_tree'] = block_hash_tree
1358            self._offsets['share_hash_chain'] = share_hash_chain
1359            self._offsets['enc_privkey'] = enc_privkey
1360            self._offsets['EOF'] = EOF
1361
1362        elif self._version_number == 1:
1363            read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1364            read_length = MDMFOFFSETS_LENGTH
1365            end = read_offset + read_length
1366            (encprivkey,
1367             sharehashes,
1368             signature,
1369             verification_key,
1370             verification_key_end,
1371             sharedata,
1372             blockhashes,
1373             eof) = struct.unpack(MDMFOFFSETS,
1374                                  offsets[read_offset:end])
1375            self._offsets = {}
1376            self._offsets['enc_privkey'] = encprivkey
1377            self._offsets['block_hash_tree'] = blockhashes
1378            self._offsets['share_hash_chain'] = sharehashes
1379            self._offsets['signature'] = signature
1380            self._offsets['verification_key'] = verification_key
1381            self._offsets['verification_key_end']= \
1382                verification_key_end
1383            self._offsets['EOF'] = eof
1384            self._offsets['share_data'] = sharedata
1385
1386
1387    def get_block_and_salt(self, segnum):
1388        """
1389        I return (block, salt), where block is the block data and
1390        salt is the salt used to encrypt that segment.
1391        """
1392        d = self._maybe_fetch_offsets_and_header()
1393        def _then(ignored):
1394            base_share_offset = self._offsets['share_data']
1395
1396            if segnum + 1 > self._num_segments:
1397                raise LayoutInvalid("Not a valid segment number")
1398
1399            if self._version_number == 0:
1400                share_offset = base_share_offset + self._block_size * segnum
1401            else:
1402                share_offset = base_share_offset + (self._block_size + \
1403                                                    SALT_SIZE) * segnum
1404            if segnum + 1 == self._num_segments:
1405                data = self._tail_block_size
1406            else:
1407                data = self._block_size
1408
1409            if self._version_number == 1:
1410                data += SALT_SIZE
1411
1412            readvs = [(share_offset, data)]
1413            return readvs
1414        d.addCallback(_then)
1415        d.addCallback(lambda readvs: self._read(readvs))
1416        def _process_results(results):
1417            if self.shnum not in results:
1418                raise BadShareError("no data for shnum %d" % self.shnum)
1419            if self._version_number == 0:
1420                # We only read the share data, but we know the salt from
1421                # when we fetched the header
1422                data = results[self.shnum]
1423                if not data:
1424                    data = b""
1425                else:
1426                    if len(data) != 1:
1427                        raise BadShareError("got %d vectors, not 1" % len(data))
1428                    data = data[0]
1429                salt = self._salt
1430            else:
1431                data = results[self.shnum]
1432                if not data:
1433                    salt = data = b""
1434                else:
1435                    salt_and_data = results[self.shnum][0]
1436                    salt = salt_and_data[:SALT_SIZE]
1437                    data = salt_and_data[SALT_SIZE:]
1438            return data, salt
1439        d.addCallback(_process_results)
1440        return d
1441
1442
1443    def get_blockhashes(self, needed=None, force_remote=False):
1444        """
1445        I return the block hash tree
1446
1447        I take an optional argument, needed, which is a set of indices
1448        correspond to hashes that I should fetch. If this argument is
1449        missing, I will fetch the entire block hash tree; otherwise, I
1450        may attempt to fetch fewer hashes, based on what needed says
1451        that I should do. Note that I may fetch as many hashes as I
1452        want, so long as the set of hashes that I do fetch is a superset
1453        of the ones that I am asked for, so callers should be prepared
1454        to tolerate additional hashes.
1455        """
1456        # TODO: Return only the parts of the block hash tree necessary
1457        # to validate the blocknum provided?
1458        # This is a good idea, but it is hard to implement correctly. It
1459        # is bad to fetch any one block hash more than once, so we
1460        # probably just want to fetch the whole thing at once and then
1461        # serve it.
1462        if needed == set([]):
1463            return defer.succeed([])
1464        d = self._maybe_fetch_offsets_and_header()
1465        def _then(ignored):
1466            blockhashes_offset = self._offsets['block_hash_tree']
1467            if self._version_number == 1:
1468                blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1469            else:
1470                blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1471            readvs = [(blockhashes_offset, blockhashes_length)]
1472            return readvs
1473        d.addCallback(_then)
1474        d.addCallback(lambda readvs:
1475            self._read(readvs, force_remote=force_remote))
1476        def _build_block_hash_tree(results):
1477            if self.shnum not in results:
1478                raise BadShareError("no data for shnum %d" % self.shnum)
1479
1480            rawhashes = results[self.shnum][0]
1481            results = [rawhashes[i:i+HASH_SIZE]
1482                       for i in range(0, len(rawhashes), HASH_SIZE)]
1483            return results
1484        d.addCallback(_build_block_hash_tree)
1485        return d
1486
1487
1488    def get_sharehashes(self, needed=None, force_remote=False):
1489        """
1490        I return the part of the share hash chain placed to validate
1491        this share.
1492
1493        I take an optional argument, needed. Needed is a set of indices
1494        that correspond to the hashes that I should fetch. If needed is
1495        not present, I will fetch and return the entire share hash
1496        chain. Otherwise, I may fetch and return any part of the share
1497        hash chain that is a superset of the part that I am asked to
1498        fetch. Callers should be prepared to deal with more hashes than
1499        they've asked for.
1500        """
1501        if needed == set([]):
1502            return defer.succeed([])
1503        d = self._maybe_fetch_offsets_and_header()
1504
1505        def _make_readvs(ignored):
1506            sharehashes_offset = self._offsets['share_hash_chain']
1507            if self._version_number == 0:
1508                sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1509            else:
1510                sharehashes_length = self._offsets['signature'] - sharehashes_offset
1511            readvs = [(sharehashes_offset, sharehashes_length)]
1512            return readvs
1513        d.addCallback(_make_readvs)
1514        d.addCallback(lambda readvs:
1515            self._read(readvs, force_remote=force_remote))
1516        def _build_share_hash_chain(results):
1517            if self.shnum not in results:
1518                raise BadShareError("no data for shnum %d" % self.shnum)
1519
1520            sharehashes = results[self.shnum][0]
1521            results = [sharehashes[i:i+(HASH_SIZE + 2)]
1522                       for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1523            results = dict([struct.unpack(">H32s", data)
1524                            for data in results])
1525            return results
1526        d.addCallback(_build_share_hash_chain)
1527        d.addErrback(_handle_bad_struct)
1528        return d
1529
1530
1531    def get_encprivkey(self):
1532        """
1533        I return the encrypted private key.
1534        """
1535        d = self._maybe_fetch_offsets_and_header()
1536
1537        def _make_readvs(ignored):
1538            privkey_offset = self._offsets['enc_privkey']
1539            if self._version_number == 0:
1540                privkey_length = self._offsets['EOF'] - privkey_offset
1541            else:
1542                privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1543            readvs = [(privkey_offset, privkey_length)]
1544            return readvs
1545        d.addCallback(_make_readvs)
1546        d.addCallback(lambda readvs: self._read(readvs))
1547        def _process_results(results):
1548            if self.shnum not in results:
1549                raise BadShareError("no data for shnum %d" % self.shnum)
1550            privkey = results[self.shnum][0]
1551            return privkey
1552        d.addCallback(_process_results)
1553        return d
1554
1555
1556    def get_signature(self):
1557        """
1558        I return the signature of my share.
1559        """
1560        d = self._maybe_fetch_offsets_and_header()
1561
1562        def _make_readvs(ignored):
1563            signature_offset = self._offsets['signature']
1564            if self._version_number == 1:
1565                signature_length = self._offsets['verification_key'] - signature_offset
1566            else:
1567                signature_length = self._offsets['share_hash_chain'] - signature_offset
1568            readvs = [(signature_offset, signature_length)]
1569            return readvs
1570        d.addCallback(_make_readvs)
1571        d.addCallback(lambda readvs: self._read(readvs))
1572        def _process_results(results):
1573            if self.shnum not in results:
1574                raise BadShareError("no data for shnum %d" % self.shnum)
1575            signature = results[self.shnum][0]
1576            return signature
1577        d.addCallback(_process_results)
1578        return d
1579
1580
1581    def get_verification_key(self):
1582        """
1583        I return the verification key.
1584        """
1585        d = self._maybe_fetch_offsets_and_header()
1586
1587        def _make_readvs(ignored):
1588            if self._version_number == 1:
1589                vk_offset = self._offsets['verification_key']
1590                vk_length = self._offsets['verification_key_end'] - vk_offset
1591            else:
1592                vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1593                vk_length = self._offsets['signature'] - vk_offset
1594            readvs = [(vk_offset, vk_length)]
1595            return readvs
1596        d.addCallback(_make_readvs)
1597        d.addCallback(lambda readvs: self._read(readvs))
1598        def _process_results(results):
1599            if self.shnum not in results:
1600                raise BadShareError("no data for shnum %d" % self.shnum)
1601            verification_key = results[self.shnum][0]
1602            return verification_key
1603        d.addCallback(_process_results)
1604        return d
1605
1606
1607    def get_encoding_parameters(self):
1608        """
1609        I return (k, n, segsize, datalen)
1610        """
1611        d = self._maybe_fetch_offsets_and_header()
1612        d.addCallback(lambda ignored:
1613            (self._required_shares,
1614             self._total_shares,
1615             self._segment_size,
1616             self._data_length))
1617        return d
1618
1619
1620    def get_seqnum(self):
1621        """
1622        I return the sequence number for this share.
1623        """
1624        d = self._maybe_fetch_offsets_and_header()
1625        d.addCallback(lambda ignored:
1626            self._sequence_number)
1627        return d
1628
1629
1630    def get_root_hash(self):
1631        """
1632        I return the root of the block hash tree
1633        """
1634        d = self._maybe_fetch_offsets_and_header()
1635        d.addCallback(lambda ignored: self._root_hash)
1636        return d
1637
1638
1639    def get_checkstring(self):
1640        """
1641        I return the packed representation of the following:
1642
1643            - version number
1644            - sequence number
1645            - root hash
1646            - salt hash
1647
1648        which my users use as a checkstring to detect other writers.
1649        """
1650        d = self._maybe_fetch_offsets_and_header()
1651        def _build_checkstring(ignored):
1652            if self._salt:
1653                checkstring = struct.pack(PREFIX,
1654                                          self._version_number,
1655                                          self._sequence_number,
1656                                          self._root_hash,
1657                                          self._salt)
1658            else:
1659                checkstring = struct.pack(MDMFCHECKSTRING,
1660                                          self._version_number,
1661                                          self._sequence_number,
1662                                          self._root_hash)
1663
1664            return checkstring
1665        d.addCallback(_build_checkstring)
1666        return d
1667
1668
1669    def get_prefix(self, force_remote):
1670        d = self._maybe_fetch_offsets_and_header(force_remote)
1671        d.addCallback(lambda ignored:
1672            self._build_prefix())
1673        return d
1674
1675
1676    def _build_prefix(self):
1677        # The prefix is another name for the part of the remote share
1678        # that gets signed. It consists of everything up to and
1679        # including the datalength, packed by struct.
1680        if self._version_number == SDMF_VERSION:
1681            return struct.pack(SIGNED_PREFIX,
1682                           self._version_number,
1683                           self._sequence_number,
1684                           self._root_hash,
1685                           self._salt,
1686                           self._required_shares,
1687                           self._total_shares,
1688                           self._segment_size,
1689                           self._data_length)
1690
1691        else:
1692            return struct.pack(MDMFSIGNABLEHEADER,
1693                           self._version_number,
1694                           self._sequence_number,
1695                           self._root_hash,
1696                           self._required_shares,
1697                           self._total_shares,
1698                           self._segment_size,
1699                           self._data_length)
1700
1701
1702    def _get_offsets_tuple(self):
1703        # The offsets tuple is another component of the version
1704        # information tuple. It is basically our offsets dictionary,
1705        # itemized and in a tuple.
1706        return self._offsets.copy()
1707
1708
1709    def get_verinfo(self):
1710        """
1711        I return my verinfo tuple. This is used by the ServermapUpdater
1712        to keep track of versions of mutable files.
1713
1714        The verinfo tuple for MDMF files contains:
1715            - seqnum
1716            - root hash
1717            - a blank (nothing)
1718            - segsize
1719            - datalen
1720            - k
1721            - n
1722            - prefix (the thing that you sign)
1723            - a tuple of offsets
1724
1725        We include the nonce in MDMF to simplify processing of version
1726        information tuples.
1727
1728        The verinfo tuple for SDMF files is the same, but contains a
1729        16-byte IV instead of a hash of salts.
1730        """
1731        d = self._maybe_fetch_offsets_and_header()
1732        def _build_verinfo(ignored):
1733            if self._version_number == SDMF_VERSION:
1734                salt_to_use = self._salt
1735            else:
1736                salt_to_use = None
1737            return (self._sequence_number,
1738                    self._root_hash,
1739                    salt_to_use,
1740                    self._segment_size,
1741                    self._data_length,
1742                    self._required_shares,
1743                    self._total_shares,
1744                    self._build_prefix(),
1745                    self._get_offsets_tuple())
1746        d.addCallback(_build_verinfo)
1747        return d
1748
1749
1750    def _read(self, readvs, force_remote=False):
1751        unsatisfiable = [x for x in readvs if x[0] + x[1] > len(self._data)]
1752        # TODO: It's entirely possible to tweak this so that it just
1753        # fulfills the requests that it can, and not demand that all
1754        # requests are satisfiable before running it.
1755
1756        if not unsatisfiable or self._data_is_everything:
1757            results = [self._data[offset:offset+length]
1758                       for (offset, length) in readvs]
1759            results = {self.shnum: results}
1760            return defer.succeed(results)
1761        else:
1762            return self._storage_server.slot_readv(
1763                self._storage_index,
1764                [self.shnum],
1765                readvs,
1766            )
1767
1768
1769    def is_sdmf(self):
1770        """I tell my caller whether or not my remote file is SDMF or MDMF
1771        """
1772        d = self._maybe_fetch_offsets_and_header()
1773        d.addCallback(lambda ignored:
1774            self._version_number == 0)
1775        return d
1776
1777
1778class LayoutInvalid(BadShareError):
1779    """
1780    This isn't a valid MDMF mutable file
1781    """
Note: See TracBrowser for help on using the repository browser.