source: trunk/src/allmydata/test/test_hashtree.py

Last change on this file was d6b38bc, checked in by Jean-Paul Calderone <exarkun@…>, at 2023-09-13T13:26:35Z

test vectors

  • Property mode set to 100644
File size: 9.5 KB
Line 
1"""
2Tests for allmydata.hashtree.
3"""
4
5from __future__ import annotations
6
7from .common import SyncTestCase
8
9from base64 import b32encode
10from allmydata.util.hashutil import tagged_hash
11from allmydata import hashtree
12
13def make_tree(numleaves):
14    leaves = [b"%d" % i for i in range(numleaves)]
15    leaf_hashes = [tagged_hash(b"tag", leaf) for leaf in leaves]
16    ht = hashtree.HashTree(leaf_hashes)
17    return ht
18
19class Complete(SyncTestCase):
20    def test_create(self):
21        # try out various sizes, since we pad to a power of two
22        ht = make_tree(6)
23        ht = make_tree(9)
24        ht = make_tree(8)
25        root = ht[0]
26        self.failUnlessEqual(len(root), 32)
27        self.failUnlessEqual(ht.get_leaf(0), tagged_hash(b"tag", b"0"))
28        self.failUnlessRaises(IndexError, ht.get_leaf, 8)
29        self.failUnlessEqual(ht.get_leaf_index(0), 7)
30        self.failUnlessRaises(IndexError, ht.parent, 0)
31        self.failUnlessRaises(IndexError, ht.needed_for, -1)
32
33    def test_well_known_tree(self):
34        self.assertEqual(
35            [b32encode(s).strip(b"=").lower() for s in make_tree(3)],
36            [b'vxuqudnucceja4pqkdqy5txapagxubm5moupzqywkbg2jrjkaola',
37             b'weycjri4jlcaunca2jyx2kr7sbtb7qdriog3f26g5jpc5awfeazq',
38             b'5ovy3g2wwjnxoqtja4licckxkbqjef4xsjtclk6gxnsl66kvow6a',
39             b'esd34nbzri75l3j2vwetpk3dvlvsxstkbaktomonrulpks3df3sq',
40             b'jkxbwa2tppyfax35o72tbjecxvaa4xphma6zbyfbkkku3ed2657a',
41             b'wfisavaqgab2raihe7dld2qjps4rtxyiubgfs5enziokey2msjwa',
42             b't3kza5vwx3tlowdemmgdyigp62ju57qduyfh7uulnfkc7mj2ncrq'],
43        )
44
45    def test_needed_hashes(self):
46        ht = make_tree(8)
47        self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
48        self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
49        self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
50        self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
51        self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
52        self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
53
54    def test_dump(self):
55        ht = make_tree(6)
56        expected = [(0,0),
57                    (1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3),
58                    (2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3),
59                    ]
60        self.failUnlessEqual(list(ht.depth_first()), expected)
61        d = "\n" + ht.dump()
62        #print(d)
63        self.failUnless("\n  0:" in d)
64        self.failUnless("\n    1:" in d)
65        self.failUnless("\n      3:" in d)
66        self.failUnless("\n        7:" in d)
67        self.failUnless("\n        8:" in d)
68        self.failUnless("\n      4:" in d)
69
70class Incomplete(SyncTestCase):
71
72    def test_create(self):
73        ht = hashtree.IncompleteHashTree(6)
74        ht = hashtree.IncompleteHashTree(9)
75        ht = hashtree.IncompleteHashTree(8)
76        self.failUnlessEqual(ht[0], None)
77        self.failUnlessEqual(ht.get_leaf(0), None)
78        self.failUnlessRaises(IndexError, ht.get_leaf, 8)
79        self.failUnlessEqual(ht.get_leaf_index(0), 7)
80
81    def test_needed_hashes(self):
82        ht = hashtree.IncompleteHashTree(8)
83        self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
84        self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
85        self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
86        self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
87        self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
88        self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
89        ht = hashtree.IncompleteHashTree(1)
90        self.failUnlessEqual(ht.needed_hashes(0), set([]))
91        ht = hashtree.IncompleteHashTree(6)
92        self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
93        self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
94        self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
95        self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1]))
96        self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1]))
97        self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1]))
98
99    def test_depth_of(self):
100        hashtree.IncompleteHashTree(8)
101        self.failUnlessEqual(hashtree.depth_of(0), 0)
102        for i in [1,2]:
103            self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i)
104        for i in [3,4,5,6]:
105            self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i)
106        for i in [7,8,9,10,11,12,13,14]:
107            self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i)
108
109    def test_large(self):
110        # IncompleteHashTree.set_hashes() used to take O(N**2). This test is
111        # meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish
112        # there were a good way to assert this (like counting VM operations
113        # or something): the problem was inside list.sort(), so there's no
114        # good way to instrument set_hashes() to count what we care about. On
115        # my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s
116        # in the old broken version. An 80k-leaf test (corresponding to a
117        # 10GB file with a 128KiB segsize) 10s in the fixed version, and
118        # several hours in the broken version, but 10s on my laptop (plus the
119        # 20s of setup code) probably means 200s on our dapper buildslave,
120        # which is painfully long for a unit test.
121        self.do_test_speed(10000)
122
123    def do_test_speed(self, SIZE):
124        # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB
125        # segsize) takes:
126        #  7s to build the (complete) HashTree
127        #  13s to set up the dictionary
128        #  10s to run set_hashes()
129        ht = make_tree(SIZE)
130        iht = hashtree.IncompleteHashTree(SIZE)
131
132        needed = set()
133        for i in range(SIZE):
134            needed.update(ht.needed_hashes(i, True))
135        all = dict([ (i, ht[i]) for i in needed])
136        iht.set_hashes(hashes=all)
137
138    def test_check(self):
139        # first create a complete hash tree
140        ht = make_tree(6)
141        # then create a corresponding incomplete tree
142        iht = hashtree.IncompleteHashTree(6)
143
144        # suppose we wanted to validate leaf[0]
145        #  leaf[0] is the same as node[7]
146        self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
147        self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2]))
148        self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
149        iht[0] = ht[0] # set the root
150        self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
151        self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
152        iht[5] = ht[5]
153        self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
154        self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
155
156        # reset
157        iht = hashtree.IncompleteHashTree(6)
158
159        current_hashes = list(iht)
160        # this should fail because there aren't enough hashes known
161        try:
162            iht.set_hashes(leaves={0: tagged_hash(b"tag", b"0")})
163        except hashtree.NotEnoughHashesError:
164            pass
165        else:
166            self.fail("didn't catch not enough hashes")
167
168        # and the set of hashes stored in the tree should still be the same
169        self.failUnlessEqual(list(iht), current_hashes)
170        # and we should still need the same
171        self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
172
173        chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]}
174        # this should fail because the leaf hash is just plain wrong
175        try:
176            iht.set_hashes(chain, leaves={0: tagged_hash(b"bad tag", b"0")})
177        except hashtree.BadHashError:
178            pass
179        else:
180            self.fail("didn't catch bad hash")
181
182        # this should fail because we give it conflicting hashes: one as an
183        # internal node, another as a leaf
184        try:
185            iht.set_hashes(chain, leaves={1: tagged_hash(b"bad tag", b"1")})
186        except hashtree.BadHashError:
187            pass
188        else:
189            self.fail("didn't catch bad hash")
190
191        bad_chain = chain.copy()
192        bad_chain[2] = ht[2] + b"BOGUS"
193
194        # this should fail because the internal hash is wrong
195        try:
196            iht.set_hashes(bad_chain, leaves={0: tagged_hash(b"tag", b"0")})
197        except hashtree.BadHashError:
198            pass
199        else:
200            self.fail("didn't catch bad hash")
201
202        # this should succeed
203        try:
204            iht.set_hashes(chain, leaves={0: tagged_hash(b"tag", b"0")})
205        except hashtree.BadHashError as e:
206            self.fail("bad hash: %s" % e)
207
208        self.failUnlessEqual(ht.get_leaf(0), tagged_hash(b"tag", b"0"))
209        self.failUnlessRaises(IndexError, ht.get_leaf, 8)
210
211        # this should succeed too
212        try:
213            iht.set_hashes(leaves={1: tagged_hash(b"tag", b"1")})
214        except hashtree.BadHashError:
215            self.fail("bad hash")
216
217        # this should fail because we give it hashes that conflict with some
218        # that we added successfully before
219        try:
220            iht.set_hashes(leaves={1: tagged_hash(b"bad tag", b"1")})
221        except hashtree.BadHashError:
222            pass
223        else:
224            self.fail("didn't catch bad hash")
225
226        # now that leaves 0 and 1 are known, some of the internal nodes are
227        # known
228        self.failUnlessEqual(iht.needed_hashes(4), set([12, 6]))
229        chain = {6: ht[6], 12: ht[12]}
230
231        # this should succeed
232        try:
233            iht.set_hashes(chain, leaves={4: tagged_hash(b"tag", b"4")})
234        except hashtree.BadHashError as e:
235            self.fail("bad hash: %s" % e)
Note: See TracBrowser for help on using the repository browser.