1 | """ |
---|
2 | Tests for allmydata.hashtree. |
---|
3 | """ |
---|
4 | |
---|
5 | from __future__ import annotations |
---|
6 | |
---|
7 | from .common import SyncTestCase |
---|
8 | |
---|
9 | from base64 import b32encode |
---|
10 | from allmydata.util.hashutil import tagged_hash |
---|
11 | from allmydata import hashtree |
---|
12 | |
---|
13 | def make_tree(numleaves): |
---|
14 | leaves = [b"%d" % i for i in range(numleaves)] |
---|
15 | leaf_hashes = [tagged_hash(b"tag", leaf) for leaf in leaves] |
---|
16 | ht = hashtree.HashTree(leaf_hashes) |
---|
17 | return ht |
---|
18 | |
---|
19 | class Complete(SyncTestCase): |
---|
20 | def test_create(self): |
---|
21 | # try out various sizes, since we pad to a power of two |
---|
22 | ht = make_tree(6) |
---|
23 | ht = make_tree(9) |
---|
24 | ht = make_tree(8) |
---|
25 | root = ht[0] |
---|
26 | self.failUnlessEqual(len(root), 32) |
---|
27 | self.failUnlessEqual(ht.get_leaf(0), tagged_hash(b"tag", b"0")) |
---|
28 | self.failUnlessRaises(IndexError, ht.get_leaf, 8) |
---|
29 | self.failUnlessEqual(ht.get_leaf_index(0), 7) |
---|
30 | self.failUnlessRaises(IndexError, ht.parent, 0) |
---|
31 | self.failUnlessRaises(IndexError, ht.needed_for, -1) |
---|
32 | |
---|
33 | def test_well_known_tree(self): |
---|
34 | self.assertEqual( |
---|
35 | [b32encode(s).strip(b"=").lower() for s in make_tree(3)], |
---|
36 | [b'vxuqudnucceja4pqkdqy5txapagxubm5moupzqywkbg2jrjkaola', |
---|
37 | b'weycjri4jlcaunca2jyx2kr7sbtb7qdriog3f26g5jpc5awfeazq', |
---|
38 | b'5ovy3g2wwjnxoqtja4licckxkbqjef4xsjtclk6gxnsl66kvow6a', |
---|
39 | b'esd34nbzri75l3j2vwetpk3dvlvsxstkbaktomonrulpks3df3sq', |
---|
40 | b'jkxbwa2tppyfax35o72tbjecxvaa4xphma6zbyfbkkku3ed2657a', |
---|
41 | b'wfisavaqgab2raihe7dld2qjps4rtxyiubgfs5enziokey2msjwa', |
---|
42 | b't3kza5vwx3tlowdemmgdyigp62ju57qduyfh7uulnfkc7mj2ncrq'], |
---|
43 | ) |
---|
44 | |
---|
45 | def test_needed_hashes(self): |
---|
46 | ht = make_tree(8) |
---|
47 | self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) |
---|
48 | self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) |
---|
49 | self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) |
---|
50 | self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) |
---|
51 | self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) |
---|
52 | self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) |
---|
53 | |
---|
54 | def test_dump(self): |
---|
55 | ht = make_tree(6) |
---|
56 | expected = [(0,0), |
---|
57 | (1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3), |
---|
58 | (2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3), |
---|
59 | ] |
---|
60 | self.failUnlessEqual(list(ht.depth_first()), expected) |
---|
61 | d = "\n" + ht.dump() |
---|
62 | #print(d) |
---|
63 | self.failUnless("\n 0:" in d) |
---|
64 | self.failUnless("\n 1:" in d) |
---|
65 | self.failUnless("\n 3:" in d) |
---|
66 | self.failUnless("\n 7:" in d) |
---|
67 | self.failUnless("\n 8:" in d) |
---|
68 | self.failUnless("\n 4:" in d) |
---|
69 | |
---|
70 | class Incomplete(SyncTestCase): |
---|
71 | |
---|
72 | def test_create(self): |
---|
73 | ht = hashtree.IncompleteHashTree(6) |
---|
74 | ht = hashtree.IncompleteHashTree(9) |
---|
75 | ht = hashtree.IncompleteHashTree(8) |
---|
76 | self.failUnlessEqual(ht[0], None) |
---|
77 | self.failUnlessEqual(ht.get_leaf(0), None) |
---|
78 | self.failUnlessRaises(IndexError, ht.get_leaf, 8) |
---|
79 | self.failUnlessEqual(ht.get_leaf_index(0), 7) |
---|
80 | |
---|
81 | def test_needed_hashes(self): |
---|
82 | ht = hashtree.IncompleteHashTree(8) |
---|
83 | self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) |
---|
84 | self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) |
---|
85 | self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) |
---|
86 | self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) |
---|
87 | self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) |
---|
88 | self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) |
---|
89 | ht = hashtree.IncompleteHashTree(1) |
---|
90 | self.failUnlessEqual(ht.needed_hashes(0), set([])) |
---|
91 | ht = hashtree.IncompleteHashTree(6) |
---|
92 | self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) |
---|
93 | self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) |
---|
94 | self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) |
---|
95 | self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1])) |
---|
96 | self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1])) |
---|
97 | self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1])) |
---|
98 | |
---|
99 | def test_depth_of(self): |
---|
100 | hashtree.IncompleteHashTree(8) |
---|
101 | self.failUnlessEqual(hashtree.depth_of(0), 0) |
---|
102 | for i in [1,2]: |
---|
103 | self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i) |
---|
104 | for i in [3,4,5,6]: |
---|
105 | self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i) |
---|
106 | for i in [7,8,9,10,11,12,13,14]: |
---|
107 | self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i) |
---|
108 | |
---|
109 | def test_large(self): |
---|
110 | # IncompleteHashTree.set_hashes() used to take O(N**2). This test is |
---|
111 | # meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish |
---|
112 | # there were a good way to assert this (like counting VM operations |
---|
113 | # or something): the problem was inside list.sort(), so there's no |
---|
114 | # good way to instrument set_hashes() to count what we care about. On |
---|
115 | # my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s |
---|
116 | # in the old broken version. An 80k-leaf test (corresponding to a |
---|
117 | # 10GB file with a 128KiB segsize) 10s in the fixed version, and |
---|
118 | # several hours in the broken version, but 10s on my laptop (plus the |
---|
119 | # 20s of setup code) probably means 200s on our dapper buildslave, |
---|
120 | # which is painfully long for a unit test. |
---|
121 | self.do_test_speed(10000) |
---|
122 | |
---|
123 | def do_test_speed(self, SIZE): |
---|
124 | # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB |
---|
125 | # segsize) takes: |
---|
126 | # 7s to build the (complete) HashTree |
---|
127 | # 13s to set up the dictionary |
---|
128 | # 10s to run set_hashes() |
---|
129 | ht = make_tree(SIZE) |
---|
130 | iht = hashtree.IncompleteHashTree(SIZE) |
---|
131 | |
---|
132 | needed = set() |
---|
133 | for i in range(SIZE): |
---|
134 | needed.update(ht.needed_hashes(i, True)) |
---|
135 | all = dict([ (i, ht[i]) for i in needed]) |
---|
136 | iht.set_hashes(hashes=all) |
---|
137 | |
---|
138 | def test_check(self): |
---|
139 | # first create a complete hash tree |
---|
140 | ht = make_tree(6) |
---|
141 | # then create a corresponding incomplete tree |
---|
142 | iht = hashtree.IncompleteHashTree(6) |
---|
143 | |
---|
144 | # suppose we wanted to validate leaf[0] |
---|
145 | # leaf[0] is the same as node[7] |
---|
146 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
---|
147 | self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2])) |
---|
148 | self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) |
---|
149 | iht[0] = ht[0] # set the root |
---|
150 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
---|
151 | self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) |
---|
152 | iht[5] = ht[5] |
---|
153 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
---|
154 | self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) |
---|
155 | |
---|
156 | # reset |
---|
157 | iht = hashtree.IncompleteHashTree(6) |
---|
158 | |
---|
159 | current_hashes = list(iht) |
---|
160 | # this should fail because there aren't enough hashes known |
---|
161 | try: |
---|
162 | iht.set_hashes(leaves={0: tagged_hash(b"tag", b"0")}) |
---|
163 | except hashtree.NotEnoughHashesError: |
---|
164 | pass |
---|
165 | else: |
---|
166 | self.fail("didn't catch not enough hashes") |
---|
167 | |
---|
168 | # and the set of hashes stored in the tree should still be the same |
---|
169 | self.failUnlessEqual(list(iht), current_hashes) |
---|
170 | # and we should still need the same |
---|
171 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
---|
172 | |
---|
173 | chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]} |
---|
174 | # this should fail because the leaf hash is just plain wrong |
---|
175 | try: |
---|
176 | iht.set_hashes(chain, leaves={0: tagged_hash(b"bad tag", b"0")}) |
---|
177 | except hashtree.BadHashError: |
---|
178 | pass |
---|
179 | else: |
---|
180 | self.fail("didn't catch bad hash") |
---|
181 | |
---|
182 | # this should fail because we give it conflicting hashes: one as an |
---|
183 | # internal node, another as a leaf |
---|
184 | try: |
---|
185 | iht.set_hashes(chain, leaves={1: tagged_hash(b"bad tag", b"1")}) |
---|
186 | except hashtree.BadHashError: |
---|
187 | pass |
---|
188 | else: |
---|
189 | self.fail("didn't catch bad hash") |
---|
190 | |
---|
191 | bad_chain = chain.copy() |
---|
192 | bad_chain[2] = ht[2] + b"BOGUS" |
---|
193 | |
---|
194 | # this should fail because the internal hash is wrong |
---|
195 | try: |
---|
196 | iht.set_hashes(bad_chain, leaves={0: tagged_hash(b"tag", b"0")}) |
---|
197 | except hashtree.BadHashError: |
---|
198 | pass |
---|
199 | else: |
---|
200 | self.fail("didn't catch bad hash") |
---|
201 | |
---|
202 | # this should succeed |
---|
203 | try: |
---|
204 | iht.set_hashes(chain, leaves={0: tagged_hash(b"tag", b"0")}) |
---|
205 | except hashtree.BadHashError as e: |
---|
206 | self.fail("bad hash: %s" % e) |
---|
207 | |
---|
208 | self.failUnlessEqual(ht.get_leaf(0), tagged_hash(b"tag", b"0")) |
---|
209 | self.failUnlessRaises(IndexError, ht.get_leaf, 8) |
---|
210 | |
---|
211 | # this should succeed too |
---|
212 | try: |
---|
213 | iht.set_hashes(leaves={1: tagged_hash(b"tag", b"1")}) |
---|
214 | except hashtree.BadHashError: |
---|
215 | self.fail("bad hash") |
---|
216 | |
---|
217 | # this should fail because we give it hashes that conflict with some |
---|
218 | # that we added successfully before |
---|
219 | try: |
---|
220 | iht.set_hashes(leaves={1: tagged_hash(b"bad tag", b"1")}) |
---|
221 | except hashtree.BadHashError: |
---|
222 | pass |
---|
223 | else: |
---|
224 | self.fail("didn't catch bad hash") |
---|
225 | |
---|
226 | # now that leaves 0 and 1 are known, some of the internal nodes are |
---|
227 | # known |
---|
228 | self.failUnlessEqual(iht.needed_hashes(4), set([12, 6])) |
---|
229 | chain = {6: ht[6], 12: ht[12]} |
---|
230 | |
---|
231 | # this should succeed |
---|
232 | try: |
---|
233 | iht.set_hashes(chain, leaves={4: tagged_hash(b"tag", b"4")}) |
---|
234 | except hashtree.BadHashError as e: |
---|
235 | self.fail("bad hash: %s" % e) |
---|