1 | #! /usr/bin/env python |
---|
2 | |
---|
3 | import random, math, re |
---|
4 | from twisted.python import usage |
---|
5 | |
---|
6 | class Args(usage.Options): |
---|
7 | optParameters = [ |
---|
8 | ["mode", "m", "alpha", "validation scheme"], |
---|
9 | ["arity", "k", 2, "k (airty) for hash tree"], |
---|
10 | ] |
---|
11 | def opt_arity(self, option): |
---|
12 | self['arity'] = int(option) |
---|
13 | def parseArgs(self, *args): |
---|
14 | if len(args) > 0: |
---|
15 | self['mode'] = args[0] |
---|
16 | |
---|
17 | |
---|
18 | def charttest(): |
---|
19 | import gdchart |
---|
20 | sizes = [random.randrange(10, 20) for i in range(10)] |
---|
21 | x = gdchart.Line() |
---|
22 | x.width = 250 |
---|
23 | x.height = 250 |
---|
24 | x.xtitle = "sample" |
---|
25 | x.ytitle = "size" |
---|
26 | x.title = "Example Graph" |
---|
27 | #x.ext_color = [ "white", "yellow", "red", "blue", "green"] |
---|
28 | x.setData(sizes) |
---|
29 | #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"]) |
---|
30 | x.draw("simple.png") |
---|
31 | |
---|
32 | KiB=1024 |
---|
33 | MiB=1024*KiB |
---|
34 | GiB=1024*MiB |
---|
35 | TiB=1024*GiB |
---|
36 | PiB=1024*TiB |
---|
37 | |
---|
38 | class Sizes(object): |
---|
39 | def __init__(self, mode, file_size, arity=2): |
---|
40 | MAX_SEGSIZE = 128*KiB |
---|
41 | self.mode = mode |
---|
42 | self.file_size = file_size |
---|
43 | self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size) |
---|
44 | self.num_segs = num_segs = math.ceil(file_size / seg_size) |
---|
45 | self.num_blocks = num_blocks = num_segs |
---|
46 | |
---|
47 | self.num_shares = num_shares = 10 |
---|
48 | self.shares_needed = shares_needed = 3 |
---|
49 | |
---|
50 | self.block_size = block_size = seg_size / shares_needed |
---|
51 | self.share_size = share_size = block_size * num_blocks |
---|
52 | |
---|
53 | # none of this includes the share-level hash chain yet, since that is |
---|
54 | # only a function of the number of shares. All overhead numbers |
---|
55 | # assume that the share-level hash chain has already been sent, |
---|
56 | # including the root of the block-level hash tree. |
---|
57 | |
---|
58 | if mode == "alpha": |
---|
59 | # no hash tree at all |
---|
60 | self.block_arity = 0 |
---|
61 | self.block_tree_depth = 0 |
---|
62 | self.block_overhead = 0 |
---|
63 | self.bytes_until_some_data = 32 + share_size |
---|
64 | self.share_storage_overhead = 0 |
---|
65 | self.share_transmission_overhead = 0 |
---|
66 | |
---|
67 | elif mode == "beta": |
---|
68 | # k=num_blocks, d=1 |
---|
69 | # each block has a 32-byte hash |
---|
70 | self.block_arity = num_blocks |
---|
71 | self.block_tree_depth = 1 |
---|
72 | self.block_overhead = 32 |
---|
73 | # the share has a list of hashes, one for each block |
---|
74 | self.share_storage_overhead = (self.block_overhead * |
---|
75 | num_blocks) |
---|
76 | # we can get away with not sending the hash of the share that |
---|
77 | # we're sending in full, once |
---|
78 | self.share_transmission_overhead = self.share_storage_overhead - 32 |
---|
79 | # we must get the whole list (so it can be validated) before |
---|
80 | # any data can be validated |
---|
81 | self.bytes_until_some_data = (self.share_transmission_overhead + |
---|
82 | block_size) |
---|
83 | |
---|
84 | elif mode == "gamma": |
---|
85 | self.block_arity = k = arity |
---|
86 | d = math.ceil(math.log(num_blocks, k)) |
---|
87 | self.block_tree_depth = d |
---|
88 | num_leaves = k ** d |
---|
89 | # to make things easier, we make the pessimistic assumption that |
---|
90 | # we have to store hashes for all the empty places in the tree |
---|
91 | # (when the number of shares is not an exact exponent of k) |
---|
92 | self.block_overhead = 32 |
---|
93 | # the block hashes are organized into a k-ary tree, which |
---|
94 | # means storing (and eventually transmitting) more hashes. This |
---|
95 | # count includes all the low-level share hashes and the root. |
---|
96 | hash_nodes = (num_leaves*k - 1) / (k - 1) |
---|
97 | #print("hash_depth", d) |
---|
98 | #print("num_leaves", num_leaves) |
---|
99 | #print("hash_nodes", hash_nodes) |
---|
100 | # the storage overhead is this |
---|
101 | self.share_storage_overhead = 32 * (hash_nodes - 1) |
---|
102 | # the transmission overhead is smaller: if we actually transmit |
---|
103 | # every block, we don't have to transmit 1/k of the |
---|
104 | # lowest-level block hashes, and we don't have to transmit the |
---|
105 | # root because it was already sent with the share-level hash tree |
---|
106 | self.share_transmission_overhead = 32 * (hash_nodes |
---|
107 | - 1 # the root |
---|
108 | - num_leaves / k) |
---|
109 | # we must get a full sibling hash chain before we can validate |
---|
110 | # any data |
---|
111 | sibling_length = d * (k-1) |
---|
112 | self.bytes_until_some_data = 32 * sibling_length + block_size |
---|
113 | |
---|
114 | else: |
---|
115 | raise ValueError("unknown mode '%s" % mode) |
---|
116 | |
---|
117 | self.storage_overhead = self.share_storage_overhead * num_shares |
---|
118 | self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size |
---|
119 | |
---|
120 | def dump(self): |
---|
121 | for k in ("mode", "file_size", "seg_size", |
---|
122 | "num_segs", "num_blocks", "num_shares", "shares_needed", |
---|
123 | "block_size", "share_size", |
---|
124 | "block_arity", "block_tree_depth", |
---|
125 | "block_overhead", |
---|
126 | "share_storage_overhead", "share_transmission_overhead", |
---|
127 | "storage_overhead", "storage_overhead_percentage", |
---|
128 | "bytes_until_some_data"): |
---|
129 | print(k, getattr(self, k)) |
---|
130 | |
---|
131 | def fmt(num, trim=False): |
---|
132 | if num < KiB: |
---|
133 | #s = str(num) + "#" |
---|
134 | s = "%.2f#" % num |
---|
135 | elif num < MiB: |
---|
136 | s = "%.2fk" % (num / KiB) |
---|
137 | elif num < GiB: |
---|
138 | s = "%.2fM" % (num / MiB) |
---|
139 | elif num < TiB: |
---|
140 | s = "%.2fG" % (num / GiB) |
---|
141 | elif num < PiB: |
---|
142 | s = "%.2fT" % (num / TiB) |
---|
143 | else: |
---|
144 | s = "big" |
---|
145 | if trim: |
---|
146 | s = re.sub(r'(\.0+)([kMGT#])', |
---|
147 | lambda m: m.group(2), |
---|
148 | s) |
---|
149 | else: |
---|
150 | s = re.sub(r'(\.0+)([kMGT#])', |
---|
151 | lambda m: (" "*len(m.group(1))+m.group(2)), |
---|
152 | s) |
---|
153 | if s.endswith("#"): |
---|
154 | s = s[:-1] + " " |
---|
155 | return s |
---|
156 | |
---|
157 | def text(): |
---|
158 | opts = Args() |
---|
159 | opts.parseOptions() |
---|
160 | mode = opts["mode"] |
---|
161 | arity = opts["arity"] |
---|
162 | # 0123456789012345678901234567890123456789012345678901234567890123456 |
---|
163 | print("mode=%s" % mode, " arity=%d" % arity) |
---|
164 | print(" storage storage") |
---|
165 | print("Size sharesize overhead overhead k d alacrity") |
---|
166 | print(" (bytes) (%)") |
---|
167 | print("------- ------- -------- -------- ---- -- --------") |
---|
168 | #sizes = [2 ** i for i in range(7, 41)] |
---|
169 | #radix = math.sqrt(10); expstep = 2 |
---|
170 | radix = 2; expstep = 2 |
---|
171 | #radix = 10; expstep = 1 |
---|
172 | maxexp = int(math.ceil(math.log(1e12, radix)))+2 |
---|
173 | sizes = [radix ** i for i in range(2,maxexp,expstep)] |
---|
174 | for file_size in sizes: |
---|
175 | s = Sizes(mode, file_size, arity) |
---|
176 | out = "" |
---|
177 | out += "%7s " % fmt(file_size, trim=True) |
---|
178 | out += "%7s " % fmt(s.share_size) |
---|
179 | out += "%8s" % fmt(s.storage_overhead) |
---|
180 | out += "%10.2f " % s.storage_overhead_percentage |
---|
181 | out += " %4d" % int(s.block_arity) |
---|
182 | out += " %2d" % int(s.block_tree_depth) |
---|
183 | out += " %8s" % fmt(s.bytes_until_some_data) |
---|
184 | print(out) |
---|
185 | |
---|
186 | |
---|
187 | def graph(): |
---|
188 | # doesn't work yet |
---|
189 | import Gnuplot |
---|
190 | opts = Args() |
---|
191 | opts.parseOptions() |
---|
192 | mode = opts["mode"] |
---|
193 | arity = opts["arity"] |
---|
194 | g = Gnuplot.Gnuplot(debug=1) |
---|
195 | g.title("overhead / alacrity tradeoffs") |
---|
196 | g.xlabel("file size") |
---|
197 | g.ylabel("stuff") |
---|
198 | sizes = [2 ** i for i in range(7, 32)] |
---|
199 | series = {"overhead": {}, "alacrity": {}} |
---|
200 | for file_size in sizes: |
---|
201 | s = Sizes(mode, file_size, arity) |
---|
202 | series["overhead"][file_size] = s.storage_overhead_percentage |
---|
203 | series["alacrity"][file_size] = s.bytes_until_some_data |
---|
204 | g.plot([ (fs, series["overhead"][fs]) |
---|
205 | for fs in sizes ]) |
---|
206 | input("press return") |
---|
207 | |
---|
208 | |
---|
209 | if __name__ == '__main__': |
---|
210 | text() |
---|
211 | #graph() |
---|