source: trunk/misc/simulators/sizes.py

Last change on this file was 1cfe843d, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-22T23:40:25Z

more python2 removal

  • Property mode set to 100644
File size: 7.7 KB
Line 
1#! /usr/bin/env python
2
3import random, math, re
4from twisted.python import usage
5
6class Args(usage.Options):
7    optParameters = [
8        ["mode", "m", "alpha", "validation scheme"],
9        ["arity", "k", 2, "k (airty) for hash tree"],
10        ]
11    def opt_arity(self, option):
12        self['arity'] = int(option)
13    def parseArgs(self, *args):
14        if len(args) > 0:
15            self['mode'] = args[0]
16
17
18def charttest():
19    import gdchart
20    sizes = [random.randrange(10, 20) for i in range(10)]
21    x = gdchart.Line()
22    x.width = 250
23    x.height = 250
24    x.xtitle = "sample"
25    x.ytitle = "size"
26    x.title = "Example Graph"
27    #x.ext_color = [ "white", "yellow", "red", "blue", "green"]
28    x.setData(sizes)
29    #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"])
30    x.draw("simple.png")
31
32KiB=1024
33MiB=1024*KiB
34GiB=1024*MiB
35TiB=1024*GiB
36PiB=1024*TiB
37
38class Sizes(object):
39    def __init__(self, mode, file_size, arity=2):
40        MAX_SEGSIZE = 128*KiB
41        self.mode = mode
42        self.file_size = file_size
43        self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size)
44        self.num_segs = num_segs = math.ceil(file_size / seg_size)
45        self.num_blocks = num_blocks = num_segs
46
47        self.num_shares = num_shares = 10
48        self.shares_needed = shares_needed = 3
49
50        self.block_size = block_size = seg_size / shares_needed
51        self.share_size = share_size = block_size * num_blocks
52
53        # none of this includes the share-level hash chain yet, since that is
54        # only a function of the number of shares. All overhead numbers
55        # assume that the share-level hash chain has already been sent,
56        # including the root of the block-level hash tree.
57
58        if mode == "alpha":
59            # no hash tree at all
60            self.block_arity = 0
61            self.block_tree_depth = 0
62            self.block_overhead = 0
63            self.bytes_until_some_data = 32 + share_size
64            self.share_storage_overhead = 0
65            self.share_transmission_overhead = 0
66
67        elif mode == "beta":
68            # k=num_blocks, d=1
69            # each block has a 32-byte hash
70            self.block_arity = num_blocks
71            self.block_tree_depth = 1
72            self.block_overhead = 32
73            # the share has a list of hashes, one for each block
74            self.share_storage_overhead = (self.block_overhead *
75                                           num_blocks)
76            # we can get away with not sending the hash of the share that
77            # we're sending in full, once
78            self.share_transmission_overhead = self.share_storage_overhead - 32
79            # we must get the whole list (so it can be validated) before
80            # any data can be validated
81            self.bytes_until_some_data = (self.share_transmission_overhead +
82                                          block_size)
83
84        elif mode == "gamma":
85            self.block_arity = k = arity
86            d = math.ceil(math.log(num_blocks, k))
87            self.block_tree_depth = d
88            num_leaves = k ** d
89            # to make things easier, we make the pessimistic assumption that
90            # we have to store hashes for all the empty places in the tree
91            # (when the number of shares is not an exact exponent of k)
92            self.block_overhead = 32
93            # the block hashes are organized into a k-ary tree, which
94            # means storing (and eventually transmitting) more hashes. This
95            # count includes all the low-level share hashes and the root.
96            hash_nodes = (num_leaves*k - 1) / (k - 1)
97            #print("hash_depth", d)
98            #print("num_leaves", num_leaves)
99            #print("hash_nodes", hash_nodes)
100            # the storage overhead is this
101            self.share_storage_overhead = 32 * (hash_nodes - 1)
102            # the transmission overhead is smaller: if we actually transmit
103            # every block, we don't have to transmit 1/k of the
104            # lowest-level block hashes, and we don't have to transmit the
105            # root because it was already sent with the share-level hash tree
106            self.share_transmission_overhead = 32 * (hash_nodes
107                                                     - 1 # the root
108                                                     - num_leaves / k)
109            # we must get a full sibling hash chain before we can validate
110            # any data
111            sibling_length = d * (k-1)
112            self.bytes_until_some_data = 32 * sibling_length + block_size
113
114        else:
115            raise ValueError("unknown mode '%s" % mode)
116
117        self.storage_overhead = self.share_storage_overhead * num_shares
118        self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size
119
120    def dump(self):
121        for k in ("mode", "file_size", "seg_size",
122                  "num_segs", "num_blocks", "num_shares", "shares_needed",
123                  "block_size", "share_size",
124                  "block_arity", "block_tree_depth",
125                  "block_overhead",
126                  "share_storage_overhead", "share_transmission_overhead",
127                  "storage_overhead", "storage_overhead_percentage",
128                  "bytes_until_some_data"):
129            print(k, getattr(self, k))
130
131def fmt(num, trim=False):
132    if num < KiB:
133        #s = str(num) + "#"
134        s = "%.2f#" % num
135    elif num < MiB:
136        s = "%.2fk" % (num / KiB)
137    elif num < GiB:
138        s = "%.2fM" % (num / MiB)
139    elif num < TiB:
140        s = "%.2fG" % (num / GiB)
141    elif num < PiB:
142        s = "%.2fT" % (num / TiB)
143    else:
144        s = "big"
145    if trim:
146        s = re.sub(r'(\.0+)([kMGT#])',
147                   lambda m: m.group(2),
148                   s)
149    else:
150        s = re.sub(r'(\.0+)([kMGT#])',
151                   lambda m: (" "*len(m.group(1))+m.group(2)),
152                   s)
153    if s.endswith("#"):
154        s = s[:-1] + " "
155    return s
156
157def text():
158    opts = Args()
159    opts.parseOptions()
160    mode = opts["mode"]
161    arity = opts["arity"]
162    #      0123456789012345678901234567890123456789012345678901234567890123456
163    print("mode=%s" % mode, " arity=%d" % arity)
164    print("                    storage    storage")
165    print("Size     sharesize  overhead   overhead     k  d  alacrity")
166    print("                    (bytes)      (%)")
167    print("-------  -------    --------   --------  ---- --  --------")
168    #sizes = [2 ** i for i in range(7, 41)]
169    #radix = math.sqrt(10); expstep = 2
170    radix = 2; expstep = 2
171    #radix = 10; expstep = 1
172    maxexp = int(math.ceil(math.log(1e12, radix)))+2
173    sizes = [radix ** i for i in range(2,maxexp,expstep)]
174    for file_size in sizes:
175        s = Sizes(mode, file_size, arity)
176        out = ""
177        out += "%7s  " % fmt(file_size, trim=True)
178        out += "%7s    " % fmt(s.share_size)
179        out += "%8s" % fmt(s.storage_overhead)
180        out += "%10.2f  " % s.storage_overhead_percentage
181        out += " %4d" % int(s.block_arity)
182        out += " %2d" % int(s.block_tree_depth)
183        out += " %8s" % fmt(s.bytes_until_some_data)
184        print(out)
185
186
187def graph():
188    # doesn't work yet
189    import Gnuplot
190    opts = Args()
191    opts.parseOptions()
192    mode = opts["mode"]
193    arity = opts["arity"]
194    g = Gnuplot.Gnuplot(debug=1)
195    g.title("overhead / alacrity tradeoffs")
196    g.xlabel("file size")
197    g.ylabel("stuff")
198    sizes = [2 ** i for i in range(7, 32)]
199    series = {"overhead": {}, "alacrity": {}}
200    for file_size in sizes:
201        s = Sizes(mode, file_size, arity)
202        series["overhead"][file_size] = s.storage_overhead_percentage
203        series["alacrity"][file_size] = s.bytes_until_some_data
204    g.plot([ (fs, series["overhead"][fs])
205             for fs in sizes ])
206    input("press return")
207
208
209if __name__ == '__main__':
210    text()
211    #graph()
Note: See TracBrowser for help on using the repository browser.