source: trunk/misc/simulators/ringsim.py

Last change on this file was b856238, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-15T15:53:34Z

remove old Python2 future statements

  • Property mode set to 100644
File size: 8.0 KB
Line 
1#! /usr/bin/python
2
3# used to discuss ticket #302: "stop permuting peerlist?"
4
5# import time
6
7
8import math
9from hashlib import md5  # sha1, sha256
10myhash = md5
11# md5: 1520 "uploads" per second
12# sha1: 1350 ups
13# sha256: 930 ups
14from itertools import count
15from twisted.python import usage
16
17def abbreviate_space(s, SI=True):
18    if s is None:
19        return "unknown"
20    if SI:
21        U = 1000.0
22        isuffix = "B"
23    else:
24        U = 1024.0
25        isuffix = "iB"
26    def r(count, suffix):
27        return "%.2f %s%s" % (count, suffix, isuffix)
28
29    if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
30        return "%d B" % s
31    if s < U*U:
32        return r(s/U, "k")
33    if s < U*U*U:
34        return r(s/(U*U), "M")
35    if s < U*U*U*U:
36        return r(s/(U*U*U), "G")
37    if s < U*U*U*U*U:
38        return r(s/(U*U*U*U), "T")
39    return r(s/(U*U*U*U*U), "P")
40
41def make_up_a_file_size(seed):
42    h = int(myhash(seed).hexdigest(),16)
43    # exponential distribution
44    e = 8 + (h % (31-8))
45    return 2 ** e
46    # uniform distribution
47    #max=2**31
48    #return h % max # avg 1GB
49
50sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
51avg_filesize = sum(sizes)/len(sizes)
52print("average file size:", abbreviate_space(avg_filesize))
53
54SERVER_CAPACITY = 10**12
55
56class Server(object):
57    def __init__(self, nodeid, capacity):
58        self.nodeid = nodeid
59        self.used = 0
60        self.capacity = capacity
61        self.numshares = 0
62        self.full_at_tick = None
63
64    def upload(self, sharesize):
65        if self.used + sharesize < self.capacity:
66            self.used += sharesize
67            self.numshares += 1
68            return True
69        return False
70
71    def __repr__(self):
72        if self.full_at_tick is not None:
73            return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
74        else:
75            return "<%s %s>" % (self.__class__.__name__, self.nodeid)
76
77class Ring(object):
78    SHOW_MINMAX = False
79    def __init__(self, numservers, seed, permute):
80        self.servers = []
81        for i in range(numservers):
82            nodeid = myhash(str(seed)+str(i)).hexdigest()
83            capacity = SERVER_CAPACITY
84            s = Server(nodeid, capacity)
85            self.servers.append(s)
86        self.servers.sort(key=lambda s: s.nodeid)
87        self.permute = permute
88        #self.list_servers()
89
90    def list_servers(self):
91        for i in range(len(self.servers)):
92            s = self.servers[i]
93            next_s = self.servers[(i+1)%len(self.servers)]
94            diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16))
95            s.next_diff = diff
96            prev_s = self.servers[(i-1)%len(self.servers)]
97            diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16))
98            s.prev_diff = diff
99            print(s, s.prev_diff)
100
101        print("sorted by delta")
102        for s in sorted(self.servers, key=lambda s:s.prev_diff):
103            print(s, s.prev_diff)
104
105    def servers_for_si(self, si):
106        if self.permute:
107            def sortkey(s):
108                return myhash(s.nodeid+si).digest()
109            return sorted(self.servers, key=sortkey)
110        for i in range(len(self.servers)):
111            if self.servers[i].nodeid >= si:
112                return self.servers[i:] + self.servers[:i]
113        return list(self.servers)
114
115    def show_servers(self, picked):
116        bits = []
117        for s in self.servers:
118            if s in picked:
119                bits.append("1")
120            else:
121                bits.append("0")
122        #d = [s in picked and "1" or "0" for s in self.servers]
123        return "".join(bits)
124
125    def dump_usage(self, numfiles, avg_space_per_file):
126        print("uploaded", numfiles)
127        # avg_space_per_file measures expected grid-wide ciphertext per file
128        used = list(reversed(sorted([s.used for s in self.servers])))
129        # used is actual per-server ciphertext
130        usedpf = [1.0*u/numfiles for u in used]
131        # usedpf is actual per-server-per-file ciphertext
132        #print("min/max usage: %s/%s" % (abbreviate_space(used[-1]),
133        #                                abbreviate_space(used[0])))
134        avg_usage_per_file = avg_space_per_file/len(self.servers)
135        # avg_usage_per_file is expected per-server-per-file ciphertext
136        spreadpf = usedpf[0] - usedpf[-1]
137        average_usagepf = sum(usedpf) / len(usedpf)
138        variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
139        std_deviation = math.sqrt(variance)
140        sd_of_total = std_deviation / avg_usage_per_file
141
142        print("min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
143            abbreviate_space(usedpf[-1]),
144            abbreviate_space(usedpf[0]),
145            abbreviate_space(avg_usage_per_file) ), end=' ')
146        print("spread-pf: %s (%.2f%%)" % (
147            abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file), end=' ')
148        #print("average_usage:", abbreviate_space(average_usagepf))
149        print("stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
150                                       100.0*sd_of_total))
151        if self.SHOW_MINMAX:
152            s2 = sorted(self.servers, key=lambda s: s.used)
153            print("least:", s2[0].nodeid)
154            print("most:", s2[-1].nodeid)
155
156
157class Options(usage.Options):
158    optParameters = [
159        ("k", "k", 3, "required shares", int),
160        ("N", "N", 10, "total shares", int),
161        ("servers", None, 100, "number of servers", int),
162        ("seed", None, None, "seed to use for creating ring"),
163        ("fileseed", None, "blah", "seed to use for creating files"),
164        ("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
165        ]
166    def postOptions(self):
167        assert self["seed"]
168
169
170def do_run(ring, opts):
171    avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
172    fileseed = opts["fileseed"]
173    all_servers_have_room = True
174    no_files_have_wrapped = True
175    for filenum in count(0):
176        #used = list(reversed(sorted([s.used for s in ring.servers])))
177        #used = [s.used for s in ring.servers]
178        #print(used)
179        si = myhash(fileseed+str(filenum)).hexdigest()
180        filesize = make_up_a_file_size(si)
181        sharesize = filesize / opts["k"]
182        if filenum%4000==0 and filenum > 1:
183            ring.dump_usage(filenum, avg_space_per_file)
184        servers = ring.servers_for_si(si)
185        #print(ring.show_servers(servers[:opts["N"]]))
186        remaining_shares = opts["N"]
187        index = 0
188        server_was_full = False
189        file_was_wrapped = False
190        remaining_servers = set(servers)
191        while remaining_shares:
192            if index >= len(servers):
193                index = 0
194                file_was_wrapped = True
195            s = servers[index]
196            accepted = s.upload(sharesize)
197            if not accepted:
198                server_was_full = True
199                remaining_servers.discard(s)
200                if not remaining_servers:
201                    print("-- GRID IS FULL")
202                    ring.dump_usage(filenum, avg_space_per_file)
203                    return filenum
204                index += 1
205                continue
206            remaining_shares -= 1
207            index += 1
208        # file is done being uploaded
209
210        if server_was_full and all_servers_have_room:
211            all_servers_have_room = False
212            print("-- FIRST SERVER FULL")
213            ring.dump_usage(filenum, avg_space_per_file)
214        if file_was_wrapped and no_files_have_wrapped:
215            no_files_have_wrapped = False
216            print("-- FIRST FILE WRAPPED")
217            ring.dump_usage(filenum, avg_space_per_file)
218
219
220def do_ring(opts):
221    total_capacity = opts["servers"]*SERVER_CAPACITY
222    avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
223    avg_files = total_capacity / avg_space_per_file
224    print("expected number of uploads:", avg_files)
225    if opts["permute"]:
226        print(" PERMUTED")
227    else:
228        print(" LINEAR")
229    seed = opts["seed"]
230
231    ring = Ring(opts["servers"], seed, opts["permute"])
232    do_run(ring, opts)
233
234def run(opts):
235    do_ring(opts)
236
237if __name__ == "__main__":
238    opts = Options()
239    opts.parseOptions()
240    run(opts)
Note: See TracBrowser for help on using the repository browser.