1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | |
---|
4 | """ |
---|
5 | Given a list of nodeids and a 'convergence' file, create a bunch of files |
---|
6 | that will (when encoded at k=1,N=1) be uploaded to specific nodeids. |
---|
7 | |
---|
8 | Run this as follows: |
---|
9 | |
---|
10 | make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1 |
---|
11 | |
---|
12 | It will create a directory named 'canaries', with one file per nodeid named |
---|
13 | '$NODEID-$NICKNAME.txt', that contains some random text. |
---|
14 | |
---|
15 | The 'nodeids' file should contain one base32 nodeid per line, followed by the |
---|
16 | optional nickname, like: |
---|
17 | |
---|
18 | --- |
---|
19 | 5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo server12 |
---|
20 | vb7vm2mneyid5jbyvcbk2wb5icdhwtun server13 |
---|
21 | ... |
---|
22 | --- |
---|
23 | |
---|
24 | The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file |
---|
25 | will, when uploaded with the given (convergence,k,N) pair, have its first |
---|
26 | share placed on the 5yyq/server12 storage server. If N>1, the other shares |
---|
27 | will be placed elsewhere, of course. |
---|
28 | |
---|
29 | This tool can be useful to construct a set of 'canary' files, which can then |
---|
30 | be uploaded to storage servers, and later downloaded to test a grid's health. |
---|
31 | If you are able to download the canary for server12 via some tahoe node X, |
---|
32 | then the following properties are known to be true: |
---|
33 | |
---|
34 | node X is running, and has established a connection to server12 |
---|
35 | server12 is running, and returning data for at least the given file |
---|
36 | |
---|
37 | Using k=1/N=1 creates a separate test for each server. The test process is |
---|
38 | then to download the whole directory of files (perhaps with a t=deep-check |
---|
39 | operation). |
---|
40 | |
---|
41 | Alternatively, you could upload with the usual k=3/N=10 and then move/delete |
---|
42 | shares to put all N shares on a single server. |
---|
43 | |
---|
44 | Note that any changes to the nodeid list will affect the placement of shares. |
---|
45 | Shares should be uploaded with the same nodeid list as this tool used when |
---|
46 | constructing the files. |
---|
47 | |
---|
48 | Also note that this tool uses the Tahoe codebase, so it should be run on a |
---|
49 | system where Tahoe is installed, or in a source tree with setup.py like this: |
---|
50 | |
---|
51 | setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..' |
---|
52 | """ |
---|
53 | |
---|
54 | from past.builtins import cmp |
---|
55 | |
---|
56 | import os, hashlib |
---|
57 | from twisted.python import usage |
---|
58 | from allmydata.immutable import upload |
---|
59 | from allmydata.util import base32 |
---|
60 | |
---|
61 | class Options(usage.Options): |
---|
62 | optParameters = [ |
---|
63 | ("convergence", "c", None, "path to NODEDIR/private/convergence"), |
---|
64 | ("nodeids", "n", None, "path to file with one base32 nodeid per line"), |
---|
65 | ("k", "k", 1, "number of necessary shares, defaults to 1", int), |
---|
66 | ("N", "N", 1, "number of total shares, defaults to 1", int), |
---|
67 | ] |
---|
68 | optFlags = [ |
---|
69 | ("verbose", "v", "Be noisy"), |
---|
70 | ] |
---|
71 | |
---|
72 | opts = Options() |
---|
73 | opts.parseOptions() |
---|
74 | |
---|
75 | verbose = bool(opts["verbose"]) |
---|
76 | |
---|
77 | nodes = {} |
---|
78 | for line in open(opts["nodeids"], "r").readlines(): |
---|
79 | line = line.strip() |
---|
80 | if not line or line.startswith("#"): |
---|
81 | continue |
---|
82 | pieces = line.split(None, 1) |
---|
83 | if len(pieces) == 2: |
---|
84 | nodeid_s, nickname = pieces |
---|
85 | else: |
---|
86 | nodeid_s = pieces[0] |
---|
87 | nickname = None |
---|
88 | nodeid = base32.a2b(nodeid_s) |
---|
89 | nodes[nodeid] = nickname |
---|
90 | |
---|
91 | if opts["k"] != 3 or opts["N"] != 10: |
---|
92 | print("note: using non-default k/N requires patching the Tahoe code") |
---|
93 | print("src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS") |
---|
94 | |
---|
95 | convergence_file = os.path.expanduser(opts["convergence"]) |
---|
96 | convergence_s = open(convergence_file, "rb").read().strip() |
---|
97 | convergence = base32.a2b(convergence_s) |
---|
98 | |
---|
99 | def get_permuted_peers(key): |
---|
100 | results = [] |
---|
101 | for nodeid in nodes: |
---|
102 | permuted = hashlib.sha1(key + nodeid).digest() |
---|
103 | results.append((permuted, nodeid)) |
---|
104 | results.sort(lambda a,b: cmp(a[0], b[0])) |
---|
105 | return [ r[1] for r in results ] |
---|
106 | |
---|
107 | def find_share_for_target(target): |
---|
108 | target_s = base32.b2a(target) |
---|
109 | prefix = "The first share of this file will be placed on " + target_s + "\n" |
---|
110 | prefix += "This data is random: " |
---|
111 | attempts = 0 |
---|
112 | while True: |
---|
113 | attempts += 1 |
---|
114 | suffix = base32.b2a(os.urandom(10)) |
---|
115 | if verbose: print(" trying", suffix, end=' ') |
---|
116 | data = prefix + suffix + "\n" |
---|
117 | assert len(data) > 55 # no LIT files |
---|
118 | # now, what storage index will this get? |
---|
119 | u = upload.Data(data, convergence) |
---|
120 | eu = upload.EncryptAnUploadable(u) |
---|
121 | d = eu.get_storage_index() # this happens to run synchronously |
---|
122 | def _got_si(si, data=data): |
---|
123 | if verbose: print("SI", base32.b2a(si), end=' ') |
---|
124 | peerlist = get_permuted_peers(si) |
---|
125 | if peerlist[0] == target: |
---|
126 | # great! |
---|
127 | if verbose: print(" yay!") |
---|
128 | fn = base32.b2a(target) |
---|
129 | if nodes[target]: |
---|
130 | nickname = nodes[target].replace("/", "_") |
---|
131 | fn += "-" + nickname |
---|
132 | fn += ".txt" |
---|
133 | fn = os.path.join("canaries", fn) |
---|
134 | open(fn, "w").write(data) |
---|
135 | return True |
---|
136 | # nope, must try again |
---|
137 | if verbose: print(" boo") |
---|
138 | return False |
---|
139 | d.addCallback(_got_si) |
---|
140 | # get sneaky and look inside the Deferred for the synchronous result |
---|
141 | if d.result: |
---|
142 | return attempts |
---|
143 | |
---|
144 | os.mkdir("canaries") |
---|
145 | attempts = [] |
---|
146 | for target in nodes: |
---|
147 | target_s = base32.b2a(target) |
---|
148 | print("working on", target_s) |
---|
149 | attempts.append(find_share_for_target(target)) |
---|
150 | print("done") |
---|
151 | print("%d attempts total, avg %d per target, max %d" % \ |
---|
152 | (sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts))) |
---|
153 | |
---|
154 | |
---|