Context Navigation

source: trunk/misc/coding_tools/make-canary-files.py

Visit:

Last change on this file was b856238, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-15T15:53:34Z
remove old Python2 future statements
Property mode set to `100644`
File size: 5.3 KB

Line
1	#!/usr/bin/env python
2
3
4	"""
5	Given a list of nodeids and a 'convergence' file, create a bunch of files
6	that will (when encoded at k=1,N=1) be uploaded to specific nodeids.
7
8	Run this as follows:
9
10	make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1
11
12	It will create a directory named 'canaries', with one file per nodeid named
13	'$NODEID-$NICKNAME.txt', that contains some random text.
14
15	The 'nodeids' file should contain one base32 nodeid per line, followed by the
16	optional nickname, like:
17
18	---
19	5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo server12
20	vb7vm2mneyid5jbyvcbk2wb5icdhwtun server13
21	...
22	---
23
24	The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file
25	will, when uploaded with the given (convergence,k,N) pair, have its first
26	share placed on the 5yyq/server12 storage server. If N>1, the other shares
27	will be placed elsewhere, of course.
28
29	This tool can be useful to construct a set of 'canary' files, which can then
30	be uploaded to storage servers, and later downloaded to test a grid's health.
31	If you are able to download the canary for server12 via some tahoe node X,
32	then the following properties are known to be true:
33
34	node X is running, and has established a connection to server12
35	server12 is running, and returning data for at least the given file
36
37	Using k=1/N=1 creates a separate test for each server. The test process is
38	then to download the whole directory of files (perhaps with a t=deep-check
39	operation).
40
41	Alternatively, you could upload with the usual k=3/N=10 and then move/delete
42	shares to put all N shares on a single server.
43
44	Note that any changes to the nodeid list will affect the placement of shares.
45	Shares should be uploaded with the same nodeid list as this tool used when
46	constructing the files.
47
48	Also note that this tool uses the Tahoe codebase, so it should be run on a
49	system where Tahoe is installed, or in a source tree with setup.py like this:
50
51	setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..'
52	"""
53
54	from past.builtins import cmp
55
56	import os, hashlib
57	from twisted.python import usage
58	from allmydata.immutable import upload
59	from allmydata.util import base32
60
61	class Options(usage.Options):
62	optParameters = [
63	("convergence", "c", None, "path to NODEDIR/private/convergence"),
64	("nodeids", "n", None, "path to file with one base32 nodeid per line"),
65	("k", "k", 1, "number of necessary shares, defaults to 1", int),
66	("N", "N", 1, "number of total shares, defaults to 1", int),
67	]
68	optFlags = [
69	("verbose", "v", "Be noisy"),
70	]
71
72	opts = Options()
73	opts.parseOptions()
74
75	verbose = bool(opts["verbose"])
76
77	nodes = {}
78	for line in open(opts["nodeids"], "r").readlines():
79	line = line.strip()
80	if not line or line.startswith("#"):
81	continue
82	pieces = line.split(None, 1)
83	if len(pieces) == 2:
84	nodeid_s, nickname = pieces
85	else:
86	nodeid_s = pieces[0]
87	nickname = None
88	nodeid = base32.a2b(nodeid_s)
89	nodes[nodeid] = nickname
90
91	if opts["k"] != 3 or opts["N"] != 10:
92	print("note: using non-default k/N requires patching the Tahoe code")
93	print("src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS")
94
95	convergence_file = os.path.expanduser(opts["convergence"])
96	convergence_s = open(convergence_file, "rb").read().strip()
97	convergence = base32.a2b(convergence_s)
98
99	def get_permuted_peers(key):
100	results = []
101	for nodeid in nodes:
102	permuted = hashlib.sha1(key + nodeid).digest()
103	results.append((permuted, nodeid))
104	results.sort(lambda a,b: cmp(a[0], b[0]))
105	return [ r[1] for r in results ]
106
107	def find_share_for_target(target):
108	target_s = base32.b2a(target)
109	prefix = "The first share of this file will be placed on " + target_s + "\n"
110	prefix += "This data is random: "
111	attempts = 0
112	while True:
113	attempts += 1
114	suffix = base32.b2a(os.urandom(10))
115	if verbose: print(" trying", suffix, end=' ')
116	data = prefix + suffix + "\n"
117	assert len(data) > 55 # no LIT files
118	# now, what storage index will this get?
119	u = upload.Data(data, convergence)
120	eu = upload.EncryptAnUploadable(u)
121	d = eu.get_storage_index() # this happens to run synchronously
122	def _got_si(si, data=data):
123	if verbose: print("SI", base32.b2a(si), end=' ')
124	peerlist = get_permuted_peers(si)
125	if peerlist[0] == target:
126	# great!
127	if verbose: print(" yay!")
128	fn = base32.b2a(target)
129	if nodes[target]:
130	nickname = nodes[target].replace("/", "_")
131	fn += "-" + nickname
132	fn += ".txt"
133	fn = os.path.join("canaries", fn)
134	open(fn, "w").write(data)
135	return True
136	# nope, must try again
137	if verbose: print(" boo")
138	return False
139	d.addCallback(_got_si)
140	# get sneaky and look inside the Deferred for the synchronous result
141	if d.result:
142	return attempts
143
144	os.mkdir("canaries")
145	attempts = []
146	for target in nodes:
147	target_s = base32.b2a(target)
148	print("working on", target_s)
149	attempts.append(find_share_for_target(target))
150	print("done")
151	print("%d attempts total, avg %d per target, max %d" % \
152	(sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts)))
153
154

Note: See TracBrowser for help on using the repository browser.

Download in other formats: