Context Navigation

source: trunk/misc/operations_helpers/provisioning/provisioning.py

Visit:

Last change on this file was 4711c9f, checked in by Itamar Turner-Trauring <itamar@…>, at 2021-05-07T13:44:44Z
More flake fixes.
Property mode set to `100644`
File size: 35.6 KB

Line
1
2	from nevow import inevow, rend, loaders, tags as T
3	import math
4	import util
5
6	# factorial and binomial copied from
7	# http://mail.python.org/pipermail/python-list/2007-April/435718.html
8
9	def div_ceil(n, d):
10	"""
11	The smallest integer k such that k*d >= n.
12	"""
13	return (n/d) + (n%d != 0)
14
15	def factorial(n):
16	"""factorial(n): return the factorial of the integer n.
17	factorial(0) = 1
18	factorial(n) with n<0 is -factorial(abs(n))
19	"""
20	result = 1
21	for i in range(1, abs(n)+1):
22	result *= i
23	assert n >= 0
24	return result
25
26	def binomial(n, k):
27	assert 0 <= k <= n
28	if k == 0 or k == n:
29	return 1
30	# calculate n!/k! as one product, avoiding factors that
31	# just get canceled
32	P = k+1
33	for i in range(k+2, n+1):
34	P *= i
35	# if you are paranoid:
36	# C, rem = divmod(P, factorial(n-k))
37	# assert rem == 0
38	# return C
39	return P//factorial(n-k)
40
41	class ProvisioningTool(rend.Page):
42	addSlash = True
43	docFactory = loaders.xmlfile(util.sibling("provisioning.xhtml"))
44
45	def render_forms(self, ctx, data):
46	req = inevow.IRequest(ctx)
47
48	def getarg(name, astype=int):
49	if req.method != b"POST":
50	return None
51	if name in req.fields:
52	return astype(req.fields[name].value)
53	return None
54	return self.do_forms(getarg)
55
56
57	def do_forms(self, getarg):
58	filled = getarg("filled", bool)
59
60	def get_and_set(name, options, default=None, astype=int):
61	current_value = getarg(name, astype)
62	i_select = T.select(name=name)
63	for (count, description) in options:
64	count = astype(count)
65	if ((current_value is not None and count == current_value) or
66	(current_value is None and count == default)):
67	o = T.option(value=str(count), selected="true")[description]
68	else:
69	o = T.option(value=str(count))[description]
70	i_select = i_select[o]
71	if current_value is None:
72	current_value = default
73	return current_value, i_select
74
75	sections = {}
76	def add_input(section, text, entry):
77	if section not in sections:
78	sections[section] = []
79	sections[section].extend([T.div[text, ": ", entry], "\n"])
80
81	def add_output(section, entry):
82	if section not in sections:
83	sections[section] = []
84	sections[section].extend([entry, "\n"])
85
86	def build_section(section):
87	return T.fieldset[T.legend[section], sections[section]]
88
89	def number(value, suffix=""):
90	scaling = 1
91	if value < 1:
92	fmt = "%1.2g%s"
93	elif value < 100:
94	fmt = "%.1f%s"
95	elif value < 1000:
96	fmt = "%d%s"
97	elif value < 1e6:
98	fmt = "%.2fk%s"; scaling = 1e3
99	elif value < 1e9:
100	fmt = "%.2fM%s"; scaling = 1e6
101	elif value < 1e12:
102	fmt = "%.2fG%s"; scaling = 1e9
103	elif value < 1e15:
104	fmt = "%.2fT%s"; scaling = 1e12
105	elif value < 1e18:
106	fmt = "%.2fP%s"; scaling = 1e15
107	else:
108	fmt = "huge! %g%s"
109	return fmt % (value / scaling, suffix)
110
111	user_counts = [(5, "5 users"),
112	(50, "50 users"),
113	(200, "200 users"),
114	(1000, "1k users"),
115	(10000, "10k users"),
116	(50000, "50k users"),
117	(100000, "100k users"),
118	(500000, "500k users"),
119	(1000000, "1M users"),
120	]
121	num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
122	add_input("Users",
123	"How many users are on this network?", i_num_users)
124
125	files_per_user_counts = [(100, "100 files"),
126	(1000, "1k files"),
127	(10000, "10k files"),
128	(100000, "100k files"),
129	(1e6, "1M files"),
130	]
131	files_per_user, i_files_per_user = get_and_set("files_per_user",
132	files_per_user_counts,
133	1000)
134	add_input("Users",
135	"How many files for each user? (avg)",
136	i_files_per_user)
137
138	space_per_user_sizes = [(1e6, "1MB"),
139	(10e6, "10MB"),
140	(100e6, "100MB"),
141	(200e6, "200MB"),
142	(1e9, "1GB"),
143	(2e9, "2GB"),
144	(5e9, "5GB"),
145	(10e9, "10GB"),
146	(100e9, "100GB"),
147	(1e12, "1TB"),
148	(2e12, "2TB"),
149	(5e12, "5TB"),
150	]
151	# Estimate ~5gb per user as a more realistic case
152	space_per_user, i_space_per_user = get_and_set("space_per_user",
153	space_per_user_sizes,
154	5e9)
155	add_input("Users",
156	"How much data for each user? (avg)",
157	i_space_per_user)
158
159	sharing_ratios = [(1.0, "1.0x"),
160	(1.1, "1.1x"),
161	(2.0, "2.0x"),
162	]
163	sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
164	sharing_ratios, 1.0,
165	float)
166	add_input("Users",
167	"What is the sharing ratio? (1.0x is no-sharing and"
168	" no convergence)", i_sharing_ratio)
169
170	# Encoding parameters
171	encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
172	("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
173	("5-of-10-7", "2x (5-of-10, repair below 7)"),
174	("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
175	("27-of-30-28", "1.1x (27-of-30, repair below 28"),
176	("25-of-100-50", "4x (25-of-100, repair below 50)"),
177	]
178	encoding_parameters, i_encoding_parameters = \
179	get_and_set("encoding_parameters",
180	encoding_choices, "3-of-10-5", str)
181	encoding_pieces = encoding_parameters.split("-")
182	k = int(encoding_pieces[0])
183	assert encoding_pieces[1] == "of"
184	n = int(encoding_pieces[2])
185	# we repair the file when the number of available shares drops below
186	# this value
187	repair_threshold = int(encoding_pieces[3])
188
189	add_input("Servers",
190	"What are the default encoding parameters?",
191	i_encoding_parameters)
192
193	# Server info
194	num_server_choices = [ (5, "5 servers"),
195	(10, "10 servers"),
196	(15, "15 servers"),
197	(30, "30 servers"),
198	(50, "50 servers"),
199	(100, "100 servers"),
200	(200, "200 servers"),
201	(300, "300 servers"),
202	(500, "500 servers"),
203	(1000, "1k servers"),
204	(2000, "2k servers"),
205	(5000, "5k servers"),
206	(10e3, "10k servers"),
207	(100e3, "100k servers"),
208	(1e6, "1M servers"),
209	]
210	num_servers, i_num_servers = \
211	get_and_set("num_servers", num_server_choices, 30, int)
212	add_input("Servers",
213	"How many servers are there?", i_num_servers)
214
215	# availability is measured in dBA = -dBF, where 0dBF is 100% failure,
216	# 10dBF is 10% failure, 20dBF is 1% failure, etc
217	server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
218	(13, "95% [13dBA] (1.2hr/day)"),
219	(20, "99% [20dBA] (14min/day or 3.5days/year)"),
220	(23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
221	(30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
222	(40, "99.99% [40dBA] (60sec/week or 53min/year)"),
223	(50, "99.999% [50dBA] (5min per year)"),
224	]
225	server_dBA, i_server_availability = \
226	get_and_set("server_availability",
227	server_dBA_choices,
228	20, int)
229	add_input("Servers",
230	"What is the server availability?", i_server_availability)
231
232	drive_MTBF_choices = [ (40, "40,000 Hours"),
233	]
234	drive_MTBF, i_drive_MTBF = \
235	get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
236	add_input("Drives",
237	"What is the hard drive MTBF?", i_drive_MTBF)
238	# http://www.tgdaily.com/content/view/30990/113/
239	# http://labs.google.com/papers/disk_failures.pdf
240	# google sees:
241	# 1.7% of the drives they replaced were 0-1 years old
242	# 8% of the drives they repalced were 1-2 years old
243	# 8.6% were 2-3 years old
244	# 6% were 3-4 years old, about 8% were 4-5 years old
245
246	drive_size_choices = [ (100, "100 GB"),
247	(250, "250 GB"),
248	(500, "500 GB"),
249	(750, "750 GB"),
250	(1000, "1000 GB"),
251	(2000, "2000 GB"),
252	(3000, "3000 GB"),
253	]
254	drive_size, i_drive_size = \
255	get_and_set("drive_size", drive_size_choices, 3000, int)
256	drive_size = drive_size * 1e9
257	add_input("Drives",
258	"What is the capacity of each hard drive?", i_drive_size)
259	drive_failure_model_choices = [ ("E", "Exponential"),
260	("U", "Uniform"),
261	]
262	drive_failure_model, i_drive_failure_model = \
263	get_and_set("drive_failure_model",
264	drive_failure_model_choices,
265	"E", str)
266	add_input("Drives",
267	"How should we model drive failures?", i_drive_failure_model)
268
269	# drive_failure_rate is in failures per second
270	if drive_failure_model == "E":
271	drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
272	else:
273	drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
274
275	# deletion/gc/ownership mode
276	ownership_choices = [ ("A", "no deletion, no gc, no owners"),
277	("B", "deletion, no gc, no owners"),
278	("C", "deletion, share timers, no owners"),
279	("D", "deletion, no gc, yes owners"),
280	("E", "deletion, owner timers"),
281	]
282	ownership_mode, i_ownership_mode = \
283	get_and_set("ownership_mode", ownership_choices,
284	"A", str)
285	add_input("Servers",
286	"What is the ownership mode?", i_ownership_mode)
287
288	# client access behavior
289	access_rates = [ (1, "one file per day"),
290	(10, "10 files per day"),
291	(100, "100 files per day"),
292	(1000, "1k files per day"),
293	(10e3, "10k files per day"),
294	(100e3, "100k files per day"),
295	]
296	download_files_per_day, i_download_rate = \
297	get_and_set("download_rate", access_rates,
298	100, int)
299	add_input("Users",
300	"How many files are downloaded per day?", i_download_rate)
301	download_rate = 1.0 * download_files_per_day / (246060)
302
303	upload_files_per_day, i_upload_rate = \
304	get_and_set("upload_rate", access_rates,
305	10, int)
306	add_input("Users",
307	"How many files are uploaded per day?", i_upload_rate)
308	upload_rate = 1.0 * upload_files_per_day / (246060)
309
310	delete_files_per_day, i_delete_rate = \
311	get_and_set("delete_rate", access_rates,
312	10, int)
313	add_input("Users",
314	"How many files are deleted per day?", i_delete_rate)
315	delete_rate = 1.0 * delete_files_per_day / (246060)
316
317
318	# the value is in days
319	lease_timers = [ (1, "one refresh per day"),
320	(7, "one refresh per week"),
321	]
322	lease_timer, i_lease = \
323	get_and_set("lease_timer", lease_timers,
324	7, int)
325	add_input("Users",
326	"How frequently do clients refresh files or accounts? "
327	"(if necessary)",
328	i_lease)
329	seconds_per_lease = 246060*lease_timer
330
331	check_timer_choices = [ (1, "every week"),
332	(4, "every month"),
333	(8, "every two months"),
334	(16, "every four months"),
335	]
336	check_timer, i_check_timer = \
337	get_and_set("check_timer", check_timer_choices, 4, int)
338	add_input("Users",
339	"How frequently should we check on each file?",
340	i_check_timer)
341	file_check_interval = check_timer * 7 * 24 * 3600
342
343
344	if filled:
345	add_output("Users", T.div["Total users: %s" % number(num_users)])
346	add_output("Users",
347	T.div["Files per user: %s" % number(files_per_user)])
348	file_size = 1.0 * space_per_user / files_per_user
349	add_output("Users",
350	T.div["Average file size: ", number(file_size)])
351	total_files = num_users * files_per_user / sharing_ratio
352
353	add_output("Grid",
354	T.div["Total number of files in grid: ",
355	number(total_files)])
356	total_space = num_users * space_per_user / sharing_ratio
357	add_output("Grid",
358	T.div["Total volume of plaintext in grid: ",
359	number(total_space, "B")])
360
361	total_shares = n * total_files
362	add_output("Grid",
363	T.div["Total shares in grid: ", number(total_shares)])
364	expansion = float(n) / float(k)
365
366	total_usage = expansion * total_space
367	add_output("Grid",
368	T.div["Share data in grid: ", number(total_usage, "B")])
369
370	if n > num_servers:
371	# silly configuration, causes Tahoe2 to wrap and put multiple
372	# shares on some servers.
373	add_output("Servers",
374	T.div["non-ideal: more shares than servers"
375	" (n=%d, servers=%d)" % (n, num_servers)])
376	# every file has at least one share on every server
377	buckets_per_server = total_files
378	shares_per_server = total_files * ((1.0 * n) / num_servers)
379	else:
380	# if nobody is full, then no lease requests will be turned
381	# down for lack of space, and no two shares for the same file
382	# will share a server. Therefore the chance that any given
383	# file has a share on any given server is n/num_servers.
384	buckets_per_server = total_files * ((1.0 * n) / num_servers)
385	# since each such represented file only puts one share on a
386	# server, the total number of shares per server is the same.
387	shares_per_server = buckets_per_server
388	add_output("Servers",
389	T.div["Buckets per server: ",
390	number(buckets_per_server)])
391	add_output("Servers",
392	T.div["Shares per server: ",
393	number(shares_per_server)])
394
395	# how much space is used on the storage servers for the shares?
396	# the share data itself
397	share_data_per_server = total_usage / num_servers
398	add_output("Servers",
399	T.div["Share data per server: ",
400	number(share_data_per_server, "B")])
401	# this is determined empirically. H=hashsize=32, for a one-segment
402	# file and 3-of-10 encoding
403	share_validation_per_server = 266 * shares_per_server
404	# this could be 423*buckets_per_server, if we moved the URI
405	# extension into a separate file, but that would actually consume
406	# more space (minimum filesize is 4KiB), unless we moved all
407	# shares for a given bucket into a single file.
408	share_uri_extension_per_server = 423 * shares_per_server
409
410	# ownership mode adds per-bucket data
411	H = 32 # depends upon the desired security of delete/refresh caps
412	# bucket_lease_size is the amount of data needed to keep track of
413	# the delete/refresh caps for each bucket.
414	bucket_lease_size = 0
415	client_bucket_refresh_rate = 0
416	owner_table_size = 0
417	if ownership_mode in ("B", "C", "D", "E"):
418	bucket_lease_size = sharing_ratio * 1.0 * H
419	if ownership_mode in ("B", "C"):
420	# refreshes per second per client
421	client_bucket_refresh_rate = (1.0 * n * files_per_user /
422	seconds_per_lease)
423	add_output("Users",
424	T.div["Client share refresh rate (outbound): ",
425	number(client_bucket_refresh_rate, "Hz")])
426	server_bucket_refresh_rate = (client_bucket_refresh_rate *
427	num_users / num_servers)
428	add_output("Servers",
429	T.div["Server share refresh rate (inbound): ",
430	number(server_bucket_refresh_rate, "Hz")])
431	if ownership_mode in ("D", "E"):
432	# each server must maintain a bidirectional mapping from
433	# buckets to owners. One way to implement this would be to
434	# put a list of four-byte owner numbers into each bucket, and
435	# a list of four-byte share numbers into each owner (although
436	# of course we'd really just throw it into a database and let
437	# the experts take care of the details).
438	owner_table_size = 2(buckets_per_server sharing_ratio * 4)
439
440	if ownership_mode in ("E",):
441	# in this mode, clients must refresh one timer per server
442	client_account_refresh_rate = (1.0 * num_servers /
443	seconds_per_lease)
444	add_output("Users",
445	T.div["Client account refresh rate (outbound): ",
446	number(client_account_refresh_rate, "Hz")])
447	server_account_refresh_rate = (client_account_refresh_rate *
448	num_users / num_servers)
449	add_output("Servers",
450	T.div["Server account refresh rate (inbound): ",
451	number(server_account_refresh_rate, "Hz")])
452
453	# TODO: buckets vs shares here is a bit wonky, but in
454	# non-wrapping grids it shouldn't matter
455	share_lease_per_server = bucket_lease_size * buckets_per_server
456	share_ownertable_per_server = owner_table_size
457
458	share_space_per_server = (share_data_per_server +
459	share_validation_per_server +
460	share_uri_extension_per_server +
461	share_lease_per_server +
462	share_ownertable_per_server)
463	add_output("Servers",
464	T.div["Share space per server: ",
465	number(share_space_per_server, "B"),
466	" (data ",
467	number(share_data_per_server, "B"),
468	", validation ",
469	number(share_validation_per_server, "B"),
470	", UEB ",
471	number(share_uri_extension_per_server, "B"),
472	", lease ",
473	number(share_lease_per_server, "B"),
474	", ownertable ",
475	number(share_ownertable_per_server, "B"),
476	")",
477	])
478
479
480	# rates
481	client_download_share_rate = download_rate * k
482	client_download_byte_rate = download_rate * file_size
483	add_output("Users",
484	T.div["download rate: shares = ",
485	number(client_download_share_rate, "Hz"),
486	" , bytes = ",
487	number(client_download_byte_rate, "Bps"),
488	])
489	total_file_check_rate = 1.0 * total_files / file_check_interval
490	client_check_share_rate = total_file_check_rate / num_users
491	add_output("Users",
492	T.div["file check rate: shares = ",
493	number(client_check_share_rate, "Hz"),
494	" (interval = %s)" %
495	number(1 / client_check_share_rate, "s"),
496	])
497
498	client_upload_share_rate = upload_rate * n
499	# TODO: doesn't include overhead
500	client_upload_byte_rate = upload_rate * file_size * expansion
501	add_output("Users",
502	T.div["upload rate: shares = ",
503	number(client_upload_share_rate, "Hz"),
504	" , bytes = ",
505	number(client_upload_byte_rate, "Bps"),
506	])
507	client_delete_share_rate = delete_rate * n
508
509	server_inbound_share_rate = (client_upload_share_rate *
510	num_users / num_servers)
511	server_inbound_byte_rate = (client_upload_byte_rate *
512	num_users / num_servers)
513	add_output("Servers",
514	T.div["upload rate (inbound): shares = ",
515	number(server_inbound_share_rate, "Hz"),
516	" , bytes = ",
517	number(server_inbound_byte_rate, "Bps"),
518	])
519	add_output("Servers",
520	T.div["share check rate (inbound): ",
521	number(total_file_check_rate * n / num_servers,
522	"Hz"),
523	])
524
525	server_share_modify_rate = ((client_upload_share_rate +
526	client_delete_share_rate) *
527	num_users / num_servers)
528	add_output("Servers",
529	T.div["share modify rate: shares = ",
530	number(server_share_modify_rate, "Hz"),
531	])
532
533	server_outbound_share_rate = (client_download_share_rate *
534	num_users / num_servers)
535	server_outbound_byte_rate = (client_download_byte_rate *
536	num_users / num_servers)
537	add_output("Servers",
538	T.div["download rate (outbound): shares = ",
539	number(server_outbound_share_rate, "Hz"),
540	" , bytes = ",
541	number(server_outbound_byte_rate, "Bps"),
542	])
543
544
545	total_share_space = num_servers * share_space_per_server
546	add_output("Grid",
547	T.div["Share space consumed: ",
548	number(total_share_space, "B")])
549	add_output("Grid",
550	T.div[" %% validation: %.2f%%" %
551	(100.0 * share_validation_per_server /
552	share_space_per_server)])
553	add_output("Grid",
554	T.div[" %% uri-extension: %.2f%%" %
555	(100.0 * share_uri_extension_per_server /
556	share_space_per_server)])
557	add_output("Grid",
558	T.div[" %% lease data: %.2f%%" %
559	(100.0 * share_lease_per_server /
560	share_space_per_server)])
561	add_output("Grid",
562	T.div[" %% owner data: %.2f%%" %
563	(100.0 * share_ownertable_per_server /
564	share_space_per_server)])
565	add_output("Grid",
566	T.div[" %% share data: %.2f%%" %
567	(100.0 * share_data_per_server /
568	share_space_per_server)])
569	add_output("Grid",
570	T.div["file check rate: ",
571	number(total_file_check_rate,
572	"Hz")])
573
574	total_drives = max(div_ceil(int(total_share_space),
575	int(drive_size)),
576	num_servers)
577	add_output("Drives",
578	T.div["Total drives: ", number(total_drives), " drives"])
579	drives_per_server = div_ceil(total_drives, num_servers)
580	add_output("Servers",
581	T.div["Drives per server: ", drives_per_server])
582
583	# costs
584	if drive_size == 3000 * 1e9:
585	add_output("Servers", T.div["3000GB drive: $250 each"])
586	drive_cost = 250
587	else:
588	add_output("Servers",
589	T.div[T.b["unknown cost per drive, assuming $100"]])
590	drive_cost = 100
591
592	if drives_per_server <= 4:
593	add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
594	server_cost = 1500 # typical 1U box
595	elif drives_per_server <= 12:
596	add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
597	server_cost = 2500 # 2U box
598	else:
599	add_output("Servers",
600	T.div[T.b["Note: too many drives per server, "
601	"assuming $3000"]])
602	server_cost = 3000
603
604	server_capital_cost = (server_cost + drives_per_server * drive_cost)
605	total_server_cost = float(num_servers * server_capital_cost)
606	add_output("Servers", T.div["Capital cost per server: $",
607	server_capital_cost])
608	add_output("Grid", T.div["Capital cost for all servers: $",
609	number(total_server_cost)])
610	# $70/Mbps/mo
611	# $44/server/mo power+space
612	server_bandwidth = max(server_inbound_byte_rate,
613	server_outbound_byte_rate)
614	server_bandwidth_mbps = div_ceil(int(server_bandwidth*8), int(1e6))
615	server_monthly_cost = 70*server_bandwidth_mbps + 44
616	add_output("Servers", T.div["Monthly cost per server: $",
617	server_monthly_cost])
618	add_output("Users", T.div["Capital cost per user: $",
619	number(total_server_cost / num_users)])
620
621	# reliability
622	any_drive_failure_rate = total_drives * drive_failure_rate
623	any_drive_MTBF = 1 // any_drive_failure_rate # in seconds
624	any_drive_MTBF_days = any_drive_MTBF / 86400
625	add_output("Drives",
626	T.div["MTBF (any drive): ",
627	number(any_drive_MTBF_days), " days"])
628	drive_replacement_monthly_cost = (float(drive_cost)
629	* any_drive_failure_rate
630	3086400)
631	add_output("Grid",
632	T.div["Monthly cost of replacing drives: $",
633	number(drive_replacement_monthly_cost)])
634
635	total_server_monthly_cost = float(num_servers * server_monthly_cost
636	+ drive_replacement_monthly_cost)
637
638	add_output("Grid", T.div["Monthly cost for all servers: $",
639	number(total_server_monthly_cost)])
640	add_output("Users",
641	T.div["Monthly cost per user: $",
642	number(total_server_monthly_cost / num_users)])
643
644	# availability
645	file_dBA = self.file_availability(k, n, server_dBA)
646	user_files_dBA = self.many_files_availability(file_dBA,
647	files_per_user)
648	all_files_dBA = self.many_files_availability(file_dBA, total_files)
649	add_output("Users",
650	T.div["availability of: ",
651	"arbitrary file = %d dBA, " % file_dBA,
652	"all files of user1 = %d dBA, " % user_files_dBA,
653	"all files in grid = %d dBA" % all_files_dBA,
654	],
655	)
656
657	time_until_files_lost = (n-k+1) / any_drive_failure_rate
658	add_output("Grid",
659	T.div["avg time until files are lost: ",
660	number(time_until_files_lost, "s"), ", ",
661	number(time_until_files_lost/86400, " days"),
662	])
663
664	share_data_loss_rate = any_drive_failure_rate * drive_size
665	add_output("Grid",
666	T.div["share data loss rate: ",
667	number(share_data_loss_rate,"Bps")])
668
669	# the worst-case survival numbers occur when we do a file check
670	# and the file is just above the threshold for repair (so we
671	# decide to not repair it). The question is then: what is the
672	# chance that the file will decay so badly before the next check
673	# that we can't recover it? The resulting probability is per
674	# check interval.
675	# Note that the chances of us getting into this situation are low.
676	P_disk_failure_during_interval = (drive_failure_rate *
677	file_check_interval)
678	disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
679	disk_failure_dBA = -disk_failure_dBF
680	file_survives_dBA = self.file_availability(k, repair_threshold,
681	disk_failure_dBA)
682	user_files_survives_dBA = self.many_files_availability( \
683	file_survives_dBA, files_per_user)
684	all_files_survives_dBA = self.many_files_availability( \
685	file_survives_dBA, total_files)
686	add_output("Users",
687	T.div["survival of: ",
688	"arbitrary file = %d dBA, " % file_survives_dBA,
689	"all files of user1 = %d dBA, " %
690	user_files_survives_dBA,
691	"all files in grid = %d dBA" %
692	all_files_survives_dBA,
693	" (per worst-case check interval)",
694	])
695
696
697
698	all_sections = []
699	all_sections.append(build_section("Users"))
700	all_sections.append(build_section("Servers"))
701	all_sections.append(build_section("Drives"))
702	if "Grid" in sections:
703	all_sections.append(build_section("Grid"))
704
705	f = T.form(action=".", method="post", enctype="multipart/form-data")
706
707	if filled:
708	action = "Recompute"
709	else:
710	action = "Compute"
711
712	f = f[T.input(type="hidden", name="filled", value="true"),
713	T.input(type="submit", value=action),
714	all_sections,
715	]
716
717	try:
718	from allmydata import reliability
719	# we import this just to test to see if the page is available
720	_hush_pyflakes = reliability
721	del _hush_pyflakes
722	f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
723	except ImportError:
724	pass
725
726	return f
727
728	def file_availability(self, k, n, server_dBA):
729	"""
730	The full formula for the availability of a specific file is::
731
732	1 - sum([choose(N,i) * p*i (1-p)**(N-i)] for i in range(k)])
733
734	Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
735	this summation is the probability that there are exactly 'i' servers
736	available, and what we're doing is adding up the cases where i is too
737	low.
738
739	This is a nuisance to calculate at all accurately, especially once N
740	gets large, and when p is close to unity. So we make an engineering
741	approximation: if (1-p) is very small, then each [i] term is much
742	larger than the [i-1] term, and the sum is dominated by the i=k-1
743	term. This only works for (1-p) < 10%, and when the choose() function
744	doesn't rise fast enough to compensate. For high-expansion encodings
745	(3-of-10, 25-of-100), the choose() function is rising at the same
746	time as the (1-p)**(N-i) term, so that's not an issue. For
747	low-expansion encodings (7-of-10, 75-of-100) the two values are
748	moving in opposite directions, so more care must be taken.
749
750	Note that the p*i term has only a minor effect as long as (1-p)N is
751	small, and even then the effect is attenuated by the 1-p term.
752	"""
753
754	assert server_dBA > 9 # >=90% availability to use the approximation
755	factor = binomial(n, k-1)
756	factor_dBA = 10 * math.log10(factor)
757	exponent = n - k + 1
758	file_dBA = server_dBA * exponent - factor_dBA
759	return file_dBA
760
761	def many_files_availability(self, file_dBA, num_files):
762	"""The probability that 'num_files' independent bernoulli trials will
763	succeed (i.e. we can recover all files in the grid at any given
764	moment) is p**num_files . Since p is close to unity, we express in p
765	in dBA instead, so we can get useful precision on q (=1-p), and then
766	the formula becomes::
767
768	P_some_files_unavailable = 1 - (1 - q)**num_files
769
770	That (1-q)**n expands with the usual binomial sequence, 1 - nq +
771	Xq2 ... + Xqn . We use the same approximation as before, since we
772	know q is close to zero, and we get to ignore all the terms past -nq.
773	"""
774
775	many_files_dBA = file_dBA - 10 * math.log10(num_files)
776	return many_files_dBA

Note: See TracBrowser for help on using the repository browser.

Download in other formats: