Context Navigation

source: trunk/misc/simulators/hashbasedsig.py

Visit:

Last change on this file was b856238, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-15T15:53:34Z
remove old Python2 future statements
Property mode set to `100644`
File size: 13.8 KB

Line
1	#!python
2
3
4	# range of hash output lengths
5	range_L_hash = [128]
6
7	lg_M = 53 # lg(required number of signatures before losing security)
8
9	limit_bytes = 480000 # limit on signature length
10	limit_cost = 500 # limit on Mcycles_Sig + weight_ver*Mcycles_ver
11	weight_ver = 1 # how important verification cost is relative to signature cost
12	# (note: setting this too high will just exclude useful candidates)
13
14	L_block = 512 # bitlength of hash input blocks
15	L_pad = 64 # bitlength of hash padding overhead (for M-D hashes)
16	L_label = 80 # bitlength of hash position label
17	L_prf = 256 # bitlength of hash output when used as a PRF
18	cycles_per_byte = 15.8 # cost of hash
19
20	Mcycles_per_block = cycles_per_byte * L_block / (8 * 1000000.0)
21
22
23	from math import floor, ceil, log, log1p, pow, e
24	from sys import stderr
25	from gc import collect
26
27	def lg(x):
28	return log(x, 2)
29	def ln(x):
30	return log(x, e)
31	def ceil_log(x, B):
32	return int(ceil(log(x, B)))
33	def ceil_div(x, y):
34	return int(ceil(float(x) / float(y)))
35	def floor_div(x, y):
36	return int(floor(float(x) / float(y)))
37
38	# number of compression function evaluations to hash k bits
39	# we assume that there is a label in each block
40	def compressions(k):
41	return ceil_div(k + L_pad, L_block - L_label)
42
43	# sum of power series sum([pow(p, i) for i in range(n)])
44	def sum_powers(p, n):
45	if p == 1: return n
46	return (pow(p, n) - 1)/(p - 1)
47
48
49	def make_candidate(B, K, K1, K2, q, T, T_min, L_hash, lg_N, sig_bytes, c_sign, c_ver, c_ver_pm):
50	Mcycles_sign = c_sign * Mcycles_per_block
51	Mcycles_ver = c_ver * Mcycles_per_block
52	Mcycles_ver_pm = c_ver_pm * Mcycles_per_block
53	cost = Mcycles_sign + weight_ver*Mcycles_ver
54
55	if sig_bytes >= limit_bytes or cost > limit_cost:
56	return []
57
58	return [{
59	'B': B, 'K': K, 'K1': K1, 'K2': K2, 'q': q, 'T': T,
60	'T_min': T_min,
61	'L_hash': L_hash,
62	'lg_N': lg_N,
63	'sig_bytes': sig_bytes,
64	'c_sign': c_sign,
65	'Mcycles_sign': Mcycles_sign,
66	'c_ver': c_ver,
67	'c_ver_pm': c_ver_pm,
68	'Mcycles_ver': Mcycles_ver,
69	'Mcycles_ver_pm': Mcycles_ver_pm,
70	'cost': cost,
71	}]
72
73
74	# K1 = size of root Merkle tree
75	# K = size of middle Merkle trees
76	# K2 = size of leaf Merkle trees
77	# q = number of revealed private keys per signed message
78
79	# Winternitz with B < 4 is never optimal. For example, going from B=4 to B=2 halves the
80	# chain depth, but that is cancelled out by doubling (roughly) the number of digits.
81	range_B = range(4, 33)
82
83	M = pow(2, lg_M)
84
85	def calculate(K, K1, K2, q_max, L_hash, trees):
86	candidates = []
87	lg_K = lg(K)
88	lg_K1 = lg(K1)
89	lg_K2 = lg(K2)
90
91	# We want the optimal combination of q and T. That takes too much time and memory
92	# to search for directly, so we start by calculating the lowest possible value of T
93	# for any q. Then for potential values of T, we calculate the smallest q such that we
94	# will have at least L_hash bits of security against forgery using revealed private keys
95	# (i.e. this method of forgery is no easier than finding a hash preimage), provided
96	# that fewer than 2^lg_S_min messages are signed.
97
98	# min height of certification tree (excluding root and bottom layer)
99	T_min = ceil_div(lg_M - lg_K1, lg_K)
100
101	last_q = None
102	for T in range(T_min, T_min+21):
103	# lg(total number of leaf private keys)
104	lg_S = lg_K1 + lg_K*T
105	lg_N = lg_S + lg_K2
106
107	# Suppose that m signatures have been made. The number of times X that a given bucket has
108	# been chosen follows a binomial distribution B(m, p) where p = 1/S and S is the number of
109	# buckets. I.e. Pr(X = x) = C(m, x) * p^x * (1-p)^(m-x).
110	#
111	# If an attacker picks a random seed and message that falls into a bucket that has been
112	# chosen x times, then at most q*x private values in that bucket have been revealed, so
113	# (ignoring the possibility of guessing private keys, which is negligable) the attacker's
114	# success probability for a forgery using the revealed values is at most min(1, q*x / K2)^q.
115	#
116	# Let j = floor(K2/q). Conditioning on x, we have
117	#
118	# Pr(forgery) = sum_{x = 0..j}(Pr(X = x) * (q*x / K2)^q) + Pr(x > j)
119	# = sum_{x = 1..j}(Pr(X = x) * (q*x / K2)^q) + Pr(x > j)
120	#
121	# We lose nothing by approximating (q*x / K2)^q as 1 for x > 4, i.e. ignoring the resistance
122	# of the HORS scheme to forgery when a bucket has been chosen 5 or more times.
123	#
124	# Pr(forgery) < sum_{x = 1..4}(Pr(X = x) * (q*x / K2)^q) + Pr(x > 4)
125	#
126	# where Pr(x > 4) = 1 - sum_{x = 0..4}(Pr(X = x))
127	#
128	# We use log arithmetic here because values very close to 1 cannot be represented accurately
129	# in floating point, but their logarithms can (provided we use appropriate functions such as
130	# log1p).
131
132	lg_p = -lg_S
133	lg_1_p = log1p(-pow(2, lg_p))/ln(2) # lg(1-p), computed accurately
134	j = 5
135	lg_px = [lg_1_p * M]*j
136
137	# We approximate lg(M-x) as lg(M)
138	lg_px_step = lg_M + lg_p - lg_1_p
139	for x in range(1, j):
140	lg_px[x] = lg_px[x-1] - lg(x) + lg_px_step
141
142	q = None
143	# Find the minimum acceptable value of q.
144	for q_cand in range(1, q_max+1):
145	lg_q = lg(q_cand)
146	lg_pforge = [lg_px[x] + (lg_qx - lg_K2)q_cand for x in range(1, j)]
147	if max(lg_pforge) < -L_hash + lg(j) and lg_px[j-1] + 1.0 < -L_hash:
148	#print("K = %d, K1 = %d, K2 = %d, L_hash = %d, lg_K2 = %.3f, q = %d, lg_pforge_1 = %.3f, lg_pforge_2 = %.3f, lg_pforge_3 = %.3f"
149	# % (K, K1, K2, L_hash, lg_K2, q, lg_pforge_1, lg_pforge_2, lg_pforge_3))
150	q = q_cand
151	break
152
153	if q is None or q == last_q:
154	# if q hasn't decreased, this will be strictly worse than the previous candidate
155	continue
156	last_q = q
157
158	# number of compressions to compute the Merkle hashes
159	(h_M, c_M, _) = trees[K]
160	(h_M1, c_M1, _) = trees[K1]
161	(h_M2, c_M2, (dau, tri)) = trees[K2]
162
163	# B = generalized Winternitz base
164	for B in range_B:
165	# n is the number of digits needed to sign the message representative and checksum.
166	# The representation is base-B, except that we allow the most significant digit
167	# to be up to 2B-1.
168	n_L = ceil_div(L_hash-1, lg(B))
169	firstL_max = floor_div(pow(2, L_hash)-1, pow(B, n_L-1))
170	C_max = firstL_max + (n_L-1)*(B-1)
171	n_C = ceil_log(ceil_div(C_max, 2), B)
172	n = n_L + n_C
173	firstC_max = floor_div(C_max, pow(B, n_C-1))
174
175	# Total depth of Winternitz hash chains. The chains for the most significant
176	# digit of the message representative and of the checksum may be a different
177	# length to those for the other digits.
178	c_D = (n-2)*(B-1) + firstL_max + firstC_max
179
180	# number of compressions to hash a Winternitz public key
181	c_W = compressions(n*L_hash)
182
183	# bitlength of a single Winternitz signature and authentication path
184	L_MW = (n + h_M ) * L_hash
185	L_MW1 = (n + h_M1) * L_hash
186
187	# bitlength of the HORS signature and authentication paths
188	# For all but one of the q authentication paths, one of the sibling elements in
189	# another path is made redundant where they intersect. This cancels out the hash
190	# that would otherwise be needed at the bottom of the path, making the total
191	# length of the signature qh_M2 + 1 hashes, rather than q(h_M2 + 1).
192	L_leaf = (qh_M2 + 1) L_hash
193
194	# length of the overall GMSS+HORS signature and seeds
195	sig_bytes = ceil_div(L_MW1 + T*L_MW + L_leaf + L_prf + ceil(lg_N), 8)
196
197	c_MW = K (c_D + c_W) + c_M + ceil_div(K n*L_hash, L_prf)
198	c_MW1 = K1(c_D + c_W) + c_M1 + ceil_div(K1n*L_hash, L_prf)
199
200	# For simplicity, c_sign and c_ver don't take into account compressions saved
201	# as a result of intersecting authentication paths in the HORS signature, so
202	# are slight overestimates.
203
204	c_sign = c_MW1 + Tc_MW + q(c_M2 + 1) + ceil_div(K2*L_hash, L_prf)
205
206	# expected number of compressions to verify a signature
207	c_ver = c_D/2.0 + c_W + c_M1 + T(c_D/2.0 + c_W + c_M) + q(c_M2 + 1)
208	c_ver_pm = (1 + T)*c_D/2.0
209
210	candidates += make_candidate(B, K, K1, K2, q, T, T_min, L_hash, lg_N, sig_bytes, c_sign, c_ver, c_ver_pm)
211
212	return candidates
213
214	def search():
215	for L_hash in range_L_hash:
216	print("collecting... \r", end=' ', file=stderr)
217	collect()
218
219	print("precomputing... \r", end=' ', file=stderr)
220
221	"""
222	# d/dq (lg(q+1) + L_hash/q) = 1/(ln(2)*(q+1)) - L_hash/q^2
223	# Therefore lg(q+1) + L_hash/q is at a minimum when 1/(ln(2)*(q+1)) = L_hash/q^2.
224	# Let alpha = L_hash*ln(2), then from the quadratic formula, the integer q that
225	# minimizes lg(q+1) + L_hash/q is the floor or ceiling of (alpha + sqrt(alpha^2 - 4*alpha))/2.
226	# (We don't want the other solution near 0.)
227
228	alpha = floor(L_hash*ln(2)) # float
229	q = floor((alpha + sqrt(alpha*(alpha-4)))/2)
230	if lg(q+2) + L_hash/(q+1) < lg(q+1) + L_hash/q:
231	q += 1
232	lg_S_margin = lg(q+1) + L_hash/q
233	q_max = int(q)
234
235	q = floor(L_hash*ln(2)) # float
236	if lg(q+1) + L_hash/(q+1) < lg(q) + L_hash/q:
237	q += 1
238	lg_S_margin = lg(q) + L_hash/q
239	q_max = int(q)
240	"""
241	q_max = 4000
242
243	# find optimal Merkle tree shapes for this L_hash and each K
244	trees = {}
245	K_max = 50
246	c2 = compressions(2*L_hash)
247	c3 = compressions(3*L_hash)
248	for dau in range(0, 10):
249	a = pow(2, dau)
250	for tri in range(0, ceil_log(30-dau, 3)):
251	x = int(a*pow(3, tri))
252	h = dau + 2*tri
253	c_x = int(sum_powers(2, dau)c2 + asum_powers(3, tri)*c3)
254	for y in range(1, x+1):
255	if tri > 0:
256	# If the bottom level has arity 3, then for every 2 nodes by which the tree is
257	# imperfect, we can save c3 compressions by pruning 3 leaves back to their parent.
258	# If the tree is imperfect by an odd number of nodes, we can prune one extra leaf,
259	# possibly saving a compression if c2 < c3.
260	c_y = c_x - floor_div(x-y, 2)c3 - ((x-y) % 2)(c3-c2)
261	else:
262	# If the bottom level has arity 2, then for each node by which the tree is
263	# imperfect, we can save c2 compressions by pruning 2 leaves back to their parent.
264	c_y = c_x - (x-y)*c2
265
266	if y not in trees or (h, c_y, (dau, tri)) < trees[y]:
267	trees[y] = (h, c_y, (dau, tri))
268
269	#for x in range(1, K_max+1):
270	# print(x, trees[x])
271
272	candidates = []
273	progress = 0
274	fuzz = 0
275	complete = (K_max-1)*(2200-200)/100
276	for K in range(2, K_max+1):
277	for K2 in range(200, 2200, 100):
278	for K1 in range(max(2, K-fuzz), min(K_max, K+fuzz)+1):
279	candidates += calculate(K, K1, K2, q_max, L_hash, trees)
280	progress += 1
281	print("searching: %3d %% \r" % (100.0 * progress / complete,), end=' ', file=stderr)
282
283	print("filtering... \r", end=' ', file=stderr)
284	step = 2.0
285	bins = {}
286	limit = floor_div(limit_cost, step)
287	for bin in range(0, limit+2):
288	bins[bin] = []
289
290	for c in candidates:
291	bin = floor_div(c['cost'], step)
292	bins[bin] += [c]
293
294	del candidates
295
296	# For each in a range of signing times, find the best candidate.
297	best = []
298	for bin in range(0, limit):
299	candidates = bins[bin] + bins[bin+1] + bins[bin+2]
300	if len(candidates) > 0:
301	best += [min(candidates, key=lambda c: c['sig_bytes'])]
302
303	def format_candidate(candidate):
304	return ("%(B)3d %(K)3d %(K1)3d %(K2)5d %(q)4d %(T)4d "
305	"%(L_hash)4d %(lg_N)5.1f %(sig_bytes)7d "
306	"%(c_sign)7d (%(Mcycles_sign)7.2f) "
307	"%(c_ver)7d +/-%(c_ver_pm)5d (%(Mcycles_ver)5.2f +/-%(Mcycles_ver_pm)5.2f) "
308	) % candidate
309
310	print(" \r", end=' ', file=stderr)
311	if len(best) > 0:
312	print(" B K K1 K2 q T L_hash lg_N sig_bytes c_sign (Mcycles) c_ver ( Mcycles )")
313	print("---- ---- ---- ------ ---- ---- ------ ------ --------- ------------------ --------------------------------")
314
315	best.sort(key=lambda c: (c['sig_bytes'], c['cost']))
316	last_sign = None
317	last_ver = None
318	for c in best:
319	if last_sign is None or c['c_sign'] < last_sign or c['c_ver'] < last_ver:
320	print(format_candidate(c))
321	last_sign = c['c_sign']
322	last_ver = c['c_ver']
323
324	print()
325	else:
326	print("No candidates found for L_hash = %d or higher." % (L_hash))
327	return
328
329	del bins
330	del best
331
332	print("Maximum signature size: %d bytes" % (limit_bytes,))
333	print("Maximum (signing + %d*verification) cost: %.1f Mcycles" % (weight_ver, limit_cost))
334	print("Hash parameters: %d-bit blocks with %d-bit padding and %d-bit labels, %.2f cycles per byte" \
335	% (L_block, L_pad, L_label, cycles_per_byte))
336	print("PRF output size: %d bits" % (L_prf,))
337	print("Security level given by L_hash is maintained for up to 2^%d signatures.\n" % (lg_M,))
338
339	search()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: