Context Navigation

source: trunk/src/allmydata/util/base32.py

Visit:

Last change on this file was 63549c7, checked in by Itamar Turner-Trauring <itamar@…>, at 2023-03-24T19:18:46Z
Fix lints, remove some Python 2 junk.
Property mode set to `100644`
File size: 4.5 KB

Line
1	"""
2	Base32 encoding.
3
4	Ported to Python 3.
5	"""
6	def backwardscompat_bytes(b):
7	return b
8	maketrans = bytes.maketrans
9
10	from typing import Optional
11	import base64
12
13	from allmydata.util.assertutil import precondition
14
15	rfc3548_alphabet = b"abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
16	chars = rfc3548_alphabet
17
18	vals = bytes(range(32))
19	c2vtranstable = maketrans(chars, vals)
20	v2ctranstable = maketrans(vals, chars)
21	identitytranstable = maketrans(b'', b'')
22
23	def _get_trailing_chars_without_lsbs(N, d):
24	"""
25	@return: a list of chars that can legitimately appear in the last place when the least significant N bits are ignored.
26	"""
27	s = []
28	if N < 4:
29	s.extend(_get_trailing_chars_without_lsbs(N+1, d=d))
30	i = 0
31	while i < len(chars):
32	if i not in d:
33	d[i] = None
34	s.append(chars[i:i+1])
35	i = i + 2**N
36	return s
37
38	def get_trailing_chars_without_lsbs(N):
39	precondition((N >= 0) and (N < 5), "N is required to be > 0 and < len(chars).", N=N)
40	if N == 0:
41	return chars
42	d = {}
43	return b''.join(_get_trailing_chars_without_lsbs(N, d=d))
44
45	BASE32CHAR = b'['+get_trailing_chars_without_lsbs(0)+b']'
46	BASE32CHAR_4bits = b'['+get_trailing_chars_without_lsbs(1)+b']'
47	BASE32CHAR_3bits = b'['+get_trailing_chars_without_lsbs(2)+b']'
48	BASE32CHAR_2bits = b'['+get_trailing_chars_without_lsbs(3)+b']'
49	BASE32CHAR_1bits = b'['+get_trailing_chars_without_lsbs(4)+b']'
50	BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits
51	BASE32STR_2bytes = BASE32CHAR+b'{3}'+BASE32CHAR_1bits
52	BASE32STR_3bytes = BASE32CHAR+b'{4}'+BASE32CHAR_4bits
53	BASE32STR_4bytes = BASE32CHAR+b'{6}'+BASE32CHAR_2bits
54	BASE32STR_anybytes = bytes(b'((?:%s{8})*') % (BASE32CHAR,) + bytes(b"(?:\|%s\|%s\|%s\|%s))") % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes)
55
56	def b2a(os): # type: (bytes) -> bytes
57	"""
58	@param os the data to be encoded (as bytes)
59
60	@return the contents of os in base-32 encoded form, as bytes
61	"""
62	return base64.b32encode(os).rstrip(b"=").lower()
63
64	def b2a_or_none(os: Optional[bytes]) -> Optional[bytes]:
65	if os is not None:
66	return b2a(os)
67	return None
68
69	# b2a() uses the minimal number of quintets sufficient to encode the binary
70	# input. It just so happens that the relation is like this (everything is
71	# modulo 40 bits).
72	# num_qs = NUM_OS_TO_NUM_QS[num_os]
73	NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,)
74
75	# num_os = NUM_QS_TO_NUM_OS[num_qs], but if not NUM_QS_LEGIT[num_qs] then
76	# there is no number of octets which would have resulted in this number of
77	# quintets, so either the encoded string has been mangled (truncated) or else
78	# you were supposed to decode it with a2b_l() (which means you were supposed
79	# to know the actual length of the encoded data).
80
81	NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4)
82	NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,)
83	NUM_QS_TO_NUM_BITS=tuple([_x*8 for _x in NUM_QS_TO_NUM_OS])
84
85	# A fast way to determine whether a given string could be base-32 encoded data, assuming that the
86	# original data had 8K bits for a positive integer K.
87	# The boolean value of s8[len(s)%8][ord(s[-1])], where s is the possibly base-32 encoded string
88	# tells whether the final character is reasonable.
89	def add_check_array(cs, sfmap):
90	checka=[0] * 256
91	for c in bytes(cs):
92	checka[c] = 1
93	sfmap.append(tuple(checka))
94
95	def init_s8():
96	s8 = []
97	add_check_array(chars, s8)
98	for lenmod8 in (1, 2, 3, 4, 5, 6, 7,):
99	if NUM_QS_LEGIT[lenmod8]:
100	add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8)
101	else:
102	add_check_array(b'', s8)
103	return tuple(s8)
104	s8 = init_s8()
105
106	def could_be_base32_encoded(s, s8=s8, tr=bytes.translate, identitytranstable=identitytranstable, chars=chars):
107	precondition(isinstance(s, bytes), s)
108	if s == b'':
109	return True
110	s = bytes(s) # On Python 2, make sure we're using modern bytes
111	return s8[len(s)%8][s[-1]] and not tr(s, identitytranstable, chars)
112
113	def a2b(cs): # type: (bytes) -> bytes
114	"""
115	@param cs the base-32 encoded data (as bytes)
116	"""
117	precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs)
118	precondition(isinstance(cs, bytes), cs)
119
120	cs = cs.upper()
121	# Add padding back, to make Python's base64 module happy:
122	while (len(cs) * 5) % 8 != 0:
123	cs += b"="
124
125	return base64.b32decode(cs)
126
127
128	__all__ = ["b2a", "a2b", "b2a_or_none", "BASE32CHAR_3bits", "BASE32CHAR_1bits", "BASE32CHAR", "BASE32STR_anybytes", "could_be_base32_encoded"]

Note: See TracBrowser for help on using the repository browser.

Download in other formats: