""" Base62 encoding. Ported to Python 3. """ maketrans = bytes.maketrans translate = bytes.translate from past.builtins import chr as byteschr from allmydata.util.mathutil import log_ceil, log_floor chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" BASE62CHAR = b'[' + chars + b']' vals = b''.join([byteschr(i) for i in range(62)]) c2vtranstable = maketrans(chars, vals) v2ctranstable = maketrans(vals, chars) identitytranstable = maketrans(chars, chars) def b2a(os): """ @param os the data to be encoded (as bytes) @return the contents of os in base-62 encoded form, as bytes """ cs = b2a_l(os, len(os)*8) assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs)) return cs def b2a_l(os, lengthinbits): """ @param os the data to be encoded (as bytes) @param lengthinbits the number of bits of data in os to be encoded b2a_l() will generate a base-62 encoded string big enough to encode lengthinbits bits. So for example if os is 3 bytes long and lengthinbits is 17, then b2a_l() will generate a 3-character- long base-62 encoded string (since 3 chars is sufficient to encode more than 2^17 values). If os is 3 bytes long and lengthinbits is 18 (or None), then b2a_l() will generate a 4-character string (since 4 chars are required to hold 2^18 values). Note that if os is 3 bytes long and lengthinbits is 17, the least significant 7 bits of os are ignored. Warning: if you generate a base-62 encoded string with b2a_l(), and then someone else tries to decode it by calling a2b() instead of a2b_l(), then they will (potentially) get a different string than the one you encoded! So use b2a_l() only when you are sure that the encoding and decoding sides know exactly which lengthinbits to use. If you do not have a way for the encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-62 encoded string that is one or two characters longer than necessary. @return the contents of os in base-62 encoded form, as bytes """ # We call bytes() again for Python 2, to ensure literals are using future's # Python 3-compatible variant. os = [o for o in reversed(bytes(os))] # treat os as big-endian -- and we want to process the least-significant o first value = 0 numvalues = 1 # the number of possible values that value could be for o in os: o *= numvalues value += o numvalues *= 256 chars = [] while numvalues > 0: chars.append(value % 62) value //= 62 numvalues //= 62 return translate(bytes([c for c in reversed(chars)]), v2ctranstable) # make it big-endian def num_octets_that_encode_to_this_many_chars(numcs): return log_floor(62**numcs, 256) def num_chars_that_this_many_octets_encode_to(numos): return log_ceil(256**numos, 62) def a2b(cs): """ @param cs the base-62 encoded data (a string) """ return a2b_l(cs, num_octets_that_encode_to_this_many_chars(len(cs))*8) def a2b_l(cs, lengthinbits): """ @param lengthinbits the number of bits of data in encoded into cs a2b_l() will return a result just big enough to hold lengthinbits bits. So for example if cs is 2 characters long (encoding between 5 and 12 bits worth of data) and lengthinbits is 8, then a2b_l() will return a string of length 1 (since 1 byte is sufficient to store 8 bits), but if lengthinbits is 9, then a2b_l() will return a string of length 2. Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l(). @return the data encoded in cs, as bytes """ # We call bytes() again for Python 2, to ensure literals are using future's # Python 3-compatible variant. cs = [c for c in reversed(bytes(translate(cs, c2vtranstable)))] # treat cs as big-endian -- and we want to process the least-significant c first value = 0 numvalues = 1 # the number of possible values that value could be for c in cs: c *= numvalues value += c numvalues *= 62 numvalues = 2**lengthinbits result_bytes = [] while numvalues > 1: result_bytes.append(value % 256) value //= 256 numvalues //= 256 return bytes([b for b in reversed(result_bytes)]) # make it big-endian