source: trunk/src/allmydata/util/base62.py

Last change on this file was 53084f7, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-27T23:49:07Z

remove more Python2 compatibility

  • Property mode set to 100644
File size: 4.5 KB
Line 
1"""
2Base62 encoding.
3
4Ported to Python 3.
5"""
6
7maketrans = bytes.maketrans
8translate = bytes.translate
9
10from past.builtins import chr as byteschr
11
12from allmydata.util.mathutil import log_ceil, log_floor
13
14chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
15
16BASE62CHAR = b'[' + chars + b']'
17
18vals = b''.join([byteschr(i) for i in range(62)])
19c2vtranstable = maketrans(chars, vals)
20v2ctranstable = maketrans(vals, chars)
21identitytranstable = maketrans(chars, chars)
22
23def b2a(os):
24    """
25    @param os the data to be encoded (as bytes)
26
27    @return the contents of os in base-62 encoded form, as bytes
28    """
29    cs = b2a_l(os, len(os)*8)
30    assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs))
31    return cs
32
33def b2a_l(os, lengthinbits):
34    """
35    @param os the data to be encoded (as bytes)
36    @param lengthinbits the number of bits of data in os to be encoded
37
38    b2a_l() will generate a base-62 encoded string big enough to encode
39    lengthinbits bits.  So for example if os is 3 bytes long and lengthinbits is
40    17, then b2a_l() will generate a 3-character- long base-62 encoded string
41    (since 3 chars is sufficient to encode more than 2^17 values).  If os is 3
42    bytes long and lengthinbits is 18 (or None), then b2a_l() will generate a
43    4-character string (since 4 chars are required to hold 2^18 values).  Note
44    that if os is 3 bytes long and lengthinbits is 17, the least significant 7
45    bits of os are ignored.
46
47    Warning: if you generate a base-62 encoded string with b2a_l(), and then someone else tries to
48    decode it by calling a2b() instead of  a2b_l(), then they will (potentially) get a different
49    string than the one you encoded!  So use b2a_l() only when you are sure that the encoding and
50    decoding sides know exactly which lengthinbits to use.  If you do not have a way for the
51    encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and
52    a2b().  The only drawback to using b2a() over b2a_l() is that when you have a number of
53    bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-62 encoded
54    string that is one or two characters longer than necessary.
55
56    @return the contents of os in base-62 encoded form, as bytes
57    """
58    # We call bytes() again for Python 2, to ensure literals are using future's
59    # Python 3-compatible variant.
60    os = [o for o in reversed(bytes(os))] # treat os as big-endian -- and we want to process the least-significant o first
61
62    value = 0
63    numvalues = 1 # the number of possible values that value could be
64    for o in os:
65        o *= numvalues
66        value += o
67        numvalues *= 256
68
69    chars = []
70    while numvalues > 0:
71        chars.append(value % 62)
72        value //= 62
73        numvalues //= 62
74
75    return translate(bytes([c for c in reversed(chars)]), v2ctranstable) # make it big-endian
76
77def num_octets_that_encode_to_this_many_chars(numcs):
78    return log_floor(62**numcs, 256)
79
80def num_chars_that_this_many_octets_encode_to(numos):
81    return log_ceil(256**numos, 62)
82
83def a2b(cs):
84    """
85    @param cs the base-62 encoded data (a string)
86    """
87    return a2b_l(cs, num_octets_that_encode_to_this_many_chars(len(cs))*8)
88
89def a2b_l(cs, lengthinbits):
90    """
91    @param lengthinbits the number of bits of data in encoded into cs
92
93    a2b_l() will return a result just big enough to hold lengthinbits bits.  So
94    for example if cs is 2 characters long (encoding between 5 and 12 bits worth
95    of data) and lengthinbits is 8, then a2b_l() will return a string of length
96    1 (since 1 byte is sufficient to store 8 bits), but if lengthinbits is 9,
97    then a2b_l() will return a string of length 2.
98
99    Please see the warning in the docstring of b2a_l() regarding the use of
100    b2a() versus b2a_l().
101
102    @return the data encoded in cs, as bytes
103    """
104    # We call bytes() again for Python 2, to ensure literals are using future's
105    # Python 3-compatible variant.
106    cs = [c for c in reversed(bytes(translate(cs, c2vtranstable)))] # treat cs as big-endian -- and we want to process the least-significant c first
107
108    value = 0
109    numvalues = 1 # the number of possible values that value could be
110    for c in cs:
111        c *= numvalues
112        value += c
113        numvalues *= 62
114
115    numvalues = 2**lengthinbits
116    result_bytes = []
117    while numvalues > 1:
118        result_bytes.append(value % 256)
119        value //= 256
120        numvalues //= 256
121
122    return bytes([b for b in reversed(result_bytes)]) # make it big-endian
Note: See TracBrowser for help on using the repository browser.