1 | # A wrapper around the Python Standard Library's filename access functions to |
---|
2 | # provide a uniform API for all platforms and to prevent lossy en/de-coding. |
---|
3 | |
---|
4 | class Fname: |
---|
5 | def __init__(self, name, failed_decode=False, alleged_encoding=None): |
---|
6 | self.name = name |
---|
7 | self.failed_decode = failed_decode |
---|
8 | self.alleged_encoding = alleged_encoding |
---|
9 | |
---|
10 | if platform.system() in ('Linux', 'Solaris'): |
---|
11 | # on byte-oriented filesystems, such as Linux and Solaris |
---|
12 | |
---|
13 | def unicode_to_fs(fn): |
---|
14 | """ Encode an unicode object to bytes. """ |
---|
15 | precondition(isinstance(fn, Fname), fn) |
---|
16 | precondition(isinstance(fn.name, unicode), fn.name) |
---|
17 | |
---|
18 | if fn.failed_decode: |
---|
19 | # This means that the unicode string in .name is not |
---|
20 | # actually the result of a successful decoding with a |
---|
21 | # suggested codec, but is instead the result of stuffing the |
---|
22 | # bytes into a unicode by dint of the utf-8b trick. This |
---|
23 | # means that on a byte-oriented system, you shouldn't treat |
---|
24 | # the .name as a unicode string containing chars, but |
---|
25 | # instead you should get the original bytes back out of it. |
---|
26 | return fn.name.encode('utf-8b', 'python-replace') |
---|
27 | else: |
---|
28 | fsencoding = sys.getfilesystemencoding() |
---|
29 | if fsencoding in (None, '', 'ascii', 'utf-8'): |
---|
30 | fsencoding = 'utf-8b' |
---|
31 | try: |
---|
32 | return fn.name.encode(encoding, 'python-escape') |
---|
33 | except UnicodeEncodeError: |
---|
34 | raise usage.UsageError("Filename '%s' cannot be \ |
---|
35 | encoded using the current encoding of your filesystem (%s). Please \ |
---|
36 | configure your locale correctly or rename this file." % |
---|
37 | (s, sys.getfilesystemencoding())) |
---|
38 | |
---|
39 | def fs_to_unicode(bytesfn): |
---|
40 | """ Decode bytes from the filesystem to a unicode object. """ |
---|
41 | precondition(isinstance(bytesfn, str), str) |
---|
42 | |
---|
43 | alleged_encoding = sys.getfilesystemencoding() |
---|
44 | if alleged_encoding in (None, '', 'ascii'): |
---|
45 | alleged_encoding = 'utf-8' |
---|
46 | |
---|
47 | try: |
---|
48 | unicodefn = bytesfn.decode(alleged_encoding, 'strict') |
---|
49 | except UnicodeDecodeError: |
---|
50 | unicodefn = bytesfn.decode('utf-8b', 'python-escape') |
---|
51 | return Fname(unicodefn, failed_decode=True) |
---|
52 | else: |
---|
53 | unicodefn = unicodedata.normalize('NFC', unicodefn) |
---|
54 | if alleged_encoding == 'utf-8': |
---|
55 | return Fname(unicodefn) |
---|
56 | else: |
---|
57 | return Fname(unicodefn, alleged_encoding) |
---|
58 | |
---|
59 | def listdir(fn): |
---|
60 | assert isinstance(fn, Fname), fn |
---|
61 | assert isinstance(fn.name, unicode), fn.name |
---|
62 | bytesfn = unicode_to_fs(fn.name) |
---|
63 | res = os.listdir(bytesfn) |
---|
64 | return([fs_to_unicode(fn) for fn in res]) |
---|
65 | |
---|
66 | else: |
---|
67 | # on unicode-oriented filesystems, such as Mac and Windows |
---|
68 | def listdir(fn): |
---|
69 | assert isinstance(fn, Fname), fn |
---|
70 | assert isinstance(fn.name, unicode), fn.name |
---|
71 | return [Fname(n) for n in os.listdir(fn.name)] |
---|