source: trunk/src/allmydata/scripts/tahoe_cp.py

Last change on this file was fec97256, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2025-01-06T21:51:37Z

trim Python2 syntax

  • Property mode set to 100644
File size: 35.7 KB
Line 
1"""
2Ported to Python 3.
3"""
4
5import os.path
6from urllib.parse import quote as url_quote
7from collections import defaultdict
8from io import BytesIO
9
10from twisted.python.failure import Failure
11from allmydata.scripts.common import get_alias, escape_path, \
12                                     DefaultAliasMarker, TahoeError
13from allmydata.scripts.common_http import do_http, HTTPError
14from allmydata import uri
15from allmydata.util import fileutil
16from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath
17from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
18    quote_local_unicode_path, to_bytes
19from allmydata.util.assertutil import precondition, _assert
20from allmydata.util import jsonbytes as json
21
22
23class MissingSourceError(TahoeError):
24    def __init__(self, name, quotefn=quote_output):
25        TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
26
27class FilenameWithTrailingSlashError(TahoeError):
28    def __init__(self, name, quotefn=quote_output):
29        TahoeError.__init__(self, "source '%s' is not a directory, but ends with a slash" % quotefn(name))
30
31class WeirdSourceError(TahoeError):
32    def __init__(self, absname):
33        quoted = quote_local_unicode_path(absname)
34        TahoeError.__init__(self, "source '%s' is neither a file nor a directory, I can't handle it" % quoted)
35
36def GET_to_file(url):
37    resp = do_http("GET", url)
38    if resp.status == 200:
39        return resp
40    raise HTTPError("Error during GET", resp)
41
42def GET_to_string(url):
43    f = GET_to_file(url)
44    return f.read()
45
46def PUT(url, data):
47    resp = do_http("PUT", url, data)
48    if resp.status in (200, 201):
49        return resp.read()
50    raise HTTPError("Error during PUT", resp)
51
52def POST(url, data):
53    resp = do_http("POST", url, data)
54    if resp.status in (200, 201):
55        return resp.read()
56    raise HTTPError("Error during POST", resp)
57
58def mkdir(targeturl):
59    url = targeturl + "?t=mkdir"
60    resp = do_http("POST", url)
61    if resp.status in (200, 201):
62        return resp.read().strip()
63    raise HTTPError("Error during mkdir", resp)
64
65def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
66    url = nodeurl + "/".join(["uri",
67                              url_quote(parent_writecap),
68                              url_quote(unicode_to_url(name)),
69                              ]) + "?t=mkdir"
70    resp = do_http("POST", url)
71    if resp.status in (200, 201):
72        return resp.read().strip()
73    raise HTTPError("Error during mkdir", resp)
74
75
76class LocalFileSource:
77    def __init__(self, pathname, basename):
78        precondition_abspath(pathname)
79        self.pathname = pathname
80        self._basename = basename
81
82    def basename(self):
83        return self._basename
84
85    def need_to_copy_bytes(self):
86        return True
87
88    def open(self, caps_only):
89        return open(self.pathname, "rb")
90
91class LocalFileTarget:
92    def __init__(self, pathname):
93        precondition_abspath(pathname)
94        self.pathname = pathname
95
96    def put_file(self, inf):
97        fileutil.put_file(self.pathname, inf)
98
99class LocalMissingTarget:
100    def __init__(self, pathname):
101        precondition_abspath(pathname)
102        self.pathname = pathname
103
104    def put_file(self, inf):
105        fileutil.put_file(self.pathname, inf)
106
107class LocalDirectorySource:
108    def __init__(self, progressfunc, pathname, basename):
109        precondition_abspath(pathname)
110
111        self.progressfunc = progressfunc
112        self.pathname = pathname
113        self.children = None
114        self._basename = basename
115
116    def basename(self):
117        return self._basename
118
119    def populate(self, recurse):
120        if self.children is not None:
121            return
122        self.children = {}
123        children = listdir_unicode(self.pathname)
124        for i,n in enumerate(children):
125            self.progressfunc("examining %d of %d" % (i+1, len(children)))
126            pn = os.path.join(self.pathname, n)
127            if os.path.isdir(pn):
128                child = LocalDirectorySource(self.progressfunc, pn, n)
129                self.children[n] = child
130                if recurse:
131                    child.populate(recurse=True)
132            elif os.path.isfile(pn):
133                self.children[n] = LocalFileSource(pn, n)
134            else:
135                # Could be dangling symlink; probably not copy-able.
136                # TODO: output a warning
137                pass
138
139class LocalDirectoryTarget:
140    def __init__(self, progressfunc, pathname):
141        precondition_abspath(pathname)
142
143        self.progressfunc = progressfunc
144        self.pathname = pathname
145        self.children = None
146
147    def populate(self, recurse):
148        if self.children is not None:
149            return
150        self.children = {}
151        children = listdir_unicode(self.pathname)
152        for i,n in enumerate(children):
153            self.progressfunc("examining %d of %d" % (i+1, len(children)))
154            pn = os.path.join(self.pathname, n)
155            if os.path.isdir(pn):
156                child = LocalDirectoryTarget(self.progressfunc, pn)
157                self.children[n] = child
158                if recurse:
159                    child.populate(recurse=True)
160            else:
161                assert os.path.isfile(pn)
162                self.children[n] = LocalFileTarget(pn)
163
164    def get_child_target(self, name):
165        precondition(isinstance(name, str), name)
166        precondition(len(name), name) # don't want ""
167        if self.children is None:
168            self.populate(recurse=False)
169        if name in self.children:
170            return self.children[name]
171        pathname = os.path.join(self.pathname, name)
172        os.makedirs(pathname)
173        child = LocalDirectoryTarget(self.progressfunc, pathname)
174        self.children[name] = child
175        return child
176
177    def put_file(self, name, inf):
178        precondition(isinstance(name, str), name)
179        pathname = os.path.join(self.pathname, name)
180        fileutil.put_file(pathname, inf)
181
182    def set_children(self):
183        pass
184
185
186class TahoeFileSource:
187    def __init__(self, nodeurl, mutable, writecap, readcap, basename):
188        self.nodeurl = nodeurl
189        self.mutable = mutable
190        self.writecap = writecap
191        self.readcap = readcap
192        self._basename = basename # unicode, or None for raw filecaps
193
194    def basename(self):
195        return self._basename
196
197    def need_to_copy_bytes(self):
198        if self.mutable:
199            return True
200        return False
201
202    def open(self, caps_only):
203        if caps_only:
204            return BytesIO(self.readcap)
205        url = self.nodeurl + "uri/" + url_quote(self.readcap)
206        return GET_to_file(url)
207
208    def bestcap(self):
209        return self.writecap or self.readcap
210
211
212def seekable(file_like):
213    """Return whether the file-like object is seekable."""
214    return hasattr(file_like, "seek") and (
215        not hasattr(file_like, "seekable") or file_like.seekable()
216    )
217
218
219class TahoeFileTarget:
220    def __init__(self, nodeurl, mutable, writecap, readcap, url):
221        self.nodeurl = nodeurl
222        self.mutable = mutable
223        self.writecap = writecap
224        self.readcap = readcap
225        self.url = url
226
227    def put_file(self, inf):
228        # We want to replace this object in-place.
229        assert self.url
230        # our do_http() call currently requires a string or a filehandle with
231        # a real .seek
232        if not seekable(inf):
233            inf = inf.read()
234        PUT(self.url, inf)
235        # TODO: this always creates immutable files. We might want an option
236        # to always create mutable files, or to copy mutable files into new
237        # mutable files. ticket #835
238
239class TahoeDirectorySource:
240    def __init__(self, nodeurl, cache, progressfunc, basename):
241        self.nodeurl = nodeurl
242        self.cache = cache
243        self.progressfunc = progressfunc
244        self._basename = basename # unicode, or None for raw dircaps
245
246    def basename(self):
247        return self._basename
248
249    def init_from_grid(self, writecap, readcap):
250        self.writecap = writecap
251        self.readcap = readcap
252        bestcap = writecap or readcap
253        url = self.nodeurl + "uri/%s" % url_quote(bestcap)
254        resp = do_http("GET", url + "?t=json")
255        if resp.status != 200:
256            raise HTTPError("Error examining source directory", resp)
257        parsed = json.loads(resp.read())
258        nodetype, d = parsed
259        assert nodetype == "dirnode"
260        self.mutable = d.get("mutable", False) # older nodes don't provide it
261        self.children_d = dict( [(str(name),value)
262                                 for (name,value)
263                                 in d["children"].items()] )
264        self.children = None
265
266    def init_from_parsed(self, parsed):
267        nodetype, d = parsed
268        self.writecap = to_bytes(d.get("rw_uri"))
269        self.readcap = to_bytes(d.get("ro_uri"))
270        self.mutable = d.get("mutable", False) # older nodes don't provide it
271        self.children_d = dict( [(str(name),value)
272                                 for (name,value)
273                                 in d["children"].items()] )
274        self.children = None
275
276    def populate(self, recurse):
277        if self.children is not None:
278            return
279        self.children = {}
280        for i,(name, data) in enumerate(self.children_d.items()):
281            self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
282            if data[0] == "filenode":
283                mutable = data[1].get("mutable", False)
284                writecap = to_bytes(data[1].get("rw_uri"))
285                readcap = to_bytes(data[1].get("ro_uri"))
286                self.children[name] = TahoeFileSource(self.nodeurl, mutable,
287                                                      writecap, readcap, name)
288            elif data[0] == "dirnode":
289                writecap = to_bytes(data[1].get("rw_uri"))
290                readcap = to_bytes(data[1].get("ro_uri"))
291                if writecap and writecap in self.cache:
292                    child = self.cache[writecap]
293                elif readcap and readcap in self.cache:
294                    child = self.cache[readcap]
295                else:
296                    child = TahoeDirectorySource(self.nodeurl, self.cache,
297                                                 self.progressfunc, name)
298                    child.init_from_grid(writecap, readcap)
299                    if writecap:
300                        self.cache[writecap] = child
301                    if readcap:
302                        self.cache[readcap] = child
303                    if recurse:
304                        child.populate(recurse=True)
305                self.children[name] = child
306            else:
307                # TODO: there should be an option to skip unknown nodes.
308                raise TahoeError("Cannot copy unknown nodes (ticket #839). "
309                                 "You probably need to use a later version of "
310                                 "Tahoe-LAFS to copy this directory.")
311
312class TahoeMissingTarget:
313    def __init__(self, url):
314        self.url = url
315
316    def put_file(self, inf):
317        # We want to replace this object in-place.
318        if not seekable(inf):
319            inf = inf.read()
320        PUT(self.url, inf)
321        # TODO: this always creates immutable files. We might want an option
322        # to always create mutable files, or to copy mutable files into new
323        # mutable files.
324
325    def put_uri(self, filecap):
326        # I'm not sure this will always work
327        return PUT(self.url + "?t=uri", filecap)
328
329class TahoeDirectoryTarget:
330    def __init__(self, nodeurl, cache, progressfunc):
331        self.nodeurl = nodeurl
332        self.cache = cache
333        self.progressfunc = progressfunc
334        self.new_children = {}
335
336    def init_from_parsed(self, parsed):
337        nodetype, d = parsed
338        self.writecap = to_bytes(d.get("rw_uri"))
339        self.readcap = to_bytes(d.get("ro_uri"))
340        self.mutable = d.get("mutable", False) # older nodes don't provide it
341        self.children_d = dict( [(str(name),value)
342                                 for (name,value)
343                                 in d["children"].items()] )
344        self.children = None
345
346    def init_from_grid(self, writecap, readcap):
347        self.writecap = writecap
348        self.readcap = readcap
349        bestcap = writecap or readcap
350        url = self.nodeurl + "uri/%s" % url_quote(bestcap)
351        resp = do_http("GET", url + "?t=json")
352        if resp.status != 200:
353            raise HTTPError("Error examining target directory", resp)
354        parsed = json.loads(resp.read())
355        nodetype, d = parsed
356        assert nodetype == "dirnode"
357        self.mutable = d.get("mutable", False) # older nodes don't provide it
358        self.children_d = dict( [(str(name),value)
359                                 for (name,value)
360                                 in d["children"].items()] )
361        self.children = None
362
363    def just_created(self, writecap):
364        # TODO: maybe integrate this with the constructor
365        self.writecap = writecap
366        self.readcap = uri.from_string(writecap).get_readonly().to_string()
367        self.mutable = True
368        self.children_d = {}
369        self.children = {}
370
371    def populate(self, recurse):
372        if self.children is not None:
373            return
374        self.children = {}
375        for i,(name, data) in enumerate(self.children_d.items()):
376            self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
377            if data[0] == "filenode":
378                mutable = data[1].get("mutable", False)
379                writecap = to_bytes(data[1].get("rw_uri"))
380                readcap = to_bytes(data[1].get("ro_uri"))
381                url = None
382                if self.writecap:
383                    url = self.nodeurl + "/".join(["uri",
384                                                   url_quote(self.writecap),
385                                                   url_quote(unicode_to_url(name))])
386                self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
387                                                      writecap, readcap, url)
388            elif data[0] == "dirnode":
389                writecap = to_bytes(data[1].get("rw_uri"))
390                readcap = to_bytes(data[1].get("ro_uri"))
391                if writecap and writecap in self.cache:
392                    child = self.cache[writecap]
393                elif readcap and readcap in self.cache:
394                    child = self.cache[readcap]
395                else:
396                    child = TahoeDirectoryTarget(self.nodeurl, self.cache,
397                                                 self.progressfunc)
398                    child.init_from_grid(writecap, readcap)
399                    if writecap:
400                        self.cache[writecap] = child
401                    if readcap:
402                        self.cache[readcap] = child
403                    if recurse:
404                        child.populate(recurse=True)
405                self.children[name] = child
406            else:
407                # TODO: there should be an option to skip unknown nodes.
408                raise TahoeError("Cannot copy unknown nodes (ticket #839). "
409                                 "You probably need to use a later version of "
410                                 "Tahoe-LAFS to copy this directory.")
411
412    def get_child_target(self, name):
413        # return a new target for a named subdirectory of this dir
414        precondition(isinstance(name, str), name)
415        if self.children is None:
416            self.populate(recurse=False)
417        if name in self.children:
418            return self.children[name]
419        writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
420        child = TahoeDirectoryTarget(self.nodeurl, self.cache,
421                                     self.progressfunc)
422        child.just_created(writecap)
423        self.children[name] = child
424        return child
425
426    def put_file(self, name, inf):
427        precondition(isinstance(name, str), name)
428        url = self.nodeurl + "uri"
429        if not seekable(inf):
430            inf = inf.read()
431
432        if self.children is None:
433            self.populate(recurse=False)
434
435        # Check to see if we already have a mutable file by this name.
436        # If so, overwrite that file in place.
437        if name in self.children and self.children[name].mutable:
438            self.children[name].put_file(inf)
439        else:
440            filecap = PUT(url, inf)
441            # TODO: this always creates immutable files. We might want an option
442            # to always create mutable files, or to copy mutable files into new
443            # mutable files.
444            self.new_children[name] = filecap
445
446    def put_uri(self, name, filecap):
447        precondition(isinstance(name, str), name)
448        self.new_children[name] = filecap
449
450    def set_children(self):
451        if not self.new_children:
452            return
453        url = (self.nodeurl + "uri/" + url_quote(self.writecap)
454               + "?t=set_children")
455        set_data = {}
456        for (name, filecap) in list(self.new_children.items()):
457            # it just so happens that ?t=set_children will accept both file
458            # read-caps and write-caps as ['rw_uri'], and will handle either
459            # correctly. So don't bother trying to figure out whether the one
460            # we have is read-only or read-write.
461            # TODO: think about how this affects forward-compatibility for
462            # unknown caps
463            set_data[name] = ["filenode", {"rw_uri": filecap}]
464        body = json.dumps_bytes(set_data)
465        POST(url, body)
466
467FileSources = (LocalFileSource, TahoeFileSource)
468DirectorySources = (LocalDirectorySource, TahoeDirectorySource)
469FileTargets = (LocalFileTarget, TahoeFileTarget)
470DirectoryTargets = (LocalDirectoryTarget, TahoeDirectoryTarget)
471MissingTargets = (LocalMissingTarget, TahoeMissingTarget)
472
473class Copier:
474
475    def do_copy(self, options, progressfunc=None):
476        if options['quiet']:
477            verbosity = 0
478        elif options['verbose']:
479            verbosity = 2
480        else:
481            verbosity = 1
482
483        nodeurl = options['node-url']
484        if nodeurl[-1] != "/":
485            nodeurl += "/"
486        self.nodeurl = nodeurl
487        self.progressfunc = progressfunc
488        self.options = options
489        self.aliases = options.aliases
490        self.verbosity = verbosity
491        self.stdout = options.stdout
492        self.stderr = options.stderr
493        if verbosity >= 2 and not self.progressfunc:
494            def progress(message):
495                print(message, file=self.stderr)
496            self.progressfunc = progress
497        self.caps_only = options["caps-only"]
498        self.cache = {}
499        try:
500            status = self.try_copy()
501            return status
502        except TahoeError as te:
503            if verbosity >= 2:
504                Failure().printTraceback(self.stderr)
505                print(file=self.stderr)
506            te.display(self.stderr)
507            return 1
508
509    def try_copy(self):
510        """
511        All usage errors (except for target filename collisions) are caught
512        here, not in a subroutine. This bottoms out in copy_file_to_file() or
513        copy_things_to_directory().
514        """
515        source_specs = self.options.sources
516        destination_spec = self.options.destination
517        recursive = self.options["recursive"]
518
519        target = self.get_target_info(destination_spec)
520        precondition(isinstance(target, FileTargets + DirectoryTargets + MissingTargets), target)
521        target_has_trailing_slash = destination_spec.endswith("/")
522
523        sources = [] # list of source objects
524        for ss in source_specs:
525            try:
526                si = self.get_source_info(ss)
527            except FilenameWithTrailingSlashError as e:
528                self.to_stderr(str(e))
529                return 1
530            precondition(isinstance(si, FileSources + DirectorySources), si)
531            sources.append(si)
532
533        # if any source is a directory, must use -r
534        # if target is missing:
535        #    if source is a single file, target will be a file
536        #    else target will be a directory, so mkdir it
537        # if there are multiple sources, target must be a dir
538        # if target is a file, source must be a single file
539        # if target is directory, sources must be named or a dir
540
541        have_source_dirs = any([isinstance(s, DirectorySources)
542                                for s in sources])
543        if have_source_dirs and not recursive:
544            # 'cp dir target' without -r: error
545            self.to_stderr("cannot copy directories without --recursive")
546            return 1
547        del recursive # -r is only used for signalling errors
548
549        if isinstance(target, FileTargets):
550            target_is_file = True
551        elif isinstance(target, DirectoryTargets):
552            target_is_file = False
553        else: # isinstance(target, MissingTargets)
554            if len(sources) == 1 and isinstance(sources[0], FileSources):
555                target_is_file = True
556            else:
557                target_is_file = False
558
559        if target_is_file and target_has_trailing_slash:
560            self.to_stderr("target is not a directory, but ends with a slash")
561            return 1
562
563        if len(sources) > 1 and target_is_file:
564            self.to_stderr("copying multiple things requires target be a directory")
565            return 1
566
567        if target_is_file:
568            _assert(len(sources) == 1, sources)
569            if not isinstance(sources[0], FileSources):
570                # 'cp -r dir existingfile': error
571                self.to_stderr("cannot copy directory into a file")
572                return 1
573            return self.copy_file_to_file(sources[0], target)
574
575        # else target is a directory, so each source must be one of:
576        # * a named file (copied to a new file under the target)
577        # * a named directory (causes a new directory of the same name to be
578        #   created under the target, then the contents of the source are
579        #   copied into that directory)
580        # * an unnamed directory (the contents of the source are copied into
581        #   the target, without a new directory being made)
582        #
583        # If any source is an unnamed file, throw an error, since we have no
584        # way to name the output file.
585        _assert(isinstance(target, DirectoryTargets + MissingTargets), target)
586
587        for source in sources:
588            if isinstance(source, FileSources) and source.basename() is None:
589                self.to_stderr("when copying into a directory, all source files must have names, but %s is unnamed" % quote_output(source_specs[0]))
590                return 1
591        return self.copy_things_to_directory(sources, target)
592
593    def to_stderr(self, text):
594        print(text, file=self.stderr)
595
596    # FIXME reduce the amount of near-duplicate code between get_target_info
597    # and get_source_info.
598
599    def get_target_info(self, destination_spec):
600        precondition(isinstance(destination_spec, str), destination_spec)
601        rootcap, path_utf8 = get_alias(self.aliases, destination_spec, None)
602        path = path_utf8.decode("utf-8")
603        if rootcap == DefaultAliasMarker:
604            # no alias, so this is a local file
605            pathname = abspath_expanduser_unicode(path)
606            if not os.path.exists(pathname):
607                t = LocalMissingTarget(pathname)
608            elif os.path.isdir(pathname):
609                t = LocalDirectoryTarget(self.progress, pathname)
610            else:
611                # TODO: should this be _assert? what happens if the target is
612                # a special file?
613                assert os.path.isfile(pathname), pathname
614                t = LocalFileTarget(pathname) # non-empty
615        else:
616            # this is a tahoe object
617            url = self.nodeurl + "uri/%s" % url_quote(rootcap)
618            if path:
619                url += "/" + escape_path(path)
620
621            resp = do_http("GET", url + "?t=json")
622            if resp.status == 404:
623                # doesn't exist yet
624                t = TahoeMissingTarget(url)
625            elif resp.status == 200:
626                parsed = json.loads(resp.read())
627                nodetype, d = parsed
628                if nodetype == "dirnode":
629                    t = TahoeDirectoryTarget(self.nodeurl, self.cache,
630                                             self.progress)
631                    t.init_from_parsed(parsed)
632                else:
633                    writecap = to_bytes(d.get("rw_uri"))
634                    readcap = to_bytes(d.get("ro_uri"))
635                    mutable = d.get("mutable", False)
636                    t = TahoeFileTarget(self.nodeurl, mutable,
637                                        writecap, readcap, url)
638            else:
639                raise HTTPError("Error examining target %s"
640                                 % quote_output(destination_spec), resp)
641        return t
642
643    def get_source_info(self, source_spec):
644        """
645        This turns an argv string into a (Local|Tahoe)(File|Directory)Source.
646        """
647        precondition(isinstance(source_spec, str), source_spec)
648        rootcap, path_utf8 = get_alias(self.aliases, source_spec, None)
649        path = path_utf8.decode("utf-8")
650        # any trailing slash is removed in abspath_expanduser_unicode(), so
651        # make a note of it here, to throw an error later
652        had_trailing_slash = path.endswith("/")
653        if rootcap == DefaultAliasMarker:
654            # no alias, so this is a local file
655            pathname = abspath_expanduser_unicode(path)
656            name = os.path.basename(pathname)
657            if not os.path.exists(pathname):
658                raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
659            if os.path.isdir(pathname):
660                t = LocalDirectorySource(self.progress, pathname, name)
661            else:
662                if had_trailing_slash:
663                    raise FilenameWithTrailingSlashError(source_spec,
664                                                         quotefn=quote_local_unicode_path)
665                if not os.path.isfile(pathname):
666                    raise WeirdSourceError(pathname)
667                t = LocalFileSource(pathname, name) # non-empty
668        else:
669            # this is a tahoe object
670            url = self.nodeurl + "uri/%s" % url_quote(rootcap)
671            name = None
672            if path:
673                if path.endswith("/"):
674                    path = path[:-1]
675                url += "/" + escape_path(path)
676                last_slash = path.rfind(u"/")
677                name = path
678                if last_slash != -1:
679                    name = path[last_slash+1:]
680
681            resp = do_http("GET", url + "?t=json")
682            if resp.status == 404:
683                raise MissingSourceError(source_spec)
684            elif resp.status != 200:
685                raise HTTPError("Error examining source %s" % quote_output(source_spec),
686                                resp)
687            parsed = json.loads(resp.read())
688            nodetype, d = parsed
689            if nodetype == "dirnode":
690                t = TahoeDirectorySource(self.nodeurl, self.cache,
691                                         self.progress, name)
692                t.init_from_parsed(parsed)
693            else:
694                if had_trailing_slash:
695                    raise FilenameWithTrailingSlashError(source_spec)
696                writecap = to_bytes(d.get("rw_uri"))
697                readcap = to_bytes(d.get("ro_uri"))
698                mutable = d.get("mutable", False) # older nodes don't provide it
699                t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name)
700        return t
701
702
703    def need_to_copy_bytes(self, source, target):
704        # This should likley be a method call! but enabling that triggers
705        # additional bugs. https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3719
706        if source.need_to_copy_bytes:
707            # mutable tahoe files, and local files
708            return True
709        if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
710            return True
711        return False
712
713    def announce_success(self, msg):
714        if self.verbosity >= 1:
715            print("Success: %s" % msg, file=self.stdout)
716        return 0
717
718    def copy_file_to_file(self, source, target):
719        precondition(isinstance(source, FileSources), source)
720        precondition(isinstance(target, FileTargets + MissingTargets), target)
721        if self.need_to_copy_bytes(source, target):
722            # if the target is a local directory, this will just write the
723            # bytes to disk. If it is a tahoe directory, it will upload the
724            # data, and stash the new filecap for a later set_children call.
725            f = source.open(self.caps_only)
726            target.put_file(f)
727            return self.announce_success("file copied")
728        # otherwise we're copying tahoe to tahoe, and using immutable files,
729        # so we can just make a link. TODO: this probably won't always work:
730        # need to enumerate the cases and analyze them.
731        target.put_uri(source.bestcap())
732        return self.announce_success("file linked")
733
734    def copy_things_to_directory(self, sources, target):
735        # step one: if the target is missing, we should mkdir it
736        target = self.maybe_create_target(target)
737        target.populate(recurse=False)
738
739        # step two: scan any source dirs, recursively, to find children
740        for s in sources:
741            if isinstance(s, DirectorySources):
742                s.populate(recurse=True)
743            if isinstance(s, FileSources):
744                # each source must have a name, or be a directory
745                _assert(s.basename() is not None, s)
746
747        # step three: find a target for each source node, creating
748        # directories as necessary. 'targetmap' is a dictionary that uses
749        # target Directory instances as keys, and has values of (name:
750        # sourceobject) dicts for all the files that need to wind up there.
751        targetmap = self.build_targetmap(sources, target)
752
753        # target name collisions are an error
754        collisions = []
755        for target, sources in list(targetmap.items()):
756            target_names = {}
757            for source in sources:
758                name = source.basename()
759                if name in target_names:
760                    collisions.append((target, source, target_names[name]))
761                else:
762                    target_names[name] = source
763        if collisions:
764            self.to_stderr("cannot copy multiple files with the same name into the same target directory")
765            # I'm not sure how to show where the collisions are coming from
766            #for (target, source1, source2) in collisions:
767            #    self.to_stderr(source1.basename())
768            return 1
769
770        # step four: walk through the list of targets. For each one, copy all
771        # the files. If the target is a TahoeDirectory, upload and create
772        # read-caps, then do a set_children to the target directory.
773        self.copy_to_targetmap(targetmap)
774
775        return self.announce_success("files copied")
776
777    def maybe_create_target(self, target):
778        if isinstance(target, LocalMissingTarget):
779            os.makedirs(target.pathname)
780            target = LocalDirectoryTarget(self.progress, target.pathname)
781        elif isinstance(target, TahoeMissingTarget):
782            writecap = mkdir(target.url)
783            target = TahoeDirectoryTarget(self.nodeurl, self.cache,
784                                          self.progress)
785            target.just_created(writecap)
786        # afterwards, or otherwise, it will be a directory
787        precondition(isinstance(target, DirectoryTargets), target)
788        return target
789
790    def build_targetmap(self, sources, target):
791        num_source_files = len([s for s in sources
792                                if isinstance(s, FileSources)])
793        num_source_dirs = len([s for s in sources
794                               if isinstance(s, DirectorySources)])
795        self.progress("attaching sources to targets, "
796                      "%d files / %d dirs in root" %
797                      (num_source_files, num_source_dirs))
798
799        # this maps each target directory to a list of source files that need
800        # to be copied into it. All source files have names.
801        targetmap = defaultdict(list)
802
803        for s in sources:
804            if isinstance(s, FileSources):
805                targetmap[target].append(s)
806            else:
807                _assert(isinstance(s, DirectorySources), s)
808                name = s.basename()
809                if name is not None:
810                    # named sources get a new directory. see #2329
811                    new_target = target.get_child_target(name)
812                else:
813                    # unnamed sources have their contents copied directly
814                    new_target = target
815                self.assign_targets(targetmap, s, new_target)
816
817        self.progress("targets assigned, %s dirs, %s files" %
818                      (len(targetmap), self.count_files_to_copy(targetmap)))
819        return targetmap
820
821    def assign_targets(self, targetmap, source, target):
822        # copy everything in the source into the target
823        precondition(isinstance(source, DirectorySources), source)
824        for name, child in list(source.children.items()):
825            if isinstance(child, DirectorySources):
826                # we will need a target directory for this one
827                subtarget = target.get_child_target(name)
828                self.assign_targets(targetmap, child, subtarget)
829            else:
830                _assert(isinstance(child, FileSources), child)
831                targetmap[target].append(child)
832
833    def copy_to_targetmap(self, targetmap):
834        files_to_copy = self.count_files_to_copy(targetmap)
835        self.progress("starting copy, %d files, %d directories" %
836                      (files_to_copy, len(targetmap)))
837        files_copied = 0
838        targets_finished = 0
839
840        for target, sources in list(targetmap.items()):
841            _assert(isinstance(target, DirectoryTargets), target)
842            for source in sources:
843                _assert(isinstance(source, FileSources), source)
844                self.copy_file_into_dir(source, source.basename(), target)
845                files_copied += 1
846                self.progress("%d/%d files, %d/%d directories" %
847                              (files_copied, files_to_copy,
848                               targets_finished, len(targetmap)))
849            target.set_children()
850            targets_finished += 1
851            self.progress("%d/%d directories" %
852                          (targets_finished, len(targetmap)))
853
854    def count_files_to_copy(self, targetmap):
855        return sum([len(sources) for sources in targetmap.values()])
856
857    def copy_file_into_dir(self, source, name, target):
858        precondition(isinstance(source, FileSources), source)
859        precondition(isinstance(target, DirectoryTargets), target)
860        precondition(isinstance(name, str), name)
861        if self.need_to_copy_bytes(source, target):
862            # if the target is a local directory, this will just write the
863            # bytes to disk. If it is a tahoe directory, it will upload the
864            # data, and stash the new filecap for a later set_children call.
865            f = source.open(self.caps_only)
866            target.put_file(name, f)
867            return
868        # otherwise we're copying tahoe to tahoe, and using immutable files,
869        # so we can just make a link
870        target.put_uri(name, source.bestcap())
871
872
873    def progress(self, message):
874        #print(message)
875        if self.progressfunc:
876            self.progressfunc(message)
877
878
879def copy(options):
880    return Copier().do_copy(options)
881
882# error cases that need improvement:
883#  local-file-in-the-way
884#   touch proposed
885#   tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
886#  handling of unknown nodes
887
888# things that maybe should be errors but aren't
889#  local-dir-in-the-way
890#   mkdir denver.txt
891#   tahoe cp -r my:docs/proposed/denver.txt denver.txt
892#   (creates denver.txt/denver.txt)
893
894# error cases that look good:
895#  tahoe cp -r my:docs/missing missing
896#  disconnect servers
897#   tahoe cp -r my:docs/missing missing  -> No JSON object could be decoded
898#  tahoe-file-in-the-way (when we want to make a directory)
899#   tahoe put README my:docs
900#   tahoe cp -r docs/proposed my:docs/proposed
Note: See TracBrowser for help on using the repository browser.