Ticket #1425: blacklist3.diff

File blacklist3.diff, 20.6 KB (added by warner, at 2011-08-24T00:09:02Z)

updated: handle directories properly, allow listing, incorporate recommendations

  • docs/frontends/webapi.rst

    diff --git a/docs/frontends/webapi.rst b/docs/frontends/webapi.rst
    index 8e7b226..81cda38 100644
    a b The Tahoe REST-ful Web API 
    36368.  `Static Files in /public_html`_
    37379.  `Safety and Security Issues -- Names vs. URIs`_
    383810. `Concurrency Issues`_
     3911. `Access Blacklist`_
    3940
    4041
    4142Enabling the web-API port
    For more details, please see the "Consistency vs Availability" and "The Prime 
    19551956Coordination Directive" sections of `mutable.rst <../specifications/mutable.rst>`_.
    19561957
    19571958
     1959Access Blacklist
     1960================
     1961
     1962Gateway nodes may find it necessary to prohibit access to certain files. The
     1963web-API has a facility to block access to filecaps by their storage index,
     1964returning a 403 "Forbidden" error instead of the original file.
     1965
     1966This blacklist is recorded in $NODEDIR/access.blacklist, and contains one
     1967blocked file per line. Comment lines (starting with ``#``) are ignored. Each
     1968line consists of the storage-index (in the usual base32 format as displayed
     1969by the "More Info" page, or by the "tahoe debug dump-cap" command), followed
     1970by whitespace, followed by a reason string, which will be included in the 403
     1971error message. This could hold a URL to a page that explains why the file is
     1972blocked, for example.
     1973
     1974So for example, if you found a need to block access to a file with filecap
     1975``URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861``,
     1976you could do the following::
     1977
     1978 tahoe debug dump-cap URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861
     1979 -> storage index: whpepioyrnff7orecjolvbudeu
     1980 echo "whpepioyrnff7orecjolvbudeu my puppy told me to" >>$NODEDIR/access.blacklist
     1981 tahoe restart $NODEDIR
     1982 tahoe get URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861
     1983 -> error, 403 Access Prohibited: my puppy told me to
     1984
     1985The ``access.blacklist`` file will be checked each time a file or directory
     1986is accessed: the file's ``mtime`` is used to decide whether it needs to be
     1987reloaded. Therefore no node restart is necessary when creating the initial
     1988blacklist, nor when adding second, third, or additional entries to the list.
     1989When modifying the file, be careful to update it atomically, otherwise a
     1990request may arrive while the file is only halfway written, and the partial
     1991file may be incorrectly parsed.
     1992
     1993The blacklist is applied to all access paths (including FTP, SFTP, and CLI
     1994operations), not just the web-API. The blacklist also applies to directories.
     1995If a directory is blacklisted, the gateway will refuse access to both that
     1996directory and any child files/directories underneath it, when accessed via
     1997"DIRCAP/SUBDIR/FILENAME" -style URLs. Users who go directly to the child
     1998file/dir will bypass the blacklist.
     1999
     2000The node will log the SI of the file being blocked, and the reason code, into
     2001the ``logs/twistd.log`` file.
     2002
     2003
    19582004.. [1] URLs and HTTP and UTF-8, Oh My
    19592005
    19602006 HTTP does not provide a mechanism to specify the character set used to
  • new file src/allmydata/blacklist.py

    diff --git a/src/allmydata/blacklist.py b/src/allmydata/blacklist.py
    new file mode 100644
    index 0000000..f0cf692
    - +  
     1
     2import os
     3from twisted.python import log as twisted_log
     4from allmydata.util import base32
     5
     6class FileProhibited(Exception):
     7    """This client has been configured to prohibit access to this object."""
     8    def __init__(self, reason):
     9        Exception.__init__(self, reason)
     10        self.reason = reason
     11
     12
     13class Blacklist:
     14    def __init__(self, blacklist_filename):
     15        self.blacklist_filename = blacklist_filename
     16        self.last_mtime = None
     17        self.entries = {}
     18        self.read_blacklist() # sets .last_mtime and .entries
     19
     20    def read_blacklist(self):
     21        try:
     22            current_mtime = os.stat(self.blacklist_filename).st_mtime
     23        except EnvironmentError:
     24            # unreadable blacklist file means no blacklist
     25            self.entries.clear()
     26            return
     27        if self.last_mtime is None or current_mtime > self.last_mtime:
     28            self.entries.clear()
     29            try:
     30                for line in open(self.blacklist_filename, "r").readlines():
     31                    line = line.lstrip()
     32                    if not line or line.startswith("#"):
     33                        continue
     34                    si_s, reason = line.split(None, 1)
     35                    si = base32.a2b(si_s) # must be valid base32
     36                    self.entries[si] = reason
     37                self.last_mtime = current_mtime
     38            except Exception, e:
     39                twisted_log.err(e, "unparseable blacklist file")
     40                raise
     41
     42    def get_readblocker(self, si):
     43        self.read_blacklist()
     44        reason = self.entries.get(si, None)
     45        if reason:
     46            def read_prohibited(*args, **kwargs):
     47                # log this to logs/twistd.log, since web logs go there too
     48                twisted_log.msg("blacklist prohibited access to SI %s: %s" %
     49                                (base32.b2a(si), reason))
     50                raise FileProhibited(reason)
     51            return read_prohibited
     52        return None
  • src/allmydata/client.py

    diff --git a/src/allmydata/client.py b/src/allmydata/client.py
    index 3cb4cd6..2bc0b1d 100644
    a b from allmydata.history import History 
    2525from allmydata.interfaces import IStatsProducer, RIStubClient, \
    2626                                 SDMF_VERSION, MDMF_VERSION
    2727from allmydata.nodemaker import NodeMaker
     28from allmydata.blacklist import Blacklist
    2829
    2930
    3031KiB=1024
    class Client(node.Node, pollmixin.PollMixin): 
    279280        self.terminator.setServiceParent(self)
    280281        self.add_service(Uploader(helper_furl, self.stats_provider))
    281282        self.init_stub_client()
     283        self.init_blacklist()
    282284        self.init_nodemaker()
    283285
    284286    def init_client_storage_broker(self):
    class Client(node.Node, pollmixin.PollMixin): 
    331333        d.addErrback(log.err, facility="tahoe.init",
    332334                     level=log.BAD, umid="OEHq3g")
    333335
     336    def init_blacklist(self):
     337        fn = os.path.join(self.basedir, "access.blacklist")
     338        self.blacklist = Blacklist(fn)
     339
    334340    def init_nodemaker(self):
    335341        self.nodemaker = NodeMaker(self.storage_broker,
    336342                                   self._secret_holder,
    class Client(node.Node, pollmixin.PollMixin): 
    338344                                   self.getServiceNamed("uploader"),
    339345                                   self.terminator,
    340346                                   self.get_encoding_parameters(),
    341                                    self._key_generator)
     347                                   self._key_generator,
     348                                   self.blacklist)
    342349        default = self.get_config("client", "mutable.format", default="sdmf")
    343350        if default == "mdmf":
    344351            self.mutable_file_default = MDMF_VERSION
    class Client(node.Node, pollmixin.PollMixin): 
    485492    # dirnodes. The first takes a URI and produces a filenode or (new-style)
    486493    # dirnode. The other three create brand-new filenodes/dirnodes.
    487494
    488     def create_node_from_uri(self, write_uri, read_uri=None, deep_immutable=False, name="<unknown name>"):
     495    def create_node_from_uri(self, write_uri, read_uri=None,
     496                             deep_immutable=False, name="<unknown name>"):
    489497        # This returns synchronously.
    490         # Note that it does *not* validate the write_uri and read_uri; instead we
    491         # may get an opaque node if there were any problems.
    492         return self.nodemaker.create_from_cap(write_uri, read_uri, deep_immutable=deep_immutable, name=name)
     498        # Note that it does *not* validate the write_uri and read_uri;
     499        # instead we may get an opaque node if there were any problems.
     500        n = self.nodemaker.create_from_cap(write_uri, read_uri,
     501                                           deep_immutable=deep_immutable,
     502                                           name=name)
     503        return n
    493504
    494505    def create_dirnode(self, initial_children={}, version=SDMF_VERSION):
    495506        d = self.nodemaker.create_new_mutable_directory(initial_children, version=version)
  • src/allmydata/nodemaker.py

    diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py
    index fb69ea5..9d1da35 100644
    a b class NodeMaker: 
    1616
    1717    def __init__(self, storage_broker, secret_holder, history,
    1818                 uploader, terminator,
    19                  default_encoding_parameters, key_generator):
     19                 default_encoding_parameters, key_generator,
     20                 blacklist=None):
    2021        self.storage_broker = storage_broker
    2122        self.secret_holder = secret_holder
    2223        self.history = history
    class NodeMaker: 
    2425        self.terminator = terminator
    2526        self.default_encoding_parameters = default_encoding_parameters
    2627        self.key_generator = key_generator
     28        self.blacklist = blacklist
    2729
    2830        self._node_cache = weakref.WeakValueDictionary() # uri -> node
    2931
    class NodeMaker: 
    6264        else:
    6365            memokey = "M" + bigcap
    6466        if memokey in self._node_cache:
    65             return self._node_cache[memokey]
    66         cap = uri.from_string(bigcap, deep_immutable=deep_immutable, name=name)
    67         node = self._create_from_single_cap(cap)
    68         if node:
    69             self._node_cache[memokey] = node  # note: WeakValueDictionary
     67            node = self._node_cache[memokey]
    7068        else:
    71             # don't cache UnknownNode
    72             node = UnknownNode(writecap, readcap, deep_immutable=deep_immutable, name=name)
     69            cap = uri.from_string(bigcap, deep_immutable=deep_immutable,
     70                                  name=name)
     71            node = self._create_from_single_cap(cap)
     72            if node:
     73                self._node_cache[memokey] = node  # note: WeakValueDictionary
     74            else:
     75                # don't cache UnknownNode
     76                node = UnknownNode(writecap, readcap,
     77                                   deep_immutable=deep_immutable, name=name)
     78        self._check_blacklist(node)
    7379        return node
    7480
    7581    def _create_from_single_cap(self, cap):
    class NodeMaker: 
    8995                            uri.MDMFDirectoryURI,
    9096                            uri.ReadonlyMDMFDirectoryURI)):
    9197            filenode = self._create_from_single_cap(cap.get_filenode_cap())
     98            self._check_blacklist(filenode)
    9299            return self._create_dirnode(filenode)
    93100        return None
    94101
     102    def _check_blacklist(self, node):
     103        if self.blacklist:
     104            si = node.get_storage_index()
     105            readblocker = self.blacklist.get_readblocker(si)
     106            if readblocker:
     107                # this read() will raise a FileProhibited exception
     108                if hasattr(node, "read"):
     109                    node.read = readblocker
     110                if hasattr(node, "download_version"):
     111                    node.download_version = readblocker
     112                if hasattr(node, "download_best_version"):
     113                    node.download_best_version = readblocker
     114
    95115    def create_mutable_file(self, contents=None, keysize=None,
    96116                            version=SDMF_VERSION):
    97117        n = MutableFileNode(self.storage_broker, self.secret_holder,
  • src/allmydata/test/no_network.py

    diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py
    index f510544..9d11017 100644
    a b class NoNetworkGrid(service.MultiService): 
    208208        self.basedir = basedir
    209209        fileutil.make_dirs(basedir)
    210210
     211        self._client_config_hooks = client_config_hooks
    211212        self.servers_by_number = {} # maps to StorageServer instance
    212213        self.wrappers_by_id = {} # maps to wrapped StorageServer instance
    213214        self.proxies_by_id = {} # maps to IServer on which .rref is a wrapped
    class NoNetworkGrid(service.MultiService): 
    231232            f.write("[storage]\n")
    232233            f.write("enabled = false\n")
    233234            f.close()
    234             c = None
    235             if i in client_config_hooks:
    236                 # this hook can either modify tahoe.cfg, or return an
    237                 # entirely new Client instance
    238                 c = client_config_hooks[i](clientdir)
    239             if not c:
    240                 c = NoNetworkClient(clientdir)
    241                 c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE)
    242             c.nodeid = clientid
    243             c.short_nodeid = b32encode(clientid).lower()[:8]
    244             c._servers = self.all_servers # can be updated later
    245             c.setServiceParent(self)
     235            c = self._create_client(i, clientdir, clientid)
    246236            self.clients.append(c)
    247237
     238    def _create_client(self, i, clientdir, clientid):
     239        c = None
     240        if i in self._client_config_hooks:
     241            # this hook can either modify tahoe.cfg, or return an
     242            # entirely new Client instance
     243            c = self._client_config_hooks[i](clientdir)
     244        if not c:
     245            c = NoNetworkClient(clientdir)
     246            c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE)
     247        c.nodeid = clientid
     248        c.short_nodeid = b32encode(clientid).lower()[:8]
     249        c._servers = self.all_servers # can be updated later
     250        c.setServiceParent(self)
     251        return c
     252
    248253    def make_server(self, i, readonly=False):
    249254        serverid = hashutil.tagged_hash("serverid", str(i))[:20]
    250255        serverdir = os.path.join(self.basedir, "servers",
    class NoNetworkGrid(service.MultiService): 
    276281        for c in self.clients:
    277282            c._servers = self.all_servers
    278283
     284    def restart_client(self, i):
     285        # we must remove the client, then build a new one with the same id
     286        # and basedir
     287        old_client = self.clients[i]
     288        d = defer.maybeDeferred(old_client.disownServiceParent)
     289        def _then(ign):
     290            c = self._create_client(i, old_client.basedir, old_client.nodeid)
     291            self.clients[i] = c
     292        d.addCallback(_then)
     293        return d
     294
    279295    def remove_server(self, serverid):
    280296        # it's enough to remove the server from c._servers (we don't actually
    281297        # have to detach and stopService it)
    class GridTestMixin: 
    338354            ss = self.g.servers_by_number[i]
    339355            yield (i, ss, ss.storedir)
    340356
     357    def restart_client(self, i=0):
     358        d = self.g.restart_client(i)
     359        def _then(ign):
     360            c = self.g.clients[i]
     361            self.client_webports[i] = c.getServiceNamed("webish").getPortnum()
     362            self.client_baseurls[i] = c.getServiceNamed("webish").getURL()
     363        d.addCallback(_then)
     364        return d
     365
    341366    def find_uri_shares(self, uri):
    342367        si = tahoe_uri.from_string(uri).get_storage_index()
    343368        prefixdir = storage_index_to_dir(si)
  • src/allmydata/test/test_web.py

    diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py
    index 109ebdc..1120dad 100644
    a b class FakeClient(Client): 
    170170        self.history = FakeHistory()
    171171        self.uploader = FakeUploader()
    172172        self.uploader.setServiceParent(self)
     173        self.blacklist = None
    173174        self.nodemaker = FakeNodeMaker(None, self._secret_holder, None,
    174175                                       self.uploader, None,
    175176                                       None, None)
    class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 
    52625263
    52635264        return d
    52645265
     5266    def test_blacklist(self):
     5267        # download from a blacklisted URI, get an error
     5268        self.basedir = "web/Grid/blacklist"
     5269        self.set_up_grid()
     5270        c0 = self.g.clients[0]
     5271        c0_basedir = c0.basedir
     5272        fn = os.path.join(c0_basedir, "access.blacklist")
     5273        self.uris = {}
     5274        DATA = "off-limits " * 50
     5275        d = c0.upload(upload.Data(DATA, convergence=""))
     5276        def _stash_uri(ur):
     5277            self.uri = ur.uri
     5278            self.url = "uri/"+self.uri
     5279            u = uri.from_string_filenode(self.uri)
     5280            self.si = u.get_storage_index()
     5281        d.addCallback(_stash_uri)
     5282        d.addCallback(lambda ign: self.GET(self.url))
     5283        def _blacklist(ign):
     5284            f = open(fn, "w")
     5285            f.write(" # this is a comment\n")
     5286            f.write(" \n")
     5287            f.write("\n") # also exercise blank lines
     5288            f.write("%s %s\n" % (base32.b2a(self.si), "off-limits to you"))
     5289            f.close()
     5290            # clients should be checking the blacklist each time, so we don't
     5291            # need to restart the client
     5292        d.addCallback(_blacklist)
     5293        d.addCallback(lambda ign:
     5294                      self.shouldHTTPError("_get_from_blacklisted_uri",
     5295                                           403, "Forbidden",
     5296                                           "Access Prohibited: off-limits",
     5297                                           self.GET, "uri/" + self.uri))
     5298        def _unblacklist(ign):
     5299            open(fn, "w").close()
     5300            # the Blacklist object watches mtime to tell when the file has
     5301            # changed, but on windows this test will run faster than the
     5302            # filesystem's mtime resolution. So we edit Blacklist.last_mtime
     5303            # to force a reload.
     5304            self.g.clients[0].blacklist.last_mtime -= 2.0
     5305        d.addCallback(_unblacklist)
     5306        # now a read should work
     5307        d.addCallback(lambda ign: self.GET(self.url))
     5308        # read again to exercise the blacklist-is-unchanged logic
     5309        d.addCallback(lambda ign: self.GET(self.url))
     5310
     5311        # now add a blacklisted directory, and make sure files under it are
     5312        # refused too
     5313        def _add_dir(ign):
     5314            childnode = c0.create_node_from_uri(self.uri, None)
     5315            return c0.create_dirnode({u"child": (childnode,{}) })
     5316        d.addCallback(_add_dir)
     5317        def _get_dircap(dn):
     5318            self.dir_si_b32 = base32.b2a(dn.get_storage_index())
     5319            self.dir_url_rw = "uri/"+dn.get_write_uri()+"/?t=json"
     5320            self.dir_url_ro = "uri/"+dn.get_readonly_uri()+"/?t=json"
     5321            self.child_url = "uri/"+dn.get_readonly_uri()+"/child"
     5322        d.addCallback(_get_dircap)
     5323        d.addCallback(lambda ign: self.GET(self.dir_url_rw))
     5324        d.addCallback(lambda ign: self.GET(self.dir_url_ro))
     5325        d.addCallback(lambda ign: self.GET(self.child_url))
     5326        def _block_dir(ign):
     5327            f = open(fn, "w")
     5328            f.write("%s %s\n" % (self.dir_si_b32, "dir-off-limits to you"))
     5329            f.close()
     5330            self.g.clients[0].blacklist.last_mtime -= 2.0
     5331        d.addCallback(_block_dir)
     5332        d.addCallback(lambda ign:
     5333                      self.shouldHTTPError("_get_from_blacklisted_uri 2",
     5334                                           403, "Forbidden",
     5335                                           "Access Prohibited: dir-off-limits",
     5336                                           self.GET, self.dir_url_rw))
     5337        d.addCallback(lambda ign:
     5338                      self.shouldHTTPError("_get_from_blacklisted_uri 3",
     5339                                           403, "Forbidden",
     5340                                           "Access Prohibited: dir-off-limits",
     5341                                           self.GET, self.dir_url_ro))
     5342        d.addCallback(lambda ign:
     5343                      self.shouldHTTPError("_get_from_blacklisted_uri 4",
     5344                                           403, "Forbidden",
     5345                                           "Access Prohibited: dir-off-limits",
     5346                                           self.GET, self.child_url))
     5347
     5348        return d
     5349
    52655350class CompletelyUnhandledError(Exception):
    52665351    pass
    52675352class ErrorBoom(rend.Page):
  • src/allmydata/web/common.py

    diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py
    index 6e90554..db33496 100644
    a b from zope.interface import Interface 
    66from nevow import loaders, appserver
    77from nevow.inevow import IRequest
    88from nevow.util import resource_filename
     9from allmydata import blacklist
    910from allmydata.interfaces import ExistingChildError, NoSuchChildError, \
    1011     FileTooLargeError, NotEnoughSharesError, NoSharesError, \
    1112     EmptyPathnameComponentError, MustBeDeepImmutableError, \
    def humanize_failure(f): 
    257258             "The cap is being passed in a read slot (ro_uri), or was retrieved "
    258259             "from a read slot as an unknown cap.") % quoted_name
    259260        return (t, http.BAD_REQUEST)
     261    if f.check(blacklist.FileProhibited):
     262        t = "Access Prohibited: %s" % f.value.reason
     263        return (t, http.FORBIDDEN)
    260264    if f.check(WebError):
    261265        return (f.value.text, f.value.code)
    262266    if f.check(FileTooLargeError):