Ticket #1425: blacklist3.diff
File blacklist3.diff, 20.6 KB (added by warner, at 2011-08-24T00:09:02Z) |
---|
-
docs/frontends/webapi.rst
diff --git a/docs/frontends/webapi.rst b/docs/frontends/webapi.rst index 8e7b226..81cda38 100644
a b The Tahoe REST-ful Web API 36 36 8. `Static Files in /public_html`_ 37 37 9. `Safety and Security Issues -- Names vs. URIs`_ 38 38 10. `Concurrency Issues`_ 39 11. `Access Blacklist`_ 39 40 40 41 41 42 Enabling the web-API port … … For more details, please see the "Consistency vs Availability" and "The Prime 1955 1956 Coordination Directive" sections of `mutable.rst <../specifications/mutable.rst>`_. 1956 1957 1957 1958 1959 Access Blacklist 1960 ================ 1961 1962 Gateway nodes may find it necessary to prohibit access to certain files. The 1963 web-API has a facility to block access to filecaps by their storage index, 1964 returning a 403 "Forbidden" error instead of the original file. 1965 1966 This blacklist is recorded in $NODEDIR/access.blacklist, and contains one 1967 blocked file per line. Comment lines (starting with ``#``) are ignored. Each 1968 line consists of the storage-index (in the usual base32 format as displayed 1969 by the "More Info" page, or by the "tahoe debug dump-cap" command), followed 1970 by whitespace, followed by a reason string, which will be included in the 403 1971 error message. This could hold a URL to a page that explains why the file is 1972 blocked, for example. 1973 1974 So for example, if you found a need to block access to a file with filecap 1975 ``URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861``, 1976 you could do the following:: 1977 1978 tahoe debug dump-cap URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 1979 -> storage index: whpepioyrnff7orecjolvbudeu 1980 echo "whpepioyrnff7orecjolvbudeu my puppy told me to" >>$NODEDIR/access.blacklist 1981 tahoe restart $NODEDIR 1982 tahoe get URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 1983 -> error, 403 Access Prohibited: my puppy told me to 1984 1985 The ``access.blacklist`` file will be checked each time a file or directory 1986 is accessed: the file's ``mtime`` is used to decide whether it needs to be 1987 reloaded. Therefore no node restart is necessary when creating the initial 1988 blacklist, nor when adding second, third, or additional entries to the list. 1989 When modifying the file, be careful to update it atomically, otherwise a 1990 request may arrive while the file is only halfway written, and the partial 1991 file may be incorrectly parsed. 1992 1993 The blacklist is applied to all access paths (including FTP, SFTP, and CLI 1994 operations), not just the web-API. The blacklist also applies to directories. 1995 If a directory is blacklisted, the gateway will refuse access to both that 1996 directory and any child files/directories underneath it, when accessed via 1997 "DIRCAP/SUBDIR/FILENAME" -style URLs. Users who go directly to the child 1998 file/dir will bypass the blacklist. 1999 2000 The node will log the SI of the file being blocked, and the reason code, into 2001 the ``logs/twistd.log`` file. 2002 2003 1958 2004 .. [1] URLs and HTTP and UTF-8, Oh My 1959 2005 1960 2006 HTTP does not provide a mechanism to specify the character set used to -
new file src/allmydata/blacklist.py
diff --git a/src/allmydata/blacklist.py b/src/allmydata/blacklist.py new file mode 100644 index 0000000..f0cf692
- + 1 2 import os 3 from twisted.python import log as twisted_log 4 from allmydata.util import base32 5 6 class FileProhibited(Exception): 7 """This client has been configured to prohibit access to this object.""" 8 def __init__(self, reason): 9 Exception.__init__(self, reason) 10 self.reason = reason 11 12 13 class Blacklist: 14 def __init__(self, blacklist_filename): 15 self.blacklist_filename = blacklist_filename 16 self.last_mtime = None 17 self.entries = {} 18 self.read_blacklist() # sets .last_mtime and .entries 19 20 def read_blacklist(self): 21 try: 22 current_mtime = os.stat(self.blacklist_filename).st_mtime 23 except EnvironmentError: 24 # unreadable blacklist file means no blacklist 25 self.entries.clear() 26 return 27 if self.last_mtime is None or current_mtime > self.last_mtime: 28 self.entries.clear() 29 try: 30 for line in open(self.blacklist_filename, "r").readlines(): 31 line = line.lstrip() 32 if not line or line.startswith("#"): 33 continue 34 si_s, reason = line.split(None, 1) 35 si = base32.a2b(si_s) # must be valid base32 36 self.entries[si] = reason 37 self.last_mtime = current_mtime 38 except Exception, e: 39 twisted_log.err(e, "unparseable blacklist file") 40 raise 41 42 def get_readblocker(self, si): 43 self.read_blacklist() 44 reason = self.entries.get(si, None) 45 if reason: 46 def read_prohibited(*args, **kwargs): 47 # log this to logs/twistd.log, since web logs go there too 48 twisted_log.msg("blacklist prohibited access to SI %s: %s" % 49 (base32.b2a(si), reason)) 50 raise FileProhibited(reason) 51 return read_prohibited 52 return None -
src/allmydata/client.py
diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 3cb4cd6..2bc0b1d 100644
a b from allmydata.history import History 25 25 from allmydata.interfaces import IStatsProducer, RIStubClient, \ 26 26 SDMF_VERSION, MDMF_VERSION 27 27 from allmydata.nodemaker import NodeMaker 28 from allmydata.blacklist import Blacklist 28 29 29 30 30 31 KiB=1024 … … class Client(node.Node, pollmixin.PollMixin): 279 280 self.terminator.setServiceParent(self) 280 281 self.add_service(Uploader(helper_furl, self.stats_provider)) 281 282 self.init_stub_client() 283 self.init_blacklist() 282 284 self.init_nodemaker() 283 285 284 286 def init_client_storage_broker(self): … … class Client(node.Node, pollmixin.PollMixin): 331 333 d.addErrback(log.err, facility="tahoe.init", 332 334 level=log.BAD, umid="OEHq3g") 333 335 336 def init_blacklist(self): 337 fn = os.path.join(self.basedir, "access.blacklist") 338 self.blacklist = Blacklist(fn) 339 334 340 def init_nodemaker(self): 335 341 self.nodemaker = NodeMaker(self.storage_broker, 336 342 self._secret_holder, … … class Client(node.Node, pollmixin.PollMixin): 338 344 self.getServiceNamed("uploader"), 339 345 self.terminator, 340 346 self.get_encoding_parameters(), 341 self._key_generator) 347 self._key_generator, 348 self.blacklist) 342 349 default = self.get_config("client", "mutable.format", default="sdmf") 343 350 if default == "mdmf": 344 351 self.mutable_file_default = MDMF_VERSION … … class Client(node.Node, pollmixin.PollMixin): 485 492 # dirnodes. The first takes a URI and produces a filenode or (new-style) 486 493 # dirnode. The other three create brand-new filenodes/dirnodes. 487 494 488 def create_node_from_uri(self, write_uri, read_uri=None, deep_immutable=False, name="<unknown name>"): 495 def create_node_from_uri(self, write_uri, read_uri=None, 496 deep_immutable=False, name="<unknown name>"): 489 497 # This returns synchronously. 490 # Note that it does *not* validate the write_uri and read_uri; instead we 491 # may get an opaque node if there were any problems. 492 return self.nodemaker.create_from_cap(write_uri, read_uri, deep_immutable=deep_immutable, name=name) 498 # Note that it does *not* validate the write_uri and read_uri; 499 # instead we may get an opaque node if there were any problems. 500 n = self.nodemaker.create_from_cap(write_uri, read_uri, 501 deep_immutable=deep_immutable, 502 name=name) 503 return n 493 504 494 505 def create_dirnode(self, initial_children={}, version=SDMF_VERSION): 495 506 d = self.nodemaker.create_new_mutable_directory(initial_children, version=version) -
src/allmydata/nodemaker.py
diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index fb69ea5..9d1da35 100644
a b class NodeMaker: 16 16 17 17 def __init__(self, storage_broker, secret_holder, history, 18 18 uploader, terminator, 19 default_encoding_parameters, key_generator): 19 default_encoding_parameters, key_generator, 20 blacklist=None): 20 21 self.storage_broker = storage_broker 21 22 self.secret_holder = secret_holder 22 23 self.history = history … … class NodeMaker: 24 25 self.terminator = terminator 25 26 self.default_encoding_parameters = default_encoding_parameters 26 27 self.key_generator = key_generator 28 self.blacklist = blacklist 27 29 28 30 self._node_cache = weakref.WeakValueDictionary() # uri -> node 29 31 … … class NodeMaker: 62 64 else: 63 65 memokey = "M" + bigcap 64 66 if memokey in self._node_cache: 65 return self._node_cache[memokey] 66 cap = uri.from_string(bigcap, deep_immutable=deep_immutable, name=name) 67 node = self._create_from_single_cap(cap) 68 if node: 69 self._node_cache[memokey] = node # note: WeakValueDictionary 67 node = self._node_cache[memokey] 70 68 else: 71 # don't cache UnknownNode 72 node = UnknownNode(writecap, readcap, deep_immutable=deep_immutable, name=name) 69 cap = uri.from_string(bigcap, deep_immutable=deep_immutable, 70 name=name) 71 node = self._create_from_single_cap(cap) 72 if node: 73 self._node_cache[memokey] = node # note: WeakValueDictionary 74 else: 75 # don't cache UnknownNode 76 node = UnknownNode(writecap, readcap, 77 deep_immutable=deep_immutable, name=name) 78 self._check_blacklist(node) 73 79 return node 74 80 75 81 def _create_from_single_cap(self, cap): … … class NodeMaker: 89 95 uri.MDMFDirectoryURI, 90 96 uri.ReadonlyMDMFDirectoryURI)): 91 97 filenode = self._create_from_single_cap(cap.get_filenode_cap()) 98 self._check_blacklist(filenode) 92 99 return self._create_dirnode(filenode) 93 100 return None 94 101 102 def _check_blacklist(self, node): 103 if self.blacklist: 104 si = node.get_storage_index() 105 readblocker = self.blacklist.get_readblocker(si) 106 if readblocker: 107 # this read() will raise a FileProhibited exception 108 if hasattr(node, "read"): 109 node.read = readblocker 110 if hasattr(node, "download_version"): 111 node.download_version = readblocker 112 if hasattr(node, "download_best_version"): 113 node.download_best_version = readblocker 114 95 115 def create_mutable_file(self, contents=None, keysize=None, 96 116 version=SDMF_VERSION): 97 117 n = MutableFileNode(self.storage_broker, self.secret_holder, -
src/allmydata/test/no_network.py
diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index f510544..9d11017 100644
a b class NoNetworkGrid(service.MultiService): 208 208 self.basedir = basedir 209 209 fileutil.make_dirs(basedir) 210 210 211 self._client_config_hooks = client_config_hooks 211 212 self.servers_by_number = {} # maps to StorageServer instance 212 213 self.wrappers_by_id = {} # maps to wrapped StorageServer instance 213 214 self.proxies_by_id = {} # maps to IServer on which .rref is a wrapped … … class NoNetworkGrid(service.MultiService): 231 232 f.write("[storage]\n") 232 233 f.write("enabled = false\n") 233 234 f.close() 234 c = None 235 if i in client_config_hooks: 236 # this hook can either modify tahoe.cfg, or return an 237 # entirely new Client instance 238 c = client_config_hooks[i](clientdir) 239 if not c: 240 c = NoNetworkClient(clientdir) 241 c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) 242 c.nodeid = clientid 243 c.short_nodeid = b32encode(clientid).lower()[:8] 244 c._servers = self.all_servers # can be updated later 245 c.setServiceParent(self) 235 c = self._create_client(i, clientdir, clientid) 246 236 self.clients.append(c) 247 237 238 def _create_client(self, i, clientdir, clientid): 239 c = None 240 if i in self._client_config_hooks: 241 # this hook can either modify tahoe.cfg, or return an 242 # entirely new Client instance 243 c = self._client_config_hooks[i](clientdir) 244 if not c: 245 c = NoNetworkClient(clientdir) 246 c.set_default_mutable_keysize(TEST_RSA_KEY_SIZE) 247 c.nodeid = clientid 248 c.short_nodeid = b32encode(clientid).lower()[:8] 249 c._servers = self.all_servers # can be updated later 250 c.setServiceParent(self) 251 return c 252 248 253 def make_server(self, i, readonly=False): 249 254 serverid = hashutil.tagged_hash("serverid", str(i))[:20] 250 255 serverdir = os.path.join(self.basedir, "servers", … … class NoNetworkGrid(service.MultiService): 276 281 for c in self.clients: 277 282 c._servers = self.all_servers 278 283 284 def restart_client(self, i): 285 # we must remove the client, then build a new one with the same id 286 # and basedir 287 old_client = self.clients[i] 288 d = defer.maybeDeferred(old_client.disownServiceParent) 289 def _then(ign): 290 c = self._create_client(i, old_client.basedir, old_client.nodeid) 291 self.clients[i] = c 292 d.addCallback(_then) 293 return d 294 279 295 def remove_server(self, serverid): 280 296 # it's enough to remove the server from c._servers (we don't actually 281 297 # have to detach and stopService it) … … class GridTestMixin: 338 354 ss = self.g.servers_by_number[i] 339 355 yield (i, ss, ss.storedir) 340 356 357 def restart_client(self, i=0): 358 d = self.g.restart_client(i) 359 def _then(ign): 360 c = self.g.clients[i] 361 self.client_webports[i] = c.getServiceNamed("webish").getPortnum() 362 self.client_baseurls[i] = c.getServiceNamed("webish").getURL() 363 d.addCallback(_then) 364 return d 365 341 366 def find_uri_shares(self, uri): 342 367 si = tahoe_uri.from_string(uri).get_storage_index() 343 368 prefixdir = storage_index_to_dir(si) -
src/allmydata/test/test_web.py
diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 109ebdc..1120dad 100644
a b class FakeClient(Client): 170 170 self.history = FakeHistory() 171 171 self.uploader = FakeUploader() 172 172 self.uploader.setServiceParent(self) 173 self.blacklist = None 173 174 self.nodemaker = FakeNodeMaker(None, self._secret_holder, None, 174 175 self.uploader, None, 175 176 None, None) … … class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 5262 5263 5263 5264 return d 5264 5265 5266 def test_blacklist(self): 5267 # download from a blacklisted URI, get an error 5268 self.basedir = "web/Grid/blacklist" 5269 self.set_up_grid() 5270 c0 = self.g.clients[0] 5271 c0_basedir = c0.basedir 5272 fn = os.path.join(c0_basedir, "access.blacklist") 5273 self.uris = {} 5274 DATA = "off-limits " * 50 5275 d = c0.upload(upload.Data(DATA, convergence="")) 5276 def _stash_uri(ur): 5277 self.uri = ur.uri 5278 self.url = "uri/"+self.uri 5279 u = uri.from_string_filenode(self.uri) 5280 self.si = u.get_storage_index() 5281 d.addCallback(_stash_uri) 5282 d.addCallback(lambda ign: self.GET(self.url)) 5283 def _blacklist(ign): 5284 f = open(fn, "w") 5285 f.write(" # this is a comment\n") 5286 f.write(" \n") 5287 f.write("\n") # also exercise blank lines 5288 f.write("%s %s\n" % (base32.b2a(self.si), "off-limits to you")) 5289 f.close() 5290 # clients should be checking the blacklist each time, so we don't 5291 # need to restart the client 5292 d.addCallback(_blacklist) 5293 d.addCallback(lambda ign: 5294 self.shouldHTTPError("_get_from_blacklisted_uri", 5295 403, "Forbidden", 5296 "Access Prohibited: off-limits", 5297 self.GET, "uri/" + self.uri)) 5298 def _unblacklist(ign): 5299 open(fn, "w").close() 5300 # the Blacklist object watches mtime to tell when the file has 5301 # changed, but on windows this test will run faster than the 5302 # filesystem's mtime resolution. So we edit Blacklist.last_mtime 5303 # to force a reload. 5304 self.g.clients[0].blacklist.last_mtime -= 2.0 5305 d.addCallback(_unblacklist) 5306 # now a read should work 5307 d.addCallback(lambda ign: self.GET(self.url)) 5308 # read again to exercise the blacklist-is-unchanged logic 5309 d.addCallback(lambda ign: self.GET(self.url)) 5310 5311 # now add a blacklisted directory, and make sure files under it are 5312 # refused too 5313 def _add_dir(ign): 5314 childnode = c0.create_node_from_uri(self.uri, None) 5315 return c0.create_dirnode({u"child": (childnode,{}) }) 5316 d.addCallback(_add_dir) 5317 def _get_dircap(dn): 5318 self.dir_si_b32 = base32.b2a(dn.get_storage_index()) 5319 self.dir_url_rw = "uri/"+dn.get_write_uri()+"/?t=json" 5320 self.dir_url_ro = "uri/"+dn.get_readonly_uri()+"/?t=json" 5321 self.child_url = "uri/"+dn.get_readonly_uri()+"/child" 5322 d.addCallback(_get_dircap) 5323 d.addCallback(lambda ign: self.GET(self.dir_url_rw)) 5324 d.addCallback(lambda ign: self.GET(self.dir_url_ro)) 5325 d.addCallback(lambda ign: self.GET(self.child_url)) 5326 def _block_dir(ign): 5327 f = open(fn, "w") 5328 f.write("%s %s\n" % (self.dir_si_b32, "dir-off-limits to you")) 5329 f.close() 5330 self.g.clients[0].blacklist.last_mtime -= 2.0 5331 d.addCallback(_block_dir) 5332 d.addCallback(lambda ign: 5333 self.shouldHTTPError("_get_from_blacklisted_uri 2", 5334 403, "Forbidden", 5335 "Access Prohibited: dir-off-limits", 5336 self.GET, self.dir_url_rw)) 5337 d.addCallback(lambda ign: 5338 self.shouldHTTPError("_get_from_blacklisted_uri 3", 5339 403, "Forbidden", 5340 "Access Prohibited: dir-off-limits", 5341 self.GET, self.dir_url_ro)) 5342 d.addCallback(lambda ign: 5343 self.shouldHTTPError("_get_from_blacklisted_uri 4", 5344 403, "Forbidden", 5345 "Access Prohibited: dir-off-limits", 5346 self.GET, self.child_url)) 5347 5348 return d 5349 5265 5350 class CompletelyUnhandledError(Exception): 5266 5351 pass 5267 5352 class ErrorBoom(rend.Page): -
src/allmydata/web/common.py
diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 6e90554..db33496 100644
a b from zope.interface import Interface 6 6 from nevow import loaders, appserver 7 7 from nevow.inevow import IRequest 8 8 from nevow.util import resource_filename 9 from allmydata import blacklist 9 10 from allmydata.interfaces import ExistingChildError, NoSuchChildError, \ 10 11 FileTooLargeError, NotEnoughSharesError, NoSharesError, \ 11 12 EmptyPathnameComponentError, MustBeDeepImmutableError, \ … … def humanize_failure(f): 257 258 "The cap is being passed in a read slot (ro_uri), or was retrieved " 258 259 "from a read slot as an unknown cap.") % quoted_name 259 260 return (t, http.BAD_REQUEST) 261 if f.check(blacklist.FileProhibited): 262 t = "Access Prohibited: %s" % f.value.reason 263 return (t, http.FORBIDDEN) 260 264 if f.check(WebError): 261 265 return (f.value.text, f.value.code) 262 266 if f.check(FileTooLargeError):