1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | # retrieve a latency statistic for a given operation and percentile from a |
---|
4 | # set of storage servers. |
---|
5 | |
---|
6 | # the OPERATION value should come from the following list: |
---|
7 | # allocate: allocate_buckets, first step to upload an immutable file |
---|
8 | # write: write data to an immutable share |
---|
9 | # close: finish writing to an immutable share |
---|
10 | # cancel: abandon a partial immutable share |
---|
11 | # get: get_buckets, first step to download an immutable file |
---|
12 | # read: read data from an immutable share |
---|
13 | # writev: slot_testv_and_readv_and_writev, modify/create a directory |
---|
14 | # readv: read a directory (or mutable file) |
---|
15 | |
---|
16 | # the PERCENTILE value should come from the following list: |
---|
17 | # 01_0: 1% |
---|
18 | # 10_0: 10% |
---|
19 | # 50_0: 50% (median) |
---|
20 | # 90_0: 90% |
---|
21 | # 99_0: 99% |
---|
22 | # 99_9: 99.9% |
---|
23 | # mean: |
---|
24 | |
---|
25 | # To use this, create a symlink from |
---|
26 | # /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this |
---|
27 | # script. For example: |
---|
28 | |
---|
29 | # ln -s /usr/share/munin/plugins/tahoe_server_latency_ \ |
---|
30 | # /etc/munin/plugins/tahoe_server_latency_allocate_99_9 |
---|
31 | |
---|
32 | # Also, you will need to put a list of node statistics URLs in the plugin's |
---|
33 | # environment, by adding a stanza like the following to a file in |
---|
34 | # /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies: |
---|
35 | # |
---|
36 | # [tahoe_server_latency*] |
---|
37 | # env.url_storage1 http://localhost:9011/statistics?t=json |
---|
38 | # env.url_storage2 http://localhost:9012/statistics?t=json |
---|
39 | # env.url_storage3 http://localhost:9013/statistics?t=json |
---|
40 | # env.url_storage4 http://localhost:9014/statistics?t=json |
---|
41 | |
---|
42 | # of course, these URLs must match the webports you have configured into the |
---|
43 | # storage nodes. |
---|
44 | |
---|
45 | |
---|
46 | import os, sys |
---|
47 | import urllib |
---|
48 | import json |
---|
49 | |
---|
50 | node_urls = [] |
---|
51 | for k,v in os.environ.items(): |
---|
52 | if k.startswith("url_"): |
---|
53 | nodename = k[len("url_"):] |
---|
54 | node_urls.append( (nodename, v) ) |
---|
55 | node_urls.sort() |
---|
56 | |
---|
57 | my_name = os.path.basename(sys.argv[0]) |
---|
58 | PREFIX = "tahoe_server_latency_" |
---|
59 | assert my_name.startswith(PREFIX) |
---|
60 | my_name = my_name[len(PREFIX):] |
---|
61 | (operation, percentile) = my_name.split("_", 1) |
---|
62 | if percentile == "mean": |
---|
63 | what = "mean" |
---|
64 | else: |
---|
65 | what = percentile.replace("_", ".") + "th percentile" |
---|
66 | |
---|
67 | configinfo = \ |
---|
68 | """graph_title Tahoe Server '%(operation)s' Latency (%(what)s) |
---|
69 | graph_vlabel seconds |
---|
70 | graph_category tahoe |
---|
71 | graph_info This graph shows how long '%(operation)s' operations took on the storage server, the %(what)s delay between message receipt and response generation, calculated over the last thousand operations. |
---|
72 | """ % {'operation': operation, |
---|
73 | 'what': what} |
---|
74 | |
---|
75 | for nodename, url in node_urls: |
---|
76 | configinfo += "%s.label %s\n" % (nodename, nodename) |
---|
77 | configinfo += "%s.draw LINE2\n" % (nodename,) |
---|
78 | |
---|
79 | |
---|
80 | if len(sys.argv) > 1: |
---|
81 | if sys.argv[1] == "config": |
---|
82 | print(configinfo.rstrip()) |
---|
83 | sys.exit(0) |
---|
84 | |
---|
85 | for nodename, url in node_urls: |
---|
86 | data = json.loads(urllib.urlopen(url).read()) |
---|
87 | if percentile == "mean": |
---|
88 | p_key = "mean" |
---|
89 | else: |
---|
90 | p_key = percentile + "_percentile" |
---|
91 | key = "storage_server.latencies.%s.%s" % (operation, p_key) |
---|
92 | value = data["stats"][key] |
---|
93 | print("%s.value %s" % (nodename, value)) |
---|
94 | |
---|