[Git][NTPsec/ntpsec][master] 2 commits: Add percentiles() for speed
Gary E. Miller
gitlab at mg.gitlab.com
Fri Sep 2 02:56:37 UTC 2016
Gary E. Miller pushed to branch master at NTPsec / ntpsec
Commits:
d9c59b66 by Gary E. Miller at 2016-09-01T19:52:44-07:00
Add percentiles() for speed
Solitting a large array 5 times, the same way, is a wate. So invert the
algorithm.
- - - - -
eab10fd4 by Gary E. Miller at 2016-09-01T19:55:38-07:00
Add 1 sigma to histogram, streamline the algorithm
- - - - -
2 changed files:
- ntpstats/ntpviz
- pylib/statfiles.py
Changes:
=====================================
ntpstats/ntpviz
=====================================
--- a/ntpstats/ntpviz
+++ b/ntpstats/ntpviz
@@ -35,18 +35,22 @@ def mean(data):
raise ValueError('mean requires at least one data point')
return sum(data)/n # in Python 2 use sum(data)/float(n)
-def _ss(data):
+def _ss(data, mu=None):
"""Return sum of square deviations of sequence data."""
- c = mean(data)
+ if None == mu:
+ c = mean(data)
+ else:
+ c = mu
ss = sum((x-c)**2 for x in data)
return ss
-def pstdev(data):
+# fixme, need to handle mu=mean
+def pstdev(data, mu=None):
"""Calculates the population standard deviation."""
n = len(data)
if n < 2:
raise ValueError('variance requires at least two data points')
- ss = _ss(data)
+ ss = _ss(data, mu)
pvar = ss/n # the population variance
return pvar**0.5
@@ -354,41 +358,69 @@ plot \
sys.stderr.write("ntpviz: WARNING: no loopstats to graph\n")
return ''
sitename = self.sitename
- cnt = collections.Counter()
- for line in self.loopstats:
- # put into 100 nSec buckets
- cnt[ round( float(line.split()[1]), 7)] += 1
+ # TODO normalize to 0 to 100
+
+ # grab and sort the values, no need for the timestamp, etc.
values = [float(line.split()[1]) for line in self.loopstats]
values.sort()
- values_mean = mean( values ) * 1000000
- ninetynine = self.percentile(2, 99, self.loopstats) * 1000000
- seventyfive = self.percentile(2, 75, self.loopstats) * 1000000
- twentyfive = self.percentile(2, 25, self.loopstats) * 1000000
- one = self.percentile(2, 1, self.loopstats) * 1000000
+ mu = mean( values )
+ values_mean = mu * 1000000
+ values_mean_str = str( round( values_mean, 3 ) )
+
+ values_pstd = round( pstdev( values, mu=mu ) * 1000000, 3)
+
+ # plus/minus of one sigma range
+ m1sigma = values_mean - (values_pstd / 2)
+ p1sigma = values_mean + (values_pstd / 2)
+
+ ninetynine = round( self.percentiles(99, values) * 1000000, 2)
+ ninety = round( self.percentiles(90, values) * 1000000, 2)
+ five = round( self.percentiles( 5, values) * 1000000, 2)
+ one = round( self.percentiles( 1, values) * 1000000, 2)
+
+ cnt = collections.Counter()
+ for value in values:
+ # put into 100 nSec buckets
+ # for a +/- 50 microSec range that is 1,000 buckets to plot
+ cnt[ round( float(value), 7)] += 1
+
+# skip the mean
+#set label 3 "mean = %(values_mean_str)s μs" at graph 0.01,0.3 left front
+
plot_template = '''\
set terminal png size 900,600
set grid
set xtic rotate by -45 scale 0
set title "%(sitename)s: Local Clock Time Offset - Histogram"
set xtics format "@1.1f μs" nomirror
-set label 1 gprintf("99@@ = @1.2f μs",%(ninetynine)s) at %(ninetynine)s, graph 0.91 left front offset 1,-1
set style arrow 1 nohead
-set arrow from %(ninetynine)s,0 to %(ninetynine)s,graph 0.91 as 1
-set label 2 gprintf(" 1@@ = @1.2f μs",%(one)s) at %(one)s, graph 0.91 right front offset -1,-1
+set arrow from %(m1sigma)s,0 to %(m1sigma)s,graph 0.90 as 1
set style arrow 2 nohead
-set arrow from %(one)s,0 to %(one)s,graph 0.91 as 2
-set label 3 gprintf("25@@ = @1.2f μs",%(twentyfive)s) at %(twentyfive)s, graph 0.7 right front offset -1,-1
+set arrow from %(p1sigma)s,0 to %(p1sigma)s,graph 0.90 as 2
set style arrow 3 nohead
-set arrow from %(twentyfive)s,0 to %(twentyfive)s,graph 0.7 as 3
-set label 4 gprintf("75@@ = @1.2f μs",%(seventyfive)s) at %(seventyfive)s, graph 0.7 left front offset 1,-1
+set arrow from %(ninetynine)s,0 to %(ninetynine)s,graph 0.15 as 3
set style arrow 4 nohead
-set arrow from %(seventyfive)s,0 to %(seventyfive)s,graph 0.7 as 4
+set arrow from %(ninety)s,0 to %(ninety)s,graph 0.30 as 4
+set style arrow 5 nohead
+set arrow from %(five)s,0 to %(five)s,graph 0.30 as 5
+set style arrow 6 nohead
+set arrow from %(one)s,0 to %(one)s,graph 0.15 as 6
set key off
set lmargin 12
set rmargin 12
-set label 1 gprintf("mean = %(values_mean)s μs",50) at graph 0.01,0.3 left front
+set label 1 "1σ" at %(m1sigma)s, graph 0.96 left front offset -1,-1
+set label 2 "1σ" at %(p1sigma)s, graph 0.96 left front offset -1,-1
+set label 3 "99%%" at %(ninetynine)s, graph 0.20 left front offset -1,-1
+set label 4 "90%%" at %(ninety)s, graph 0.35 left front offset -1,-1
+set label 5 "1%%" at %(one)s, graph 0.20 left front offset -1,-1
+set label 6 "5%%" at %(five)s, graph 0.35 left front offset -1,-1
+set label 10 "1σ (68%%) = %(values_pstd)s μs" at graph 0.01,0.95 left front
+set label 11 "99%% = %(ninetynine)s μs" at graph 0.01,0.90 left front
+set label 12 "90%% = %(ninety)s μs" at graph 0.01,0.85 left front
+set label 13 "5%% = %(five)s μs" at graph 0.01,0.80 left front
+set label 14 "1%% = %(one)s μs" at graph 0.01,0.75 left front
plot \
"-" using ($1 * 1000000):2 title "histogram" with boxes
''' % locals()
@@ -629,7 +661,7 @@ heating).</p>
""",
"local-offset-histogram": """\
<p>This shows the clock offsets of the local clock as a histogram. It
-includes 1%, 25%, 75%, and 99% percentiles to show the performance of
+includes 1%, 5%, 95%, and 99% percentiles to show the performance of
the system.</p>
""",
"local-stability": """\
=====================================
pylib/statfiles.py
=====================================
--- a/pylib/statfiles.py
+++ b/pylib/statfiles.py
@@ -125,10 +125,16 @@ class NTPStats:
return m
def percentile(self, n, percentile, entries):
"Return given percentiles of a given row in a given set of entries."
+ "If you call this twice on the same data set you should use"
+ "percentiles() instead"
# Row is decremented so we match GNUPLOT's 1-origin indexing.
values = [float(line.split()[n-1]) for line in entries]
values.sort()
return values[int(len(values) * (percentile/100))]
+ def percentiles(self, percentile, values):
+ "Return given percentiles of a given row in a given set of entries."
+ "assuming values are already spilit and sorted"
+ return values[int(len(values) * (percentile/100))]
def peersplit(self):
"Return a dictionary mapping peerstats IPs to entry subsets."
peermap = {}
View it on GitLab: https://gitlab.com/NTPsec/ntpsec/compare/dba694c1810ee720263a85125060c36c399657fe...eab10fd4b1cf5953bd1be40dc0f6a9997bf49ea4
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ntpsec.org/pipermail/vc/attachments/20160902/970874b6/attachment.html>
More information about the vc
mailing list