[Git][NTPsec/ntpsec][master] ntpviz: add skewness and kurtosis stats.
Gary E. Miller
gitlab at mg.gitlab.com
Sat May 20 01:58:49 UTC 2017
Gary E. Miller pushed to branch master at NTPsec / ntpsec
Commits:
9b296f50 by Gary E. Miller at 2017-05-19T18:52:49-07:00
ntpviz: add skewness and kurtosis stats.
Skewness and Kurtosis show how far off a data stream is from a
normal distribution.
- - - - -
1 changed file:
- ntpclients/ntpviz
Changes:
=====================================
ntpclients/ntpviz
=====================================
--- a/ntpclients/ntpviz
+++ b/ntpclients/ntpviz
@@ -34,12 +34,13 @@ Python by ESR, concept and gnuplot code by Dan Drown.
# SPDX-License-Identifier: BSD-2-Clause
from __future__ import print_function, division
-import csv
-import datetime
-import re
import atexit
import binascii
import collections
+import csv
+import datetime
+import math
+import re
import os
import socket
import sys
@@ -123,39 +124,48 @@ def print_profile():
pr.print_stats('cumtime')
-# standard deviation functions
+# standard deviation class
# use this until we can guarantee Python 3.4 and the statistics module
# http://stackoverflow.com/questions/15389768/standard-deviation-of-a-list#21505523
-def mean(data):
- """Return the sample arithmetic mean of data."""
- n = len(data)
- if n < 1:
- raise ValueError('mean requires at least one data point')
- return sum(data)/n # in Python 2 use sum(data)/float(n)
-
-
-def _ss(data, mu=None):
- """Return sum of square deviations of sequence data."""
- if mu is None:
- c = mean(data)
- else:
- c = mu
- ss = sum((x-c)**2 for x in data)
- return ss
+# class to calc:
+# Mean, Variance, Standard Deviation, Skewness and Kurtosis
+
+class RunningStats:
+ "Calculate mean, variance, sigma, skewness and kurtosis"
+
+ def __init__(self, values):
+ self.num = len(values) # number of samples
+ self.mu = 0.0 # simple arithmetic mean
+ self.variance = 0.0 # variance
+ self.sigma = 0.0 # aka standard deviation
+ self.skewness = 0.0
+ self.kurtosis = 3.0
+
+ if 0 >= self.num:
+ # no data??
+ return
+
+ self.mu = sum(values) / self.num
+ self.variance = sum(pow((v-self.mu), 2) for v in values) / self.num
+ self.sigma = math.sqrt(self.variance)
+
+ if math.isnan(self.sigma) or 1e-12 >= abs(self.sigma):
+ # punt
+ self.skewness = float('nan')
+ self.kurtosis = float('nan')
+ return
-# fixme, need to handle mu=mean
-def pstdev(data, mu=None):
- """Calculates the population standard deviation."""
- n = len(data)
- if n < 2:
- # variance requires at least two data points
- return 0
- ss = _ss(data, mu)
- pvar = ss/n # the population variance
- return pvar**0.5
+ m3 = 0
+ m4 = 0
+ for val in values:
+ m3 += pow(val - self.sigma, 3)
+ m4 += pow(val - self.sigma, 4)
-# end standard deviation functions
+ self.skewness = m3 / (self.num * pow(self.sigma, 3))
+ self.kurtosis = m4 / (self.num * pow(self.sigma, 4))
+
+# end standard deviation class
# class for calced values
@@ -169,17 +179,20 @@ class VizStats(ntp.statfiles.NTPStats):
# observe RFC 4180, end lines with CRLF
csv_head = ["Name", "Min", "1%", "5%", "50%", "95%", "99%", "Max", "",
- "90% Range", "98% Range", "StdDev", "", "Mean", "Units"]
+ "90% Range", "98% Range", "StdDev", "", "Mean", "Units",
+ "Skewness", "Kurtosis"]
table_head = """\
<br>
-<table style="text-align:right;width:1024px;">
+<table style="text-align:right;width:1300px;">
<thead>
<tr style="font-weight:bold;text-align:left;">
<td style="width:300px;"></td>
<td colspan=8> Percentiles......</td>
<td colspan=3> Ranges......</td>
- <td colspan=2></td>
+ <td colspan=3></td>
+ <td style="text-align:right;">Skew-</td>
+ <td style="text-align:right;">Kurt-</td>
<td ></td>
</tr>
<tr style="font-weight:bold;text-align:right;">
@@ -188,6 +201,7 @@ class VizStats(ntp.statfiles.NTPStats):
<td>99%</td><td>Max</td> <td style="width:10px;"> </td>
<td>90%</td><td>95%</td><td>StdDev</td>
<td style="width:10px;"> </td><td>Mean</td><td>Units</td>
+ <td>ness</td><td>osis</td>
</tr>
</thead>
"""
@@ -250,8 +264,9 @@ class VizStats(ntp.statfiles.NTPStats):
# go to nanosec
self.unit = "ns"
- self.percs["mu"] = mean(values)
- self.percs["pstd"] = pstdev(values, mu=self.percs["mu"])
+ sts = RunningStats(values)
+ self.percs["mu"] = sts.mu
+ self.percs["pstd"] = sts.sigma
self.title = title
@@ -274,6 +289,17 @@ class VizStats(ntp.statfiles.NTPStats):
else:
self.percs_f[k] = format(v, ",.3f")
+ # dan't scale skewness and kurtosis
+ self.percs["skew"] = sts.skewness
+ self.percs["kurt"] = sts.kurtosis
+ if '°C' == units:
+ # skip for temperatures.
+ self.percs_f["skew"] = ''
+ self.percs_f["kurt"] = ''
+ else:
+ self.percs_f["skew"] = format(self.percs["skew"], "6.4g")
+ self.percs_f["kurt"] = format(self.percs["kurt"], "6.4g")
+
if args.clip:
self.percs["min_y"] = self.percs["p1"]
self.percs["max_y"] = self.percs["p99"]
@@ -293,7 +319,7 @@ class VizStats(ntp.statfiles.NTPStats):
s = ["%(title)s", "%(p0)s", "%(p1)s", "%(p5)s", "%(p50)s", "%(p95)s",
" %(p99)s", "%(p100)s", "", "%(r90)s", "%(r98)s", "%(pstd)s",
- "", "%(mu)s", "%(unit)s"]
+ "", "%(mu)s", "%(unit)s", "%(skew)s", "%(kurt)s", ]
# csv is raw, html table is autoranged
self.csv = [x % self.percs for x in s]
@@ -1682,6 +1708,13 @@ Refclock Jitter, and Peer Jitter in seconds. Local Frequency Jitter is
in ppm or ppb.
</dd>
+<dt>kurtosis, Kurt:</dt>
+<dd>The kurtosis of a random variable X is the fourth standardized
+moment and is a dimension-less ratio. ntpviz uses the Pearson's moment
+coefficient of kurtosis. A normal distribution has a kurtosis of three.
+NIST describes a kurtosis over three as "heavy tailed" and one under
+three as "light tailed".</dd>
+
<dt>ms, millisecond:</dt>
<dd>One thousandth of a second = 0.001 seconds, 1e-3 seconds</dd>
@@ -1723,9 +1756,6 @@ or server.</dd>
<dd>The difference between the ntpd calculated time and the local system
clock's time. Also called phase offset.</dd>
-<dt>upstream clock:</dt>
-<dd>Any remote clock or reference clock used as a source of time.</dd>
-
<dt>σ, sigma:</dt>
<dd>Sigma denotes the standard deviation (SD) and is centered on the
arithmetic mean of the data set. The SD is simply the square root of
@@ -1735,6 +1765,16 @@ The formula for sigma is: "σ = √[ ∑(x<sub>i</sub>-mu)^2 / N ]".
Where x<sub>i</sub> denotes the data points and N is the number of data
points.</dd>
+<dt>skewness, Skew:</dt>
+<dd>The skewness of a random variable X is the third standardized
+moment and is a dimension-less ratio. ntpviz uses the Pearson's moment
+coefficient of skewness. Wikipedia describes it best: "The qualitative
+interpretation of the skew is complicated and unintuitive."<br> A normal
+distribution has a skewness of zero. </dd>
+
+<dt>upstream clock:</dt>
+<dd>Any remote clock or reference clock used as a source of time.</dd>
+
<dt>µs, us, microsecond:</dt>
<dd>One millionth of a second, also one thousandth of a millisecond,
0.000,001 seconds, and 1e-6 seconds.</dd>
View it on GitLab: https://gitlab.com/NTPsec/ntpsec/commit/9b296f50d25945a30d3607f9a715d047715e2c19
---
View it on GitLab: https://gitlab.com/NTPsec/ntpsec/commit/9b296f50d25945a30d3607f9a715d047715e2c19
You're receiving this email because of your account on gitlab.com.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.ntpsec.org/pipermail/vc/attachments/20170520/4a13e234/attachment.html>
More information about the vc
mailing list