[Git][NTPsec/ntpsec][master] 2 commits: ntpviz: move plot row extraction into NTPViz.plot_slice()

Gary E. Miller gitlab at mg.gitlab.com
Tue Oct 25 03:49:38 UTC 2016


Gary E. Miller pushed to branch master at NTPsec / ntpsec


Commits:
a7913ef9 by Gary E. Miller at 2016-10-24T13:46:11-07:00
ntpviz: move plot row extraction into NTPViz.plot_slice()

Surpising to me, this alone yielded about a 2% speedup.  I except
due to less work to dereference data in the loop.  Or smaller
string concatenations, or...

- - - - -
58ca1c44 by Gary E. Miller at 2016-10-24T20:45:50-07:00
ntpviz: claw back the speed lost in the missing data hack.

Sorts and calculations were done on timestamp as a float.  Instead
unixize() adds timestamp as an integer of milli seconds to each
row.  That buys back the 10% speed loss of the missing data hack.

- - - - -


2 changed files:

- ntpstats/ntpviz
- pylib/statfiles.py


Changes:

=====================================
ntpstats/ntpviz
=====================================
--- a/ntpstats/ntpviz
+++ b/ntpstats/ntpviz
@@ -287,7 +287,7 @@ def gnuplot(template, outfile=None):
     else:
         # remove tmp file
         os.remove(tmp_filename)
-        
+
     return rcode
 
 class NTPViz(NTPStats):
@@ -312,6 +312,35 @@ set rmargin 12
         NTPStats.__init__(self, statsdir=statsdir, sitename=sitename,
                           period=period, starttime=starttime, endtime=endtime)
 
+    def plot_slice( self, rows, item1, item2=None):
+        "slice 0,item1, maybe item2, from rows, ready for gnuplot"
+        # speed up by only sending gnuplot the data it will actually use
+        # WARNING: this is hot code, only modify if you profile
+        plot_data = ''
+        last_time = 0
+        item1 += 1
+        if item2:
+            item2 += 1
+            for row in rows:
+                if 1024000 < row[0] - last_time:
+                    # data loss, add a break in the plot line
+                    plot_data += '\n'
+                last_time = row[0]
+                # fields: time, fld, and rtt
+                plot_data += row[1] + ' ' + row[item1] + ' ' + row[item2] + '\n'
+        else:
+            for row in rows:
+                if 1024000 < row[0] - last_time:
+                    # data loss, add a break in the plot line
+                    plot_data += '\n'
+                last_time = row[0]
+                # fields: time, fld
+                plot_data += row[1] + ' ' + row[item1] + '\n'
+        # I know you want to replace the plot_data string concat with
+        # or more join()s, do not do it, it is slower
+        plot_data += "e\n"
+        return plot_data
+
     def local_offset_gnuplot(self):
         "Generate GNUPLOT code graphing local clock loop statistics"
         if not len( self.loopstats):
@@ -319,25 +348,15 @@ set rmargin 12
             return ''
 
         # speed up by only sending gnuplot the data it will actually use
-        plot_data = ''
-        last_time = 0
-        for row in self.loopstats:
-            delay = float( row[0] ) - last_time
-            if 1024 < delay:
-                # data loss, add a break in the plot line
-                plot_data += '\n'
-            last_time = float(row[0] )
-            # Python slice is too dumb to do this the easy way
-            # fields: time, time offset, and freq offset
-            plot_data += row[0] + ' ' + row[1] + ' ' + row[2] + '\n'
-        plot_data += "e\n"
+        # fields: time, time offset, freq offset
+        plot_data = self.plot_slice( self.loopstats, 1, 2)
 
         # compute clock offset
-        values = [float(line[1]) for line in self.loopstats]
+        values = [float(line[2]) for line in self.loopstats]
         stats = VizStats( values, "Local Clock Time Offset")
 
         # compute frequency offset
-        values_f = [float(line[2]) for line in self.loopstats]
+        values_f = [float(line[3]) for line in self.loopstats]
         stats_f = VizStats(values_f, "Local Clock Frequency Offset", freq=1)
 
         out = stats.percs
@@ -396,17 +415,8 @@ file.</p>
         plot_data = ''
         for key in tempslist:
             # speed up by only sending gnuplot the data it will actually use
-            last_time = 0
-            for row in tempsmap[key]:
-                delay = float( row[0] ) - last_time
-                if 1024 < delay:
-                    # data loss, add a break in the plot line
-                    plot_data += '\n'
-                last_time = float(row[0] )
-                # Python slice is too dumb to do this the easy way
-                # fields: time, temp
-                plot_data += row[0] + ' ' + row[2] + '\n'
-            plot_data += "e\n"
+            # fields: time, temp
+            plot_data += self.plot_slice( tempsmap[key], 2)
 
         plot_template = NTPViz.Common + """\
 set title "%(sitename)s: Local Temparatures"
@@ -450,17 +460,8 @@ component of frequency drift.</p>
         gps_data = ()
         plot_data = ""
         for key in gpslist:
-            last_time = 0
-            for row in gpsmap[key]:
-                delay = float( row[0] ) - last_time
-                if 1024 < delay:
-                    # data loss, add a break in the plot line
-                    plot_data += '\n'
-                last_time = float(row[0] )
-                # Python slice is too dumb to do this the easy way
-                # fields: time, tdop, nSats
-                plot_data += row[0] + ' ' + row[2] + ' ' + row[3] + '\n'
-            plot_data += "e\n"
+            # fields: time, tdop, nSats
+            plot_data += self.plot_slice( gpsmap[key], 2, 3)
 
         plot_template = NTPViz.Common + """\
 set title "%(sitename)s: Local GPS
@@ -508,18 +509,8 @@ gpsd log file is created by the gps-log.py program.</p>
         out["sitename"] = self.sitename
 
         # speed up by only sending gnuplot the data it will actually use
-        plot_data = ''
-        last_time = 0
-        for row in self.loopstats:
-            delay = float( row[0] ) - last_time
-            if 1024 < delay:
-                # data loss, add a break in the plot line
-                plot_data += '\n'
-            last_time = float(row[0] )
-            # Python slice is too dumb to do this the easy way
-            # fields: time, freq error
-            plot_data += row[0] + ' ' + row[2] + '\n'
-        plot_data += "e\n"
+        # fields: time, freq error
+        plot_data = self.plot_slice( self.loopstats, 2)
 
         plot_template = NTPViz.Common + """\
 set title "%(sitename)s: Local Clock Frequency Offset%(clipped)s"
@@ -560,21 +551,11 @@ line at 0ppm.  Expected values of 99%-1% percentiles: 0.4ppm</p>
             return ''
 
         # speed up by only sending gnuplot the data it will actually use
-        plot_data = ''
-        last_time = 0
-        for row in self.loopstats:
-            delay = float( row[0] ) - last_time
-            if 1024 < delay:
-                # data loss, add a break in the plot line
-                plot_data += '\n'
-            last_time = float(row[0] )
-            # Python slice is too dumb to do this the easy way
-            # fields: time, and fld
-            plot_data += row[0] + ' ' + row[fld - 1] + '\n'
-        plot_data += "e\n"
+        # fields: time, fld
+        plot_data = self.plot_slice( self.loopstats, fld - 1)
 
         # grab and process the values
-        values = [float(line[fld - 1]) for line in self.loopstats]
+        values = [float(line[fld]) for line in self.loopstats]
         stats = VizStats( values, title, freq=freq )
 
         # build the output dictionary, because Python can not format
@@ -646,7 +627,6 @@ plot \
             sys.stderr.write("ntpviz: WARNING: no peer data to graph\n")
             return ''
         peerlist.sort() # For stability of output
-        plot_data = ""
         namelist = []   # peer names
 
         ip_todo = []
@@ -740,7 +720,7 @@ at 0s.</p>
 """
 
             # grab and sort the values, no need for the timestamp, etc.
-            values = [float(line[fld - 1]) for line in peerdict[ip]]
+            values = [float(line[fld]) for line in peerdict[ip]]
 
             stats = VizStats( values, title)
 
@@ -760,7 +740,7 @@ at 0s.</p>
             # many peers
             title += "s"
             # grab and sort the values, no need for the timestamp, etc.
-            values = [float(line[fld - 1]) for line in self.peerstats]
+            values = [float(line[fld]) for line in self.peerstats]
 
             stats = VizStats( values, title )
 
@@ -787,30 +767,16 @@ at 0s.</p>
 <p>RMS Jitter is field 8 in the peerstats log file.</p>
 """
 
+        plot_data = ""
         for ip in ip_todo:
             # 20% speed up by only sending gnuplot the data it will
             # actually use
-            # Python slice is too dumb to do this the easy way
-            last_time = 0
             if rtt:
-                for row in peerdict[ip]:
-                    delay = float( row[0] ) - last_time
-                    if 1024 < delay:
-                        # data loss, add a break in the plot line
-                        plot_data += '\n'
-                    last_time = float(row[0] )
-                    # fields: time, fld, and rtt
-                    plot_data += row[0] + ' ' + row[fld-1] + ' ' + row[4] + '\n'
+                # fields: time, fld, and rtt
+                plot_data += self.plot_slice( peerdict[ip], fld-1, 4)
             else:
-                for row in peerdict[ip]:
-                    delay = float( row[0] ) - last_time
-                    if 1024 < delay:
-                        # data loss, add a break in the plot line
-                        plot_data += '\n'
-                    last_time = float(row[0] )
-                    # fields: time, fld
-                    plot_data += row[0] + ' ' + row[fld - 1] + '\n'
-            plot_data += "e\n"
+                # fields: time, fld
+                plot_data += self.plot_slice( peerdict[ip], fld-1)
 
         out = stats.percs
         out['sitename'] = self.sitename
@@ -867,7 +833,7 @@ plot \
         # TODO normalize to 0 to 100?
 
         # grab and sort the values, no need for the timestamp, etc.
-        values = [float(line[1]) for line in self.loopstats]
+        values = [float(row[2]) for row in self.loopstats]
         stats = VizStats( values, 'Local Clock Offset' )
         out = stats.percs
         out['sitename'] = self.sitename
@@ -972,17 +938,15 @@ plot \\
         plot += '"-" using 1:($2*1000000) title "%s clock offset μs" with linespoints, \\\n' % (sitenames[i])
     plot = plot[:-4] + "\n"
 
+    plot_data = ''
     for stats in statlist:
         # speed up by only sending gnuplot the data it will actually use
-        for row in stats.loopstats:
-            # Python slice is too dumb to do this the easy way
-            # fields: time, temp
-            plot += row[0] + ' ' + row[1] + '\n'
-        plot += "\ne\n"
+        # fields: time, offset
+        plot_data += self.plot_slice( stats.loopstats, 1)
 
     ret = {'html' : '', 'stats' : [] }
     ret['title'] = "Multiplot"
-    ret['plot'] = plot
+    ret['plot'] = plot + plot_data
     return ret
 
 # here is how to create the base64 from an image file:


=====================================
pylib/statfiles.py
=====================================
--- a/pylib/statfiles.py
+++ b/pylib/statfiles.py
@@ -34,8 +34,8 @@ class NTPStats:
         # warning: 32 bit overflows
         time = NTPStats.SecondsInDay * mjd + second - 3506716800
         if starttime  <= time <= endtime:
-            del split[0]
-            split[0] = str(time)
+            split[0] = int(time * 1000)  # time as integer number milli seconds
+            split[1] = str(time)         # time as string
             return split
         # else
         return None
@@ -100,12 +100,14 @@ class NTPStats:
                             continue
                         split = line.split()
                         try:
-                            t = int(float(split[0]))
+                            t = float(split[0])
                         except:
                             # ignore comment lines, lines with no time
                             continue
 
                         if starttime <= t <= endtime:
+                            # prefix with int milli sec.
+                            split.insert(0, int(t * 1000))
                             lines1.append( split)
             else:
                 # Morph first field into Unix time with fractional seconds
@@ -137,31 +139,31 @@ class NTPStats:
         if len( self.peermap):
             return self.peermap
 
-        for line in self.peerstats:
-            ip = line[1]
+        for row in self.peerstats:
+            ip = row[2]     # peerstats field 2, refclock id
             if ip not in self.peermap:
                 self.peermap[ip] = []
-            self.peermap[ip].append(line)
+            self.peermap[ip].append(row)
         return self.peermap
 
     def gpssplit(self):
         "Return a dictionary mapping gps sources to entry subsets."
         gpsmap = {}
-        for line in self.gpsd:
-            source = line[1]
+        for row in self.gpsd:
+            source = row[2]
             if source not in gpsmap:
                 gpsmap[source] = []
-            gpsmap[source].append(line)
+            gpsmap[source].append(row)
         return gpsmap
 
     def tempssplit(self):
         "Return a dictionary mapping temperature sources to entry subsets."
         tempsmap = {}
-        for line in self.temps:
-            source = line[1]
+        for row in self.temps:
+            source = row[2]
             if source not in tempsmap:
                 tempsmap[source] = []
-            tempsmap[source].append(line)
+            tempsmap[source].append(row)
         return tempsmap
 
     def dump(self, row):



View it on GitLab: https://gitlab.com/NTPsec/ntpsec/compare/1e92ba012b393061bca8128c2166fe56c42d8052...58ca1c44c9e7d699aac874bd06fad69ff4241c7d
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ntpsec.org/pipermail/vc/attachments/20161025/b35b4c9f/attachment.html>


More information about the vc mailing list