[Git][NTPsec/ntpsec][master] 5 commits: ntpviz: stop calling unixize() in a hot loop.

Tue Oct 25 20:20:08 UTC 2016

Gary E. Miller pushed to branch master at NTPsec / ntpsec


Commits:
7ea6dc2d by Gary E. Miller at 2016-10-25T13:16:50-07:00
ntpviz: stop calling unixize() in a hot loop.

This save over 1/2 million function calls when plotting 7 days.  For
about a 5% speedup when doing 7 days.  Now it is easier to see that 1/2
the work of init is unixize(), and init is the most time consuming thing
after waiting for gnuplot.

- - - - -
e6758ff5 by Gary E. Miller at 2016-10-25T13:16:50-07:00
ntpviz: stop incrementing and decrementing.

Mostly cosmetic.

- - - - -
cc3f10cb by Gary E. Miller at 2016-10-25T13:16:50-07:00
ntpviz: minor tweak, sorting a list sorts on the first item.

A tiny speedup.

- - - - -
90a01db4 by Gary E. Miller at 2016-10-25T13:16:50-07:00
ntpviz: just in case, explicit binary open()

- - - - -
8b936c0c by Gary E. Miller at 2016-10-25T13:16:50-07:00
ntpviz: comment polishing

document why this is the fastest sort.

- - - - -


2 changed files:

- ntpstats/ntpviz
- pylib/statfiles.py


Changes:

=====================================
ntpstats/ntpviz
=====================================

--- a/ntpstats/ntpviz
+++ b/ntpstats/ntpviz
@@ -318,9 +318,7 @@ set rmargin 12
         # WARNING: this is hot code, only modify if you profile
         plot_data = ''
         last_time = 0
-        item1 += 1
         if item2:
-            item2 += 1
             for row in rows:
                 if 1024000 < row[0] - last_time:
                     # data loss, add a break in the plot line
@@ -349,7 +347,7 @@ set rmargin 12
 
         # speed up by only sending gnuplot the data it will actually use
         # fields: time, time offset, freq offset
-        plot_data = self.plot_slice( self.loopstats, 1, 2)
+        plot_data = self.plot_slice( self.loopstats, 2, 3)
 
         # compute clock offset
         values = [float(line[2]) for line in self.loopstats]
@@ -416,7 +414,7 @@ file.</p>
         for key in tempslist:
             # speed up by only sending gnuplot the data it will actually use
             # fields: time, temp
-            plot_data += self.plot_slice( tempsmap[key], 2)
+            plot_data += self.plot_slice( tempsmap[key], 3)
 
         plot_template = NTPViz.Common + """\
 set title "%(sitename)s: Local Temparatures"
@@ -461,7 +459,7 @@ component of frequency drift.</p>
         plot_data = ""
         for key in gpslist:
             # fields: time, tdop, nSats
-            plot_data += self.plot_slice( gpsmap[key], 2, 3)
+            plot_data += self.plot_slice( gpsmap[key], 3, 4)
 
         plot_template = NTPViz.Common + """\
 set title "%(sitename)s: Local GPS
@@ -510,7 +508,7 @@ gpsd log file is created by the gps-log.py program.</p>
 
         # speed up by only sending gnuplot the data it will actually use
         # fields: time, freq error
-        plot_data = self.plot_slice( self.loopstats, 2)
+        plot_data = self.plot_slice( self.loopstats, 3)
 
         plot_template = NTPViz.Common + """\
 set title "%(sitename)s: Local Clock Frequency Offset%(clipped)s"
@@ -552,7 +550,7 @@ line at 0ppm.  Expected values of 99%-1% percentiles: 0.4ppm</p>
 
         # speed up by only sending gnuplot the data it will actually use
         # fields: time, fld
-        plot_data = self.plot_slice( self.loopstats, fld - 1)
+        plot_data = self.plot_slice( self.loopstats, fld)
 
         # grab and process the values
         values = [float(line[fld]) for line in self.loopstats]
@@ -773,10 +771,10 @@ at 0s.</p>
             # actually use
             if rtt:
                 # fields: time, fld, and rtt
-                plot_data += self.plot_slice( peerdict[ip], fld-1, 4)
+                plot_data += self.plot_slice( peerdict[ip], fld, 5)
             else:
                 # fields: time, fld
-                plot_data += self.plot_slice( peerdict[ip], fld-1)
+                plot_data += self.plot_slice( peerdict[ip], fld)
 
         out = stats.percs
         out['sitename'] = self.sitename
@@ -942,7 +940,7 @@ plot \\
     for stats in statlist:
         # speed up by only sending gnuplot the data it will actually use
         # fields: time, offset
-        plot_data += self.plot_slice( stats.loopstats, 1)
+        plot_data += self.plot_slice( stats.loopstats, 2)
 
     ret = {'html' : '', 'stats' : [] }
     ret['title'] = "Multiplot"


=====================================
pylib/statfiles.py
=====================================
--- a/pylib/statfiles.py
+++ b/pylib/statfiles.py
@@ -20,25 +20,30 @@ class NTPStats:
     sitename = ''
 
     @staticmethod
-    def unixize(line, starttime, endtime):
+    def unixize(lines, starttime, endtime):
         "Extract first two fields, MJD and seconds past midnight."
         "convert timestamp (MJD & seconds past midnight) to Unix time"
         "Replace MJD+second with Unix time."
-        try:
-            split = line.split()
-            mjd = int(split[0])
-            second = float(split[1])
-        except:
-            # unparseable, skip this line
-            return None
-        # warning: 32 bit overflows
-        time = NTPStats.SecondsInDay * mjd + second - 3506716800
-        if starttime  <= time <= endtime:
-            split[0] = int(time * 1000)  # time as integer number milli seconds
-            split[1] = str(time)         # time as string
-            return split
-        # else
-        return None
+        # HOT LOOP!  Do not change w/o profiling before and after
+        lines1 = []
+        for line in lines:
+            try:
+                split = line.split()
+                mjd = int(split[0])
+                second = float(split[1])
+            except:
+                # unparseable, skip this line
+                continue
+
+            # warning: 32 bit overflows
+            time = NTPStats.SecondsInDay * mjd + second - 3506716800
+            if starttime  <= time <= endtime:
+                # time as integer number milli seconds
+                split[0] = int(time * 1000)
+                # time as string
+                split[1] = str(time)
+                lines1.append(split)
+        return lines1
 
     @staticmethod
     def timestamp(line):
@@ -82,9 +87,9 @@ class NTPStats:
                     if starttime > os.path.getmtime(logpart):
                         continue
                     if logpart.endswith("gz"):
-                        lines += gzip.open(logpart).readlines()
+                        lines += gzip.open(logpart, 'rb').readlines()
                     else:
-                        lines += open(logpart).readlines()
+                        lines += open(logpart, 'rb').readlines()
             except IOError:
                 sys.stderr.write("ntpviz: WARNING: could not read %s\n" \
                      % logpart)
@@ -94,12 +99,9 @@ class NTPStats:
             if stem == "temps" or stem == "gpsd":
                 # temps and gpsd are already in UNIX time
                 for line in lines:
-                    line = line.strip(' \0\r\n\t')
                     if line is not None:
-                        if 0 == len(line):
-                            continue
-                        split = line.split()
                         try:
+                            split = line.split()
                             t = float(split[0])
                         except:
                             # ignore comment lines, lines with no time
@@ -110,15 +112,15 @@ class NTPStats:
                             split.insert(0, int(t * 1000))
                             lines1.append( split)
             else:
-                # Morph first field into Unix time with fractional seconds
-                for line in lines:
-                    line = line.strip(' \0\r\n\t')
-                    line = NTPStats.unixize(line, starttime, endtime)
-                    if line is not None:
-                        lines1.append( line)
+                # Morph first fields into Unix time with fractional seconds
+                # ut into nice dictionary of dictionary rows
+                lines1 = NTPStats.unixize(lines, starttime, endtime)
 
             # Sort by datestamp
-            lines1.sort(key=lambda line: line[0])
+            # by default, a tuple sort()s on the 1st item, which is a nice
+            # integer of milli seconds.  This is faster than using
+            # cmp= or key=
+            lines1.sort()
             setattr(self, stem, lines1)
 
     def percentiles(self, percents, values):



View it on GitLab: https://gitlab.com/NTPsec/ntpsec/compare/9522b0dcd4a7c7b2aa43e0488422bd14ee7ca1d1...8b936c0c5e9b4dc91cb55c2b7e5b51cafae9f50e
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ntpsec.org/pipermail/vc/attachments/20161025/f9d5bf63/attachment.html>