''' >>> fs = patcher('stanza.monitoring.summary', '/test'); open = fs.start() ... # ^ for doctest; ignore A nearly pure-Python module for logging output to a file in TensorBoard's events format. Supports scalars, RGB images and histograms. >>> writer = SummaryWriter('/test/values.tfevents') >>> writer.log_scalar(1, 'universe', 42) >>> writer.flush() >>> with open('/test/values.tfevents', 'r') as infile: ... for event in read_events(infile): ... print(event) # doctest: +ELLIPSIS wall_time: ... step: 1 summary { value { tag: "universe" simple_value: 42.0 } } <BLANKLINE> This module requires a very small subset of TensorFlow to be available for importing, consisting of the following compiled protobuf definitions: tensorflow/core/ framework/ attr_value_pb2.py function_pb2.py graph_pb2.py op_def_pb2.py summary_pb2.py tensor_pb2.py tensor_shape_pb2.py types_pb2.py util/ event_pb2.py It also requires a couple of other easy-to-install Python modules: pip install -U pypng 'Protobuf>=3.0.0b2' After an event file is written (it should have 'tfevents' somewhere in its name), the file can be read by TensorBoard by running tensorboard --logdir="`pwd`" from the parent directory of the directory containing the events file. >>> fs.stop() ... # ^ for doctest; ignore ''' __author__ = 'wmonroe4' import atexit import numpy as np import png import struct import sys import time from itertools import izip from StringIO import StringIO from tensorflow.core.util.event_pb2 import Event from tensorflow.core.framework.summary_pb2 import Summary, HistogramProto from .crc32c import crc as crc32 from ..research.mockfs import patcher # NOQA: for doctest open = open # for doctest class SummaryWriter(object): def __init__(self, filename, tick=5.0, max_queue_len=100): ''' :param str filename: The path of the events file to be written. The file is truncated during construction of the `SummaryWriter` object. :param float tick: The number of seconds to elapse in between automatically writing queued events out to the file. A write can be forced manually with a call to `flush()`. :param max_queue_len: The maximum number of events to keep queued before the queue is flushed. If more than this number of events accumulate in the queue, they will be flushed even if `tick` seconds have not elapsed. Note that event writing is performed synchronously; unlike the TensorFlow SummaryWriter, this module is not run in a separate thread or process. ''' self.filename = filename self.tick = tick self.max_queue_len = max_queue_len self.last_append = time.time() self.queue = [] # Truncate the file to start with open(filename, 'wb'): pass atexit.register(SummaryWriter.flush, self) def log_image(self, step, tag, val): ''' Write an image event. :param int step: Time step (x-axis in TensorBoard graphs) :param str tag: Label for this value :param numpy.ndarray val: Image in RGB format with values from 0 to 255; a 3-D array with index order (row, column, channel). `val.shape[-1] == 3` ''' # TODO: support floating-point tensors, 4-D tensors, grayscale if len(val.shape) != 3: raise ValueError('`log_image` value should be a 3-D tensor, instead got shape %s' % (val.shape,)) if val.shape[2] != 3: raise ValueError('Last dimension of `log_image` value should be 3 (RGB), ' 'instead got shape %s' % (val.shape,)) fakefile = StringIO() png.Writer(size=(val.shape[1], val.shape[0])).write( fakefile, val.reshape(val.shape[0], val.shape[1] * val.shape[2])) encoded = fakefile.getvalue() # https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/framework/summary.proto RGB = 3 image = Summary.Image(height=val.shape[0], width=val.shape[1], colorspace=RGB, encoded_image_string=encoded) summary = Summary(value=[Summary.Value(tag=tag, image=image)]) self._add_event(step, summary) def log_scalar(self, step, tag, val): ''' Write a scalar event. :param int step: Time step (x-axis in TensorBoard graphs) :param str tag: Label for this value :param float val: Scalar to graph at this time step (y-axis) ''' summary = Summary(value=[Summary.Value(tag=tag, simple_value=float(np.float32(val)))]) self._add_event(step, summary) def log_histogram(self, step, tag, val): ''' Write a histogram event. :param int step: Time step (x-axis in TensorBoard graphs) :param str tag: Label for this value :param numpy.ndarray val: Arbitrary-dimensional array containing values to be aggregated in the resulting histogram. ''' hist = Histogram() hist.add(val) summary = Summary(value=[Summary.Value(tag=tag, histo=hist.encode_to_proto())]) self._add_event(step, summary) def _add_event(self, step, summary): t = time.time() e = Event(wall_time=t, step=step, summary=summary) self.queue.append(e) if t - self.last_append >= self.tick or len(self.queue) >= self.max_queue_len: self.flush() self.last_append = t def flush(self): ''' Force all queued events to be written to the events file. The queue will automatically be flushed at regular time intervals, when it grows too large, and at program exit (with the usual caveats of `atexit`: this won't happen if the program is killed with a signal or `os._exit()`). ''' if self.queue: with open(self.filename, 'ab') as outfile: write_events(outfile, self.queue) del self.queue[:] _default_buckets = None def default_buckets(): global _default_buckets if _default_buckets is None: positive_buckets = 1e-12 * 1.1 ** np.arange(0, 776.) positive_buckets[-1] = sys.float_info.max _default_buckets = np.array(list(reversed(-positive_buckets)) + [0.0] + list(positive_buckets)) return _default_buckets class Histogram(object): ''' Stores statistics about the values of an array as counts of values falling into buckets on a logarithmic scale. Ported from the TensorFlow C++ class: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/lib/histogram/histogram.cc >>> h = Histogram([-2.0, -1.0, 0.0, 1.0, 2.0]) >>> h.add([-1.5, 0.5, 0.25]) >>> print(str(h.encode_to_proto())) min: -1.5 max: 0.5 num: 3.0 sum: -0.75 sum_squares: 2.5625 bucket_limit: -2.0 bucket_limit: -1.0 bucket_limit: 0.0 bucket_limit: 1.0 bucket_limit: 2.0 bucket: 0.0 bucket: 1.0 bucket: 0.0 bucket: 2.0 bucket: 0.0 <BLANKLINE> ''' def __init__(self, bucket_limits=None): if bucket_limits is None: bucket_limits = default_buckets() self.bucket_limits = bucket_limits self.clear() def clear(self): self.min = self.bucket_limits[-1] self.max = self.bucket_limits[0] self.num = 0 self.sum = 0.0 self.sum_squares = 0.0 self.buckets = np.zeros((len(self.bucket_limits),)) def add(self, arr): if not isinstance(arr, np.ndarray): arr = np.array(arr) arr = arr.flatten() self.min = min(self.min, arr.min()) self.max = max(self.max, arr.max()) self.sum += arr.sum() self.num += len(arr) self.sum_squares += (arr ** 2).sum() indices = np.searchsorted(self.bucket_limits, arr, side='right') new_counts = np.bincount(indices, minlength=self.buckets.shape[0]) if new_counts.shape[0] > self.buckets.shape[0]: # This should only happen with nans and extremely large values assert new_counts.shape[0] == self.buckets.shape[0] + 1, new_counts.shape new_counts = new_counts[:self.buckets.shape[0]] self.buckets += new_counts def encode_to_proto(self): p = HistogramProto() p.min = float(self.min) p.max = float(self.max) p.num = float(self.num) p.sum = float(self.sum) p.sum_squares = float(self.sum_squares) bucket_limits = [] buckets = [] for i, (end, count) in enumerate(izip(self.bucket_limits, self.buckets)): if (i == len(self.bucket_limits) - 1 or count > 0.0 or self.buckets[i + 1] > 0.0): bucket_limits.append(float(end)) buckets.append(float(count)) p.bucket_limit.extend(bucket_limits) p.bucket.extend(buckets) return p class SummaryReaderException(Exception): pass def masked_crc(data): crc = crc32(data) & 0xffffffff return (((crc >> 15) | (crc << 17)) + 0xa282ead8) & 0xffffffff def read_events(stream): ''' Read and return as a generator a sequence of Event protos from file-like object `stream`. ''' header_size = struct.calcsize('<QI') len_size = struct.calcsize('<Q') footer_size = struct.calcsize('<I') while True: header = stream.read(header_size) if len(header) == 0: break elif len(header) < header_size: raise SummaryReaderException('unexpected EOF (expected a %d-byte header, ' 'got %d bytes)' % (header_size, len(header))) data_len, len_crc = struct.unpack('<QI', header) len_crc_actual = masked_crc(header[:len_size]) if len_crc_actual != len_crc: raise SummaryReaderException('incorrect length CRC (%d != %d)' % (len_crc_actual, len_crc)) data = stream.read(data_len) if len(data) < data_len: raise SummaryReaderException('unexpected EOF (expected %d bytes, got %d)' % (data_len, len(data))) yield Event.FromString(data) footer = stream.read(footer_size) if len(footer) < footer_size: raise SummaryReaderException('unexpected EOF (expected a %d-byte footer, ' 'got %d bytes)' % (footer_size, len(footer))) data_crc, = struct.unpack('<I', footer) data_crc_actual = masked_crc(data) if data_crc_actual != data_crc: raise SummaryReaderException('incorrect data CRC (%d != %d)' % (data_crc_actual, data_crc)) def write_events(stream, events): ''' Write a sequence of Event protos to file-like object `stream`. ''' for event in events: data = event.SerializeToString() len_field = struct.pack('<Q', len(data)) len_crc = struct.pack('<I', masked_crc(len_field)) data_crc = struct.pack('<I', masked_crc(data)) stream.write(len_field) stream.write(len_crc) stream.write(data) stream.write(data_crc) __all__ = [ 'SummaryWriter', 'read_events', 'write_events', ] if __name__ == '__main__': if len(sys.argv) != 2: print('Usage: summary.py [summary_file.tfevents]') sys.exit(1) with open(sys.argv[1], 'rb') as infile: for event in read_events(infile): print(event)