python source code of viz

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
__author__ = 'Scott H. Hawley'
__copyright__ = 'Scott H. Hawley'

"""

Visualization of model activations (and weights)
-  in realtime, based on live microphone input - 'oscilloscope-like' display
-  TODO: from audio file(s)

Still under development.  Currently it just reads from the (live) default microphone,
multiplies by one pre-fabriated array of weights and shows the result.
Later I will try to load layers from a model, e.g. from checkpoint files

Example usage:
    ./viz.py ../demo/modelcheckpoint_4c.tar

TODO/Limitations:
   - Assumes four knobs.
   - Allow user to change knobs.  (Defaults to middle of knob range)

Tested only on Mac OS X High Sierra with Python 3.6/3.7 (Anaconda)
"""

import numpy as np
import os, sys
import torch
import torch.nn as nn
import argparse
import cv2                  # pip install opencv-python
import soundcard as sc      # pip install git+https://github.com/bastibe/SoundCard
from scipy.ndimage.interpolation import shift  # used for oscilloscope trigger

# Code for model
sys.path.append('../signaltrain')
import nn_proc
TheModelClass = nn_proc.st_model
from ptsd2full import load_model  # ptsd2full is in utils directory with viz

# Where to do PyTorch Calcs: CPU or GPU?
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    device = torch.device("cpu")
    torch.set_default_tensor_type('torch.FloatTensor')



# Global variables for graphics windows
imWidth = 1024    # image width for 'oscilloscope display'
imHeight = 600                   # image height for oscilloscope display; can be anything.
blue, green, cyan, yellow = (255,0,0), (0,255,0), (255,255,0), (0,255,255)   # OpenCV BGR colors
blackwhite, rainbow, heat = cv2.COLORMAP_BONE, cv2.COLORMAP_JET, cv2.COLORMAP_AUTUMN  #OpenCV colormaps

layer_act_dims = [1024]

# a few other globals that are kind of handy
knob_names, knob_ranges, knobs_nn = [], [], []
knob_controls_window = 'effect knob controls'
cv2.namedWindow(knob_controls_window)
logo = cv2.imread('stlogo.png',0)  # if you don't give it a picture you get a black rectangle
logo = np.pad(logo, ((25,0),(150,150)), mode='edge')
cv2.imshow(knob_controls_window, logo)


def check_window_exists(title):
    try:
        window_handle = cv2.getWindowProperty(title, 0)
        #print(f"window {title}: handle is {window_handle}")
        return window_handle == 0.0
        return True
    except:
        print("******  error checking for window ",title)
        return False


"""
Routines that draw 2d functions as images
"""
# draw weights for one layer, using one window
def show_2d(array_2d, title="weights_layer", colormap=rainbow, flip=True):

    #print("weights_layer.shape = ",weights_layer.shape)
    if len(array_2d.shape) < 2:
        return
    img = np.clip(array_2d*255 ,-255,255).astype(np.uint8)   # scale
    if flip:
        img = np.flip(np.transpose(img))
    img = np.repeat(img[:,:,np.newaxis],3,axis=2)            # add color channels
    img = cv2.applyColorMap(img, colormap)                   # rainbow: blue=low, red=high
    # see if it exists
    new_window = not check_window_exists(title)
    window = cv2.namedWindow(title,cv2.WINDOW_NORMAL)
    cv2.imshow(title, img)
    if new_window:
        cv2.resizeWindow(title, img.shape[1], img.shape[0])                      # show what we've got
    #aspect = img.shape[0] / img.shape[1]
    #if aspect > 3:
    #    cv2.resizeWindow(title, 200, 1024)   # zoom in/out (can use two-finger-scroll to zoom in)
        #print(f"size for {title} = 1024, {int(1024/aspect*img.shape[0])}")
    #else:
    #    cv2.resizeWindow(title, int(imWidth/2),int(imWidth/2))   # zoom in/out (can use two-finger-scroll to zoom in)


# draw weights for all layers using model state dict
def draw_weights(model, colormap=rainbow):
    sd = model.state_dict()
    for key, value in sd.items():
        show_2d(value.data.squeeze().numpy(), title=key+" "+str(value.data.numpy().shape))


def random_color(seed=None):
    # seed is a cheap way of keeping the color from changing too frequently
    if seed is not None:
        np.random.seed(seed+1)     # +1 because it gives you green for seed=0   :-)
    return (128+np.random.randint(127), 128+np.random.randint(127), 128+np.random.randint(127) )


"""
This actually draws the 'oscilloscope' display for the various 1d activations
but also draws 2d activations

...NOTE: for now, we only show input and output
"""
def draw_activations(screen, model, mono_audio, xs, trig_level=None, \
        title="activations (cyan=input, green=output)", gains=[1.0,1.0]):

    # run the model
    x = torch.as_tensor(mono_audio).unsqueeze(0).double().requires_grad_(False)
    knobs = torch.as_tensor(knobs_nn).unsqueeze(0).double().requires_grad_(False)
    #torch.zeros(model.num_knobs, requires_grad=False).unsqueeze(0).double()  # TODO: allow user to change knobs

    y_hat, mag, mag_hat, layer_acts = model.forward(x, knobs, return_acts=True)



    screen *= 0                                # clear the screen

    # parameters for the one window that will show all the audio activations together
    # count how many activations are 1d
    n_1d_acts = 0                               # number of activations to show in oscilloscope besides the input
    for l in range(len(layer_acts) ):
        if len(layer_acts[l].data.squeeze().detach().numpy().shape)==1:
            n_1d_acts += 1
    max_amp_pixels = imHeight/(n_1d_acts+1)/2    # maximum amplitude in pixels
    dy0 = 2*max_amp_pixels                     # spacing between zero lines

    # Draw Input audio
    act = mono_audio                             # first show the input
    y0 = max_amp_pixels                          # zero line
    # minux sign after y0 in the following is because computer graphics are 'upside down'
    ys_in = ( y0 - max_amp_pixels * np.clip( act[-len(xs):], -1, 1) ).astype(np.int)
    pts_in = np.array(list(zip(xs,ys_in)))      # pair up xs & ys for input
    cv2.polylines(screen,[pts_in],False,cyan)   # draw lines connecting the points

    if trig_level is not None:  # draw the trigger
        trig_pos = int(y0 - trig_level*max_amp_pixels)
        pts_in = np.array(list(zip([0,10],[trig_pos,trig_pos] ) ) )
        cv2.polylines(screen,[pts_in],False,yellow)


    # draw all other activations (besides input)
    n_acts = len(layer_acts)
    count_1d = 0
    for l in range(len(layer_acts) ):
        act = layer_acts[l].data.squeeze().detach().numpy()
        #print(f"layer {l} of {n_acts}: act.shape = {act.shape}, len = {len(act.shape)}")
        if len(act.shape)==1:
            count_1d += 1
            y0 = max_amp_pixels + (count_1d)*dy0                         # zero line
            act *= gains[1]
            ys_out = ( y0 - max_amp_pixels * np.clip(  act[-len(xs):], -1, 1) ).astype(np.int) # gains[1] gets applied to weights directly
            #ys_out = ys_out[0:layer_out_dim]            # don't show more than is supposed to be there
            pts_out = np.array(list(zip(xs,ys_out)))    # pair up xs & ys for output
            if (count_1d < n_1d_acts):
                color = random_color()
            else:
                color = green
            cv2.polylines(screen,[pts_out],False, color)
        if len(act.shape) == 2:
            show_2d(act, title=f"act_{l} {act.shape}")

    #cv2.text(screen, 'gains = '+str(gains))
    cv2.putText(screen,f"gains = {gains[0]:.1f}, {gains[1]:.1f} ", (10,30),
        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), lineType=cv2.LINE_AA)


    # draw the one window showing all the activations together
    window = cv2.namedWindow(title,cv2.WINDOW_NORMAL)   # allow the window containing the image to be resized
    cv2.imshow(title, screen.astype(np.uint8))
    return


"""
this is a trigger function for the oscilloscope
"""
def find_trigger(mono_audio, thresh=0.02, pos_slope=True):  # thresh = trigger level
    start_ind = None     # this is where in the buffer the trigger should go; None
    shift_forward = shift(mono_audio, 1, cval=0)
    if pos_slope:
        inds = np.where(np.logical_and(mono_audio >= thresh, shift_forward <= thresh))
    else:
        inds = np.where(np.logical_and(mono_audio <= thresh, shift_forward >= thresh))
    if (len(inds[0]) > 0):
        start_ind = inds[0][0]
    return start_ind


"""
Just prints the keys one can use. wish I could get arrow keys working
"""
def instructions():
    print("Keys: ")
    print("  Q : quit ")
    print("  = : increase input gain")
    print("  - : decrease input gain")
    print("  ] : increase output gain")
    print("  [ : decrease output gain")
    print("  ' : increase trigger level")
    print("  ; : decrease trigger level")
    print("      Two-finger scroll on trackpad will zoom in on 2D images")
    print("")
    print("Note: windows start out reduced in display size; can be resized at will.")
    #print("      (Don't beleive the 'Zoom:%' display; it doesn't reflect proper array size)")


"""
# 'Oscilloscope' routine; audio buffer & sample rate; make the audio buffer a little bigger than 'needed',
#  to avoid showing zero-pads (which are primarily there for 'safety')
Triggers on input audio
"""
def scope(model, buf_size=2000, fs=44100):

    default_mic = sc.default_microphone()
    print("oscilloscope: listening on ",default_mic)
    instructions()

    trig_level = 0.01   # trigger value for input waveform
    gains = [1,1]    # gains for input and output

    # allocate storage for 'screen'
    screen = np.zeros((imHeight,imWidth,3), dtype=np.uint8) # 3=color channels
    xs = np.arange(imWidth).astype(np.int)                  # x values of pixels (time samples)

    while (1):                             # keep looping until someone stops this
        try:  # sometimes the mic will give a RunTimeError while you're reizing windows
            with default_mic.recorder(samplerate=fs) as mic:
                audio_data = mic.record(numframes=buf_size)  # get some audio from the mic
            audio_data *= gains[0]                     # apply gain before activation

            bgn = find_trigger(audio_data[:,0], thresh=trig_level)    # try to trigger
            layer_in_dim = model.in_chunk_size                     # length of input layer
            if bgn is not None:                                  # we found a spot to trigger at
                end = min(bgn+layer_in_dim, buf_size)                 # don't go off the end of the buffer
                pad_len = max(0, layer_in_dim - (end-bgn) )           # might have to pad with zeros
                padded_data = np.pad(audio_data[bgn:end,0],(0,pad_len),'constant',constant_values=0)
                draw_activations(screen, model, padded_data, xs, trig_level=trig_level, gains=gains)             # draw left channel
            else:
                draw_activations(screen, model,  audio_data[0:layer_in_dim,0]*0, xs, trig_level=trig_level, gains=gains)  # just draw zero line


            key = cv2.waitKeyEx(1) & 0xFF         # keyboard input

            # Controls:  (Couldn't get arrow keys to work.)
            if (key != -1) and (key !=255):
                #print('key = ',key)
                pass
            if ord('q') == key:       # quit key
                break
            elif ord('=') == key:
                gains[0] *= 1.1
            elif ord("-") == key:
                gains[0] *= 0.9
            elif ord(']') == key:  #  right bracket
                gains[1] *= 1.1
            elif ord('[') == key:  # left bracket
                gains[1] *= 0.9
            elif ord("'") == key:
                trig_level += 0.02
            elif ord(";") == key:     # letter p
                trig_level -= 0.02
        except:
            pass
    return


'''
# Trying to make a 'knobs' GUI but OpenCV is really primitive.
# Hence all the exec() statements in what follows...
'''
def set_knobs():
    global knob_names, knobs_nn
    setstr = 'global knobs_nn;  knobs_nn = np.array(['
    num_knobs = len(knob_names)
    for i in range(len(knob_names)):
        knob_name = knob_names[i]
        setstr += f"{knob_name}"
        if i < num_knobs-1: setstr += ','
    setstr += '])'
    #print('setstr = ',setstr)
    exec(setstr)
    #knobs_wc = knobs_nn * ()
    knobs_wc = knob_ranges[:,0] + (knobs_nn+0.5)*(knob_ranges[:,1]-knob_ranges[:,0])

    text = "knobs_wc = "+np.array2string(knobs_wc, precision=3, separator=',',suppress_small=True)
    cv2.rectangle(logo, (0, 0), (500, 25), (255,255,255), -1)
    cv2.putText(logo, text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), lineType=cv2.LINE_AA)
    cv2.imshow(knob_controls_window, logo)



def make_controls(cp_items):
    global knob_ranges, knobs_nn, knob_names
    # cp_items are items in the checkpoint file
    for key, value in cp_items:
        if key == 'knob_names':
            knob_names = value
        elif key == 'knob_ranges':
            knob_ranges = np.array(value)
    print("knob_names = ",knob_names)
    print("knob_ranges = ",knob_ranges)

    knobs_nn = np.zeros(len(knob_names))



    num_knobs = len(knob_names)
    for i in range(num_knobs):
        knob_name = knob_names[i]
        exec(f"global {knob_name}; {knob_name} = 0.0")
        fn_name = f"on_{knob_name}_change"
        exec(f'def {fn_name}(val): global {knob_name};  {knob_name} = val/100.0-0.5; set_knobs()')
        exec(f"cv2.createTrackbar(knob_name, knob_controls_window, 50, 100, {fn_name})")
    set_knobs()
    return



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Visualizes audio activations and neural network weights",\
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('model', help='Name of model checkpoint .tar file', default="../demo/modelcheckpoint.tar")
    args = parser.parse_args()

    model, cp_items = load_model(args.model)
    model = model.double()

    make_controls(cp_items)

    draw_weights(model)   # show the model weights

    # Call the oscilloscope in order to visualize activations
    scope(model, buf_size=model.in_chunk_size)


# EOF