import numpy as np import scipy as sp import numpy.matlib as matlib import scipy.interpolate as interp import scipy.signal as signal # For progress bar import time from tqdm import tqdm def alignTraces(data): ''' Aligns the traces in the profile such that their maximum amplitudes align at the average two-way travel time of the maximum amplitudes INPUT: data data matrix whose columns contain the traces OUTPUT: newdata data matrix with aligned traces ''' maxlen = data.shape[0] newdata = np.asmatrix(np.zeros(data.shape)) # Go through all traces to find maximum spike maxind = np.zeros(data.shape[1], dtype=int) for tr in range(0,data.shape[1]): maxind[tr] = int(np.argmax(np.abs(data[:,tr]))) # Find the mean spike point meanind = int(np.round(np.mean(maxind))) # Shift all traces. If max index is smaller than # mean index, then prepend zeros, otherwise append for tr in range(0,data.shape[1]): if meanind > maxind[tr]: differ = int(meanind - maxind[tr]) newdata[:,tr] = np.vstack([np.zeros((differ,1)), data[0:(maxlen-differ),tr]]) elif meanind < maxind[tr]: differ = maxind[tr] - meanind newdata[:,tr] = np.vstack([data[differ:maxlen,tr], np.zeros((differ,1))]) else: newdata[:,tr] = data[:,tr] return newdata def dewow(data,window): ''' Subtracts from each sample along each trace an along-time moving average. Can be used as a low-cut filter. INPUT: data data matrix whose columns contain the traces window length of moving average window [in "number of samples"] OUTPUT: newdata data matrix after dewow ''' totsamps = data.shape[0] # If the window is larger or equal to the number of samples, # then we can do a much faster dewow if (window >= totsamps): newdata = data-np.matrix.mean(data,0) else: newdata = np.asmatrix(np.zeros(data.shape)) halfwid = int(np.ceil(window/2.0)) # For the first few samples, it will always be the same avgsmp=np.matrix.mean(data[0:halfwid+1,:],0) newdata[0:halfwid+1,:] = data[0:halfwid+1,:]-avgsmp # for each sample in the middle for smp in tqdm(range(halfwid,totsamps-halfwid+1)): winstart = int(smp - halfwid) winend = int(smp + halfwid) avgsmp = np.matrix.mean(data[winstart:winend+1,:],0) newdata[smp,:] = data[smp,:]-avgsmp # For the last few samples, it will always be the same avgsmp = np.matrix.mean(data[totsamps-halfwid:totsamps+1,:],0) newdata[totsamps-halfwid:totsamps+1,:] = data[totsamps-halfwid:totsamps+1,:]-avgsmp print('done with dewow') return newdata def smooth(data,window): ''' Replaces each sample along each trace with an along-time moving average. Can be used as high-cut filter. INPUT: data data matrix whose columns contain the traces window length of moving average window [in "number of samples"] OUTPUT: newdata data matrix after applying smoothing ''' totsamps = data.shape[0] # If the window is larger or equal to the number of samples, # then we can do a much faster dewow if (window >= totsamps): newdata = np.matrix.mean(data,0) elif window == 1: newdata = data elif window == 0: newdata = data else: newdata = np.asmatrix(np.zeros(data.shape)) halfwid = int(np.ceil(window/2.0)) # For the first few samples, it will always be the same newdata[0:halfwid+1,:] = np.matrix.mean(data[0:halfwid+1,:],0) # for each sample in the middle for smp in tqdm(range(halfwid,totsamps-halfwid+1)): winstart = int(smp - halfwid) winend = int(smp + halfwid) newdata[smp,:] = np.matrix.mean(data[winstart:winend+1,:],0) # For the last few samples, it will always be the same newdata[totsamps-halfwid:totsamps+1,:] = np.matrix.mean(data[totsamps-halfwid:totsamps+1,:],0) print('done with smoothing') return newdata def remMeanTrace(data,ntraces): ''' Subtracts from each trace the average trace over a moving average window. Can be used to remove horizontal arrivals, such as the airwave. INPUT: data data matrix whose columns contain the traces ntraces window width; over how many traces to take the moving average. OUTPUT: newdata data matrix after subtracting average traces ''' data=np.asmatrix(data) tottraces = data.shape[1] # For ridiculous ntraces values, just remove the entire average if ntraces >= tottraces: newdata=data-np.matrix.mean(data,1) else: newdata = np.asmatrix(np.zeros(data.shape)) halfwid = int(np.ceil(ntraces/2.0)) # First few traces, that all have the same average avgtr=np.matrix.mean(data[:,0:halfwid+1],1) newdata[:,0:halfwid+1] = data[:,0:halfwid+1]-avgtr # For each trace in the middle for tr in tqdm(range(halfwid,tottraces-halfwid+1)): winstart = int(tr - halfwid) winend = int(tr + halfwid) avgtr=np.matrix.mean(data[:,winstart:winend+1],1) newdata[:,tr] = data[:,tr] - avgtr # Last few traces again have the same average avgtr=np.matrix.mean(data[:,tottraces-halfwid:tottraces+1],1) newdata[:,tottraces-halfwid:tottraces+1] = data[:,tottraces-halfwid:tottraces+1]-avgtr print('done with removing mean trace') return newdata def profileSmooth(data,profilePos,ntraces=1,noversample=1): ''' First creates copies of each trace and appends the copies next to each trace, then replaces each trace with the average trace over a moving average window. Can be used to smooth-out noisy reflectors appearing in neighboring traces, or simply to increase the along-profile resolution by interpolating between the traces. INPUT: data data matrix whose columns contain the traces profilePos profile coordinates for the traces in data ntraces window width [in "number of samples"]; over how many traces to take the moving average. noversample how many copies of each trace OUTPUT: newdata data matrix after along-profile smoothing newProfilePos profile coordinates for output data matrix ''' # New profile positions newProfilePos = np.linspace(profilePos[0], profilePos[-1], noversample*len(profilePos)) # First oversample the data data = np.asmatrix(np.repeat(data,noversample,1)) tottraces = data.shape[1] if ntraces == 1: newdata = data elif ntraces == 0: newdata = data elif ntraces >= tottraces: newdata=np.matrix.mean(data,1) else: newdata = np.asmatrix(np.zeros(data.shape)) halfwid = int(np.ceil(ntraces/2.0)) # First few traces, that all have the same average newdata[:,0:halfwid+1] = np.matrix.mean(data[:,0:halfwid+1],1) # For each trace in the middle for tr in tqdm(range(halfwid,tottraces-halfwid+1)): winstart = int(tr - halfwid) winend = int(tr + halfwid) newdata[:,tr] = np.matrix.mean(data[:,winstart:winend+1],1) # Last few traces again have the same average newdata[:,tottraces-halfwid:tottraces+1] = np.matrix.mean(data[:,tottraces-halfwid:tottraces+1],1) print('done with profile smoothing') return newdata, newProfilePos def tpowGain(data,twtt,power): ''' Apply a t-power gain to each trace with the given exponent. INPUT: data data matrix whose columns contain the traces twtt two-way travel time values for the rows in data power exponent OUTPUT: newdata data matrix after t-power gain ''' factor = np.reshape(twtt**(float(power)),(len(twtt),1)) factmat = matlib.repmat(factor,1,data.shape[1]) return np.multiply(data,factmat) def agcGain(data,window): ''' Apply automated gain controll (AGC) by normalizing the energy of the signal over a given window width in each trace INPUT: data data matrix whose columns contain the traces window window width [in "number of samples"] OUTPUT: newdata data matrix after AGC gain ''' eps=1e-8 totsamps = data.shape[0] # If window is a ridiculous value if (window>totsamps): # np.maximum is exactly the right thing (not np.amax or np.max) energy = np.maximum(np.linalg.norm(data,axis=0),eps) # np.divide automatically divides each row of "data" # by the elements in "energy" newdata = np.divide(data,energy) else: # Need to go through the samples newdata = np.asmatrix(np.zeros(data.shape)) halfwid = int(np.ceil(window/2.0)) # For the first few samples, it will always be the same energy = np.maximum(np.linalg.norm(data[0:halfwid+1,:],axis=0),eps) newdata[0:halfwid+1,:] = np.divide(data[0:halfwid+1,:],energy) for smp in tqdm(range(halfwid,totsamps-halfwid+1)): winstart = int(smp - halfwid) winend = int(smp + halfwid) energy = np.maximum(np.linalg.norm(data[winstart:winend+1,:],axis=0),eps) newdata[smp,:] = np.divide(data[smp,:],energy) # For the first few samples, it will always be the same energy = np.maximum(np.linalg.norm(data[totsamps-halfwid:totsamps+1,:],axis=0),eps) newdata[totsamps-halfwid:totsamps+1,:] = np.divide(data[totsamps-halfwid:totsamps+1,:],energy) return newdata def prepTopo(topofile,delimiter=',',xStart=0): ''' Reads an ASCII text file containing either profile/topo coordinates (if given as two columns) or x,y,z or Easting,Northing,Elevation (if given as three columns) INPUT: topofile file name for the ASCII text file delimiter delimiter by which the entries are separated (e.g. ',' or tab '\t') [default: ','] xStart if three-dimensional topo data is given: profile position of the first x,y,z entry [default: 0] OUTPUT: topoPos the along-profile coordinates for the elevation points topoVal the elevation values for the given profile coordinates threeD n x 3 matrix containing the x, y, z values for the topography points ''' # Read topofile, see if it is two columns or three columns. # Here I'm using numpy's loadtxt. There are more advanced readers around # but this one should do for this simple situation topotable = np.loadtxt(topofile,delimiter=delimiter) topomat = np.asmatrix(topotable) # Depending if the table has two or three columns, # need to treat it differently if topomat.shape[1] is 3: # Save the three columns threeD = topomat # Turn the three-dimensional positions into along-profile # distances topoVal = topomat[:,2] npos = topomat.shape[0] steplen = np.sqrt( np.power( topomat[1:npos,0]-topomat[0:npos-1,0] ,2.0) + np.power( topomat[1:npos,1]-topomat[0:npos-1,1] ,2.0) + np.power( topomat[1:npos,2]-topomat[0:npos-1,2] ,2.0) ) alongdist = np.cumsum(steplen) topoPos = np.append(xStart,alongdist+xStart) elif topomat.shape[1] is 2: threeD = None topoPos = topomat[:,0] topoVal = topomat[:,1] topoPos = np.squeeze(np.asarray(topoPos)) else: print("Something is wrong with the topogrphy file") topoPos = None topoVal = None threeD = None return topoPos, topoVal, threeD def correctTopo(data, velocity, profilePos, topoPos, topoVal, twtt): ''' Corrects for topography along the profile by shifting each Trace up or down depending on provided coordinates. INPUT: data data matrix whose columns contain the traces velocity subsurface RMS velocity in m/ns profilePos along-profile coordinates of the traces topoPos along-profile coordinates for provided elevation in meters topoVal elevation values for provided along-profile coordinates, in meters twtt two-way travel time values for the samples, in ns OUTPUT: newdata data matrix with shifted traces, padded with NaN newtwtt twtt for the shifted / padded data matrix maxElev maximum elevation value minElev minimum elevation value ''' # We assume that the profilePos are the correct along-profile # points of the measurements (they can be correted with adj profile) # For some along-profile points, we have the elevation from prepTopo # So we can just interpolate if not ((all(np.diff(topoPos)>0)) or (all(np.diff(topoPos)<0))): raise ValueError('\x1b[1;31;47m' + 'The profile vs topo file does not have purely increasing or decreasing along-profile positions' + '\x1b[0m') else: elev = interp.pchip_interpolate(topoPos,topoVal,profilePos) elevdiff = elev-np.min(elev) # Turn each elevation point into a two way travel-time shift. # It's two-way travel time etime = 2*elevdiff/velocity timeStep=twtt[3]-twtt[2] # Calculate the time shift for each trace tshift = (np.round(etime/timeStep)).astype(int) maxup = np.max(tshift) # We want the highest elevation to be zero time. # Need to shift by the greatest amount, where we are the lowest tshift = np.max(tshift) - tshift # Make new datamatrix newdata = np.empty((data.shape[0]+maxup,data.shape[1])) newdata[:] = np.nan # Set new twtt newtwtt = np.arange(0, twtt[-1] + maxup*timeStep, timeStep) nsamples = len(twtt) # Enter every trace at the right place into newdata for pos in range(0,len(profilePos)): #print(type(tshift[pos][0])) newdata[tshift[pos][0]:tshift[pos][0]+nsamples ,pos] = np.squeeze(data[:,pos]) return newdata, newtwtt, np.max(elev), np.min(elev) def prepVTK(profilePos,gpsmat=None,smooth=True,win_length=51,porder=3): ''' Calculates the three-dimensional coordinates for each trace by interpolating the given three dimensional points along the profile. INPUT: profilePos the along-profile coordinates of the traces gpsmat n x 3 matrix containing the x, y, z coordinates of given three-dimensional points for the profile smooth Want to smooth the profile's three-dimensional alignment instead of piecewise linear? [Default: True] win_length If smoothing, the window length for scipy.signal.savgol_filter [default: 51] porder If smoothing, the polynomial order for scipy.signal.savgol_filter [default: 3] OUTPUT: x, y, z three-dimensional coordinates for the traces ''' if gpsmat is None: x = profilePos y = np.zeros(x.size) z = np.zeros(x.size) else: #gpstable = np.loadtxt(gpsfile,delimiter=delimiter) #gpsmat = np.asmatrix(gpstable) #gpsmat=np.asmatrix(gpsmat) # Turn the three-dimensional positions into along-profile # distances if gpsmat.shape[1] is 3: npos = gpsmat.shape[0] steplen = np.sqrt( np.power( gpsmat[1:npos,0]-gpsmat[0:npos-1,0] ,2.0) + np.power( gpsmat[1:npos,1]-gpsmat[0:npos-1,1] ,2.0) + np.power( gpsmat[1:npos,2]-gpsmat[0:npos-1,2] ,2.0) ) alongdist = np.cumsum(steplen) # gpsPos = np.append(0,alongdist) gpsPos = np.append(0,alongdist) + np.min(profilePos) # We assume that the profilePos are the correct along-profile # points of the measurements (they can be correted with adj profile) # For some along-profile points, we have the elevation from prepTopo # So we can just interpolate xval = gpsmat[:,0] yval = gpsmat[:,1] zval = gpsmat[:,2] x = interp.pchip_interpolate(gpsPos,xval,profilePos) y = interp.pchip_interpolate(gpsPos,yval,profilePos) z = interp.pchip_interpolate(gpsPos,zval,profilePos) else: npos = gpsmat.shape[0] steplen = np.sqrt( np.power( gpsmat[1:npos,0]-gpsmat[0:npos-1,0] ,2.0) + np.power( gpsmat[1:npos,1]-gpsmat[0:npos-1,1] ,2.0) ) alongdist = np.cumsum(steplen) # gpsPos = np.append(0,alongdist) gpsPos = np.append(0,alongdist) + np.min(profilePos) xval = gpsmat[:,0] zval = gpsmat[:,1] x = interp.pchip_interpolate(gpsPos,xval,profilePos) z = interp.pchip_interpolate(gpsPos,zval,profilePos) y = np.zeros(len(x)) # Do some smoothing if smooth: win_length = min(int(len(x)/2),win_length) porder = min(int(np.sqrt(len(x))),porder) x = signal.savgol_filter(x.squeeze(), window_length=win_length, polyorder=porder) y = signal.savgol_filter(y.squeeze(), window_length=win_length, polyorder=porder) z = signal.savgol_filter(z.squeeze(), window_length=win_length, polyorder=porder) return x,y,z def linStackedAmplitude(data,profilePos,twtt,vVals,tVals,typefact): ''' Calculates the linear stacked amplitudes for each two-way travel time sample and the provided velocity range by summing the pixels of the data that follow a line given by the two-way travel time zero offset and the velocity. INPUT: data data matrix whose columns contain the traces profilePos along-profile coordinates of the traces twtt two-way travel time values for the samples, in ns vVals list of velocity values for which to calculate the linear stacked amplitudes, in m/ns tVals list of twtt zero-offsets for which to calculate the linear stacked amplitudes, in ns typefact factor for antenna separation depending if this is for CMP (typefact=2) or WARR (typefact=1) data OUTPUT: linStAmp matrix containing the linear stacked amplitudes for the given data, tVals, and vVals ''' linStAmp=np.zeros((len(tVals),len(vVals))) for vi in tqdm(range(0,len(vVals))): for ti in range(0,len(tVals)): t = tVals[ti] + typefact*profilePos/vVals[vi] tindices = (np.round((t-twtt[0])/(twtt[3]-twtt[2]))).astype(int) # The tindices will be sorted, can use searchsorted because # the wave doesn't turn around maxi = np.searchsorted(tindices,len(twtt)) pixels = data[(tindices[0:maxi],np.arange(0,maxi))] linStAmp[ti,vi]=np.abs(np.sum(pixels)/pixels.shape[1]) return linStAmp def hypStackedAmplitude(data,profilePos,twtt,vVals,tVals,typefact): ''' Calculates the hyperbolic stacked amplitudes for each two-way travel time sample and the provided velocity range by summing the pixels of the data that follow a hyperbola given by the two-way travel time apex and the velocity. INPUT: data data matrix whose columns contain the traces profilePos along-profile coordinates of the traces twtt two-way travel time values for the samples, in ns vVals list of velocity values for which to calculate the hyperbolic stacked amplitudes, in m/ns tVals list of twtt zero-offsets for which to calculate the hyperbolic stacked amplitudes, in ns typefact factor for antenna separation depending if this is for CMP (typefact=2) or WARR (typefact=1) data OUTPUT: hypStAmp matrix containing the hyperbolic stacked amplitudes for the given data, tVals, and vVals ''' hypStAmp=np.zeros((len(tVals),len(vVals))) x2 = np.power(typefact*profilePos,2.0) for vi in tqdm(range(0,len(vVals))): for ti in range(0,len(tVals)): t = np.sqrt(x2 + 4*np.power(tVals[ti]/2.0 * vVals[vi],2.0))/vVals[vi] tindices = (np.round((t-twtt[0])/(twtt[3]-twtt[2]))).astype(int) # The tindices will be sorted, can use searchsorted because # the wave doesn't turn around maxi = np.searchsorted(tindices,len(twtt)) pixels = data[(tindices[0:maxi],np.arange(0,maxi))] hypStAmp[ti,vi]=np.abs(np.sum(pixels)/pixels.shape[1]) return hypStAmp # ##### Some helper functions # def nextpow2(i): # n = 1 # while n < i: n *= 2 # return n # def padMat(mat,nrow,ncol): # padheight=nrow-mat.shape[0] # padwidth=ncol-mat.shape[1] # if padheight>0: # mat = np.concatenate((mat,np.zeros((padheight,mat.shape[1])))) # if padwidth>0: # pad = np.zeros((nrow,padwidth)) # mat = np.concatenate((mat,pad),axis=1) # return mat # def padVec(vec,totlen): # padwidth=totlen-len(vec) # if padwidth>0: # vec = np.append(vec,np.zeros(padwidth)) # return vec ##### Testing / trying to improve performance: def linStackedAmplitude_alt1(data,profilePos,twtt,vVals,tVals,typefact): ''' Calculates the linear stacked amplitudes for each two-way travel time sample and the provided velocity range by summing the pixels of the data that follow a line given by the two-way travel time zero offset and the velocity. INPUT: data data matrix whose columns contain the traces profilePos along-profile coordinates of the traces twtt two-way travel time values for the samples, in ns vVals list of velocity values for which to calculate the linear stacked amplitudes, in m/ns tVals list of twtt zero-offsets for which to calculate the linear stacked amplitudes, in ns typefact factor for antenna separation depending if this is for CMP (typefact=2) or WARR (typefact=1) data OUTPUT: linStAmp matrix containing the linear stacked amplitudes for the given data, tVals, and vVals ''' linStAmp=np.zeros((len(tVals),len(vVals))) f = interp.interp2d(profilePos, twtt, data) for vi in tqdm(range(0,len(vVals))): for ti in range(0,len(tVals)): t = tVals[ti] + typefact*profilePos/vVals[vi] vals = np.diagonal(np.asmatrix(f(profilePos, t))) linStAmp[ti,vi] = np.abs(sum(vals)/len(vals)) return linStAmp def linStackedAmplitude_alt2(data,profilePos,twtt,vVals,tVals,typefact): ''' Calculates the linear stacked amplitudes for each two-way travel time sample and the provided velocity range by summing the pixels of the data that follow a line given by the two-way travel time zero offset and the velocity. INPUT: data data matrix whose columns contain the traces profilePos along-profile coordinates of the traces twtt two-way travel time values for the samples, in ns vVals list of velocity values for which to calculate the linear stacked amplitudes, in m/ns tVals list of twtt zero-offsets for which to calculate the linear stacked amplitudes, in ns typefact factor for antenna separation depending if this is for CMP (typefact=2) or WARR (typefact=1) data OUTPUT: linStAmp matrix containing the linear stacked amplitudes for the given data, tVals, and vVals ''' linStAmp=np.zeros((len(tVals),len(vVals))) tVals = np.asmatrix(tVals).transpose() for vi in tqdm(range(0,len(vVals))): t = tVals + typefact*profilePos/vVals[vi] tindices = (np.round((t-twtt[0])/(twtt[3]-twtt[2]))).astype(int) for ti in range(0,len(tVals)): # The tindices will be sorted, can use searchsorted because # the wave doesn't turn around maxi = np.searchsorted(np.ravel(tindices[ti,:]),len(twtt)) pixels = data[(tindices[ti,0:maxi],np.arange(0,maxi))] linStAmp[ti,vi]=np.abs(np.sum(pixels)/pixels.shape[1]) return linStAmp