python source code of dataconverter

import numpy as np
import csv
import pandas as pd
import SimpleITK as sitk
import os
import os.path
def load_itk_image(filename):
    with open(filename) as f:
        contents = f.readlines()
        line = [k for k in contents if k.startswith('TransformMatrix')][0]
        transformM = np.array(line.split(' = ')[1].split(' ')).astype('float')
        transformM = np.round(transformM)
        if np.any( transformM!=np.array([1,0,0, 0, 1, 0, 0, 0, 1])):
            isflip = True
        else:
            isflip = False
    itkimage = sitk.ReadImage(filename)
    numpyImage = sitk.GetArrayFromImage(itkimage) 
    numpyOrigin = np.array(list(reversed(itkimage.GetOrigin())))
    numpySpacing = np.array(list(reversed(itkimage.GetSpacing())))
    return numpyImage, numpyOrigin, numpySpacing,isflip
def worldToVoxelCoord(worldCoord, origin, spacing):
    stretchedVoxelCoord = np.absolute(worldCoord - origin)
    voxelCoord = stretchedVoxelCoord / spacing
    return voxelCoord
# read groundtruth from original data space
# remove data of 0 value
pdframe = pd.read_csv('annotationdetclsgt.csv', names=['seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm', 'malignant'])
srslst = pdframe['seriesuid'].tolist()[1:]
crdxlst = pdframe['coordX'].tolist()[1:]
crdylst = pdframe['coordY'].tolist()[1:]
crdzlst = pdframe['coordZ'].tolist()[1:]
dimlst = pdframe['diameter_mm'].tolist()[1:]
mlglst = pdframe['malignant'].tolist()[1:]
dct = {}
for idx in xrange(len(srslst)):
    # if mlglst[idx] == '0':
    #     continue
    assert mlglst[idx] in ['1', '0']
    vlu = [float(crdxlst[idx]), float(crdylst[idx]), float(crdzlst[idx]), float(dimlst[idx]), int(mlglst[idx])]
    if srslst[idx] in dct:
        dct[srslst[idx]].append(vlu)
    else:
        dct[srslst[idx]] = [vlu]
# convert it to the preprocessed space
newlst = []
rawpath = '/media/data1/wentao/tianchi/luna16/lunaall/'
preprocesspath = '/media/data1/wentao/tianchi/luna16/preprocess/lunaall/'
resolution = np.array([1,1,1])
def process(pid):
    # print pid
    Mask,origin,spacing,isflip = load_itk_image(os.path.join(rawpath, pid+'.mhd'))
    spacing = np.load(os.path.join(preprocesspath, pid+'_spacing.npy'))
    extendbox = np.load(os.path.join(preprocesspath, pid+'_extendbox.npy'))
    origin = np.load(os.path.join(preprocesspath, pid+'_origin.npy'))
    if isflip:
        Mask = np.load(os.path.join(preprocesspath, pid+'_mask.npy'))
    retlst = []
    for vlu in dct[pid]:
        pos = worldToVoxelCoord(vlu[:3][::-1], origin=origin, spacing=spacing)
        if isflip:
            pos[1:] = Mask.shape[1:3] - pos[1:]
        label = np.concatenate([pos, [vlu[3]/spacing[1]]])
        label2 = np.expand_dims(np.copy(label), 1)
        # print label2.shape
        label2[:3] = label2[:3]*np.expand_dims(spacing,1)/np.expand_dims(resolution,1)
        label2[3] = label2[3]*spacing[1]/resolution[1]
        label2[:3] = label2[:3]-np.expand_dims(extendbox[:,0],1)
        label2 = label2[:4].T
        retlst.append([pid, label2[0,0], label2[0,1], label2[0,2], label2[0,3], vlu[-1]])
    return retlst
from multiprocessing import Pool
p = Pool(30)
newlst = p.map(process, dct.keys())
p.close()
print(len(dct.keys()), len(newlst))
# for pid in dct.keys():
#     print pid
#     Mask,origin,spacing,isflip = load_itk_image(os.path.join(rawpath, pid+'.mhd'))
#     spacing = np.load(os.path.join(preprocesspath, pid+'_spacing.npy'))
#     extendbox = np.load(os.path.join(preprocesspath, pid+'_extendbox.npy'))
#     origin = np.load(os.path.join(preprocesspath, pid+'_origin.npy'))
#     if isflip:
#         Mask = np.load(os.path.join(preprocesspath, pid+'_mask.npy'))
#     for vlu in dct[pid]:
#         pos = worldToVoxelCoord(vlu[:3][::-1], origin=origin, spacing=spacing)
#         if isflip:
#             pos[1:] = Mask.shape[1:3] - pos[1:]
#         label = np.concatenate([pos, [vlu[3]/spacing[1]]])
#         label2 = np.expand_dims(np.copy(label), 1)
#         # print label2.shape
#         label2[:3] = label2[:3]*np.expand_dims(spacing,1)/np.expand_dims(resolution,1)
#         label2[3] = label2[3]*spacing[1]/resolution[1]
#         label2[:3] = label2[:3]-np.expand_dims(extendbox[:,0],1)
#         label2 = label2[:4].T
#         newlst.append([pid, label2[0,0], label2[0,1], label2[0,2], label2[0,3], vlu[-1]])
# save it to the csv
savecsv = 'annotationdetclsconv_v3.csv'
fid = open(savecsv, 'w')
writer = csv.writer(fid)
writer.writerow(['seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm', 'malignant'])
for idx in xrange(len(newlst)):
    for subidx in xrange(len(newlst[idx])):
        writer.writerow(newlst[idx][subidx])
fid.close()