Code automatically generated with cleaning of non-relevant contents
Please cite: Xu et al. De novo visual proteomics of single cells through pattern mining

import os, json, copy
import pickle as pickle
import numpy as N
import numpy.fft as NF
import aitom.tomominer.io.file as IF
import aitom.tomominer.geometry.rotate as GR
import aitom.tomominer.pursuit.multi.pattern_generate.genetic_algorithm_ssnr_fsc as PMPG

def average(dj, mask_count_threshold):
    vol_sum = None
    mask_sum = None
    for d in dj:
        v = IF.read_mrc_vol(d['subtomogram'])
        if (not N.all(N.isfinite(v))):
            raise Exception('error loading', d['subtomogram'])
        vm = IF.read_mrc_vol(d['mask'])
        v_r = GR.rotate_pad_mean(v, angle=d['angle'], loc_r=d['loc'])
        assert N.all(N.isfinite(v_r))
        vm_r = GR.rotate_mask(vm, angle=d['angle'])
        assert N.all(N.isfinite(vm_r))
        if (vol_sum is None):
            vol_sum = N.zeros(v_r.shape, dtype=N.float64, order='F')
        vol_sum += v_r
        if (mask_sum is None):
            mask_sum = N.zeros(vm_r.shape, dtype=N.float64, order='F')
        mask_sum += vm_r
    ind = (mask_sum >= mask_count_threshold)
    vol_sum_fft = NF.fftshift(NF.fftn(vol_sum))
    avg = N.zeros(vol_sum_fft.shape, dtype=N.complex)
    avg[ind] = (vol_sum_fft[ind] / mask_sum[ind])
    avg = N.real(NF.ifftn(NF.ifftshift(avg)))
    return {'v': avg, 'm': (mask_sum / len(dj)), }

def main():
    with open('aligned_refine__op.json') as f:
        op = json.load(f)
    out_dir = os.path.abspath(op['out_dir'])
    if (not os.path.isdir(out_dir)):
    with open(op['data_file']) as f:
        dj = json.load(f)
    for d in dj:
        if (not os.path.isabs(d['subtomogram'])):
            d['subtomogram'] = os.path.abspath(os.path.join(os.path.dirname(op['data_file']), d['subtomogram']))
        if (not os.path.isabs(d['mask'])):
            d['mask'] = os.path.abspath(os.path.join(os.path.dirname(op['data_file']), d['mask']))
    pmpg_file = os.path.join(out_dir, 'pmpg.pickle')
    if os.path.isfile(pmpg_file):
        print('loading existing result file', pmpg_file)
        with open(pmpg_file, 'rb') as f:
            pmpg = pickle.load(f)
        pmpg_dp_op = {}
        pmpg_ga_op = copy.deepcopy(op['genetic_algorithm'])
        pmpg_ga_op['sum_min'] = op['min_sample_num']
        pmpg_ga_op['evaluate']['ssnr']['mask_sum_threshold'] = op['min_sample_num']
        pmpg = {}
        pmpg['dp'] = PMPG.data_prepare(dj=dj, op=pmpg_dp_op)
        pmpg['full_set'] = {}
        pmpg['full_set']['evaluate'] = [PMPG.ga_evaluate__single(l=N.ones(len(dj)), stat=pmpg['dp'], op=pmpg_ga_op['evaluate'])]
        PMPG.ga_evaluate__scoring(pmpg['full_set']['evaluate'], op=pmpg_ga_op['evaluate']['scoring'])
        pmpg['best'] = PMPG.ga(stat=pmpg['dp'], op=pmpg_ga_op)
        pmpg['dj'] = [dj[_] for _ in range(len(dj)) if (pmpg['best']['p'][(0, _)] == 1)]
        del pmpg['dp']
        with open(pmpg_file, 'wb') as f:
            pickle.dump(pmpg, f, protocol=(-1))
    print('score for the full set of', len(dj), 'subtomograms:', pmpg['full_set']['evaluate'][0]['score'])
    print('score for the', pmpg['best']['p'][0, :].sum(), 'selected subtomograms:', pmpg['best']['e'][0]['score'])
    avg_re = average(dj=pmpg['dj'], mask_count_threshold=op['min_sample_num'])
    avg_dir = os.path.join(op['out_dir'], 'avg')
    if (not os.path.isdir(avg_dir)):
    IF.put_mrc(avg_re['v'], os.path.join(avg_dir, 'vol_avg.mrc'))
    IF.put_mrc(avg_re['m'], os.path.join(avg_dir, 'mask_avg.mrc'))
    with open(os.path.join(out_dir, 'data_selected.json'), 'w') as f:
        json.dump(pmpg['dj'], f, indent=2)