Python pickle.dump() Examples

The following are code examples for showing how to use pickle.dump(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: Hyperopt-Keras-CNN-CIFAR-100   Author: guillaume-chevalier   File: optimize.py    (MIT License) View Source Project 8 votes vote down vote up
def run_a_trial():
    """Run one TPE meta optimisation step and save its results."""
    max_evals = nb_evals = 1

    print("Attempt to resume a past training if it exists:")

    try:
        # https://github.com/hyperopt/hyperopt/issues/267
        trials = pickle.load(open("results.pkl", "rb"))
        print("Found saved Trials! Loading...")
        max_evals = len(trials.trials) + nb_evals
        print("Rerunning from {} trials to add another one.".format(
            len(trials.trials)))
    except:
        trials = Trials()
        print("Starting from scratch: new trials.")

    best = fmin(
        build_and_optimize_cnn,
        space,
        algo=tpe.suggest,
        trials=trials,
        max_evals=max_evals
    )
    pickle.dump(trials, open("results.pkl", "wb"))

    print("\nOPTIMIZATION STEP COMPLETE.\n")
    print("Best results yet (note that this is NOT calculated on the 'loss' "
          "metric despite the key is 'loss' - we rather take the negative "
          "best accuracy throughout learning as a metric to minimize):")
    print(best) 
Example 2
Project: bob   Author: BobBuildTool   File: state.py    (GNU General Public License v3.0) View Source Project 7 votes vote down vote up
def __save(self):
        if self.__asynchronous == 0:
            state = {
                "version" : _BobState.CUR_VERSION,
                "byNameDirs" : self.__byNameDirs,
                "results" : self.__results,
                "inputs" : self.__inputs,
                "jenkins" : self.__jenkins,
                "dirStates" : self.__dirStates,
                "buildState" : self.__buildState,
            }
            tmpFile = self.__path+".new"
            try:
                with open(tmpFile, "wb") as f:
                    pickle.dump(state, f)
                    f.flush()
                    os.fsync(f.fileno())
                os.replace(tmpFile, self.__path)
            except OSError as e:
                raise ParseError("Error saving workspace state: " + str(e))
            self.__dirty = False
        else:
            self.__dirty = True 
Example 3
Project: astrobase   Author: waqasbhatti   File: hplc.py    (MIT License) View Source Project 7 votes vote down vote up
def lcdict_to_pickle(lcdict, outfile=None):
    '''This just writes the lcdict to a pickle.

    If outfile is None, then will try to get the name from the
    lcdict['objectid'] and write to <objectid>-hptxtlc.pkl. If that fails, will
    write to a file named hptxtlc.pkl'.

    '''

    if not outfile and lcdict['objectid']:
        outfile = '%s-hplc.pkl' % lcdict['objectid']
    elif not outfile and not lcdict['objectid']:
        outfile = 'hplc.pkl'

    with open(outfile,'wb') as outfd:
        pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)

    if os.path.exists(outfile):
        LOGINFO('lcdict for object: %s -> %s OK' % (lcdict['objectid'],
                                                    outfile))
        return outfile
    else:
        LOGERROR('could not make a pickle for this lcdict!')
        return None 
Example 4
Project: DREAM   Author: LaceyChen17   File: data.py    (MIT License) View Source Project 6 votes vote down vote up
def get_item_history(self, prior_or_train, reconstruct = False, none_idx = 49689):
        filepath = self.cache_dir + './item_history_' + prior_or_train + '.pkl'
        if (not reconstruct) and os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                item_history = pickle.load(f)
        else:
            up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
            item_history = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
            item_history.loc[item_history.order_number == 1, 'product_id'] = item_history.loc[item_history.order_number == 1, 'product_id'] + [none_idx]
            item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
            # accumulate 
            item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].transform(pd.Series.cumsum)
            # get unique item list
            item_history['product_id'] = item_history['product_id'].apply(set).apply(list)
            item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
            # shift each group to make it history
            item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].shift(1)
            for row in item_history.loc[item_history.product_id.isnull(), 'product_id'].index:
                item_history.at[row, 'product_id'] = [none_idx]
            item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
            item_history.columns = ['user_id', 'history_items']

            with open(filepath, 'wb') as f:
                pickle.dump(item_history, f, pickle.HIGHEST_PROTOCOL)
        return item_history 
Example 5
Project: xr-telemetry-m2m-web   Author: cisco   File: render_connection.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def render_POST(self, request):
        if 'disconnect' in request.args:
            self._request_disconnection(request)
        else:
            # Save all the _inputdata, so it's the same next time
            conn_params = {x: request.args[x][0] for x in request.args.keys()}
            if request.args['secret_type'][0] == 'key':
                conn_params['secret_key'] = 'checked'
                conn_params['secret_password'] = ''
            else:
                conn_params['secret_key'] = ''
                conn_params['secret_password'] = 'checked'

            # Save both locally and across restarts
            request.sdata.conn_params = conn_params
            try:
                with open(DEFAULT_PATH, 'wb') as f:
                    pickle.dump(conn_params, f)
            except Exception as e:
                print('### failed to save defaults: ' + str(e))

            # Do the request
            self._request_connection(request) 
Example 6
Project: zipline-chinese   Author: zhanghan1990   File: generate_new_sample_saved_state.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def write_state_to_disk(cls, state, emission_rate=None):
    state_dir = cls.__module__ + '.' + cls.__name__

    full_dir = base_state_dir + '/' + state_dir

    if not os.path.exists(full_dir):
        os.makedirs(full_dir)

    if emission_rate is not None:
        name = 'State_Version_' + emission_rate + \
            str(state['obj_state'][VERSION_LABEL])
    else:
        name = 'State_Version_' + str(state['obj_state'][VERSION_LABEL])

    full_path = full_dir + '/' + name

    f = open(full_path, 'w')

    pickle.dump(state, f)

    f.close() 
Example 7
Project: HandDetection   Author: YunqiuXu   File: pascal_voc.py    (license) View Source Project 6 votes vote down vote up
def gt_roidb(self):
    """
    Return the database of ground-truth regions of interest.

    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
    if os.path.exists(cache_file):
      with open(cache_file, 'rb') as fid:
        try:
          roidb = pickle.load(fid)
        except:
          roidb = pickle.load(fid, encoding='bytes')
      print('{} gt roidb loaded from {}'.format(self.name, cache_file))
      return roidb

    gt_roidb = [self._load_pascal_annotation(index)
                for index in self.image_index]
    with open(cache_file, 'wb') as fid:
      pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote gt roidb to {}'.format(cache_file))

    return gt_roidb 
Example 8
Project: didi_competition   Author: Heipiao   File: operate_hash.py    (MIT License) View Source Project 6 votes vote down vote up
def create_hash_district_map_dict():
    file = "cluster_map.csv"
    district_hash_map_path = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR, file)

    hash_data = pd.read_csv(district_hash_map_path)
    ## convert the dataframe into dict
    hash_map_rule = dict(zip(hash_data.district_hash, hash_data.district_map))
    
    # print(type(hash_map_rule))

    saved_file = "cluster_map.pickle"
    map_save_file = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR, saved_file)
    ## save into same dir as file
    with open(map_save_file, "wb") as f:
        pickle.dump(hash_map_rule, f)

    #print(hash_map_rule)

# map the district features in the input data_frame into value 
Example 9
Project: lung-cancer-detector   Author: YichenGong   File: luna.py    (MIT License) View Source Project 6 votes vote down vote up
def _pre_process_all(self):
		if self._pre_processed_exists():
			self._load_norm_parameters()
			print("Mean = ", self._mean, ", STD = ", self._std)
			return

		print("No pre-processed dataset found, pre-processing now...")
		if not(os.path.exists(self._target_directory)):
			os.makedirs(self._target_directory)

		size = len(self._all_series)
		for idx, patient in enumerate(self._all_series):
			print(patient[1], str(idx+1) + "/" + str(size))
			p.dump(self._pre_process(patient), open(os.path.join(self._target_directory, patient[1] + ".pick"), "wb"), protocol=2)

		print("Mean = ", self._mean, ", STD = ", self._std)
		p.dump((self._mean, self._std), open(os.path.join(self._target_directory, "norm_parameters.pick"), "wb"), protocol=2)

		print("Pre-processing Done!") 
Example 10
Project: Gank-Alfred-Workflow   Author: hujiaweibujidao   File: workflow.py    (MIT License) View Source Project 6 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """

        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 11
Project: Gank-Alfred-Workflow   Author: hujiaweibujidao   File: workflow.py    (MIT License) View Source Project 6 votes vote down vote up
def dump(cls, obj, file_obj):
        """Serialize object ``obj`` to open pickle file.

        .. versionadded:: 1.8

        :param obj: Python object to serialize
        :type obj: Python object
        :param file_obj: file handle
        :type file_obj: ``file`` object

        """

        return pickle.dump(obj, file_obj, protocol=-1)


# Set up default manager and register built-in serializers 
Example 12
Project: Gank-Alfred-Workflow   Author: hujiaweibujidao   File: workflow.py    (MIT License) View Source Project 6 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """

        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 13
Project: scientific-paper-summarisation   Author: EdCo95   File: useful_functions.py    (license) View Source Project 6 votes vote down vote up
def write_summary(location, summary_as_list, filename):
    """
    Writes a generated summary to the specified location, writing both a pickle file and a text file; the pickle file
    for easy program reading, and a text file for easy human and ROUGE reading.
    :param location: the location to write the summary
    :param summary_as_list: the summary to write, as a list of tuples with each tuple of the form
                            (sentence, sentence_index_into_paper)
    :param filename: the name of the file to write.
    """
    with open(location + "Pickles/" + filename + ".pkl", "wb") as f:
        pickle.dump(summary_as_list, f)

    raw_sentences = [x for x, _ in summary_as_list]

    with open(location + "Text/" + filename + ".txt", "wb") as f:
        for sentence in raw_sentences:
            f.write(sentence)
            f.write("\n") 
Example 14
Project: Flask_Blog   Author: sugarguo   File: cache.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def set(self, key, value, timeout=None):
        if timeout is None:
            timeout = int(time() + self.default_timeout)
        elif timeout != 0:
            timeout = int(time() + timeout)
        filename = self._get_filename(key)
        self._prune()
        try:
            fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
                                       dir=self._path)
            with os.fdopen(fd, 'wb') as f:
                pickle.dump(timeout, f, 1)
                pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
            rename(tmp, filename)
            os.chmod(filename, self._mode)
        except (IOError, OSError):
            return False
        else:
            return True 
Example 15
Project: kaggle-review   Author: daxiongshu   File: coord.py    (license) View Source Project 6 votes vote down vote up
def build_hash_to_coord(paths):
    if os.path.exists("comps/mobike/sol_carl/data/h2c.p") and os.path.exists("comps/mobike/sol_carl/data/c2h.p"):
        return
    h2c,c2h = {},{}
    for path in paths:
        for c,row in enumerate(csv.DictReader(open(path))):
            for tag in ["geohashed_end_loc","geohashed_start_loc"]:
                if tag not in row:
                    continue
                h = row[tag]
                if h not in h2c:
                    coord = str_coord(decode(h))
                    h2c[h] = coord
                    #lat,lon = int(lat+0.5),int(lon+0.5)
                    if coord not in c2h:
                        c2h[coord] = set()
                    c2h[coord].add(h)
            if c>0 and c%100000 == 0:
                print(path,c)
    print(len(h2c),len(c2h))
    pickle.dump(h2c,open("comps/mobike/sol_carl/data/h2c.p","wb"))
    pickle.dump(c2h,open("comps/mobike/sol_carl/data/c2h.p","wb")) 
Example 16
Project: kaggle-review   Author: daxiongshu   File: nlp_pd_db.py    (license) View Source Project 6 votes vote down vote up
def get_per_sample_tf(self, texts, field, silent=0):
        """
        Each sample is a document.
        Input:
            texts: ["train","text"]
        """
        if self.sample_tf is not None:
            return

        self.sample_tf = {}
        self.get_per_sample_words_count(texts, field, 1)

        for text in texts:
            name = "{}/{}_sample_tf_{}.p".format(self.flags.data_path,self.name,text)
            if os.path.exists(name):
                self.sample_tf[text] = pickle.load(open(name,'rb'))
            else:
                print("gen",name)
                tf_list = tf(self.sample_words_count[text],0)
                pickle.dump(tf_list,open(name,'wb'))
                self.sample_tf[text] = tf_list
            if silent==0:
                print("\n{} sample tf done".format(text)) 
Example 17
Project: kaggle-review   Author: daxiongshu   File: utils.py    (license) View Source Project 6 votes vote down vote up
def mean_target_rate(name,out,idcol,ycol):
    if os.path.exists(out):
        return pickle.load(open(out,'rb'))
    yc,cc = defaultdict(float),defaultdict(float)
    for c,row in enumerate(csv.DictReader(open(name))):
        y = float(row[ycol])
        for i in row:
            if i in [idcol,ycol]:
                continue
            v = "%s-%s"%(i,row[i])
            yc[v] += y
            cc[v] += 1.0

        if c>0 and c%100000 == 0:
            print("rows %d len_cc %d"%(c,len(cc)))
    for i in yc:
        yc[i] = yc[i]/cc[i]
    pickle.dump(yc,open(out,'wb'))
    return yc 
Example 18
Project: fluxpart   Author: usda-ars-ussl   File: tutor.py    (license) View Source Project 6 votes vote down vote up
def tutor_fpout():
    pklout = os.path.join(RESDIR, TUTORPKL)
    if os.path.exists(pklout):
        with open(pklout, 'rb') as f:
            fpout = pickle.load(f)
    else:
        print('re-creating fp results ... this could take a few minutes')
        zip_archive = os.path.join(DATADIR, ZIPFILE)
        with zipfile.ZipFile(zip_archive, 'r') as zfile:
            zfile.extractall(DATADIR)
        fpout = tutor_example()
        make_clean_dat()
        os.makedirs(RESDIR, exist_ok=True)
        with open(pklout, 'wb') as f:
            pickle.dump(fpout, f)
    return fpout 
Example 19
Project: code   Author: ActiveState   File: recipe-576642.py    (MIT License) View Source Project 6 votes vote down vote up
def sync(self):
        'Write dict to disk'
        if self.flag == 'r':
            return
        filename = self.filename
        tempname = filename + '.tmp'
        fileobj = open(tempname, 'wb' if self.format=='pickle' else 'w')
        try:
            self.dump(fileobj)
        except Exception:
            os.remove(tempname)
            raise
        finally:
            fileobj.close()
        shutil.move(tempname, self.filename)    # atomic commit
        if self.mode is not None:
            os.chmod(self.filename, self.mode) 
Example 20
Project: tracer   Author: angr   File: cachemanager.py    (BSD 2-Clause "Simplified" License) View Source Project 6 votes vote down vote up
def _dump_cache_data(self, simstate, dump_fp=None):

        if self.tracer.predecessors[-1] != None:
            state = self.tracer.predecessors[-1]
        else:
            state = None

        if dump_fp:
            proj = state.project
            state.project = None
            state.history.trim()
            try:
                pickle.dump((self.tracer.bb_cnt, self.tracer.cgc_flag_bytes, state, claripy.ast.base.var_counter), dump_fp, pickle.HIGHEST_PROTOCOL)
            except RuntimeError as e: # maximum recursion depth can be reached here
                l.error("unable to cache state, '%s' during pickling", e.message)
            finally:
                state.project = proj

        # unhook receive
        receive.cache_hook = None

        # add preconstraints to tracer
        self.tracer._preconstrain_state(simstate) 
Example 21
Project: mbin   Author: fanglab   File: controls.py    (license) View Source Project 6 votes vote down vote up
def build_control_IPD_dict( self, motifs, bi_motifs ):
		"""

		"""
		control_ipds_fn   = glob.glob( "control_ipds.tmp" )
		control_ipds_N_fn = glob.glob( "control_ipdsN.tmp")
		control_kmers_fn  = glob.glob( "control_ipdskmers.tmp")

		if (len(control_ipds_fn)>1 or len(control_ipds_N_fn)>1 or len(control_kmers_fn)>1):
			raise Exception("*** Double check the control files. There should not be multiples for a file type.")

		control_means,not_found = self.chunk_control_matrices(control_ipds_fn[0], control_ipds_N_fn[0], control_kmers_fn[0])

		if not_found > 0:
			logging.info("")
			logging.warning("WARNING: could not find sufficient instances (>=%s) for %s motifs (out of %s total) in control data!" % (self.opts.min_motif_count, not_found, (len(motifs)+len(bi_motifs))))
			logging.warning("   * If this is alarming, try reducing --min_motif_count or increasing --N_reads, although you just might not have those motifs in your reference sequence.")
		
		logging.info("")
		logging.info("Writing control data to a pickled file: %s" % self.opts.control_pkl_name)
		pickle.dump( control_means, open( self.opts.control_pkl_name, "wb" ) )

		return control_means 
Example 22
Project: AutoSleepScorerDev   Author: skjerns   File: runscript.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def feat_ann(c=0):
        batch_size =700
        feats_eeg = scipy.stats.zscore(tools.feat_eeg(data[:,:,0]))
        feats_emg = scipy.stats.zscore(tools.feat_emg(data[:,:,1]))

        feats_eog = scipy.stats.zscore(tools.feat_eog(data[:,:,2]))
        feats_all = np.hstack([feats_eeg, feats_emg, feats_eog])
        results = dict()
        r = cv(feats_eeg, target, groups, models.ann, name = 'eeg', stop_after=15,batch_size=batch_size, counter=c, plot=plot)
        results.update(r)
        r = cv(np.hstack([feats_eeg,feats_eog]), target, groups, models.ann, name = 'eeg+eog',batch_size=batch_size, stop_after=15, counter=c, plot=plot)  
        results.update(r)
        r = cv(np.hstack([feats_eeg,feats_emg]), target, groups, models.ann, name = 'eeg+emg',batch_size=batch_size, stop_after=15, counter=c, plot=plot) 
        results.update(r)
        r = cv(feats_all, target, groups, models.ann, name = 'all',batch_size=batch_size, stop_after=15, counter=c, plot=plot)
        results.update(r)
        with open('results_electrodes_feat.pkl', 'wb') as f:  pickle.dump(results, f) 
Example 23
Project: hSBM_Topicmodel   Author: martingerlach   File: hsbm_tm.py    (license) View Source Project 6 votes vote down vote up
def inference(self,g):
        """We load a graph-tool graph-object and fit an hsbm:
           - hierarchical
           - nonoverlapping
           - degree-corrected
           We get a state-object which is a 'NestedBlockState'.
           We save as 'state.pkl'
        """
        if self.args.state == None:
            state=gt.minimize_nested_blockmodel_dl(g,deg_corr=True,overlap=False)
            ## save state
            with open(os.path.join(self.out_path,'state.pkl'),'wb') as f:
                pickle.dump(state,f)
        ## if the state already exists, we just load
        else:
            with open(self.args.state,'rb') as f:
                state = pickle.load(f)
        return state 
Example 24
Project: TAC-GAN   Author: dashayushman   File: encode_text.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--caption_file', type=str, default='Data/text.txt',
					   help='caption file')
	parser.add_argument('--data_dir', type=str, default='Data',
					   help='Data Directory')
	
	args = parser.parse_args()

	model = skipthoughts.load_model()
	encoded_captions = {}
	file_path = os.path.join(args.caption_file)
	dump_path = os.path.join(args.data_dir, 'enc_text.pkl')
	with open(file_path) as f:
		str_captions = f.read()
		captions = str_captions.split('\n')
		print(captions)
		encoded_captions['features'] = skipthoughts.encode(model, captions)

	pickle.dump(encoded_captions,
	            open(dump_path, "wb"))
	print('Finished extracting Skip-Thought vectors of the given text '
	      'descriptions') 
Example 25
Project: SnapStitch   Author: avikj   File: clustering.py    (license) View Source Project 6 votes vote down vote up
def get_clusters_for_project(project_id, video_names):
  embs = []
  filenames = []
  for video_name in video_names:
    filename_to_embedding = pickle.load(open(os.path.join('temp', project_id, video_name, 'filename_to_emb.pkl'))) # TODO: call get_inception_embeddings on frame dir, but for now just use the pickle
    for filename, embedding in filename_to_embedding.iteritems():
      embs.append(embedding)
      filenames.append(filename)
  labels = cluster(embs, eps=12, min_pts=3)
  d = {}
  for video_name in video_names:
    d[video_name] = {}
  for i in range(len(filenames)):
    video_name = video_name_from_filename(filenames[i])
    d[video_name][filenames[i]] = labels[i]
  with open(os.path.join('temp', project_id, 'filename_to_clust.pkl'), 'w') as pickle_file:
    pickle.dump(d, pickle_file)
  for video_name in d:
    for filename in d[video_name]:
      mkdir_p(os.path.join('temp', project_id, 'clusters', str(d[video_name][filename])))
      copy(filename, os.path.join('temp', project_id, 'clusters', str(d[video_name][filename]), os.path.basename(filename)))
  '''filenames = [filename[filename.rindex('/')+1:] for filename in filenames]
  embs = np.array(embs)
  candidates = [(11, 6)]
  candidates = [(eps, min_pts) for eps in range(7, 15) for min_pts in range(2, 10)]''' 
Example 26
Project: code-uai16   Author: thanhan   File: do_test.py    (MIT License) View Source Project 6 votes vote down vote up
def main_sim_multi(cor = 0.75, rs = 0.5):
    """
    multitask simulated data
    """
    dic1, rel1, turk1, dic2, rel2, turk2 = simulate_multitask(cor)
    lc1 = crowd_model.labels_collection(turk1, rel1)
    lc2 = crowd_model.labels_collection(turk2, rel2)

    for rs in [0.1, 0.2, 0.3, 0.4, 0.5]:
        res = main_multitask([lc1, lc2], [dic1, dic2], rs) 
        import pickle
        f = open('simult_' + str(cor) + '.pkl', 'w')
        pickle.dump(res, f)
        f.close()
 
   


################################################
################################################
# multitask on simulated data 
Example 27
Project: PersonalizedMultitaskLearning   Author: mitmedialab   File: make_datasets.py    (license) View Source Project 6 votes vote down vote up
def getWellbeingTaskListFromDataset(datafile, data_path=PATH_TO_DATASETS, subdivide_phys=True):
	df = pd.DataFrame.from_csv(data_path + datafile)
	wanted_labels = [x for x in df.columns.values if '_Label' in x and 'tomorrow_' in x and 'Evening' in x and 'Alertness' not in x and 'Energy' not in x]
	wanted_feats = [x for x in df.columns.values if x != 'user_id' and x != 'timestamp' and x!= 'dataset' and x!='Cluster' and '_Label' not in x]

	core_name = getDatasetCoreName(datafile)

	modality_dict = getModalityDict(wanted_feats, subdivide_phys=subdivide_phys)
	
	for dataset in ['Train','Val','Test']:
		task_dict_list = []
		for target_label in wanted_labels: 
			mini_df = helper.normalizeAndFillDataDf(df, wanted_feats, [target_label], suppress_output=True)
			mini_df.reindex(np.random.permutation(mini_df.index))
				
			X,y = helper.getTensorFlowMatrixData(mini_df, wanted_feats, [target_label], dataset=dataset, single_output=True)
			task_dict = dict()
			task_dict['X'] = X
			task_dict['Y'] = y
			task_dict['Name'] = target_label
			task_dict['ModalityDict'] = modality_dict
			task_dict_list.append(task_dict)
		pickle.dump(task_dict_list, open(data_path + "datasetTaskList-" + core_name + "_" + dataset + ".p","wb")) 
Example 28
Project: swjtu-pyscraper   Author: Desgard   File: cache.py    (MIT License) View Source Project 6 votes vote down vote up
def set(self, key, value, timeout=None):
        if timeout is None:
            timeout = int(time() + self.default_timeout)
        elif timeout != 0:
            timeout = int(time() + timeout)
        filename = self._get_filename(key)
        self._prune()
        try:
            fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
                                       dir=self._path)
            with os.fdopen(fd, 'wb') as f:
                pickle.dump(timeout, f, 1)
                pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
            rename(tmp, filename)
            os.chmod(filename, self._mode)
        except (IOError, OSError):
            return False
        else:
            return True 
Example 29
Project: ScraXBRL   Author: tooksoi   File: logs.py    (MIT License) View Source Project 6 votes vote down vote up
def add_scrape_data(symbol, scrape_data, complete):
	"""Add data regarding scrape to scrape log."""
	
	if complete:
		complete_key = 'complete'
	else:
		complete_key = 'incomplete'
	data_log = pickle.load(open(settings.SCRAPE_LOG_FILE_PATH, "rb"))
	try:
		data_log[symbol]
		data_log[symbol][complete_key] = scrape_data
	except KeyError:
		data_log[symbol] = {}
		data_log[symbol]['complete'] = None
		data_log[symbol]['incomplete'] = None
		data_log[symbol][complete_key] = scrape_data
	pickle.dump(data_log, open(settings.SCRAPE_LOG_FILE_PATH, "wb")) 
Example 30
Project: ScraXBRL   Author: tooksoi   File: logs.py    (MIT License) View Source Project 6 votes vote down vote up
def add_extract_data(symbol, extract_data, complete):
	"""Add data regarding scrape or extract to master log."""
	
	if complete:
		complete_key = 'complete'
	else:
		complete_key = 'incomplete'
	data_log = pickle.load(open(settings.EXTRACT_LOG_FILE_PATH, "rb"))
	try:
		data_log[symbol]
		data_log[symbol][complete_key].append(extract_data)
	except KeyError:
		data_log[symbol] = {}
		data_log[symbol]['complete'] = []
		data_log[symbol]['incomplete'] = []
		data_log[symbol][complete_key].append(extract_data)
	pickle.dump(data_log, open(settings.EXTRACT_LOG_FILE_PATH, "wb")) 
Example 31
Project: deeppavlov   Author: deepmipt   File: model.py    (license) View Source Project 6 votes vote down vote up
def save(self, fname=None):
        """Save the parameters of the agent to a file."""
        fname = self.opt.get('model_file', None) if fname is None else fname

        if fname:
            if self.model_type == 'nn':
                print("[ saving model: " + fname + " ]")
                self.model.save_weights(fname + '.h5')
                self.embedding_dict.save_items(fname)

            if self.model_type == 'ngrams':
                print("[ saving model: " + fname + " ]")
                with open(fname + '_cls.pkl', 'wb') as model_file:
                    pickle.dump(self.model, model_file)

            with open(fname + '_opt.json', 'w') as opt_file:
                json.dump(self.opt, opt_file) 
Example 32
Project: flora   Author: Lamden   File: flora.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def register(name):
	# hit api to see if name is already registered
	if check_name(name)['status'] == 'error':
		print('{} already registered.'.format(name))
	else:
		# generate new keypair
		(pub, priv) = rsa.newkeys(512)

		if os.path.exists(KEY_LOCATION) == False:
			os.mkdir(KEY_LOCATION)

		# save to disk
		with open('{}/.key'.format(KEY_LOCATION), 'wb') as f:
		    pickle.dump((pub, priv), f, pickle.HIGHEST_PROTOCOL)

		r = requests.post('{}/names'.format(API_LOCATION), data = {'name' : name, 'n' : pub.n, 'e' : pub.e})
		if r.json()['status'] == 'success':
			print('Successfully registered new name: {}'.format(name))
		else:
			print('Error registering name: {}'.format(name)) 
Example 33
Project: flora   Author: Lamden   File: flora.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def generate(location):
	# cli wizard for creating a new contract from a template
	if directory_has_smart_contract(location):
		example_payload = json.load(open(glob.glob(os.path.join(location, '*.json'))[0]))
		print(example_payload)
		for k, v in example_payload.items():
			value = input(k + ':')
			if value != '':
				example_payload[k] = value
		print(example_payload)

		code_path = glob.glob(os.path.join(location, '*.tsol'))
		tsol.compile(open(code_path[0]), example_payload)
		print('Code compiles with new payload.')
		selection = ''
		while True:
			selection = input('(G)enerate Solidity contract or (E)xport implementation:')
			if selection.lower() == 'g':
				output_name = input('Name your contract file without an extension:')
				code = tsol.generate_code(open(code_path[0]).read(), example_payload)
				open(os.path.join(location, '{}.sol'.format(output_name)), 'w').write(code)
				break

			if selection.lower() == 'e':
				output_name = input('Name your implementation file without an extension:')
				json.dump(example_payload, open(os.path.join(location, '{}.json'.format(output_name)), 'w'))
				break
	else:
		print('Provided directory does not contain a *.tsol and *.json or does not compile.') 
Example 34
Project: alfred-mpd   Author: deanishe   File: workflow.py    (license) View Source Project 5 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """
        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 35
Project: alfred-mpd   Author: deanishe   File: workflow.py    (license) View Source Project 5 votes vote down vote up
def dump(cls, obj, file_obj):
        """Serialize object ``obj`` to open JSON file.

        .. versionadded:: 1.8

        :param obj: Python object to serialize
        :type obj: JSON-serializable data structure
        :param file_obj: file handle
        :type file_obj: ``file`` object

        """
        return json.dump(obj, file_obj, indent=2, encoding='utf-8') 
Example 36
Project: alfred-mpd   Author: deanishe   File: workflow.py    (license) View Source Project 5 votes vote down vote up
def dump(cls, obj, file_obj):
        """Serialize object ``obj`` to open pickle file.

        .. versionadded:: 1.8

        :param obj: Python object to serialize
        :type obj: Python object
        :param file_obj: file handle
        :type file_obj: ``file`` object

        """
        return cPickle.dump(obj, file_obj, protocol=-1) 
Example 37
Project: alfred-mpd   Author: deanishe   File: workflow.py    (license) View Source Project 5 votes vote down vote up
def dump(cls, obj, file_obj):
        """Serialize object ``obj`` to open pickle file.

        .. versionadded:: 1.8

        :param obj: Python object to serialize
        :type obj: Python object
        :param file_obj: file handle
        :type file_obj: ``file`` object

        """
        return pickle.dump(obj, file_obj, protocol=-1)


# Set up default manager and register built-in serializers 
Example 38
Project: alfred-mpd   Author: deanishe   File: workflow.py    (license) View Source Project 5 votes vote down vote up
def cache_data(self, name, data):
        """Save ``data`` to cache under ``name``.

        If ``data`` is ``None``, the corresponding cache file will be
        deleted.

        :param name: name of datastore
        :param data: data to store. This may be any object supported by
                the cache serializer

        """
        serializer = manager.serializer(self.cache_serializer)

        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))

        if data is None:
            if os.path.exists(cache_path):
                os.unlink(cache_path)
                self.logger.debug('Deleted cache file : %s', cache_path)
            return

        with atomic_writer(cache_path, 'wb') as file_obj:
            serializer.dump(data, file_obj)

        self.logger.debug('Cached data saved at : %s', cache_path) 
Example 39
Project: subtitle-synchronization   Author: AlbertoSabater   File: audio_converter.py    (GNU Lesser General Public License v3.0) View Source Project 5 votes vote down vote up
def generateDatasets(train_files, cut_data, len_mfcc, step_mfcc, hop_len, freq):
    
    X, Y = [], []
    
    for tf in train_files:

        train_data, labels = generateSingleDataset(tf, cut_data, len_mfcc, step_mfcc, hop_len, freq)
                
        X.append(train_data)
        Y.append(labels)
        
    X = np.concatenate(X)
    Y = np.concatenate(Y)
        
    if cut_data:
        filename = STORE_DIR + 'dataset_CUT_' + str(freq) + '_' + str(hop_len) + '_' + str(len_mfcc) + '_' + str(step_mfcc) + '_' + str(X.shape[0]) + '_' + str(X.shape[1]) + '_' + str(X.shape[2]) + '.pickle'
    else:
        filename = STORE_DIR + 'dataset_' + str(freq) + '_' + str(hop_len) + '_' + str(len_mfcc) + '_' + str(step_mfcc) + '_' + str(X.shape[0]) + '_' + str(X.shape[1]) + '_' + str(X.shape[2]) + '.pickle'
    print filename
    with open(filename, 'w') as f:
        pickle.dump([X, Y], f)
        
    return X, Y


# Generate a dataset from all available files 
Example 40
Project: subtitle-synchronization   Author: AlbertoSabater   File: train_nets.py    (GNU Lesser General Public License v3.0) View Source Project 5 votes vote down vote up
def sotreResults(results, v):
# %%
    import pickle
    
    with open('test_results_'+v+'.pickle', 'w') as f:
        pickle.dump(results, f)


# %% 

# Plot stored training statistics. Look for the best model 
Example 41
Project: DeepAnomaly   Author: adiyoss   File: utils.py    (MIT License) View Source Project 5 votes vote down vote up
def store_prediction_and_ground_truth(model):
    input_size = 1
    maxlen = 140
    batch_size = 32

    db = read_data('../data/ECG5000_TEST_PHASE_1_CONTINUOUS_SIGNAL_1.pkl')
    X = create_sequences(db[:-140], win_size=maxlen, step=maxlen)
    X = np.reshape(X, (X.shape[0], X.shape[1], input_size))
    Y = create_sequences(db[140:], win_size=maxlen, step=maxlen).flatten()

    prediction = model.predict(X, batch_size, verbose=1)
    prediction = prediction.flatten()
    with open('../data/ECG5000_TRAIN_PHASE_2_CONTINUOUS_SIGNAL_1.pkl', 'wb') as f:
        pickle.dump(np.stack((Y, prediction)), f) 
Example 42
Project: DeepAnomaly   Author: adiyoss   File: utils.py    (MIT License) View Source Project 5 votes vote down vote up
def prepare_data():
    test_data = read_data("../data/ECG5000_TEST_CONTINUOUS_SIGNAL_1.pkl")
    test_data_half_len = int(len(test_data) / 2)

    with open("../data/ECG5000_TEST_PHASE_1_CONTINUOUS_SIGNAL_1.pkl", "wb") as f:
        pickle.dump(test_data[:test_data_half_len], f)

    with open("../data/ECG5000_TEST_PHASE_2_CONTINUOUS_SIGNAL_1.pkl", "wb") as f:
        pickle.dump(test_data[test_data_half_len:], f) 
Example 43
Project: robot-arena   Author: kenganong   File: roborally.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def log_results(state):
  if config.print_results:
    print('Final Results!')
    for brain in sorted(state.brains, key = lambda x: x.placement):
      print('{}. {}  with {} flags (scored: {})  surviving {} iterations ({} robots left)'.format(brain.placement,
            brain.name, brain.max_flag, brain.total_flags, brain.iterations_survived, brain.robots_alive))
  if config.save_replay:
    filename = 'roborally/replays/{}.pickle'.format(replay['name'])
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'wb') as replay_file:
      pickle.dump(replay, replay_file) 
Example 44
Project: onto-lstm   Author: pdasigi   File: preposition_model.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def save_model(self, epoch):
        '''
        Saves the current model using the epoch id to identify the file.
        '''
        self.model.save("%s_%d.model" % (self.model_name_prefix, epoch))
        pickle.dump(self.data_processor, open("%s.dataproc" % self.model_name_prefix, "wb")) 
Example 45
Project: onto-lstm   Author: pdasigi   File: model_pp_relation.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def save_model(self, epoch):
        pickle.dump(self.label_map, open("%s.label_map" % self.model_name_prefix, "wb"))
        super(PPRelationModel, self).save_model(epoch) 
Example 46
Project: onto-lstm   Author: pdasigi   File: model_entailment.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def save_model(self, epoch):
        '''
        Saves the current model using the epoch id to identify the file.
        '''
        self.model.save("%s_%d.model" % (self.model_name_prefix, epoch))
        pickle.dump(self.data_processor, open("%s.dataproc" % self.model_name_prefix, "wb"))
        pickle.dump(self.label_map, open("%s.labelmap" % self.model_name_prefix, "wb")) 
Example 47
Project: DREAM   Author: LaceyChen17   File: data.py    (MIT License) View Source Project 5 votes vote down vote up
def get_users_orders(self, prior_or_train):
        '''
            get users' prior detailed orders
        '''
        if os.path.exists(self.cache_dir + 'users_orders.pkl'):
            with open(self.cache_dir + 'users_orders.pkl', 'rb') as f:
                users_orders = pickle.load(f)
        else:
            orders = self.get_orders()
            order_products_prior = self.get_orders_items(prior_or_train)
            users_orders = pd.merge(order_products_prior, orders[['user_id', 'order_id', 'order_number', 'days_up_to_last']], 
                        on = ['order_id'], how = 'left')
            with open(self.cache_dir + 'users_orders.pkl', 'wb') as f:
                pickle.dump(users_orders, f, pickle.HIGHEST_PROTOCOL)
        return users_orders 
Example 48
Project: DREAM   Author: LaceyChen17   File: data.py    (MIT License) View Source Project 5 votes vote down vote up
def get_users_products(self, prior_or_train):
        '''
            get users' all purchased products
        '''
        if os.path.exists(self.cache_dir + 'users_products.pkl'):
            with open(self.cache_dir + 'users_products.pkl', 'rb') as f:
                users_products = pickle.load(f)
        else:
            users_products = self.get_users_orders(prior_or_train)[['user_id', 'product_id']].drop_duplicates()
            users_products['product_id'] = users_products.product_id.astype(int)
            users_products['user_id'] = users_products.user_id.astype(int)
            users_products = users_products.groupby(['user_id'])['product_id'].apply(list).reset_index()
            with open(self.cache_dir + 'users_products.pkl', 'wb') as f:
                pickle.dump(users_products, f, pickle.HIGHEST_PROTOCOL)
        return users_products 
Example 49
Project: DREAM   Author: LaceyChen17   File: data.py    (MIT License) View Source Project 5 votes vote down vote up
def get_baskets(self, prior_or_train, reconstruct = False, reordered = False, none_idx = 49689):
        '''
            get users' baskets
        '''
        if reordered:
            filepath = self.cache_dir + './reorder_basket_' + prior_or_train + '.pkl'
        else:
            filepath = self.cache_dir + './basket_' + prior_or_train + '.pkl'
       
        if (not reconstruct) and os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                up_basket = pickle.load(f)
        else:          
            up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
            uid_oid = up[['user_id', 'order_number']].drop_duplicates()
            up = up[up.reordered == 1][['user_id', 'order_number', 'product_id']] if reordered else up[['user_id', 'order_number', 'product_id']]
            up_basket = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
            up_basket = pd.merge(uid_oid, up_basket, on = ['user_id', 'order_number'], how = 'left')
            for row in up_basket.loc[up_basket.product_id.isnull(), 'product_id'].index:
                up_basket.at[row, 'product_id'] = [none_idx]
            up_basket = up_basket.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
            up_basket.columns = ['user_id', 'reorder_basket'] if reordered else ['user_id', 'basket']
            #pdb.set_trace()
            with open(filepath, 'wb') as f:
                pickle.dump(up_basket, f, pickle.HIGHEST_PROTOCOL)
        return up_basket 
Example 50
Project: SuperPACs   Author: SpencerNorris   File: parse_indepexpends.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def donations(filename='donationdata.pickle'):

    try:
        print("donation data pickled already. Grabbing data from donationdata.picke")
        with open(filename, 'rb') as handle:
            donations = pickle.load(handle)
        return donations
    except EOFError:
        print("donation data not pickled, grabbing directly from FEC and ProPublica APIs")
        donations = donations_helper()

        with open(filename, 'wb') as handle:
            pickle.dump(donations, handle, protocol=pickle.HIGHEST_PROTOCOL)

        return donations