Python cPickle.load() Examples

The following are code examples for showing how to use cPickle.load(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: multi-embedding-cws   Author: wangjksjtu   File: fc_lstm3_crf_seg_nowubi.py    MIT License 6 votes vote down vote up
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )

    return graph

# make the raw data acceptable for the model 
Example 2
Project: multi-embedding-cws   Author: wangjksjtu   File: fc_lstm3_crf_seg_nopy.py    MIT License 6 votes vote down vote up
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )

    return graph

# make the raw data acceptable for the model 
Example 3
Project: multi-embedding-cws   Author: wangjksjtu   File: share_lstm_crf_seg_nowubi.py    MIT License 6 votes vote down vote up
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )

    return graph

# make the raw data acceptable for the model 
Example 4
Project: multi-embedding-cws   Author: wangjksjtu   File: crf_seg.py    MIT License 6 votes vote down vote up
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )

    return graph

# make the raw data acceptable for the model 
Example 5
Project: multi-embedding-cws   Author: wangjksjtu   File: share_lstm_crf_seg.py    MIT License 6 votes vote down vote up
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )

    return graph

# make the raw data acceptable for the model 
Example 6
Project: multi-embedding-cws   Author: wangjksjtu   File: baseline_crf_seg.py    MIT License 6 votes vote down vote up
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we can use again a convenient built-in function to import a graph_def into the
    # current default Graph
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )

    return graph

# make the raw data acceptable for the model 
Example 7
Project: cat-bbs   Author: aleju   File: train.py    MIT License 6 votes vote down vote up
def _augment_images_worker(self, augseq, queue_source, queue_result):
        """Worker function that endlessly queries the source queue (input
        batches), augments batches in it and sends the result to the output
        queue."""
        while True:
            # wait for a new batch in the source queue and load it
            batch_str = queue_source.get()
            batch = pickle.loads(batch_str)

            # augment the batch
            if batch.images is not None and batch.keypoints is not None:
                augseq_det = augseq.to_deterministic()
                batch.images_aug = augseq_det.augment_images(batch.images)
                batch.keypoints_aug = augseq_det.augment_keypoints(batch.keypoints)
            elif batch.images is not None:
                batch.images_aug = augseq.augment_images(batch.images)
            elif batch.keypoints is not None:
                batch.keypoints_aug = augseq.augment_keypoints(batch.keypoints)

            # send augmented batch to output queue
            queue_result.put(pickle.dumps(batch, protocol=-1)) 
Example 8
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 6 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """
        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 9
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 6 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """
        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 10
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: train_val.py    MIT License 6 votes vote down vote up
def from_snapshot(self, sfile, nfile):
    print('Restoring model snapshots from {:s}'.format(sfile))
    self.net.load_state_dict(torch.load(str(sfile)))
    print('Restored.')
    # Needs to restore the other hyper-parameters/states for training, (TODO xinlei) I have
    # tried my best to find the random states so that it can be recovered exactly
    # However the Tensorflow state is currently not available
    with open(nfile, 'rb') as fid:
      st0 = pickle.load(fid)
      cur = pickle.load(fid)
      perm = pickle.load(fid)
      cur_val = pickle.load(fid)
      perm_val = pickle.load(fid)
      last_snapshot_iter = pickle.load(fid)

      np.random.set_state(st0)
      self.data_layer._cur = cur
      self.data_layer._perm = perm
      self.data_layer_val._cur = cur_val
      self.data_layer_val._perm = perm_val

    return last_snapshot_iter 
Example 11
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 6 votes vote down vote up
def pickle_page_rank_data():
    network = load_graph("output/wikipedianetwork.xml.gz")
    print "after load"
    values_page_rank = list()
    with open(os.path.join(os.path.dirname(__file__), "/home/ddimitrov/tmp/wikipedia_network.csv")) as f:
        print "page"
        next(f)
        for line in f:
            line = line.strip().split('\t')

            target_vertex = network.vertex(line[1])

            values_page_rank.append(network.vertex_properties["page_rank"][target_vertex])

    print "network"

    pickle.dump(values_page_rank, open("/ssd/ddimitrov/pickle/values_page_rank", "wb"), protocol=pickle.HIGHEST_PROTOCOL)
    print "done" 
Example 12
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 6 votes vote down vote up
def pickle_transitions_matrix_data():
    transitions = pickle.load( open( "/ssd/ddimitrov/pickle/transitions", "rb" ) )
    vocab = pickle.load( open( "/ssd/ddimitrov/pickle/vocab", "rb" ) )

    i_indices = array.array(str("l"))
    j_indices = array.array(str("l"))
    values = array.array(str("d"))

    for s, targets in transitions.iteritems():
        for t, v in targets.iteritems():
            i_indices.append(vocab[s])
            j_indices.append(vocab[t])
            values.append(v)

    i_indices = np.frombuffer(i_indices, dtype=np.int_)
    j_indices = np.frombuffer(j_indices, dtype=np.int_)
    values = np.frombuffer(values, dtype=np.float64)
    transition_matrix=[i_indices,j_indices,values]
    pickle.dump(transition_matrix, open("/ssd/ddimitrov/pickle/transition_matrix", "wb"), protocol=pickle.HIGHEST_PROTOCOL)
    print "transition_matrix" 
Example 13
Project: MODS_ConvNet   Author: santiagolopezg   File: test_lillabcrossval_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_224_224_{0}_test_3.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    validation_data = data[0]
    training_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 14
Project: MODS_ConvNet   Author: santiagolopezg   File: test_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_all_data_bw_224_224_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 15
Project: MODS_ConvNet   Author: santiagolopezg   File: train_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_all_data_bw_224_224_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 16
Project: MODS_ConvNet   Author: santiagolopezg   File: test_labcrossval_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset, name):    
    f = file('MODS_224_224_{0}_{1}.pkl'.format(n_dataset, name),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 17
Project: MODS_ConvNet   Author: santiagolopezg   File: test_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset, name):    
    f = file('MODS_224_224_{0}_{1}.pkl'.format(n_dataset, name),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 18
Project: MODS_ConvNet   Author: santiagolopezg   File: convnet_keras_1.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('debug_MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return t_data, t_label, v_data, v_label 
Example 19
Project: MODS_ConvNet   Author: santiagolopezg   File: cifar10_v6.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return (t_data, t_label), (v_data, v_label) 
Example 20
Project: MODS_ConvNet   Author: santiagolopezg   File: cifar10_v3.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return (t_data, t_label), (v_data, v_label) 
Example 21
Project: MODS_ConvNet   Author: santiagolopezg   File: cifar10_v5.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return (t_data, t_label), (v_data, v_label) 
Example 22
Project: MODS_ConvNet   Author: santiagolopezg   File: convnet_keras_2.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return t_data, t_label, v_data, v_label 
Example 23
Project: MODS_ConvNet   Author: santiagolopezg   File: convnet_keras_3.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return t_data, t_label, v_data, v_label 
Example 24
Project: MODS_ConvNet   Author: santiagolopezg   File: cifar10_v1.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return (t_data, t_label), (v_data, v_label) 
Example 25
Project: MODS_ConvNet   Author: santiagolopezg   File: cifar10_v2.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return (t_data, t_label), (v_data, v_label) 
Example 26
Project: MODS_ConvNet   Author: santiagolopezg   File: educate_the_hipster.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_dataset_cv_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    v_data = validation_data[0]
    v_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    v_data = np.array(v_data)
    v_label = np.array(v_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 256, 192)
    v_data = v_data.reshape(v_data.shape[0], 1, 256, 192)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    v_data = v_data.astype('float32')
    
    return (t_data, t_label), (v_data, v_label) 
Example 27
Project: MODS_ConvNet   Author: santiagolopezg   File: train_lillabcrossval_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset, name):    
    f = file('MODS_224_224_{0}_{1}.pkl'.format(n_dataset, name),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 28
Project: MODS_ConvNet   Author: santiagolopezg   File: test_lilfoo.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_all_data_bw_224_224_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 29
Project: MODS_ConvNet   Author: santiagolopezg   File: train_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset):    
    f = file('MODS_all_data_bw_224_224_{0}.pkl'.format(n_dataset),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 30
Project: MODS_ConvNet   Author: santiagolopezg   File: train_labcrossval_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset, name):    
    f = file('MODS_224_224_{0}_{1}.pkl'.format(n_dataset, name),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    #less precision means less memory needed: 64 -> 32 (half the memory used)
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 31
Project: MODS_ConvNet   Author: santiagolopezg   File: test_labcrossval_network.py    MIT License 6 votes vote down vote up
def get_data(n_dataset, name):    
    f = file('MODS_224_224_{0}_{1}.pkl'.format(n_dataset, name),'rb')
    data = cPickle.load(f)
    f.close()
    training_data = data[0]
    validation_data = data[1]
    t_data = training_data[0]
    t_label = training_data[1]
    test_data = validation_data[0]
    test_label = validation_data[1]
    
    t_data = np.array(t_data)
    t_label = np.array(t_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    t_data = t_data.reshape(t_data.shape[0], 1, 224, 224)
    test_data = test_data.reshape(test_data.shape[0], 1, 224, 224)
    
    t_data = t_data.astype('float32')
    test_data = test_data.astype('float32')
    
    return (t_data, t_label), (test_data, test_label) 
Example 32
Project: multi-embedding-cws   Author: wangjksjtu   File: fc_lstm3_crf_seg_nowubi.py    MIT License 5 votes vote down vote up
def main(model_path, test_data, char_path, pinyin_path, result_path, MAX_LEN, batch_size):
    # load model
    graph = load_graph(model_path)

    # load vocabulary
    try:
        dip_char = open(char_path, 'r')
        vob_char_dict = cpk.load(dip_char)
        dip_char.close()
        dip_pinyin = open(pinyin_path, 'r')
        vob_pinyin_dict = cpk.load(dip_pinyin)
        dip_pinyin.close()
    except Exception as e:
        raise e

    # get test data
    tX_char, tX_pinyin = TransRawData(test_data, vob_char_dict, vob_pinyin_dict, MAX_LEN)

    # get predicted sequence
    sequences = seg_sequence(graph, tX_char, tX_pinyin, batch_size)

    rinp = open(test_data, 'r')
    with open(result_path, 'w') as opt:
        for ind, line in enumerate(rinp):
            ustr = line.strip().decode("utf-8")
            seq = sequences[ind]
            newline = u""
            for word, label in zip(ustr, seq):
                if label == 0 or label == 1:
                    newline += u' ' + word
                else:
                    newline += word

            newline = newline.strip().encode("utf-8")
            opt.write(newline + '\n')

    rinp.close() 
Example 33
Project: multi-embedding-cws   Author: wangjksjtu   File: fc_lstm3_crf_seg_nopy.py    MIT License 5 votes vote down vote up
def main(model_path, test_data, char_path, pinyin_path, wubi_path, result_path, MAX_LEN, batch_size):
    # load model
    graph = load_graph(model_path)

    # load vocabulary
    try:
        dip_char = open(char_path, 'r')
        vob_char_dict = cpk.load(dip_char)
        dip_char.close()
        dip_pinyin = open(pinyin_path, 'r')
        vob_pinyin_dict = cpk.load(dip_pinyin)
        dip_pinyin.close()
        dip_wubi = open(wubi_path, 'r')
        vob_wubi_dict = cpk.load(dip_wubi)
        dip_wubi.close()
    except Exception as e:
        raise e

    # get test data
    tX_char, tX_pinyin, tX_wubi = TransRawData(test_data, vob_char_dict, vob_pinyin_dict, vob_wubi_dict, MAX_LEN)

    # get predicted sequence
    sequences = seg_sequence(graph, tX_char, tX_pinyin, tX_wubi, batch_size)

    rinp = open(test_data, 'r')
    with open(result_path, 'w') as opt:
        for ind, line in enumerate(rinp):
            ustr = line.strip().decode("utf-8")
            seq = sequences[ind]
            newline = u""
            for word, label in zip(ustr, seq):
                if label == 0 or label == 1:
                    newline += u' ' + word
                else:
                    newline += word

            newline = newline.strip().encode("utf-8")
            opt.write(newline + '\n')

    rinp.close() 
Example 34
Project: multi-embedding-cws   Author: wangjksjtu   File: crf_seg.py    MIT License 5 votes vote down vote up
def main(model_path, test_data, char_path, pinyin_path, wubi_path, result_path, MAX_LEN, batch_size):
    # load model
    graph = load_graph(model_path)

    # load vocabulary
    try:
        dip_char = open(char_path, 'r')
        vob_char_dict = cpk.load(dip_char)
        dip_char.close()
        dip_pinyin = open(pinyin_path, 'r')
        vob_pinyin_dict = cpk.load(dip_pinyin)
        dip_pinyin.close()
        dip_wubi = open(wubi_path, 'r')
        vob_wubi_dict = cpk.load(dip_wubi)
        dip_wubi.close()
    except Exception as e:
        raise e

    # get test data
    tX_char, tX_pinyin, tX_wubi = TransRawData(test_data, vob_char_dict, vob_pinyin_dict, vob_wubi_dict, MAX_LEN)

    # get predicted sequence
    sequences = seg_sequence(graph, tX_char, tX_pinyin, tX_wubi, batch_size)

    rinp = open(test_data, 'r')
    with open(result_path, 'w') as opt:
        for ind, line in enumerate(rinp):
            ustr = line.strip().decode("utf-8")
            seq = sequences[ind]
            newline = u""
            for word, label in zip(ustr, seq):
                if label == 0 or label == 1:
                    newline += u' ' + word
                else:
                    newline += word

            newline = newline.strip().encode("utf-8")
            opt.write(newline + '\n')

    rinp.close() 
Example 35
Project: multi-embedding-cws   Author: wangjksjtu   File: share_lstm_crf_seg.py    MIT License 5 votes vote down vote up
def main(model_path, test_data, char_path, pinyin_path, wubi_path, result_path, MAX_LEN, batch_size):
    # load model
    graph = load_graph(model_path)

    # load vocabulary
    try:
        dip_char = open(char_path, 'r')
        vob_char_dict = cpk.load(dip_char)
        dip_char.close()
        dip_pinyin = open(pinyin_path, 'r')
        vob_pinyin_dict = cpk.load(dip_pinyin)
        dip_pinyin.close()
        dip_wubi = open(wubi_path, 'r')
        vob_wubi_dict = cpk.load(dip_wubi)
        dip_wubi.close()
    except Exception as e:
        raise e

    # get test data
    tX_char, tX_pinyin, tX_wubi = TransRawData(test_data, vob_char_dict, vob_pinyin_dict, vob_wubi_dict, MAX_LEN)

    # get predicted sequence
    sequences = seg_sequence(graph, tX_char, tX_pinyin, tX_wubi, batch_size)

    rinp = open(test_data, 'r')
    with open(result_path, 'w') as opt:
        for ind, line in enumerate(rinp):
            ustr = line.strip().decode("utf-8")
            seq = sequences[ind]
            newline = u""
            for word, label in zip(ustr, seq):
                if label == 0 or label == 1:
                    newline += u' ' + word
                else:
                    newline += word

            newline = newline.strip().encode("utf-8")
            opt.write(newline + '\n')

    rinp.close() 
Example 36
Project: cat-bbs   Author: aleju   File: plotting.py    MIT License 5 votes vote down vote up
def load_from_filepath(fp):
        #return json.loads(open(, "r").read())
        with open(fp, "r") as f:
            history = pickle.load(f)
        return history 
Example 37
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open JSON file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from JSON file
        :rtype: object

        """
        return json.load(file_obj) 
Example 38
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open pickle file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from pickle file
        :rtype: object

        """
        return cPickle.load(file_obj) 
Example 39
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open pickle file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from pickle file
        :rtype: object

        """
        return pickle.load(file_obj) 
Example 40
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def _load(self):
        """Load cached settings from JSON file `self._filepath`."""
        data = {}
        with LockFile(self._filepath, 0.5):
            with open(self._filepath, 'rb') as fp:
                data.update(json.load(fp))

        self._original = deepcopy(data)

        self._nosave = True
        self.update(data)
        self._nosave = False 
Example 41
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def cached_data(self, name, data_func=None, max_age=60):
        """Return cached data if younger than ``max_age`` seconds.

        Retrieve data from cache or re-generate and re-cache data if
        stale/non-existant. If ``max_age`` is 0, return cached data no
        matter how old.

        :param name: name of datastore
        :param data_func: function to (re-)generate data.
        :type data_func: ``callable``
        :param max_age: maximum age of cached data in seconds
        :type max_age: ``int``
        :returns: cached data, return value of ``data_func`` or ``None``
            if ``data_func`` is not set

        """
        serializer = manager.serializer(self.cache_serializer)

        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
        age = self.cached_data_age(name)

        if (age < max_age or max_age == 0) and os.path.exists(cache_path):

            with open(cache_path, 'rb') as file_obj:
                self.logger.debug('loading cached data: %s', cache_path)
                return serializer.load(file_obj)

        if not data_func:
            return None

        data = data_func()
        self.cache_data(name, data)

        return data 
Example 42
Project: pyblish-win   Author: pyblish   File: test_xpickle.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def worker_main(in_stream, out_stream):
    message = cPickle.load(in_stream)
    protocol, obj = message
    cPickle.dump(obj, out_stream, protocol) 
Example 43
Project: Att-ChemdNER   Author: lingluodlut   File: model.py    Apache License 2.0 5 votes vote down vote up
def reload_mappings(self):
#{{{
        """
        Load mappings from disk.
        """
        with open(self.mappings_path, 'rb') as f:
            mappings = cPickle.load(f)
        self.id_to_word = mappings['id_to_word']
        self.id_to_char = mappings['id_to_char']
        self.id_to_tag = mappings['id_to_tag']
#}}} 
Example 44
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open JSON file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from JSON file
        :rtype: object

        """
        return json.load(file_obj) 
Example 45
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open pickle file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from pickle file
        :rtype: object

        """
        return cPickle.load(file_obj) 
Example 46
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open pickle file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from pickle file
        :rtype: object

        """
        return pickle.load(file_obj) 
Example 47
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def _load(self):
        """Load cached settings from JSON file `self._filepath`."""
        data = {}
        with LockFile(self._filepath, 0.5):
            with open(self._filepath, 'rb') as fp:
                data.update(json.load(fp))

        self._original = deepcopy(data)

        self._nosave = True
        self.update(data)
        self._nosave = False 
Example 48
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def cached_data(self, name, data_func=None, max_age=60):
        """Return cached data if younger than ``max_age`` seconds.

        Retrieve data from cache or re-generate and re-cache data if
        stale/non-existant. If ``max_age`` is 0, return cached data no
        matter how old.

        :param name: name of datastore
        :param data_func: function to (re-)generate data.
        :type data_func: ``callable``
        :param max_age: maximum age of cached data in seconds
        :type max_age: ``int``
        :returns: cached data, return value of ``data_func`` or ``None``
            if ``data_func`` is not set

        """
        serializer = manager.serializer(self.cache_serializer)

        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
        age = self.cached_data_age(name)

        if (age < max_age or max_age == 0) and os.path.exists(cache_path):

            with open(cache_path, 'rb') as file_obj:
                self.logger.debug('loading cached data: %s', cache_path)
                return serializer.load(file_obj)

        if not data_func:
            return None

        data = data_func()
        self.cache_data(name, data)

        return data 
Example 49
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: train_val.py    MIT License 5 votes vote down vote up
def initialize(self):
    # Initial file lists are empty
    np_paths = []
    ss_paths = []
    # Fresh train directly from ImageNet weights
    print('Loading initial model weights from {:s}'.format(self.pretrained_model))
    self.net.load_pretrained_cnn(torch.load(self.pretrained_model))
    print('Loaded.')
    
#    pretrained_model = torch.load('/DATA3_DB7/data/jjwang/workspace/two_stage/output/vgg16/voc_2007_trainval/default/vgg16_faster_rcnn_iter_50001.pth')    
    if self.wsddn_premodel is not None: # Load the pretrained WSDDN model
      wsddn_pre = torch.load(self.wsddn_premodel)
      model_dict = self.net.state_dict()
      model_dict.update(wsddn_pre)
      self.net.load_state_dict(model_dict)
      print('Loading pretrained WSDDN model weights from {:s}'.format(self.wsddn_premodel))
      print('Loaded.')
    
    
    # Need to fix the variables before loading, so that the RGB weights are changed to BGR
    # For VGG16 it also changes the convolutional weights fc6 and fc7 to
    # fully connected weights
    last_snapshot_iter = 0
    lr = cfg.TRAIN.LEARNING_RATE
    stepsizes = list(cfg.TRAIN.STEPSIZE)

    return lr, last_snapshot_iter, stepsizes, np_paths, ss_paths 
Example 50
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def _prune(self):
        entries = self._list_dir()
        if len(entries) > self._threshold:
            now = time()
            try:
                for idx, fname in enumerate(entries):
                    remove = False
                    with open(fname, 'rb') as f:
                        expires = pickle.load(f)
                    remove = expires <= now or idx % 3 == 0

                    if remove:
                        os.remove(fname)
            except (IOError, OSError):
                pass 
Example 51
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def get(self, key):
        filename = self._get_filename(key)
        try:
            with open(filename, 'rb') as f:
                if pickle.load(f) >= time():
                    return pickle.load(f)
                else:
                    os.remove(filename)
                    return None
        except (IOError, OSError, pickle.PickleError):
            return None 
Example 52
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def _prune(self):
        entries = self._list_dir()
        if len(entries) > self._threshold:
            now = time()
            try:
                for idx, fname in enumerate(entries):
                    remove = False
                    with open(fname, 'rb') as f:
                        expires = pickle.load(f)
                    remove = expires <= now or idx % 3 == 0

                    if remove:
                        os.remove(fname)
            except (IOError, OSError):
                pass 
Example 53
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def get(self, key):
        filename = self._get_filename(key)
        try:
            with open(filename, 'rb') as f:
                if pickle.load(f) >= time():
                    return pickle.load(f)
                else:
                    os.remove(filename)
                    return None
        except (IOError, OSError, pickle.PickleError):
            return None 
Example 54
Project: convseg   Author: chqiwang   File: tagger.py    MIT License 5 votes vote down vote up
def load_model(self, model_dir):
        mappings_path = os.path.join(model_dir, 'mappings.pkl')
        parameters_path = os.path.join(model_dir, 'parameters.pkl')
        item2id, id2item, tag2id, id2tag, word2id, id2word = \
            pickle.load(open(mappings_path, 'r'))
        parameters = pickle.load(open(parameters_path))

        self.item2id = item2id
        self.id2item = id2item
        self.tag2id = tag2id
        self.id2tag = id2tag
        self.word2id = word2id
        self.id2word = id2word
        self.parameters = parameters

        print(parameters)
        print('Building input graph...', end='')
        self.build_graph()
        print('Finished.')
        print('Initializing variables...', end='')
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)
        print('Finished.')
        print('Reloading parameters...', end='')
        saver = tf.train.Saver(tf.global_variables())
        checkpoint = tf.train.latest_checkpoint(model_dir)
        saver.restore(self.sess, checkpoint)
        print('Finished.') 
Example 55
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 5 votes vote down vote up
def pickle_topic_sim():
    i = 0
    print "loading voc_zip_links"
    voc_zip_links = pickle.load( open( "/ssd/ddimitrov/pickle/voc_zip_links", "rb" ) )
    print "loaded voc_zip_links"
    uniqeu_nonzero_map = pickle.load( open( "/ssd/ddimitrov/pickle/uniqeu_nonzero_map", "rb" ) )
    vocab = pickle.load( open( "/ssd/ddimitrov/pickle/vocab", "rb" ) )
    print "loaded vocab"

    rel_feature_map = {}
    print 'topicsim'
    with open(os.path.join(os.path.dirname('__file__'), "/home/ddimitrov/tmp/"+"topic_sim"+".tsv")) as f:
        next(f)
        for line in f:
            #i += 1
            #if i % 1000000 == 0:
            #    print rel_feature, i
            line = line.strip().split('\t')
            rel_feature_map[(line[0],line[1])]=float(line[2])

    print 'topicsim'

    values_rel_faeture = list()
    i_indices = list()
    j_indices = list()
    i = 0
    for link in voc_zip_links:
        i += 1
        if i % 1000000 == 0:
            print 'topicsim', i
        if link in rel_feature_map:
            i_indices.append(uniqeu_nonzero_map[vocab[link[0]]])
            j_indices.append(vocab[link[1]])
            print rel_feature_map[link]
            values_rel_faeture.append(rel_feature_map[link])
    rel_feature_hyp_data = [i_indices, j_indices, values_rel_faeture]
    pickle.dump(rel_feature_hyp_data, open("/ssd/ddimitrov/pickle/topic_sim_hyp", "wb"), protocol=pickle.HIGHEST_PROTOCOL) 
Example 56
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 5 votes vote down vote up
def pickle_viz(rel_feature):
    i = 0

    voc_zip_links = pickle.load( open( "/ssd/ddimitrov/pickle/voc_zip_links", "rb" ) )
    print "loaded voc_zip_links"
    uniqeu_nonzero_map = pickle.load( open( "/ssd/ddimitrov/pickle/uniqeu_nonzero_map", "rb" ) )
    vocab = pickle.load( open( "/ssd/ddimitrov/pickle/vocab", "rb" ) )
    print "loaded vocab"

    rel_feature_set = set()
    print rel_feature
    with open(os.path.join(os.path.dirname('__file__'), "/home/ddimitrov/tmp/"+rel_feature+".tsv")) as f:
        next(f)
        for line in f:
            #i += 1
            #if i % 10000 == 0:
            #    print rel_feature, i
            line = line.strip().split('\t')
            rel_feature_set.add((line[0],line[1]))

    print rel_feature

    values_rel_faeture = list()
    i_indices = list()
    j_indices = list()
    i = 0
    for link in voc_zip_links:
        i += 1
        if i % 1000000 == 0:
            print rel_feature, i
        if link in rel_feature_set:
            i_indices.append(uniqeu_nonzero_map[vocab[link[0]]])
            j_indices.append(vocab[link[1]])
            values_rel_faeture.append(1)
    rel_feature_hyp_data = [i_indices, j_indices, values_rel_faeture]
    pickle.dump(rel_feature_hyp_data, open("/ssd/ddimitrov/pickle/"+rel_feature+"_hyp", "wb"), protocol=pickle.HIGHEST_PROTOCOL) 
Example 57
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 5 votes vote down vote up
def pickle_viz_positions(rel_feature):
    i = 0

    voc_zip_links = pickle.load( open( "/ssd/ddimitrov/pickle/voc_zip_links", "rb" ) )
    print "loaded voc_zip_links"
    uniqeu_nonzero_map = pickle.load( open( "/ssd/ddimitrov/pickle/uniqeu_nonzero_map", "rb" ) )
    vocab = pickle.load( open( "/ssd/ddimitrov/pickle/vocab", "rb" ) )
    print "loaded vocab"

    rel_feature_map = {}
    print rel_feature
    with open(os.path.join(os.path.dirname('__file__'), "/home/ddimitrov/tmp/"+rel_feature+".tsv")) as f:
        next(f)
        for line in f:
            #i += 1
            #if i % 10000 == 0:
            #    print rel_feature, i
            line = line.strip().split('\t')
            rel_feature_map[(line[0],line[1])]=float(line[2])

    print rel_feature

    values_rel_faeture = list()
    i_indices = list()
    j_indices = list()
    i = 0
    for link in voc_zip_links:
        i += 1
        if i % 1000000 == 0:
            print rel_feature, i
        if link in rel_feature_map:
            #print rel_feature_map[link]
            i_indices.append(uniqeu_nonzero_map[vocab[link[0]]])
            j_indices.append(vocab[link[1]])
            values_rel_faeture.append(rel_feature_map[link])
    rel_feature_hyp_data = [i_indices, j_indices, values_rel_faeture]
    pickle.dump(rel_feature_hyp_data, open("/ssd/ddimitrov/pickle/"+rel_feature+"_hyp", "wb"), protocol=pickle.HIGHEST_PROTOCOL) 
Example 58
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 5 votes vote down vote up
def pickle_sim():
    # setup logging
    LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
    LOGGING_PATH = 'tmp/semsim-pickle.log'
    logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')
    i = 0
    voc_zip_links = pickle.load( open( "/ssd/ddimitrov/pickle/voc_zip_links", "rb" ) )
    print "loaded voc_zip_links"
    uniqeu_nonzero_map = pickle.load( open( "/ssd/ddimitrov/pickle/uniqeu_nonzero_map", "rb" ) )
    vocab = pickle.load( open( "/ssd/ddimitrov/pickle/vocab", "rb" ) )
    print "loaded vocab"
    sem_sim = pickle.load( open( "/ssd/ddimitrov/pickle/sem_sim", "rb" ) )


    values_rel_faeture = list()
    i_indices = list()
    j_indices = list()
    i = 0
    for link in voc_zip_links:
        i += 1
        if i % 1000000 == 0:
            print  i
        i_indices.append(uniqeu_nonzero_map[vocab[link[0]]])
        j_indices.append(vocab[link[1]])
        from_id = int(link[0])
        to_id = int(link[1])
        if from_id<=to_id:
            try:
                values_rel_faeture.append(sem_sim[(from_id,to_id)])
            except KeyError as e:
                logging.error(e)
        else:
            try:
                values_rel_faeture.append(sem_sim[(to_id,from_id)])
            except KeyError as e:
                logging.error(e)
    rel_feature_hyp_data = [i_indices, j_indices, values_rel_faeture]
    pickle.dump(rel_feature_hyp_data, open("/ssd/ddimitrov/pickle/sem_sim_hyp", "wb"), protocol=pickle.HIGHEST_PROTOCOL) 
Example 59
Project: wikilinks   Author: trovdimi   File: pickle_data.py    MIT License 5 votes vote down vote up
def merge_semsim():
    merge = {}
    for dirname, dirnames, filenames in os.walk("/home/psinger/WikiLinks/data/sem_sim"):
        for file_name in filenames:
            if file_name.endswith(".p"):
                sem_sim = pickle.load( open( "/home/psinger/WikiLinks/data/sem_sim/"+file_name, "rb" ) )
                merge = merge_two_dicts(merge, sem_sim)
                print len(merge)
    pickle.dump(merge, open("/ssd/ddimitrov/pickle/sem_sim", "wb"), protocol=pickle.HIGHEST_PROTOCOL)
    print "semsim" 
Example 60
Project: wikilinks   Author: trovdimi   File: weighted_pagerank.py    MIT License 5 votes vote down vote up
def read_pickle(fpath):
    with open(fpath, 'rb') as infile:
        obj = pickle.load(infile)
    return obj 
Example 61
Project: wikilinks   Author: trovdimi   File: weighted_pagerank.py    MIT License 5 votes vote down vote up
def correlations_ground_truth():
    print 'ground truth'
    #load network
    wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering.xml.gz")
    #read counts with zeros
    article_counts  =  pd.read_csv(TMP+'article_counts.tsv', sep='\t')
    cor = {}
    for damping in [0.8,0.9]:
        page_rank = pagerank(wikipedia, damping=damping)
        wikipedia.vertex_properties['page_rank_'+str(damping)] = page_rank
        page_rank_values = list()
        counts = list()
        correlations_values = {}
        for index, row in article_counts.iterrows():
            counts.append(float(row['counts']))
            page_rank_values.append(page_rank[wikipedia.vertex(int(row['target_article_id']))])
        print 'pearson'
        p = pearsonr(page_rank_values, counts)
        print p
        correlations_values['pearson']=p
        print 'spearmanr'
        s = spearmanr(page_rank_values, counts)
        print s
        correlations_values['spearmanr']=s
        print 'kendalltau'
        k = kendalltau(page_rank_values, counts)
        print k
        correlations_values['kendalltau']=k
        cor['page_rank_'+str(damping)]=correlations_values
    write_pickle(HOME+'output/correlations/correlations_pagerank.obj', cor) 
Example 62
Project: wikilinks   Author: trovdimi   File: weighted_pagerank.py    MIT License 5 votes vote down vote up
def correlations_weighted_unweighted(labels):
    #load network
    print 'weighted vs unweighted'
    name = '_'.join(labels)
    wikipedia = load_graph("output/weightedpagerank/wikipedianetwork_hyp_engineering_"+name+".xml.gz")
    #read counts with zeros

    wikipedia_u = load_graph("output/weightedpagerank/wikipedianetwork_sem_sim_distinct_links.xml.gz")
    correlations_weighted_pagerank = {}
    for label in labels:
        for damping in [0.8,0.85,0.9]:
            correlations_values={}
            key_weighted = label+"_page_rank_weighted_"+str(damping)
            pagerank_weighted = wikipedia.vertex_properties[key_weighted]
            key_unweighted = "page_rank"+str(damping)
            pagerank_unweighted = wikipedia_u.vertex_properties[key_unweighted]
            print 'pearson'
            p = pearsonr(pagerank_weighted.a, pagerank_unweighted.a)
            print p
            correlations_values['pearson']=p
            print 'spearmanr'
            s = spearmanr(pagerank_weighted.a, pagerank_unweighted.a)
            print s
            correlations_values['spearmanr']=s
            print 'kendalltau'
            k = kendalltau(pagerank_weighted.a, pagerank_unweighted.a)
            print k
            correlations_values['kendalltau']=k
            correlations_weighted_pagerank[label+str(damping)]=correlations_values

    write_pickle(HOME+'output/correlations/correlations_pagerank_weightedvsunweighted'+name+'.obj', correlations_weighted_pagerank) 
Example 63
Project: wikilinks   Author: trovdimi   File: check.py    MIT License 5 votes vote down vote up
def read_pickle(self, fpath):
        with open(fpath, 'rb') as infile:
            obj = pickle.load(infile)
        return obj 
Example 64
Project: wikilinks   Author: trovdimi   File: click_distributions.py    MIT License 5 votes vote down vote up
def read_pickle(fpath):
    with open(fpath, 'rb') as infile:
        obj = pickle.load(infile)
    return obj 
Example 65
Project: wikilinks   Author: trovdimi   File: normalized_entropy.py    MIT License 5 votes vote down vote up
def read_pickle(fpath):
    with open(fpath, 'rb') as infile:
        obj = pickle.load(infile)
    return obj 
Example 66
Project: curriculum-dropout   Author: pmorerio   File: load.py    GNU General Public License v3.0 5 votes vote down vote up
def unpickle(file):
    import cPickle
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict 
Example 67
Project: plugin.video.lynda   Author: davejm   File: util.py    GNU General Public License v2.0 5 votes vote down vote up
def load_data(addon, filename):
    profile_path = get_profile(addon)
    load_path = os.path.join(profile_path, filename)
    print(profile_path)
    if not os.path.isfile(load_path):
        print('%s does not exist' % load_path)
        return None
    try:
        data = pickle.load(open(load_path))
        return data
    except:
        return None 
Example 68
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Dset(self, ndataset=7, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.0005
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio)
             dif = math.fabs(ratio-1) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data.pkl', 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close() 
Example 69
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('cut_MODS_all_data_bw_224_224_{0}.pkl'.format(i),'wb')
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 70
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, v, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
	     #if 'test' in v:
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
	     #else:
		#validation = []
		#data_temp = datamama[:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('MODS_224_224_{0}_{1}.pkl'.format(i, v),'wb')
	     print len(validation), v
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 71
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('cut_MODS_all_data_bw_224_224_{0}.pkl'.format(i),'wb')
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 72
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 5 votes vote down vote up
def Djoin(self, v, name='seg_MODS_data.pkl'):
         '''
         Takes as input segmented data from the Dset function. The data is split into
         training and testing. Each list (dataset) in the segmented data is taken,
         once, as the testing set. Then, the rest of the data is shuffled, and put
         into the testing set. Therefore, for each dataset, we have a different testing
         set of images, with also a different set of training images, shuffled twice.     
         Returns n datasets (same amount as in Dset). The datasets are made of two lists:
         training and testing. These lists are made of two lists each: data and labels.
         '''
         f = file(name, 'rb')
         datamama = cPickle.load(f)
         f.close()
         for i in xrange(len(datamama)):
             data_join = []
             data_label_join = []
	     #if 'test' in v:
             validation = datamama[i]
             data_temp = datamama[:i] + datamama[i+1:]
	     #else:
		#validation = []
		#data_temp = datamama[:]
             for j in data_temp:
                 data_join+=j[0]
                 data_label_join+=j[1]
             
             ##Shuffle data
             combined = zip(data_join, data_label_join)
             random.shuffle(combined)
             data_join[:], data_label_join[:] = zip(*combined)                 
            
             training = [data_join,data_label_join]
             dataset_new = [training,validation]
             f = file('MODS_224_224_{0}_{1}.pkl'.format(i, v),'wb')
	     print len(validation), v
             cPickle.dump(dataset_new, f, protocol=cPickle.HIGHEST_PROTOCOL)
             f.close() 
Example 73
Project: PIC   Author: ameroyer   File: nn.py    MIT License 5 votes vote down vote up
def pickle_load(file, **kwargs):
        cPickle.load(file) 
Example 74
Project: PIC   Author: ameroyer   File: main.py    MIT License 5 votes vote down vote up
def pickle_load(file, **kwargs):
        return cPickle.load(file) 
Example 75
Project: neural-fingerprinting   Author: StephanZheng   File: utils_cifar.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def unpickle(file):
    fo = open(file, 'rb')
    dict = pkl.load(fo)
    fo.close()
    return dict 
Example 76
Project: projection-methods   Author: akshayka   File: plot_residuals.py    GNU General Public License v3.0 4 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    # --- input/output --- #
    parser.add_argument(
        'data', metavar='D',
        help=('glob matching pickled results to plot; results should be '
        'generated by experiment.py'))
    parser.add_argument(
        '-o', '--output', type=str, default=None,
        help=('output filename of plot (w/o extension); if None, plot is '
        'shown but not saved.'))
    # --- plot settings --- #
    parser.add_argument(
        '-t', '--title', type=str, default='Residuals for feasibility problem',
        help='plot title')
    args = vars(parser.parse_args())

    if args['output'] is not None:
        output_path = PosixPath(args['output'] + '.png')
        if output_path.is_file():
            raise ValueError('Output file %s already exists!' % str(output_path))

    data_paths = [PosixPath(f) for f in glob(args['data'])]
    data = []
    for p in data_paths:
        if not p.is_file():
            raise ValueError('File %s does not exist.' % str(p))
        with p.open('rb') as f:
            data.append(cPickle.load(f))

    plt.figure() 
    max_its = 0
    for d in data:
        res = d['res']
        res = [sum(r) for r in res]
        if 0 in res:
            res = [r + 1e-20 for r in res]
        it = range(len(res))
        if len(res) > max_its:
            max_its = len(res)
        plt.plot(it, res, label=d['name'])
    plt.semilogy()
    step = int(max_its / 10)
    plt.xticks(range(0, max_its+1, step))
    plt.title(args['title']) 
    plt.ylabel('residual')
    plt.xlabel('iterations')
    plt.legend()

    if args['output'] is not None:
        plt.savefig(str(output_path))
    else:
        datacursor(formatter='{label}'.format)
        plt.show() 
Example 77
Project: Att-ChemdNER   Author: lingluodlut   File: model.py    Apache License 2.0 4 votes vote down vote up
def __init__(self, parameters=None, models_path=None, 
                 model_path=None,Training=False):
#{{{
        """
        Initialize the model. We either provide the parameters and a path where
        we store the models, or the location of a trained model.
        """
        if Training: 
#{{{
            assert parameters and models_path 
            # Create a name based on the parameters
            self.parameters = parameters
            self.name = get_name(parameters)
            # Model location 
            if model_path is None:
                model_path = os.path.join(models_path, self.name)
            self.model_path = model_path
            self.parameters_path = os.path.join(model_path, 'parameters.pkl')
            self.mappings_path = os.path.join(model_path, 'mappings.pkl')
            # Create directory for the model if it does not exist
            if not os.path.exists(self.model_path):
                os.makedirs(self.model_path)
            # Save the parameters to disk
            with open(self.parameters_path, 'wb') as f:
                cPickle.dump(parameters, f) 
#}}}
        else: 
#{{{
            # Model location
            self.model_path = model_path
            self.parameters_path = os.path.join(model_path, 'parameters.pkl')
            self.mappings_path = os.path.join(model_path, 'mappings.pkl')
            # Create directory for the model if it does not exist
            if not os.path.exists(self.model_path):
                os.makedirs(self.model_path)
            # Save the parameters to disk
            with open(self.parameters_path, 'rb') as f:
                self.parameters=cPickle.load(f);
            self.reload_mappings();
        self.components = {}
#}}}
#}}} 
Example 78
Project: wikilinks   Author: trovdimi   File: insertarticlefeatures.py    MIT License 4 votes vote down vote up
def update_link_features_sem_similarity():

    connection = db._create_connection()
    cursor = connection.cursor()

    # setup logging
    LOGGING_FORMAT = '%(levelname)s:\t%(asctime)-15s %(message)s'
    LOGGING_PATH = 'tmp/link_features_semsim-dbinsert.log'
    logging.basicConfig(filename=LOGGING_PATH, level=logging.DEBUG, format=LOGGING_FORMAT, filemode='w')
    for dirname, dirnames, filenames in os.walk("/home/psinger/WikiLinks/data/sem_sim"):
        for file_name in filenames:
            if file_name.endswith(".p"):
                print file_name
                sem_sim = cPickle.load( open( "/home/psinger/WikiLinks/data/sem_sim/"+file_name, "rb" ) )
                for link, sim in sem_sim.iteritems():
                    try:
                        link_features = {}
                        link_features['source_article_id'] = link[0]
                        link_features['target_article_id'] = link[1]
                        link_features['sim'] = sim

                        sql  = "UPDATE link_features " \
                               "SET  sem_similarity=%(sim)s " \
                               "WHERE source_article_id = %(source_article_id)s AND target_article_id = %(target_article_id)s;"

                        cursor.execute(sql, link_features)

                    except MySQLdb.Error as e:
                        logging.error(e)
                    connection.commit()
                    try:
                        link_features = {}
                        link_features['source_article_id'] = link[1]
                        link_features['target_article_id'] = link[0]
                        link_features['sim'] = sim

                        sql  = "UPDATE link_features " \
                               "SET  sem_similarity=%(sim)s " \
                               "WHERE source_article_id = %(source_article_id)s AND target_article_id = %(target_article_id)s;"

                        cursor.execute(sql, link_features)

                    except MySQLdb.Error as e:
                        logging.error(e)
                    connection.commit()
                connection.close() 
Example 79
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 4 votes vote down vote up
def Dset(self, v, ndataset=5, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
	 ratios = {'train_1': 0.62234,
		'train_2': 0.8499,
		'train_3': 0.53817,
		'test_1': 0.8881987,
		'test_2': 0.51543,
		'test_3': 0.84473
		   }
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
	 print size, 'gamboozle'
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.02
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio), v
	     #exit()
             dif = math.fabs(ratio-ratios[v]) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data_{0}.pkl'.format(v), 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close() 
Example 80
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 4 votes vote down vote up
def Dset(self, v, ndataset=5, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
	 ratios = {'train_1': 0.62234,
		'train_2': 0.8499,
		'train_3': 0.53817,
		'test_1': 0.8881987,
		'test_2': 0.51543,
		'test_3': 0.84473
		   }
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
	 print size, 'gamboozle'
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.02
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio), v
	     #exit()
             dif = math.fabs(ratio-ratios[v]) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data_{0}.pkl'.format(v), 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close()