Python sklearn.preprocessing.normalize() Examples

The following are 30 code examples for showing how to use sklearn.preprocessing.normalize(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .

Example 1
Project: scanorama   Author: brianhie   File: process.py    License: MIT License 6 votes vote down vote up
def load_names(data_names, norm=True, log1p=False, verbose=True):
    # Load datasets.
    datasets = []
    genes_list = []
    n_cells = 0
    for name in data_names:
        X_i, genes_i = load_data(name)
        if norm:
            X_i = normalize(X_i, axis=1)
        if log1p:
            X_i = np.log1p(X_i)
        X_i = csr_matrix(X_i)
            
        datasets.append(X_i)
        genes_list.append(genes_i)
        n_cells += X_i.shape[0]
        if verbose:
            print('Loaded {} with {} genes and {} cells'.
                  format(name, X_i.shape[1], X_i.shape[0]))
    if verbose:
        print('Found {} cells among all datasets'
              .format(n_cells))

    return datasets, genes_list, n_cells 
Example 2
Project: Python-ELM   Author: masaponto   File: ml_elm.py    License: MIT License 6 votes vote down vote up
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import train_test_split

    db_name = 'diabetes'
    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)

    tmp = data_set.target
    tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp]
    data_set.target = tmpL

    X_train, X_test, y_train, y_test = train_test_split(
        data_set.data, data_set.target, test_size=0.4)

    mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train)
    elm = ELM(200).fit(X_train, y_train)

    print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test))
    print("ELM Accuracy %0.3f " % elm.score(X_test, y_test)) 
Example 3
Project: OpenNE   Author: thunlp   File: grarep.py    License: MIT License 6 votes vote down vote up
def train(self):
        self.adj = self.getAdjMat()
        self.node_size = self.adj.shape[0]
        self.Ak = np.matrix(np.identity(self.node_size))
        self.RepMat = np.zeros((self.node_size, int(self.dim*self.Kstep)))
        for i in range(self.Kstep):
            print('Kstep =', i)
            self.Ak = np.dot(self.Ak, self.adj)
            probTranMat = self.GetProbTranMat(self.Ak)
            Rk = self.GetRepUseSVD(probTranMat, 0.5)
            Rk = normalize(Rk, axis=1, norm='l2')
            self.RepMat[:, self.dim*i:self.dim*(i+1)] = Rk[:, :]
        # get embeddings
        self.vectors = {}
        look_back = self.g.look_back_list
        for i, embedding in enumerate(self.RepMat):
            self.vectors[look_back[i]] = embedding 
Example 4
Project: nodevectors   Author: VHRanger   File: prone.py    License: MIT License 6 votes vote down vote up
def pre_factorization(G, n_components, exponent):
        """
        Network Embedding as Sparse Matrix Factorization
        """
        C1 = preprocessing.normalize(G, "l1")
        # Prepare negative samples
        neg = np.array(C1.sum(axis=0))[0] ** exponent
        neg = neg / neg.sum()
        neg = sparse.diags(neg, format="csr")
        neg = G.dot(neg)
        # Set negative elements to 1 -> 0 when log
        C1.data[C1.data <= 0] = 1
        neg.data[neg.data <= 0] = 1
        C1.data = np.log(C1.data)
        neg.data = np.log(neg.data)
        C1 -= neg
        features_matrix = ProNE.tsvd_rand(C1, n_components=n_components)
        return features_matrix 
Example 5
Project: geosketch   Author: brianhie   File: process.py    License: MIT License 6 votes vote down vote up
def load_names(data_names, norm=True, log1p=False, verbose=True):
    # Load datasets.
    datasets = []
    genes_list = []
    n_cells = 0
    for name in data_names:
        X_i, genes_i = load_data(name)
        if norm:
            X_i = normalize(X_i, axis=1)
        if log1p:
            X_i = np.log1p(X_i)
        X_i = csr_matrix(X_i)
            
        datasets.append(X_i)
        genes_list.append(genes_i)
        n_cells += X_i.shape[0]
        if verbose:
            print('Loaded {} with {} genes and {} cells'.
                  format(name, X_i.shape[1], X_i.shape[0]))
    if verbose:
        print('Found {} cells among all datasets'
              .format(n_cells))

    return datasets, genes_list, n_cells 
Example 6
Project: SARC   Author: NLPrinceton   File: eval.py    License: MIT License 6 votes vote down vote up
def parse():
  parser = argparse.ArgumentParser()
  parser.add_argument('dataset', help='pol or main', type=str)
  parser.add_argument('-n', '--n', default=1, help='Number of grams', type=int)
  parser.add_argument('--min_count', default=1, help='Min count', type=int)
  parser.add_argument('--embedding', default=CCGLOVE,
                      help='embedding file', type=str)
  parser.add_argument('--weights', default=None,
                      help='weights to use for ngrams (e.g. sif, None)', type=str)
  parser.add_argument('-norm', '--normalize', action='store_true',
                      help='Normalize vectors')
  parser.add_argument('-l', '--lower', action='store_true',
                      help='Whether or not to lowercase text')
  parser.add_argument('-e', '--embed', action='store_true',
                      help='Use embeddings instead of bong')
  return parser.parse_args() 
Example 7
Project: 2016CCF-sougou   Author: prozhuchen   File: STFIWF.py    License: Apache License 2.0 6 votes vote down vote up
def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)]) 
Example 8
Project: 2016CCF-sougou   Author: prozhuchen   File: STFIWF.py    License: Apache License 2.0 6 votes vote down vote up
def _char_wb_ngrams(self, text_document):
        """Whitespace sensitive char-n-gram tokenization.

        Tokenize text_document into a sequence of character n-grams
        excluding any whitespace (operating only inside word boundaries)"""
        # normalize white spaces
        text_document = self._white_spaces.sub(" ", text_document)

        min_n, max_n = self.ngram_range
        ngrams = []
        for w in text_document.split():
            w = ' ' + w + ' '
            w_len = len(w)
            for n in xrange(min_n, max_n + 1):
                offset = 0
                ngrams.append(w[offset:offset + n])
                while offset + n < w_len:
                    offset += 1
                    ngrams.append(w[offset:offset + n])
                if offset == 0:  # count a short word (w_len < n) only once
                    break
        return ngrams 
Example 9
Project: MultiKE   Author: nju-websoft   File: literal_encoder.py    License: MIT License 6 votes vote down vote up
def __init__(self, word_vec_list, args, input_dimension=1500, hidden_dimensions=None):
        self.session = load_session()
        self.args = args
        self.weights, self.biases = {}, {}
        self.input_dimension = input_dimension
        if hidden_dimensions is None:
            hidden_dimensions = [1024, 512, self.args.dim]
        self.hidden_dimensions = hidden_dimensions
        self.layer_num = len(self.hidden_dimensions)
        self.encoder_output = None
        self.decoder_output = None
        self.decoder_op = None

        self.word_vec_list = np.reshape(word_vec_list, [len(word_vec_list), input_dimension])
        if self.args.encoder_normalize:
            self.word_vec_list = preprocessing.normalize(self.word_vec_list)

        self._init_graph()
        self._loss_optimizer()
        tf.global_variables_initializer().run(session=self.session) 
Example 10
Project: MultiKE   Author: nju-websoft   File: data_model.py    License: MIT License 6 votes vote down vote up
def _generate_name_vectors_mat(self):
        name_ordered_list = list()
        num = len(self.entities)
        print("total entities:", num)
        entity_id_uris_dic = dict(zip(self.kgs.kg1.entities_id_dict.values(), self.kgs.kg1.entities_id_dict.keys()))
        entity_id_uris_dic2 = dict(zip(self.kgs.kg2.entities_id_dict.values(), self.kgs.kg2.entities_id_dict.keys()))
        entity_id_uris_dic.update(entity_id_uris_dic2)
        print('total entities ids:', len(entity_id_uris_dic))
        assert len(entity_id_uris_dic) == num
        for i in range(num):
            assert i in entity_id_uris_dic
            entity_uri = entity_id_uris_dic.get(i)
            assert entity_uri in self.entity_local_name_dict
            entity_name = self.entity_local_name_dict.get(entity_uri)
            entity_name_index = self.literal_id_dic.get(entity_name)
            name_ordered_list.append(entity_name_index)
        print('name_ordered_list', len(name_ordered_list))
        name_mat = self.literal_vectors_mat[name_ordered_list, ]
        print("entity name embeddings mat:", type(name_mat), name_mat.shape)
        if self.args.literal_normalize:
            name_mat = preprocessing.normalize(name_mat)
        self.local_name_vectors = name_mat 
Example 11
Project: MultiKE   Author: nju-websoft   File: MultiKE_Late.py    License: MIT License 6 votes vote down vote up
def valid(model, embed_choice='avg', w=(1, 1, 1)):
    if embed_choice == 'nv':
        ent_embeds = model.name_embeds.eval(session=model.session)
    elif embed_choice == 'rv':
        ent_embeds = model.rv_ent_embeds.eval(session=model.session)
    elif embed_choice == 'av':
        ent_embeds = model.av_ent_embeds.eval(session=model.session)
    elif embed_choice == 'final':
        ent_embeds = model.ent_embeds.eval(session=model.session)
    elif embed_choice == 'avg':
        ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \
                     w[1] * model.rv_ent_embeds.eval(session=model.session) + \
                     w[2] * model.av_ent_embeds.eval(session=model.session)
    else:  # 'final'
        ent_embeds = model.ent_embeds
    print(embed_choice, 'valid results:')
    embeds1 = ent_embeds[model.kgs.valid_entities1,]
    embeds2 = ent_embeds[model.kgs.valid_entities2 + model.kgs.test_entities2,]
    hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num,
                                 normalize=True)
    del embeds1, embeds2
    gc.collect()
    return mrr_12 
Example 12
Project: MultiKE   Author: nju-websoft   File: MultiKE_Late.py    License: MIT License 6 votes vote down vote up
def test(model, embed_choice='avg', w=(1, 1, 1)):
    if embed_choice == 'nv':
        ent_embeds = model.name_embeds.eval(session=model.session)
    elif embed_choice == 'rv':
        ent_embeds = model.rv_ent_embeds.eval(session=model.session)
    elif embed_choice == 'av':
        ent_embeds = model.av_ent_embeds.eval(session=model.session)
    elif embed_choice == 'final':
        ent_embeds = model.ent_embeds.eval(session=model.session)
    elif embed_choice == 'avg':
        ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \
                     w[1] * model.rv_ent_embeds.eval(session=model.session) + \
                     w[2] * model.av_ent_embeds.eval(session=model.session)
    else:  # wavg
        ent_embeds = model.ent_embeds
    print(embed_choice, 'test results:')
    embeds1 = ent_embeds[model.kgs.test_entities1,]
    embeds2 = ent_embeds[model.kgs.test_entities2,]
    hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num,
                                 normalize=True)
    del embeds1, embeds2
    gc.collect()
    return mrr_12 
Example 13
Project: MultiKE   Author: nju-websoft   File: MultiKE_Late.py    License: MIT License 6 votes vote down vote up
def _compute_weight(embeds1, embeds2, embeds3):
    def min_max_normalization(mat):
        min_ = np.min(mat)
        max_ = np.max(mat)
        return (mat - min_) / (max_ - min_)

    other_embeds = (embeds1 + embeds2 + embeds3) / 3
    # other_embeds = (embeds2 + embeds3) / 2
    other_embeds = preprocessing.normalize(other_embeds)
    embeds1 = preprocessing.normalize(embeds1)
    # sim_mat = sim(embeds1, other_embeds, metric='cosine')
    sim_mat = np.matmul(embeds1, other_embeds.T)
    # sim_mat = 1 - euclidean_distances(embeds1, other_embeds)
    weights = np.diag(sim_mat)
    # print(weights.shape, np.mean(weights))
    # weights = min_max_normalization(weights)
    print(weights.shape, np.mean(weights))
    return np.mean(weights) 
Example 14
Project: scikit-multiflow   Author: scikit-multiflow   File: streaming_random_patches.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _predict_proba(self, X):
        y_proba = np.asarray([0.])

        for i in range(len(self.ensemble)):
            y_proba_temp = self.ensemble[i].predict_proba(X)
            if np.sum(y_proba_temp) > 0.0:
                y_proba_temp = normalize(y_proba_temp, norm='l1')[0].copy()
                acc = self.ensemble[i].performance_evaluator.accuracy_score()
                if not self.disable_weighted_vote and acc > 0.0:
                    y_proba_temp *= acc
                # Check array length consistency
                if len(y_proba_temp) != len(y_proba):
                    if len(y_proba_temp) > len(y_proba):
                        y_proba.resize((len(y_proba_temp), ), refcheck=False)
                    else:
                        y_proba_temp.resize((len(y_proba), ), refcheck=False)
                # Add values
                y_proba += y_proba_temp
        return y_proba 
Example 15
Project: karateclub   Author: benedekrozemberczki   File: neu.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _update_embedding(self, graph, original_embedding):
        r"""Performs the Network Embedding Update on the original embedding.
        Args:
            original_embedding (Numpy array): An array containing an embedding.
            graph (NetworkX graph): The embedded graph.

        Return types:
            embedding (Numpy array): An array containing the updated embedding.
        """
        embedding = self._normalize_embedding(original_embedding)
        adjacency = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes()))
        normalized_adjacency = normalize(adjacency, norm='l1', axis=1)
        for _ in range(self.iterations):
            embedding = (embedding + 
                         self.L1*(normalized_adjacency @ embedding) + 
                         self.L2*(normalized_adjacency @ (normalized_adjacency @ embedding)))
        return embedding 
Example 16
Project: robotreviewer   Author: ijmarshall   File: vectorizer.py    License: GNU General Public License v3.0 6 votes vote down vote up
def transform(self, X_si, high=None, low=None, limit=None):
        """
        Same as HashingVectorizer transform, except allows for 
        interaction list, which is an iterable the same length as X
        filled with True/False. This method adds an empty row to
        docs labelled as False.
        """
        analyzer = self.build_analyzer()

        X = self._get_hasher().transform(
            analyzer(self._deal_with_input(doc)) for doc in X_si)
        
        X.data.fill(1)

        if self.norm is not None:
            X = normalize(X, norm=self.norm, copy=False)

        if low:
            X = self._limit_features(X, low=low)
        return X 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 6 votes vote down vote up
def test_cosine_similarity():
    # Test the cosine_similarity.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((3, 4))
    Xcsr = csr_matrix(X)
    Ycsr = csr_matrix(Y)

    for X_, Y_ in ((X, None), (X, Y),
                   (Xcsr, None), (Xcsr, Ycsr)):
        # Test that the cosine is kernel is equal to a linear kernel when data
        # has been previously normalized by L2-norm.
        K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
        X_ = normalize(X_)
        if Y_ is not None:
            Y_ = normalize(Y_)
        K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
        assert_array_almost_equal(K1, K2) 
Example 18
Project: StageDP   Author: yizhongw   File: other.py    License: MIT License 6 votes vote down vote up
def vectorize(features, vocab):
    """ Transform a features list into a numeric vector
        with a given vocab

    :type dpvocab: dict
    :param dpvocab: vocab for distributional representation

    :type projmat: scipy.lil_matrix
    :param projmat: projection matrix for disrep
    """
    vec = lil_matrix((1, len(vocab)))

    for feat in features:
        try:
            fidx = vocab[feat]
            vec[0, fidx] += 1.0
        except KeyError:
            pass
    # Normalization
    vec = normalize(vec)
    return vec 
Example 19
Project: scanorama   Author: brianhie   File: scanorama.py    License: MIT License 5 votes vote down vote up
def process_data(datasets, genes, hvg=HVG, dimred=DIMRED, verbose=False):
    # Only keep highly variable genes
    if not hvg is None and hvg > 0 and hvg < len(genes):
        if verbose:
            print('Highly variable filter...')
        X = vstack(datasets)
        disp = dispersion(X)
        highest_disp_idx = np.argsort(disp[0])[::-1]
        top_genes = set(genes[highest_disp_idx[range(hvg)]])
        for i in range(len(datasets)):
            gene_idx = [ idx for idx, g_i in enumerate(genes)
                         if g_i in top_genes ]
            datasets[i] = datasets[i][:, gene_idx]
        genes = np.array(sorted(top_genes))

    # Normalize.
    if verbose:
        print('Normalizing...')
    for i, ds in enumerate(datasets):
        datasets[i] = normalize(ds, axis=1)

    # Compute compressed embedding.
    if dimred > 0:
        if verbose:
            print('Reducing dimension...')
        datasets_dimred = dimensionality_reduce(datasets, dimred=dimred)
        if verbose:
            print('Done processing.')
        return datasets_dimred, genes

    if verbose:
        print('Done processing.')

    return datasets, genes

# Plot t-SNE visualization. 
Example 20
Project: scanorama   Author: brianhie   File: scanorama.py    License: MIT License 5 votes vote down vote up
def batch_bias(curr_ds, match_ds, bias, batch_size=None, sigma=SIGMA):
    if batch_size is None:
        weights = rbf_kernel(curr_ds, match_ds, gamma=0.5*sigma)
        weights = normalize(weights, axis=1, norm='l1')
        avg_bias = np.dot(weights, bias)
        return avg_bias

    base = 0
    avg_bias = np.zeros(curr_ds.shape)
    denom = np.zeros(curr_ds.shape[0])
    while base < match_ds.shape[0]:
        batch_idx = range(
            base, min(base + batch_size, match_ds.shape[0])
        )
        weights = rbf_kernel(curr_ds, match_ds[batch_idx, :],
                             gamma=0.5*sigma)
        avg_bias += np.dot(weights, bias[batch_idx, :])
        denom += np.sum(weights, axis=1)
        base += batch_size

    denom = handle_zeros_in_scale(denom, copy=False)
    avg_bias /= denom[:, np.newaxis]

    return avg_bias

# Compute nonlinear translation vectors between dataset
# and a reference. 
Example 21
Project: TransferRL   Author: yaserkl   File: replay_buffer.py    License: MIT License 5 votes vote down vote up
def __init__(self, hps, example_list, dqn_batch_size, use_state_prime = False, max_art_oovs = 0):
    """
      Args:
       hps: seq2seq model parameters
       example_list: list of experiences
       dqn_batch_size: DDQN batch size
       use_state_prime: whether to use the next decoder state to make the batch or the current one
       max_art_oovs: number of OOV tokens in current batch

      Properties:
        _x: The input to DDQN model for training, this is basically the decoder output (dqn_batch_size, dqn_input_feature_len)
        _y: The Q-estimation (dqn_batch_size, vocab_size)
        _y_extended: The Q-estimation (dqn_batch_size, vocab_size + max_art_oovs)
    """
    self._x = np.zeros((dqn_batch_size, hps.dqn_input_feature_len))
    self._y = np.zeros((dqn_batch_size, hps.vocab_size))
    self._y_extended = np.zeros((dqn_batch_size, hps.vocab_size + max_art_oovs))
    for i,e in enumerate(example_list):
      if use_state_prime:
        self._x[i,:]=e.state_prime
      else:
        self._x[i,:]=e.state
        self._y[i,:]=normalize(e.q_value[0:hps.vocab_size], axis=1, norm='l1')
      if max_art_oovs == 0:
        self._y_extended[i,:] = normalize(e.q_value[0:hps.vocab_size], axis=1, norm='l1')
      else:
        self._y_extended[i,:] = e.q_value 
Example 22
Project: TransferRL   Author: yaserkl   File: beam_search.py    License: MIT License 5 votes vote down vote up
def avg_log_prob(self):
    # normalize log probability by number of tokens (otherwise longer sequences always have lower probability)
    return self.log_prob / len(self.tokens) 
Example 23
Project: Python-ELM   Author: masaponto   File: elm.py    License: MIT License 5 votes vote down vote up
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import ShuffleSplit, KFold, cross_val_score

    db_name = 'australian'
    hid_nums = [100, 200, 300]

    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)
    data_set.target = [1 if i == 1 else -1
                       for i in  data_set.target.astype(int)]

    for hid_num in hid_nums:
        print(hid_num, end=' ')
        e = ELM(hid_num)

        ave = 0
        for i in range(10):
            cv = KFold(n_splits=5, shuffle=True)
            scores = cross_val_score(
                e, data_set.data, data_set.target,
                cv=cv, scoring='accuracy', n_jobs=-1)
            ave += scores.mean()

        ave /= 10

        print("Accuracy: %0.3f " % (ave)) 
Example 24
Project: tartarus   Author: sergiooramas   File: train.py    License: MIT License 5 votes vote down vote up
def __init__(self, params, normalize=False, whiten=True):
        self.model_id = common.get_next_model_id()
        self.norm = normalize
        self.whiten = whiten
        self.x_path = '%s_%sx%s' % (params['dataset']['dataset'],params['dataset']['npatches'],params['dataset']['window'])
        self.y_path = '%s_%s_%s' % (params['dataset']['fact'],params['dataset']['dim'],params['dataset']['dataset'])
        self.dataset_settings = params['dataset']
        self.training_params = params['training']
        self.model_arch = params['cnn']
        self.predicting_params = params['predicting'] 
Example 25
Project: tartarus   Author: sergiooramas   File: train.py    License: MIT License 5 votes vote down vote up
def batch_block_generator(params, y_path, N_train, id2gt, X_meta=None,
                          val_from_file=False):
    hdf5_file = common.PATCHES_DIR+"/patches_train_%s_%sx%s.hdf5" % (params['dataset']['dataset'],params['dataset']['npatches'],params['dataset']['window'])
    f = h5py.File(hdf5_file,"r")
    block_step = 50000
    batch_size = params['training']['n_minibatch']
    randomize = True
    with_meta = False
    if X_meta != None:
        with_meta = True
    while 1:
        for i in range(0, N_train, block_step):
            x_block = f['features'][i:min(N_train, i+block_step)]
            index_block = f['index'][i:min(N_train, i+block_step)]
            #y_block = f['targets'][i:min(N_train,i+block_step)]
            x_block = np.delete(x_block, np.where(index_block == ""), axis=0)
            index_block = np.delete(index_block, np.where(index_block == ""))
            y_block = np.asarray([id2gt[id] for id in index_block])
            if params['training']['normalize_y']:
                normalize(y_block, copy=False)
            items_list = range(x_block.shape[0])
            if randomize:
                random.shuffle(items_list)
            for j in range(0, len(items_list), batch_size):
                if j+batch_size <= x_block.shape[0]:
                    items_in_batch = items_list[j:j+batch_size]
                    x_batch = x_block[items_in_batch]
                    y_batch = y_block[items_in_batch]
                    if with_meta:
                        x_batch = [x_batch, X_meta[items_in_batch]]
                    yield (x_batch, y_batch) 
Example 26
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: line_combine.py    License: MIT License 5 votes vote down vote up
def combine_vectors(order1_input_file, order2_input_file, output_file):
    
    o1_in_file = open(order1_input_file, 'r')
    o2_in_file = open(order2_input_file, 'r')
    o1_line = o1_in_file.readline()
    o2_line = o2_in_file.readline()
    
    vectors = []
    keys = []
    
    while o1_line and o2_line:
        o1_line = o1_line.split()
        o2_line = o2_line.split()
        assert(o1_line[0] == o2_line[0]), "%s and %s are not the same." % (o1_line[0], o2_line[0])
        if len(o1_line) == len(o2_line) and len(o1_line) == 2:
            print("WARNING: Skipping a line because it appears to be header line.")
            o1_line = o1_in_file.readline()
            o2_line = o2_in_file.readline()
            continue
        vector = [val for val in o1_line[1:]] + [val for val in o2_line[1:]]
        vectors.append(vector)
        keys.append(o1_line[0])
        o1_line = o1_in_file.readline()
        o2_line = o2_in_file.readline()
        
    vector_length = len(vectors[0])
    vector_cnt = len(vectors)
    vectors = preprocessing.normalize(vectors)
    output = ""
    for key, vector in zip(keys, vectors):
        output += "%s %s\n" % (key, ' '.join([str(num) for num in vector]))
    out_file = open(output_file, 'w')
    output = "%s %s\n%s" % (vector_cnt, vector_length, output)
    out_file.write(output) 
Example 27
Project: blackbox-attacks   Author: sunblaze-ucb   File: cifar10_query_based.py    License: MIT License 5 votes vote down vote up
def pca_components(X, dim):
    X = X.reshape((len(X), dim))
    pca = PCA(n_components=dim)
    pca.fit(X)

    U = (pca.components_).T
    U_norm = normalize(U, axis=0)

    return U_norm[:,:args.num_comp] 
Example 28
Project: blackbox-attacks   Author: sunblaze-ucb   File: query_based_attack.py    License: MIT License 5 votes vote down vote up
def pca_components(X, dim):
    X = X.reshape((len(X), dim))
    pca = PCA(n_components=dim)
    pca.fit(X)

    U = (pca.components_).T
    U_norm = normalize(U, axis=0)

    return U_norm[:,:args.num_comp] 
Example 29
Project: deep-smoke-machine   Author: CMU-CREATE-Lab   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def normalize_l2(x):
    return preprocessing.normalize(x) 
Example 30
Project: deep-smoke-machine   Author: CMU-CREATE-Lab   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def normalize_l1(x):
    return preprocessing.normalize(x, norm='l1')