Python keras.utils.to_categorical() Examples

The following are 30 code examples of keras.utils.to_categorical(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.utils , or try the search function

Example #1

Source File: get_data.py From Audio-Vision with MIT License

6 votes

def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
  
    img_data = np.array(img_data['images_train'])
    img_pos_train = ques_data['img_pos_train'][:data_limit]
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    # Normalizing images
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    #shifting padding to left side
    ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
    ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
    ques_train = right_align(ques_train, ques_length_train)

    train_X = [train_img_data, ques_train]
    # NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
    # To temporarily rectify this, all those answer indices is set to 1 in validation set
    train_y = to_categorical(ques_data['answers'])[:data_limit, :]

    return train_X, train_y

Example #2

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #3

Source File: keras_bert_ner_bi_lstm.py From nlp_xiaojiang with MIT License

6 votes

def label_tagging(data_x_s, tag_label2index, len_max=32):
    """
        根据类别字典dict、语料y和最大文本长度l，padding和to_categorical
    :param data_x_s: list
    :param tag_label2index:dict 
    :param len_max: int
    :return: list
    """
    tag_labels = []
    for data_x in data_x_s:
        if len(data_x) <= len_max-2:
            tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x] + [tag_label2index['O'] for i in range(len_max - len(data_x) - 1)])
        else:
            tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x[:len_max-1]] + [tag_label2index['O']])

    tag_labels_pad = pad_sequences(sequences=tag_labels, maxlen=len_max, dtype='int32',
                                padding='post', truncating='post', value=tag_label2index['O'])
    one_hot_y = to_categorical(tag_labels_pad, num_classes=len(tag_label2index))

    label_num = len(set(["".join(str(i)) for i in tag_labels]))
    # tag_labels_pad_to = to_categorical(y=tag_labels_pad.tolist(), num_classes=label_num)
    return one_hot_y, label_num

Example #4

Source File: data_generator.py From imageatm with Apache License 2.0

6 votes

def _data_generator(self, batch_samples: List[dict]) -> Tuple[np.array, np.array]:
        """Generates data from samples in specified batch."""
        #  initialize images and labels tensors for faster processing
        dims = self.img_crop_dims if self.train == True else self.img_load_dims
        X = np.empty((len(batch_samples), *dims, 3))
        y = np.empty((len(batch_samples), self.n_classes))

        for i, sample in enumerate(batch_samples):
            # load and randomly augment image
            img_file = self.image_dir / sample['image_id']
            img = np.asarray(load_image(img_file, self.img_load_dims))
            if self.train == True:
                img = random_crop(img, self.img_crop_dims)
            X[i,] = img

            # TODO: more efficient by preprocessing
            y[i,] = to_categorical([sample['label']], num_classes=self.n_classes)

        # apply basenet specific preprocessing
        # input is 4D numpy array of RGB values within [0, 255]
        X = self.basenet_preprocess(X)

        return X, y

Example #5

Source File: load_data.py From Image-Caption-Generator with MIT License

6 votes

def create_sequences(tokenizer, max_length, captions_list, image):
	# X1 : input for image features
	# X2 : input for text features
	# y  : output word
	X1, X2, y = list(), list(), list()
	vocab_size = len(tokenizer.word_index) + 1
	# Walk through each caption for the image
	for caption in captions_list:
		# Encode the sequence
		seq = tokenizer.texts_to_sequences([caption])[0]
		# Split one sequence into multiple X,y pairs
		for i in range(1, len(seq)):
			# Split into input and output pair
			in_seq, out_seq = seq[:i], seq[i]
			# Pad input sequence
			in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
			# Encode output sequence
			out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
			# Store
			X1.append(image)
			X2.append(in_seq)
			y.append(out_seq)
	return X1, X2, y

# Data generator, intended to be used in a call to model.fit_generator()

Example #6

Source File: data_loader.py From bi-lstm-crf with Apache License 2.0

6 votes

def generator_from_data(self, X, Y):
        steps = 0
        total_size = X.shape[0]
        while True:
            if steps >= self.shuffle_batch:
                indicates = list(range(total_size))
                np.random.shuffle(indicates)
                X = X[indicates]
                Y = Y[indicates]
                steps = 0
            sample_index = np.random.randint(0, total_size - self.batch_size)
            ret_x = X[sample_index:sample_index + self.batch_size]
            ret_y = Y[sample_index:sample_index + self.batch_size]

            if not self.sparse_target:
                ret_y = to_categorical(ret_y, num_classes=self.tgt_vocab_size + 1)
            else:
                ret_y = np.expand_dims(ret_y, 2)
            yield ret_x, ret_y
            steps += 1

Example #7

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #8

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #9

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #10

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #11

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #12

Source File: 3leveldcnet.py From Multi-level-DCNet with GNU General Public License v3.0

6 votes

def load_dataset():
    # Load the dataset from Keras
    from keras.datasets import cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    # Preprocessing the dataset
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train= preprocess_input(x_train)
    x_test= preprocess_input(x_test)
    x_train = x_train.reshape(-1, 32, 32, 3).astype('float32') 
    x_test = x_test.reshape(-1, 32, 32, 3).astype('float32')
    y_train = to_categorical(y_train.astype('float32'))
    y_test = to_categorical(y_test.astype('float32'))

    return (x_train, y_train), (x_test, y_test)

Example #13

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Example #14

Source File: data_loader.py From bi-lstm-crf with Apache License 2.0

6 votes

def load_sents_from_file(self, file_path, encoding):
        with open(file_path, encoding=encoding) as f:
            sent, chunk = [], []
            for line in f:
                line = line[:-1]
                chars, tags = line.split(self.sent_delimiter)
                sent.append(chars.split(self.word_delimiter))
                chunk.append(tags.split(self.word_delimiter))
                if len(sent) >= self.batch_size:
                    sent = self.src_tokenizer.texts_to_sequences(sent)
                    chunk = self.tgt_tokenizer.texts_to_sequences(chunk)
                    sent, chunk = self._pad_seq(sent, chunk)
                    if not self.sparse_target:
                        chunk = to_categorical(chunk, num_classes=self.tgt_vocab_size + 1)
                    yield sent, chunk
                    sent, chunk = [], []

Example #15

Source File: data.py From icassp19 with MIT License

6 votes

def __getitem__(self, index):
        """
        takes an index (batch number) and returns one batch of self.batch_size
        :param index:
        :return:
        """
        # index is taken care of by the Sequencer inherited
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # fetch labels for the batch
        y_int = np.empty((self.batch_size, 1), dtype='int')
        for tt in np.arange(self.batch_size):
            y_int[tt] = int(self.labels[indexes[tt]])
        y_cat = to_categorical(y_int, num_classes=self.n_classes)

        # fetch features for the batch and adjust format to input CNN
        # (batch_size, 1, time, freq) for channels_first
        features = self.features[indexes, np.newaxis]
        return features, y_cat

Example #16

Source File: task_sentiment_virtual_adversarial_training.py From bert4keras with Apache License 2.0

6 votes

def __iter__(self, random=False):
        batch_token_ids, batch_segment_ids, batch_labels = [], [], []
        for is_end, (text, label) in self.sample(random):
            token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_labels.append(label)
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = to_categorical(batch_labels, num_classes)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


# 转换数据集

Example #17

Source File: get_data.py From Audio-Vision with MIT License

6 votes

def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
  
    img_data = np.array(img_data['images_train'])
    img_pos_train = ques_data['img_pos_train'][:data_limit]
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    # Normalizing images
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    #shifting padding to left side
    ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
    ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
    ques_train = right_align(ques_train, ques_length_train)

    train_X = [train_img_data, ques_train]
    # NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
    # To temporarily rectify this, all those answer indices is set to 1 in validation set
    train_y = to_categorical(ques_data['answers'])[:data_limit, :]

    return train_X, train_y

Example #18

Source File: data.py From five-video-classification-methods with MIT License

5 votes

def get_class_one_hot(self, class_str):
        """Given a class as a string, return its number in the classes
        list. This lets us encode and one-hot it for training."""
        # Encode it first.
        label_encoded = self.classes.index(class_str)

        # Now one-hot it.
        label_hot = to_categorical(label_encoded, len(self.classes))

        assert len(label_hot) == len(self.classes)

        return label_hot

Example #19

Source File: mlearn.py From easy12306 with Artistic License 2.0

5 votes

def load_data(fn='texts.npz', to=False):
    data = np.load(fn)
    texts, labels = data['texts'], data['labels']
    texts = texts / 255.0
    _, h, w = texts.shape
    texts.shape = (-1, h, w, 1)
    if to:
        labels = to_categorical(labels)
    n = int(texts.shape[0] * 0.9)   # 90%用于训练，10%用于测试
    return (texts[:n], labels[:n]), (texts[n:], labels[n:])

Example #20

Source File: pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Example #21

Source File: pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

Example #22

Source File: mobilenet_transfer_pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Example #23

Source File: pseudo_pretrain_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train(n_labeled_data):
    model = create_cnn()
    
    pseudo = PseudoCallback(model, n_labeled_data, min(512, n_labeled_data))

    # pretrain
    model.compile("adam", loss="categorical_crossentropy", metrics=["acc"])
    model.fit(pseudo.X_train_labeled/255.0, to_categorical(pseudo.y_train_labeled),
              batch_size=pseudo.batch_size, epochs=30,
              validation_data=(pseudo.X_test/255.0, to_categorical(pseudo.y_test)))
    pseudo.y_train_unlabeled_prediction = np.argmax(
            model.predict(pseudo.X_train_unlabeled), axis=-1,).reshape(-1, 1)

    #main-train
    model.compile("adam", loss=pseudo.loss_function, metrics=[pseudo.accuracy])

    if not os.path.exists("result_pseudo"):
        os.mkdir("result_pseudo")

    hist = model.fit_generator(pseudo.train_generator(), steps_per_epoch=pseudo.train_steps_per_epoch,
                               validation_data=pseudo.test_generator(), callbacks=[pseudo],
                               validation_steps=pseudo.test_stepes_per_epoch, epochs=100).history
    hist["labeled_accuracy"] = pseudo.labeled_accuracy
    hist["unlabeled_accuracy"] = pseudo.unlabeled_accuracy

    with open(f"result_pseudo/history_{n_labeled_data:05}.dat", "wb") as fp:
        pickle.dump(hist, fp)

Example #24

Source File: pseudo_pretrain_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Example #25

Source File: pseudo_pretrain_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

Example #26

Source File: mobilenet_pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Example #27

Source File: mobilenet_pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

Example #28

Source File: get_data.py From Audio-Vision with MIT License

5 votes

def get_test_data(input_img_h5, input_ques_h5,metadata,val_annotations_path):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
    with open(val_annotations_path, 'r') as an_file:
        annotations = json.loads(an_file.read())

    img_data = np.array(img_data['images_test'])
    img_pos_train = ques_data['img_pos_test']
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    ques_train = np.array(ques_data['ques_test'])
    ques_length_train = np.array(ques_data['ques_length_test'])
    ques_train = right_align(ques_train, ques_length_train)

    # Convert all last index to 0, coz embeddings were made that way :/
    for _ in ques_train:
        if 12602 in _:
            _[_==12602] = 0

    val_X = [train_img_data, ques_train]

    ans_to_ix = {str(ans):int(i) for i,ans in metadata['ix_to_ans'].items()}
    ques_annotations = {}
    for _ in annotations['annotations']:
        idx = ans_to_ix.get(_['multiple_choice_answer'].lower())
        _['multiple_choice_answer_idx'] = 1 if idx in [None, 1000] else idx
        ques_annotations[_['question_id']] = _

    abs_val_y = [ques_annotations[ques_id]['multiple_choice_answer_idx'] for ques_id in ques_data['question_id_test']]
    abs_val_y = to_categorical(np.array(abs_val_y))

    multi_val_y = [list(set([ans_to_ix.get(_['answer'].lower()) for _ in ques_annotations[ques_id]['answers']])) for ques_id in ques_data['question_id_test']]
    for i,_ in enumerate(multi_val_y):
        multi_val_y[i] = [1 if ans in [None, 1000] else ans for ans in _]

    return val_X, abs_val_y, multi_val_y

Example #29

Source File: data_processor.py From tying-wv-and-wc with MIT License

5 votes

def format(self, word_seq, vocab_size, sequence_size):
        words = []
        nexts = []
        sequence_count = (len(word_seq) - 1) // sequence_size
        for i in range(sequence_count):
            start = i * sequence_size
            words.append(word_seq[start:start + sequence_size])
            next_seq = word_seq[(start + 1):(start + 1 + sequence_size)]
            next_seq_as_one_hot = to_categorical(next_seq, vocab_size)  # to one hot vector
            nexts.append(next_seq_as_one_hot)
        
        words = np.array(words)
        nexts = np.array(nexts)

        return words, nexts

Example #30

Source File: mobilenet_supervised_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train(n_labeled_data):
    model = create_cnn()
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()

    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)

    y_test_true = np.ravel(y_test)
    X_train = X_train[indices[:n_labeled_data]] / 255.0
    X_test = X_test / 255.0
    y_train = to_categorical(y_train[indices[:n_labeled_data]], 10)
    y_test = to_categorical(y_test, 10)
    
    model.compile("adam", loss="categorical_crossentropy", metrics=["acc"])

    if not os.path.exists("result_mobilenet"):
        os.mkdir("result_mobilenet")

    hist = model.fit(X_train, y_train, batch_size=min(n_labeled_data, 256), 
                     validation_data=(X_test, y_test), epochs=100).history

    with open(f"result_mobilenet/history_{n_labeled_data:05}.dat", "wb") as fp:
        pickle.dump(hist, fp)

    # tsne-plot
    emb_model = Model(model.input, model.layers[-2].output)
    embedding = emb_model.predict(X_test)
    proj = TSNE(n_components=2).fit_transform(embedding)
    cmp = plt.get_cmap("tab10")
    plt.figure()
    for i in range(10):
        select_flag = y_test_true == i
        plt_latent = proj[select_flag, :]
        plt.scatter(plt_latent[:,0], plt_latent[:,1], color=cmp(i), marker=".")
    plt.savefig(f"result_mobilenet/embedding_{n_labeled_data:05}.png")