Python keras.datasets.imdb.load_data() Examples

The following are 30 code examples of keras.datasets.imdb.load_data(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.datasets.imdb , or try the search function .
Example #1
Source Project: CAPTCHA-breaking   Author: lllcho   File: test_datasets.py    License: MIT License 6 votes vote down vote up
def test_cifar(self):
        print('cifar10')
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape)

        print('cifar100 fine')
        (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape)

        print('cifar100 coarse')
        (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape) 
Example #2
Source Project: CAPTCHA-breaking   Author: lllcho   File: test_datasets.py    License: MIT License 6 votes vote down vote up
def test_imdb(self):
        print('imdb')
        (X_train, y_train), (X_test, y_test) = imdb.load_data() 
Example #3
Source Project: plaidbench   Author: plaidml   File: frontend_keras.py    License: Apache License 2.0 6 votes vote down vote up
def setup_cifar(train, epoch_size):
    # Setup
    if train:
        # Training setup
        from keras.datasets import cifar10
        from keras.utils.np_utils import to_categorical
        click.echo('Loading CIFAR data')
        (x_train, y_train_cats), (_, _) = cifar10.load_data()
        x_train = x_train[:epoch_size]
        y_train_cats = y_train_cats[:epoch_size]
        y_train = to_categorical(y_train_cats, num_classes=1000)
    else:
        # Inference setup
        this_dir = os.path.dirname(os.path.abspath(__file__))
        cifar_path = os.path.join(this_dir, 'cifar16.npy')
        x_train = np.load(cifar_path).repeat(1 + epoch_size // 16, axis=0)[:epoch_size]
        y_train = None
    return x_train, y_train 
Example #4
Source Project: plaidbench   Author: plaidml   File: frontend_keras.py    License: Apache License 2.0 6 votes vote down vote up
def setup_imdb(train, epoch_size):
    # Setup
    if train:
        # Training setup
        from keras.datasets import imdb
        from keras.preprocessing import sequence
        click.echo('Loading IMDB data')
        (x_train, y_train), (_, _) = imdb.load_data(num_words=imdb_max_features)
        x_train = sequence.pad_sequences(x_train, maxlen=imdb_max_length)
        x_train = x_train[:epoch_size]
        y_train = y_train[:epoch_size]
    else:
        # Inference setup
        this_dir = os.path.dirname(os.path.abspath(__file__))
        imdb_path = os.path.join(this_dir, 'imdb16.npy')
        x_train = np.load(imdb_path).repeat(1 + epoch_size // 16, axis=0)[:epoch_size]
        y_train = None
    return x_train, y_train 
Example #5
Source Project: hyperas   Author: maxpumperla   File: lstm.py    License: MIT License 6 votes vote down vote up
def data():
    maxlen = 100
    max_features = 20000

    print('Loading data...')
    (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')

    print("Pad sequences (samples x time)")
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    return X_train, X_test, y_train, y_test, max_features, maxlen 
Example #6
Source Project: kopt   Author: Avsecz   File: data.py    License: MIT License 6 votes vote down vote up
def data(max_features=5000, maxlen=400):
    print('Loading data...')
    (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

    # subset the data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    x_test = x_test[:100]
    y_test = y_test[:100]

    print(len(x_train), 'train sequences')
    print(len(x_test), 'test sequences')

    print('Pad sequences (samples x time)')
    x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
    x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)
    return (x_train, y_train, [1, 2, 3, "dummy_data"]), (x_test, y_test) 
Example #7
Source Project: DEC-keras   Author: XifengGuo   File: datasets.py    License: MIT License 6 votes vote down vote up
def load_retures_keras():
    from keras.preprocessing.text import Tokenizer
    from keras.datasets import reuters
    max_words = 1000

    print('Loading data...')
    (x, y), (_, _) = reuters.load_data(num_words=max_words, test_split=0.)
    print(len(x), 'train sequences')

    num_classes = np.max(y) + 1
    print(num_classes, 'classes')

    print('Vectorizing sequence data...')
    tokenizer = Tokenizer(num_words=max_words)
    x = tokenizer.sequences_to_matrix(x, mode='binary')
    print('x_train shape:', x.shape)

    return x.astype(float), y 
Example #8
Source Project: DEC-keras   Author: XifengGuo   File: datasets.py    License: MIT License 6 votes vote down vote up
def load_imdb():
    from keras.preprocessing.text import Tokenizer
    from keras.datasets import imdb
    max_words = 1000

    print('Loading data...')
    (x1, y1), (x2, y2) = imdb.load_data(num_words=max_words)
    x = np.concatenate((x1, x2))
    y = np.concatenate((y1, y2))
    print(len(x), 'train sequences')

    num_classes = np.max(y) + 1
    print(num_classes, 'classes')

    print('Vectorizing sequence data...')
    tokenizer = Tokenizer(num_words=max_words)
    x = tokenizer.sequences_to_matrix(x, mode='binary')
    print('x_train shape:', x.shape)

    return x.astype(float), y 
Example #9
Source Project: Deep-Learning-Quick-Reference   Author: PacktPublishing   File: imdb_sentiment.py    License: MIT License 6 votes vote down vote up
def main():

    data = load_data(20000)
    data = pad_sequences(data)
    model = build_network(vocab_size=data["vocab_size"],
                          embedding_dim=100,
                          sequence_length=data["sequence_length"])

    callbacks = create_callbacks("sentiment")

    model.fit(x=data["X_train"], y=data["y_train"],
              batch_size=32,
              epochs=10,
              validation_data=(data["X_test"], data["y_test"]),
              callbacks=callbacks)

    model.save("sentiment.h5")

    score, acc = model.evaluate(data["X_test"], data["y_test"],
                                batch_size=32)
    print('Test loss:', score)
    print('Test accuracy:', acc) 
Example #10
Source Project: CAPTCHA-breaking   Author: lllcho   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_reuters(self):
        print('reuters')
        (X_train, y_train), (X_test, y_test) = reuters.load_data() 
Example #11
Source Project: CAPTCHA-breaking   Author: lllcho   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_mnist(self):
        print('mnist')
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape) 
Example #12
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_cifar():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000 
Example #13
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_reuters():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = reuters.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        assert len(x_train) + len(x_test) == 11228
        (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = reuters.get_word_index()
        assert isinstance(word_index, dict) 
Example #14
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_mnist():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        assert len(x_train) == len(y_train) == 60000
        assert len(x_test) == len(y_test) == 10000 
Example #15
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_imdb():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = imdb.load_data()
        (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = imdb.get_word_index()
        assert isinstance(word_index, dict) 
Example #16
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_boston_housing():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = boston_housing.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test) 
Example #17
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_cifar():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000 
Example #18
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_reuters():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = reuters.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        assert len(x_train) + len(x_test) == 11228
        (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = reuters.get_word_index()
        assert isinstance(word_index, dict) 
Example #19
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_mnist():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        assert len(x_train) == len(y_train) == 60000
        assert len(x_test) == len(y_test) == 10000 
Example #20
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_imdb():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = imdb.load_data()
        (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = imdb.get_word_index()
        assert isinstance(word_index, dict) 
Example #21
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_boston_housing():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = boston_housing.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test) 
Example #22
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_cifar():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000 
Example #23
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_reuters():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = reuters.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        assert len(x_train) + len(x_test) == 11228
        (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = reuters.get_word_index()
        assert isinstance(word_index, dict) 
Example #24
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_mnist():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        assert len(x_train) == len(y_train) == 60000
        assert len(x_test) == len(y_test) == 10000 
Example #25
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_imdb():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = imdb.load_data()
        (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = imdb.get_word_index()
        assert isinstance(word_index, dict) 
Example #26
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_boston_housing():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = boston_housing.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test) 
Example #27
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_cifar():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000 
Example #28
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_reuters():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = reuters.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        assert len(x_train) + len(x_test) == 11228
        (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = reuters.get_word_index()
        assert isinstance(word_index, dict) 
Example #29
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_mnist():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        assert len(x_train) == len(y_train) == 60000
        assert len(x_test) == len(y_test) == 10000 
Example #30
Source Project: DeepLearning_Wavelet-LSTM   Author: hello-sea   File: test_datasets.py    License: MIT License 5 votes vote down vote up
def test_imdb():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = imdb.load_data()
        (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = imdb.get_word_index()
        assert isinstance(word_index, dict)