Python sklearn.preprocessing.MinMaxScaler() Examples

The following are 30 code examples of sklearn.preprocessing.MinMaxScaler(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .
Example #1
Source File: make_data.py    From DCC with MIT License 8 votes vote down vote up
def make_mnist_data(path, isconv=False):
    X, Y = load_mnist(path, True)
    X = X.astype(np.float64)
    X2, Y2 = load_mnist(path, False)
    X2 = X2.astype(np.float64)
    X3 = np.concatenate((X, X2), axis=0)

    minmaxscale = MinMaxScaler().fit(X3)

    X = minmaxscale.transform(X)
    if isconv:
        X = X.reshape((-1, 1, 28, 28))

    sio.savemat(osp.join(path, 'traindata.mat'), {'X': X, 'Y': Y})

    X2 = minmaxscale.transform(X2)
    if isconv:
        X2 = X2.reshape((-1, 1, 28, 28))

    sio.savemat(osp.join(path, 'testdata.mat'), {'X': X2, 'Y': Y2}) 
Example #2
Source File: train.py    From skorch with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def get_model(with_pipeline=False):
    """Get a multi-layer perceptron model.

    Optionally, put it in a pipeline that scales the data.

    """
    model = NeuralNetClassifier(MLPClassifier)
    if with_pipeline:
        model = Pipeline([
            ('scale', FeatureUnion([
                ('minmax', MinMaxScaler()),
                ('normalize', Normalizer()),
            ])),
            ('select', SelectKBest(k=N_FEATURES)),  # keep input size constant
            ('net', model),
        ])
    return model 
Example #3
Source File: friedman_scores.py    From mlens with MIT License 7 votes vote down vote up
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens 
Example #4
Source File: test_nfpc.py    From fylearn with MIT License 7 votes vote down vote up
def test_build_meowa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory,
                                    aggregation_factory=nfpc.MEOWAFactory())

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    assert 0.80 < mean 
Example #5
Source File: test_fpcga.py    From fylearn with MIT License 7 votes vote down vote up
def test_classifier_iris():

    iris = load_iris()

    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1)

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)

    assert len(scores) == 10
    assert np.mean(scores) > 0.6
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 == pytest.approx(mean, 0.01) 
Example #6
Source File: compare.py    From StarTrader with MIT License 6 votes vote down vote up
def scale_data(self):
        """
        Scale the X and Y data with minimax scaller.
        The scaling is done separately for the train and test set to avoid look ahead bias.
        """
        self.XY = pd.concat([self.X, self.Y], axis=1).dropna()
        train_set = self.XY.loc[START_TRAIN:END_TRAIN]
        test_set = self.XY.loc[START_TEST:END_TEST]
        # MinMax scaling
        minmaxed_scaler = MinMaxScaler(feature_range=(0, 1))
        self.minmaxed = minmaxed_scaler.fit(train_set)
        train_set_matrix = minmaxed_scaler.transform(train_set)
        test_set_matrix = minmaxed_scaler.transform(test_set)
        self.train_set_matrix_df = pd.DataFrame(train_set_matrix, index=train_set.index, columns=train_set.columns)
        self.test_set_matrix_df = pd.DataFrame(test_set_matrix, index=test_set.index, columns=test_set.columns)
        self.XY = pd.concat([self.train_set_matrix_df, self.test_set_matrix_df], axis=0)

        # print ("Train set shape: ", train_set_matrix.shape)
        # print ("Test set shape: ", test_set_matrix.shape) 
Example #7
Source File: cnn_lstm_autoencoder.py    From keras-anomaly-detection with MIT License 6 votes vote down vote up
def main():
    data_dir_path = './data'
    model_dir_path = './models'
    ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
    print(ecg_data.head())
    ecg_np_data = ecg_data.as_matrix()
    scaler = MinMaxScaler()
    ecg_np_data = scaler.fit_transform(ecg_np_data)
    print(ecg_np_data.shape)

    ae = CnnLstmAutoEncoder()

    # fit the data and save model into model_dir_path
    if DO_TRAINING:
        ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
    anomaly_information = ae.anomaly(ecg_np_data[:23, :])
    reconstruction_error = []
    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
        reconstruction_error.append(dist)

    visualize_reconstruction_error(reconstruction_error, ae.threshold) 
Example #8
Source File: test_utils.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_metrics_wrapper():
    # make the features in y be in different scales
    y = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100]

    # With no scaler provided it is relevant which of the two series gets an 80% error
    metric_func_noscaler = model_utils.metric_wrapper(mean_squared_error)

    mse_feature_one_wrong = metric_func_noscaler(y, y * [0.8, 1])
    mse_feature_two_wrong = metric_func_noscaler(y, y * [1, 0.8])

    assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)

    # With a scaler provided it is not relevant which of the two series gets an 80%
    # error
    scaler = MinMaxScaler().fit(y)
    metric_func_scaler = model_utils.metric_wrapper(mean_squared_error, scaler=scaler)

    mse_feature_one_wrong = metric_func_scaler(y, y * [0.8, 1])
    mse_feature_two_wrong = metric_func_scaler(y, y * [1, 0.8])

    assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong) 
Example #9
Source File: STFIWF.py    From 2016CCF-sougou with Apache License 2.0 6 votes vote down vote up
def get_term_topic(self, X):
        n_features = X.shape[1]
        id2word = self.vocabulary_
        word2topic = {}

        with open('word_topic.txt', 'r') as f:
            for line in f:
                strs = line.decode('utf-8').strip('\n').split('\t')
                word2topic[strs[0]] = strs[2]

        topic = np.zeros((len(id2word),))

        for i, key in enumerate(id2word):
            if key in word2topic:
                topic[id2word[key]] = word2topic[key]
            else:
                print key

        topic = preprocessing.MinMaxScaler().fit_transform(topic)
        # topic = sp.spdiags(topic, diags=0, m=n_features,
        #                    n=n_features, format='csr')
        return topic 
Example #10
Source File: util.py    From stock-price-prediction with MIT License 6 votes vote down vote up
def applyFeatures(dataset, delta):
    """
    applies rolling mean and delayed returns to each dataframe in the list
    """
    columns = dataset.columns
    close = columns[-3]
    returns = columns[-1]
    for n in delta:
        addFeatures(dataset, close, returns, n)

    dataset = dataset.drop(dataset.index[0:max(delta)]) #drop NaN due to delta spanning

    # normalize columns
    scaler = preprocessing.MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(dataset),\
            columns=dataset.columns, index=dataset.index) 
Example #11
Source File: lstm_autoencoder.py    From keras-anomaly-detection with MIT License 6 votes vote down vote up
def main():
    data_dir_path = './data'
    model_dir_path = './models'
    ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
    print(ecg_data.head())
    ecg_np_data = ecg_data.as_matrix()
    scaler = MinMaxScaler()
    ecg_np_data = scaler.fit_transform(ecg_np_data)
    print(ecg_np_data.shape)

    ae = LstmAutoEncoder()

    # fit the data and save model into model_dir_path
    if DO_TRAINING:
        ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
    anomaly_information = ae.anomaly(ecg_np_data[:23, :])
    reconstruction_error = []
    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
        reconstruction_error.append(dist)

    visualize_reconstruction_error(reconstruction_error, ae.threshold) 
Example #12
Source File: make_data.py    From DCC with MIT License 6 votes vote down vote up
def make_easy_visual_data(path, N=600):
    """Make 3 clusters of 2D data where the cluster centers lie along a line.
    The latent variable would be just their x or y value since that uniquely defines their projection onto the line.
    """

    line = (1.5, 1)
    centers = [(m, m * line[0] + line[1]) for m in (-4, 0, 6)]
    cluster_std = [1, 1, 1.5]
    X, labels = make_blobs(n_samples=N, cluster_std=cluster_std, centers=centers, n_features=len(centers[0]))

    # scale data
    minmaxscale = MinMaxScaler().fit(X)
    X = minmaxscale.transform(X)

    save_misc_data(path, X, labels, N)
    return X, labels 
Example #13
Source File: make_data.py    From DCC with MIT License 6 votes vote down vote up
def make_misc_data(path, filename, dim, isconv=False):
    import cPickle
    fo = open(osp.join(path, filename), 'r')
    data = cPickle.load(fo)
    fo.close()
    X = data['data'].astype(np.float64)
    Y = data['labels']

    minmaxscale = MinMaxScaler().fit(X)
    X = minmaxscale.transform(X)

    p = np.random.permutation(X.shape[0])
    X = X[p]
    Y = Y[p]

    N = X.shape[0]

    if isconv:
        X = X.reshape((-1, dim[2], dim[0], dim[1]))
    save_misc_data(path, X, Y, N) 
Example #14
Source File: test_nfpc.py    From fylearn with MIT License 6 votes vote down vote up
def test_build_ps_owa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(
        membership_factory=t_factory,
        aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer())
    )

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 < mean 
Example #15
Source File: parallel_prophet_forecast_using_individual_groups.py    From driverlessai-recipes with Apache License 2.0 6 votes vote down vote up
def scale_target_for_each_time_group(self, X, tgc_wo_time):
        # Go through groups and standard scale them
        if len(tgc_wo_time) > 0:
            X_groups = X.groupby(tgc_wo_time)
        else:
            X_groups = [([None], X)]

        self.scalers = {}
        scaled_ys = []
        for key, X_grp in X_groups:
            # Create dict key to store the min max scaler
            grp_hash = self.get_hash(key)
            # Scale target for current group
            self.scalers[grp_hash] = MinMaxScaler()
            y_skl = self.scalers[grp_hash].fit_transform(X_grp[['y']].values)
            # Put back in a DataFrame to keep track of original index
            y_skl_df = pd.DataFrame(y_skl, columns=['y'])
            y_skl_df.index = X_grp.index
            scaled_ys.append(y_skl_df)
        # Set target back in original frame but keep original
        X['y_orig'] = X['y']
        X['y'] = pd.concat(tuple(scaled_ys), axis=0)
        return X 
Example #16
Source File: feature_engineering.py    From copper_price_forecast with GNU General Public License v3.0 6 votes vote down vote up
def _pp_min_max_scale(df):
    """
    特征值归一化处理
    """
    print("  start minmax scaling...")
    # drop掉id和price_date字段
    # df = df.drop(['id', 'price_date'], axis=1)
    # 保存index信息及column信息
    index = df.index
    columns = df.columns
    # 对特征进行归一化
    feature_scaled = preprocessing.MinMaxScaler().fit_transform(df.iloc[:, :-1])

    target = np.array(df.iloc[:, -1])
    target.shape = (len(target), 1)

    # 合并归一化后的X和未做归一化的y(归一化后Pandas 的 DataFrame类型会转换成numpy的ndarray类型)
    df_scaled = pd.DataFrame(np.hstack((feature_scaled, target)))
    # 重新设置索引及column信息
    df_scaled.index = index
    df_scaled.columns = columns

    print("  minmax scaling finished.")
    return df_scaled 
Example #17
Source File: testScoreWithAdapaXgboost.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_03_xgb_classifier(self):
        print("\ntest 03 (xgb classifier with preprocessing) [binary-class]\n")
        model = XGBClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MinMaxScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y_bin)
        file_name = "test03xgboost.pmml"
        xgboost_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #18
Source File: testScoreWithAdapaLgbm.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_01_lgbm_classifier(self):
        print("\ntest 01 (lgbm classifier with preprocessing) [binary-class]\n")
        model = LGBMClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MinMaxScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y_bin)
        file_name = "test01lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #19
Source File: test_autoencoder.py    From muffnn with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_persistence():
    """Make sure we can pickle it."""
    X = iris.data  # Use the iris features.
    X = MinMaxScaler().fit_transform(X)

    ae = Autoencoder(hidden_units=(1,),
                     n_epochs=1000,
                     random_state=4556,
                     learning_rate=1e-2,
                     keep_prob=1.0)
    Xenc = ae.fit_transform(X)

    b = BytesIO()
    pickle.dump(ae, b)
    ae_pickled = pickle.loads(b.getvalue())
    Xenc_pickled = ae_pickled.transform(X)
    assert_array_almost_equal(Xenc, Xenc_pickled) 
Example #20
Source File: test_autoencoder.py    From muffnn with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_replicability():
    """Make sure it can be seeded properly."""
    X = iris.data  # Use the iris features.
    X = MinMaxScaler().fit_transform(X)

    ae1 = Autoencoder(hidden_units=(1,),
                      n_epochs=1000,
                      random_state=4556,
                      learning_rate=1e-2,
                      keep_prob=1.0)
    Xenc1 = ae1.fit_transform(X)

    ae2 = Autoencoder(hidden_units=(1,),
                      n_epochs=1000,
                      random_state=4556,
                      learning_rate=1e-2,
                      keep_prob=1.0)
    Xenc2 = ae2.fit_transform(X)

    assert_array_almost_equal(Xenc1, Xenc2) 
Example #21
Source File: test_autoencoder.py    From muffnn with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_monitor_ae():
    """Test the monitor keyword."""
    # Use the iris features.
    X = iris.data
    X = MinMaxScaler().fit_transform(X)

    ae = Autoencoder(hidden_units=(3, 2,),
                     n_epochs=7500,
                     random_state=4556,
                     learning_rate=DEFAULT_LEARNING_RATE,
                     keep_prob=1.0,
                     hidden_activation=tf.nn.sigmoid,
                     encoding_activation=tf.nn.sigmoid,
                     output_activation=tf.nn.sigmoid)

    def _monitor(epoch, est, stats):
        assert epoch <= 1000, "The autoencoder has been running too long!"
        if stats['loss'] < 0.2:
            assert epoch > 10, "The autoencoder returned too soon!"
            return True
        else:
            return False
    ae.fit(X, monitor=_monitor) 
Example #22
Source File: domainAdaptation.py    From dzetsaka with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, transportAlgorithm="MappingTransport",
                 scaler=False, params=None, feedback=True):
        try:
            from sklearn.metrics import mean_squared_error
            from itertools import product
            from sklearn.metrics import (
                f1_score, cohen_kappa_score, accuracy_score)
        except BaseException:
            raise ImportError('Please install itertools and scikit-learn')

        self.transportAlgorithm = transportAlgorithm
        self.feedback = feedback

        self.params_ = params

        if scaler:
            from sklearn.preprocessing import MinMaxScaler
            self.scaler = MinMaxScaler(feature_range=(-1, 1))
            self.scalerTarget = MinMaxScaler(feature_range=(-1, 1))
        else:
            self.scaler = scaler 
Example #23
Source File: min_max_scale_test.py    From FATE with Apache License 2.0 6 votes vote down vote up
def test_fit_instance_default(self):
        scale_param = self.get_scale_param()
        scale_param.scale_col_indexes = -1
        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        scaler = MMS()
        scaler.fit(self.test_data)
        self.assertListEqual(np.round(self.get_table_instance_feature(fit_instance),6).tolist(),
                             np.around(scaler.transform(self.test_data), 6).tolist())
        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))

    # test with (area="all", upper=2, lower=1): 
Example #24
Source File: gefcom2014.py    From dts with MIT License 6 votes vote down vote up
def transform(X, scaler=None, scaler_type=None):
    """
    Apply standard scaling to the input variables
    :param X: the data
    :param scaler: the scaler to use, None if StandardScaler has to be used
    :return:
        scaler used
        X transformed using scaler
    """
    if scaler is None:
        if scaler_type == 'minmax':
            scaler = MinMaxScaler()
        else:
            scaler = StandardScaler()
        scaler.fit(X)
    return scaler, scaler.transform(X) 
Example #25
Source File: test_cli.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_print_help_pipeline(self, print_help, pipe, capsys):
        print_help(pipe)
        out = capsys.readouterr()[0]

        expected_snippets = [
            '-- --help',
            '<MinMaxScaler> options',
            '--features__scale__feature_range',
            '<NeuralNetClassifier> options',
            '--net__module : torch module (class or instance)',
            '--net__batch_size : int (default=128)',
            '<MLPModule> options',
            '--net__module__hidden_units : int (default=10)'
        ]
        for snippet in expected_snippets:
            assert snippet in out 
Example #26
Source File: test_cli.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_print_help_pipeline_custom_defaults(
            self, print_help, pipe, capsys):
        defaults = {'net__batch_size': 256, 'net__module__hidden_units': 55}
        print_help(pipe, defaults=defaults)
        out = capsys.readouterr()[0]

        expected_snippets = [
            '-- --help',
            '<MinMaxScaler> options',
            '--features__scale__feature_range',
            '<NeuralNetClassifier> options',
            '--net__module : torch module (class or instance)',
            '--net__batch_size : int (default=256)',
            '<MLPModule> options',
            '--net__module__hidden_units : int (default=55)'
        ]
        for snippet in expected_snippets:
            assert snippet in out 
Example #27
Source File: bidirectional_lstm_autoencoder.py    From keras-anomaly-detection with MIT License 6 votes vote down vote up
def main():
    data_dir_path = './data'
    model_dir_path = './models'
    ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
    print(ecg_data.head())
    ecg_np_data = ecg_data.as_matrix()
    scaler = MinMaxScaler()
    ecg_np_data = scaler.fit_transform(ecg_np_data)
    print(ecg_np_data.shape)

    ae = BidirectionalLstmAutoEncoder()

    # fit the data and save model into model_dir_path
    if DO_TRAINING:
        ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
    anomaly_information = ae.anomaly(ecg_np_data[:23, :])
    reconstruction_error = []
    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
        reconstruction_error.append(dist)

    visualize_reconstruction_error(reconstruction_error, ae.threshold) 
Example #28
Source File: site_stability.py    From CatLearn with GNU General Public License v3.0 5 votes vote down vote up
def normalized_site_features(self):
        """Computes normalized system- and site-specific features for all sites.
        Only numeric features are selected."""
        if self._normalized_site_features is None:
            x = self.site_features.select_dtypes(['number'])
            colnames = list(x.columns)
            x = x.values
            min_max_scaler = preprocessing.MinMaxScaler()
            x_scaled = min_max_scaler.fit_transform(x)
            df = pd.DataFrame(x_scaled)
            df.columns = colnames
            self._normalized_site_features = df
        return self._normalized_site_features 
Example #29
Source File: VariationalAutoencoderRunner.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def min_max_scale(X_train, X_test):
    preprocessor = prep.MinMaxScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train, X_test 
Example #30
Source File: variational_autoencoder.py    From CalibrationNN with GNU General Public License v3.0 5 votes vote down vote up
def sample_from_generator(history, nb_samples, latent_dim=12, 
                          valid_split=0.3, random_split=True,
                          hidden_dims=None, **kwargs):
    scaler = MinMaxScaler()
    scaler.fit(history)
    scaled = scaler.transform(history)
    
    nb_train = history.shape[0]    
    if not valid_split:
        nb_valid = 0
    elif isinstance(valid_split, float):
        nb_valid = nb_train - int(np.floor(nb_train*valid_split))
    else:
        nb_valid = valid_split
        
    if nb_valid > 0:
        if random_split:
            ind = np.arange(nb_train)
            np.random.shuffle(ind)
            x_valid = scaled[ind[-nb_valid:], :]
            x_train = scaled[ind[:-nb_valid], :]
        else:
            x_valid = scaled[-nb_valid:, :]
            x_train = scaled[:-nb_valid, :]
    else:
        x_valid = None
        x_train = scaled
    
    _, generator = build_model(latent_dim, x_train, x_valid=x_valid, 
                               hidden_dims=hidden_dims, **kwargs)
    
    normal_sample = np.random.standard_normal((nb_samples, latent_dim))
    draws = generator.predict(normal_sample)
    return scaler.inverse_transform(draws)