Python config.DATA_DIR Examples

The following are 30 code examples of config.DATA_DIR(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module config , or try the search function .
Example #1
Source File: datasets.py    From yolo_v2 with Apache License 2.0 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #2
Source File: datasets.py    From object_detection_kitti with Apache License 2.0 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #3
Source File: datasets.py    From hands-detection with MIT License 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #4
Source File: create_manifest.py    From python-sgx with GNU General Public License v3.0 6 votes vote down vote up
def main():
    args = parse_args()
    path = os.path.abspath(args.PATH)

    if path.endswith(".manifest.template"):
        if not os.path.isfile(path):
            sys.exit("Cannot find file %r" % path)
        manifest_templates = [path]
    else:
        manifest_templates = get_manifest_templates(path)

    for manifest_template in manifest_templates:
        manifest = manifest_template[:-9]
        with open(manifest_template) as f_template:
            with open(manifest, "w+") as f_manifest:
                for line in f_template:
                    line = line.replace("$(DATA_DIR)", DATA_DIR)
                    line = line.replace("$(CONFIG_DIR)", CONFIG_DIR)
                    line = line.replace("$(RUNTIME)", RUNTIME)
                    line = line.replace("$(PYTHON_VERSION)", PYTHON_VERSION)
                    line = line.replace("$(LIBPROTOBUF_VERSION)", LIBPROTOBUF_VERSION)
                    line = line.replace("$(TESTS_DIR)", TESTS_DIR)
                    f_manifest.write(line) 
Example #5
Source File: benchmark_empirical_kde.py    From Conditional_Density_Estimation with MIT License 6 votes vote down vote up
def experiment():
    logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX, color='green')

    # 1) EUROSTOXX
    dataset = datasets.EuroStoxx50()

    result_df = run_benchmark_train_test_fit_cv_ml(dataset, model_dict, n_train_valid_splits=3, shuffle_splits=False, seed=22)

    # 2)
    for n_samples in [10000]:
        dataset = datasets.NCYTaxiDropoffPredict(n_samples=n_samples)

    df = run_benchmark_train_test_fit_cv_ml(dataset, model_dict, n_train_valid_splits=3, shuffle_splits=True, seed=22)

    result_df = pd.concat([result_df, df], ignore_index=True)

    # 3) UCI & NYC Taxi
    for dataset_class in [datasets.BostonHousing, datasets.Conrete, datasets.Energy]:
        dataset = dataset_class()
        df = run_benchmark_train_test_fit_cv_ml(dataset, model_dict, n_train_valid_splits=3, shuffle_splits=True, seed=22)
        result_df = pd.concat([result_df, df], ignore_index=True)

    logger.log('\n', str(result_df)) 
Example #6
Source File: datasets.py    From object_detection_with_tensorflow with MIT License 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #7
Source File: datasets.py    From g-tensorflow-models with Apache License 2.0 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #8
Source File: datasets.py    From models with Apache License 2.0 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #9
Source File: datasets.py    From Gun-Detector with Apache License 2.0 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #10
Source File: datasets.py    From multilabel-image-classification-tensorflow with MIT License 6 votes vote down vote up
def read_MNIST(binarize=False):
  """Reads in MNIST images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: 50k training images
    x_valid: 10k validation images
    x_test: 10k test images

  """
  with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_BINARIZED), 'r') as f:
    (x_train, _), (x_valid, _), (x_test, _) = pickle.load(f)

  if not binarize:
    with gfile.FastGFile(os.path.join(config.DATA_DIR, config.MNIST_FLOAT), 'r') as f:
      x_train = np.load(f).reshape(-1, 784)

  return x_train, x_valid, x_test 
Example #11
Source File: datasets.py    From g-tensorflow-models with Apache License 2.0 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #12
Source File: datasets.py    From object_detection_with_tensorflow with MIT License 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #13
Source File: datasets.py    From models with Apache License 2.0 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #14
Source File: main.py    From grocery with Apache License 2.0 5 votes vote down vote up
def load_data(DATA_NAME):
    
    print('loading', DATA_NAME, 'data ...')
    myTrans = pd.read_csv(DATA_DIR + DATA_NAME + ".data.csv", encoding = 'latin1')
    myTrans['PID'] = myTrans['PID'].apply(lambda x : list(set(eval(x))))
    myItem = pd.read_csv(DATA_DIR + DATA_NAME + ".meta.csv", encoding = 'latin1')
    n_item = len(myItem)
    n_user = myTrans['UID'].max() + 1
    print('done!')
    print('interactions about', n_item, 'products and', n_user, 'users are loaded')
    return myTrans, myItem, n_item, n_user 
Example #15
Source File: task2.py    From ntua-slp-semeval2018 with MIT License 5 votes vote down vote up
def load_task2(dataset):
    data_file = os.path.join(DATA_DIR, "task2/us_{}.text".format(dataset))
    label_file = os.path.join(DATA_DIR, "task2/us_{}.labels".format(dataset))

    X = []
    y = []
    with open(data_file, 'r', encoding="utf-8") as dfile, \
            open(label_file, 'r', encoding="utf-8") as lfile:
        for tweet, label in zip(dfile, lfile):
            X.append(tweet.rstrip())
            y.append(int(label.rstrip()))

    return X, y 
Example #16
Source File: main.py    From tensorflow-DSMM with MIT License 5 votes vote down vote up
def get_train_valid_test_data(augmentation=False):
    # load data
    Q = load_question(params)
    dfTrain = load_train()
    dfTest = load_test()
    # train_features = load_feat("train")
    # test_features = load_feat("test")
    # params["num_features"] = train_features.shape[1]

    # load split
    with open(config.SPLIT_FILE, "rb") as f:
        train_idx, valid_idx = pkl.load(f)

    # validation
    if augmentation:
        dfDev = pd.read_csv(config.DATA_DIR + "/" + "dev_aug.csv")
        dfDev = downsample(dfDev)
        params["use_features"] = False
        params["augmentation_decay_steps"] = 50000
        params["decay_steps"] = 50000
        X_dev = get_model_data(dfDev, None, params)
    else:
        X_dev = get_model_data(dfTrain.loc[train_idx], None, params)
    X_valid = get_model_data(dfTrain.loc[valid_idx], None, params)

    # submit
    if augmentation:
        dfTrain = pd.read_csv(config.DATA_DIR + "/" + "train_aug.csv")
        dfTrain = downsample(dfTrain)
        params["use_features"] = False
        params["augmentation_decay_steps"] = 50000
        params["decay_steps"] = 50000
        X_train = get_model_data(dfTrain, None, params)
    else:
        X_train = get_model_data(dfTrain, None, params)
    X_test = get_model_data(dfTest, None, params)

    return X_dev, X_valid, X_train, X_test, Q 
Example #17
Source File: datasets.py    From object_detection_kitti with Apache License 2.0 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #18
Source File: datasets.py    From multilabel-image-classification-tensorflow with MIT License 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #19
Source File: datasets.py    From hands-detection with MIT License 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #20
Source File: regularization_empirical.py    From Conditional_Density_Estimation with MIT License 5 votes vote down vote up
def experiment():
    logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX, color='green')

    # 1) EUROSTOXX
    dataset = datasets.EuroStoxx50()

    result_df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=False,
                                    n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3)

    # 2) NYC Taxi
    for n_samples in [10000]:
        dataset = datasets.NCYTaxiDropoffPredict(n_samples=n_samples)

    df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=True,
                                    n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3)
    result_df = pd.concat([result_df, df], ignore_index=True)

    # 3) UCI
    for dataset_class in [datasets.BostonHousing, datasets.Conrete, datasets.Energy]:
        dataset = dataset_class()
        df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5,
                                        shuffle_splits=True, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3)
        result_df = pd.concat([result_df, df], ignore_index=True)

    logger.log('\n', str(result_df))
    logger.log('\n', result_df.tolatex()) 
Example #21
Source File: benchmark_empirical.py    From Conditional_Density_Estimation with MIT License 5 votes vote down vote up
def experiment():
    logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX, color='green')

    # 1) EUROSTOXX
    dataset = datasets.EuroStoxx50()

    result_df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=False,
                                    n_folds=5, seed=22)

    # 2) NYC Taxi
    for n_samples in [10000]:
        dataset = datasets.NCYTaxiDropoffPredict(n_samples=n_samples)

    df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=True,
                                    n_folds=5, seed=22,  n_jobs_inner=-1, n_jobc_outer=2)
    result_df = pd.concat([result_df, df], ignore_index=True)


    # 3) UCI
    result_df = None
    for dataset_class in [datasets.BostonHousing, datasets.Conrete, datasets.Energy]:
        dataset = dataset_class()
        df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=1, n_eval_seeds=5,
                                             shuffle_splits=True, n_folds=5, seed=22, n_jobs_inner=-1,
                                             n_jobc_outer=2)
        result_df = pd.concat([result_df, df], ignore_index=True)

    logger.log('\n', str(result_df)) 
Example #22
Source File: unittests_configrunner.py    From Conditional_Density_Estimation with MIT License 5 votes vote down vote up
def test_store_load_configrunner_pipeline(self):

    logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX)
    test_dir = os.path.join(logger.log_directory, logger.prefix)
    if os.path.exists(test_dir):
      shutil.rmtree(test_dir)


    keys_of_interest = ['task_name', 'estimator', 'simulator', 'n_observations', 'center_sampling_method', 'x_noise_std', 'y_noise_std',
                        'ndim_x', 'ndim_y', 'n_centers', "n_mc_samples", "n_x_cond", 'mean_est', 'cov_est', 'mean_sim', 'cov_sim',
                        'kl_divergence', 'hellinger_distance', 'js_divergence', 'x_cond', 'random_seed', "mean_sim", "cov_sim",
                        "mean_abs_diff", "cov_abs_diff", "VaR_sim", "VaR_est", "VaR_abs_diff", "CVaR_sim", "CVaR_est", "CVaR_abs_diff",
                        "time_to_fit"]


    conf_est, conf_sim, observations = question1()
    conf_runner = ConfigRunner(EXP_PREFIX, conf_est, conf_sim, observations=observations, keys_of_interest=keys_of_interest,
                               n_mc_samples=1 * 10 ** 2, n_x_cond=5, n_seeds=5)

    conf_runner.configs = random.sample(conf_runner.configs, NUM_CONFIGS_TO_TEST)

    conf_runner.run_configurations(dump_models=True, multiprocessing=False)
    results_from_pkl_file = dict({logger.load_pkl(RESULTS_FILE)})

    """ check if model dumps have all been created """
    dump_dir = os.path.join(logger.log_directory, logger.prefix, 'model_dumps')
    model_dumps_list = os.listdir(dump_dir) # get list of all model files
    model_dumps_list_no_suffix = [os.path.splitext(entry)[0] for entry in model_dumps_list] # remove suffix

    for conf in conf_runner.configs:
      self.assertTrue(conf['task_name'] in model_dumps_list_no_suffix)


    """ check if model dumps can be used successfully"""
    for model_dump_i in model_dumps_list:
      #tf.reset_default_graph()
      with tf.Session(graph=tf.Graph()):
        model = logger.load_pkl("model_dumps/"+model_dump_i)
        self.assertTrue(model)
        if model.ndim_x == 1 and model.ndim_y == 1:
          self.assertTrue(model.plot3d(show=False)) 
Example #23
Source File: train_source.py    From pytorch-domain-adaptation with MIT License 5 votes vote down vote up
def create_dataloaders(batch_size):
    dataset = MNIST(config.DATA_DIR/'mnist', train=True, download=True,
                    transform=Compose([GrayscaleToRgb(), ToTensor()]))
    shuffled_indices = np.random.permutation(len(dataset))
    train_idx = shuffled_indices[:int(0.8*len(dataset))]
    val_idx = shuffled_indices[int(0.8*len(dataset)):]

    train_loader = DataLoader(dataset, batch_size=batch_size, drop_last=True,
                              sampler=SubsetRandomSampler(train_idx),
                              num_workers=1, pin_memory=True)
    val_loader = DataLoader(dataset, batch_size=batch_size, drop_last=False,
                            sampler=SubsetRandomSampler(val_idx),
                            num_workers=1, pin_memory=True)
    return train_loader, val_loader 
Example #24
Source File: data.py    From pytorch-domain-adaptation with MIT License 5 votes vote down vote up
def __init__(self, train=True):
        super(MNISTM, self).__init__()
        self.mnist = datasets.MNIST(config.DATA_DIR / 'mnist', train=train,
                                    download=True)
        self.bsds = BSDS500()
        # Fix RNG so the same images are used for blending
        self.rng = np.random.RandomState(42) 
Example #25
Source File: data.py    From pytorch-domain-adaptation with MIT License 5 votes vote down vote up
def __init__(self):
        image_folder = config.DATA_DIR / 'BSR/BSDS500/data/images'
        self.image_files = list(map(str, image_folder.glob('*/*.jpg'))) 
Example #26
Source File: datasets.py    From Gun-Detector with Apache License 2.0 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #27
Source File: get_dataset_mean_std.py    From tf.fashionAI with Apache License 2.0 5 votes vote down vote up
def get_dataset_mean_std():
    all_sub_dirs = []
    for split in config.SPLITS:
        if 'test' not in split:
            for cat in config.CATEGORIES:
                all_sub_dirs.append(os.path.join(config.DATA_DIR, split, 'Images', cat))
    all_image_nums = 0
    #print(all_sub_dirs)
    means = [0., 0., 0.]
    stds = [0., 0., 0.]
    for dirs in all_sub_dirs:
        all_images = tf.gfile.Glob(os.path.join(dirs, '*.jpg'))
        for image in all_images:
            np_image = imread(image, mode='RGB')
            if len(np_image.shape) < 3 or np_image.shape[-1] != 3:
                continue
            all_image_nums += 1

            means[0] += np.mean(np_image[:, :, 0]) / 10000.
            means[1] += np.mean(np_image[:, :, 1]) / 10000.
            means[2] += np.mean(np_image[:, :, 2]) / 10000.

            stds[0] += np.std(np_image[:, :, 0]) / 10000.
            stds[1] += np.std(np_image[:, :, 1]) / 10000.
            stds[2] += np.std(np_image[:, :, 2]) / 10000.

        print([_*10000./all_image_nums for _ in means])
        print([_*10000./all_image_nums for _ in stds])
    print([_*10000./all_image_nums for _ in means])
    print([_*10000./all_image_nums for _ in stds])
    print(all_image_nums) 
Example #28
Source File: datasets.py    From yolo_v2 with Apache License 2.0 5 votes vote down vote up
def read_omniglot(binarize=False):
  """Reads in Omniglot images.

  Args:
    binarize: whether to use the fixed binarization

  Returns:
    x_train: training images
    x_valid: validation images
    x_test: test images

  """
  n_validation=1345

  def reshape_data(data):
    return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

  omni_raw = scipy.io.loadmat(os.path.join(config.DATA_DIR, config.OMNIGLOT))

  train_data = reshape_data(omni_raw['data'].T.astype('float32'))
  test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

  # Binarize the data with a fixed seed
  if binarize:
    np.random.seed(5)
    train_data = (np.random.rand(*train_data.shape) < train_data).astype(float)
    test_data = (np.random.rand(*test_data.shape) < test_data).astype(float)

  shuffle_seed = 123
  permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
  train_data = train_data[permutation]

  x_train = train_data[:-n_validation]
  x_valid = train_data[-n_validation:]
  x_test = test_data

  return x_train, x_valid, x_test 
Example #29
Source File: ConfigRunnerLogProb.py    From Conditional_Density_Estimation with MIT License 4 votes vote down vote up
def __init__(self, exp_prefix, est_params, sim_params, observations, keys_of_interest, n_test_samples=10 ** 5,
               n_seeds=5, use_gpu=True):

    assert est_params and exp_prefix and sim_params and keys_of_interest
    assert observations.all()

    # convert to dicts to list of tuples
    if isinstance(est_params, dict):
      est_params = list(est_params.items())

    if isinstance(sim_params, dict):
      sim_params = list(sim_params.items())

    # every simulator configuration will be run multiple times with different randomness seeds
    sim_params = _add_seeds_to_sim_params(n_seeds, sim_params)

    self.observations = observations
    self.n_test_samples = n_test_samples
    self.keys_of_interest = keys_of_interest
    self.exp_prefix = exp_prefix
    self.use_gpu = use_gpu

    logger.configure(log_directory=config.DATA_DIR, prefix=exp_prefix, color='green')

    ''' ---------- Either load or generate the configs ----------'''
    config_pkl_path = os.path.join(logger.log_directory, logger.prefix, EXP_CONFIG_FILE)

    if os.path.isfile(config_pkl_path):
      logger.log("{:<70s} {:<30s}".format("Loading experiment previous configs from file: ", config_pkl_path))
      self.configs = logger.load_pkl(EXP_CONFIG_FILE)
    else:
      logger.log("{:<70s} {:<30s}".format("Generating and storing experiment configs under: ", config_pkl_path))
      self.configs = self._generate_configuration_variants(est_params, sim_params)
      logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE)

    ''' ---------- Either load already existing results or start a new result collection ---------- '''
    results_pkl_path = os.path.join(logger.log_directory, logger.prefix, RESULTS_FILE)
    if os.path.isfile(results_pkl_path):
      logger.log_line("{:<70s} {:<30s}".format("Continue with: ", results_pkl_path))
      self.gof_single_res_collection = dict(logger.load_pkl_log(RESULTS_FILE))

    else: # start from scratch
      self.gof_single_res_collection = {}

    self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection) 
Example #30
Source File: revgrad.py    From pytorch-domain-adaptation with MIT License 4 votes vote down vote up
def main(args):
    model = Net().to(device)
    model.load_state_dict(torch.load(args.MODEL_FILE))
    feature_extractor = model.feature_extractor
    clf = model.classifier

    discriminator = nn.Sequential(
        GradientReversal(),
        nn.Linear(320, 50),
        nn.ReLU(),
        nn.Linear(50, 20),
        nn.ReLU(),
        nn.Linear(20, 1)
    ).to(device)

    half_batch = args.batch_size // 2
    source_dataset = MNIST(config.DATA_DIR/'mnist', train=True, download=True,
                          transform=Compose([GrayscaleToRgb(), ToTensor()]))
    source_loader = DataLoader(source_dataset, batch_size=half_batch,
                               shuffle=True, num_workers=1, pin_memory=True)
    
    target_dataset = MNISTM(train=False)
    target_loader = DataLoader(target_dataset, batch_size=half_batch,
                               shuffle=True, num_workers=1, pin_memory=True)

    optim = torch.optim.Adam(list(discriminator.parameters()) + list(model.parameters()))

    for epoch in range(1, args.epochs+1):
        batches = zip(source_loader, target_loader)
        n_batches = min(len(source_loader), len(target_loader))

        total_domain_loss = total_label_accuracy = 0
        for (source_x, source_labels), (target_x, _) in tqdm(batches, leave=False, total=n_batches):
                x = torch.cat([source_x, target_x])
                x = x.to(device)
                domain_y = torch.cat([torch.ones(source_x.shape[0]),
                                      torch.zeros(target_x.shape[0])])
                domain_y = domain_y.to(device)
                label_y = source_labels.to(device)

                features = feature_extractor(x).view(x.shape[0], -1)
                domain_preds = discriminator(features).squeeze()
                label_preds = clf(features[:source_x.shape[0]])
                
                domain_loss = F.binary_cross_entropy_with_logits(domain_preds, domain_y)
                label_loss = F.cross_entropy(label_preds, label_y)
                loss = domain_loss + label_loss

                optim.zero_grad()
                loss.backward()
                optim.step()

                total_domain_loss += domain_loss.item()
                total_label_accuracy += (label_preds.max(1)[1] == label_y).float().mean().item()

        mean_loss = total_domain_loss / n_batches
        mean_accuracy = total_label_accuracy / n_batches
        tqdm.write(f'EPOCH {epoch:03d}: domain_loss={mean_loss:.4f}, '
                   f'source_accuracy={mean_accuracy:.4f}')

        torch.save(model.state_dict(), 'trained_models/revgrad.pt')