Python tensorflow.learn() Examples

The following are 21 code examples of tensorflow.learn(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function

Example #1

Source File: io_test.py From deep_image_model with Apache License 2.0

6 votes

def test_dask_iris_classification(self):
    if HAS_DASK and HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      data = dd.from_pandas(data, npartitions=2)
      labels = pd.DataFrame(iris.target)
      labels = dd.from_pandas(labels, npartitions=2)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      predictions = data.map_partitions(classifier.predict).compute()
      score = accuracy_score(labels.compute(), predictions)
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))

Example #2

Source File: input_pipeline.py From seq2seq with Apache License 2.0

5 votes

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

Example #3

Source File: io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_pandas_dataframe(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.DataFrame(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels[0], list(classifier.predict(data)))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    else:
      print("No pandas installed. pandas-related tests are skipped.")

Example #4

Source File: io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_pandas_series(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.Series(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels, list(classifier.predict(data)))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))

Example #5

Source File: io_test.py From deep_image_model with Apache License 2.0

5 votes

def test_string_data_formats(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))

Example #6

Source File: datasets.py From conv-ensemble-str with Apache License 2.0

5 votes

def create_dataset(def_dict, mode, use_beam_search):
  """Creates an Dataset object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "dataset_name", "split_name" and "dataset_dir" that
      correspond to the class       name and constructor parameters of
      an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys
    use_beam_search: Whether to use beam search

  Returns:
    A Dataset object.
  """
  if not "dataset_name" in def_dict:
    raise ValueError("Dataset definition must have a dataset_name property.")

  class_ = def_dict["dataset_name"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Dataset class: {}".format(class_))

  # TODO(Shancheng): to support batch_size > 1,
  # remove use_beam_search argument
  if mode != tf.contrib.learn.ModeKeys.TRAIN and use_beam_search:
    def_dict['batch_size'] = 1

  dataset_class = getattr(sys.modules[__name__], class_)
  return dataset_class(params=def_dict, mode=mode)

Example #7

Source File: datasets.py From conv-ensemble-str with Apache License 2.0

5 votes

def create_input_fn(self):
    """Creates an input function that can be used with tf.learn estimators.
      Note that you must pass "factory funcitons" for both the data provider and
      featurizer to ensure that everything will be created in  the same graph.
    """
    with tf.variable_scope("input_fn"):
      batch_size = self.params['batch_size']
      data_provider = self._make_data_provider()
      features_and_labels = self._read_from_data_provider(data_provider)

      tf.logging.info("Start batch queue.")
      batch = tf.train.batch(
          tensors=features_and_labels,
          enqueue_many=False,
          batch_size=batch_size,
          dynamic_pad=True,
          capacity=3000 + 16 * batch_size,
          allow_smaller_final_batch=self.params['smaller_final_batch'],
          name="batch_queue",
          num_threads=int((batch_size+1)/2)
      )

      # Separate features and labels
      features_batch = {k: batch[k] for k in self.feature_keys}
      if set(batch.keys()).intersection(self.label_keys):
        labels_batch = {k: batch[k] for k in self.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

Example #8

Source File: input_pipeline.py From natural-language-summary-generation-from-structured-data with MIT License

5 votes

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

Example #9

Source File: input_pipeline.py From reaction_prediction_seq2seq with Apache License 2.0

5 votes

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

Example #10

Source File: input_pipeline.py From conv_seq2seq with Apache License 2.0

5 votes

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

Example #11

Source File: inputs.py From dstc-noesis with MIT License

5 votes

def create_input_fn(mode, input_files, batch_size, num_epochs):
    def input_fn():
        features = tf.contrib.layers.create_feature_spec_for_parsing(
            get_feature_columns(mode))

        feature_map = tf.contrib.learn.io.read_batch_features(
            file_pattern=input_files,
            batch_size=batch_size,
            features=features,
            reader=tf.TFRecordReader,
            randomize_input=True,
            num_epochs=num_epochs,
            queue_capacity=200000 + batch_size * 10,
            name="read_batch_features_{}".format(mode))

        # This is an ugly hack because of a current bug in tf.learn
        # During evaluation TF tries to restore the epoch variable which isn't defined during training
        # So we define the variable manually here
        if mode == tf.contrib.learn.ModeKeys.TRAIN:
            tf.get_variable(
                "read_batch_features_eval/file_name_queue/limit_epochs/epochs",
                initializer=tf.constant(0, dtype=tf.int64))

        target = feature_map.pop("target")

        return feature_map, target

    return input_fn

Example #12

Source File: utils.py From natural-language-summary-generation-from-structured-data with MIT License

4 votes

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn

Example #13

Source File: utils.py From seq2seq with Apache License 2.0

4 votes

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn

Example #14

Source File: utils.py From conv_seq2seq with Apache License 2.0

4 votes

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn

Example #15

Source File: variants_inference.py From cloudml-examples with Apache License 2.0

4 votes

def _def_experiment(
    train_file_pattern, eval_file_pattern, batch_size):
  """Creates the function used to configure the experiment runner.

  This function creates a function that is used by the learn_runner
  module to create an Experiment.

  Args:
    train_file_pattern: The directory the train data can be found in.
    eval_file_pattern: The directory the test data can be found in.
    batch_size: Batch size

  Returns:
    A function that creates an Experiment object for the runner.
  """

  def _experiment_fn(output_dir):
    """Experiment function used by learn_runner to run training/eval/etc.

    Args:
      output_dir: String path of directory to use for outputs.

    Returns:
      tf.learn `Experiment`.
    """
    estimator = tf.contrib.learn.Estimator(
        model_fn=_build_model_fn(),
        model_dir=output_dir)
    train_input_fn = _build_input_fn(
        input_file_pattern=train_file_pattern,
        batch_size=batch_size,
        mode=tf.contrib.learn.ModeKeys.TRAIN)
    eval_input_fn = _build_input_fn(
        input_file_pattern=eval_file_pattern,
        batch_size=batch_size,
        mode=tf.contrib.learn.ModeKeys.EVAL)

    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        train_steps=FLAGS.num_train_steps,
        eval_input_fn=eval_input_fn,
        eval_steps=FLAGS.num_eval_steps,
        eval_metrics=_create_evaluation_metrics(),
        min_eval_frequency=100,
        export_strategies=[
            saved_model_export_utils.make_export_strategy(
                _predict_input_fn,
                exports_to_keep=5,
                default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE)
        ])

  return _experiment_fn

Example #16

Source File: variants_inference.py From cloudml-examples with Apache License 2.0

4 votes

def _build_model_fn():
  """Build model function.

  Returns:
    A model function that can be passed to `Estimator` constructor.
  """
  def _model_fn(features, labels, mode):
    """Creates the prediction and its loss.

    Args:
      features: A dictionary of tensors keyed by the feature name.
      labels: A tensor representing the labels.
      mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

    Returns:
      A tuple consisting of the prediction, loss, and train_op.
    """
    # Generate one embedding per sparse feature column and concatenate them.
    concat_embeddings = tf.contrib.layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=_get_feature_columns(include_target_column=False))

    # Add one hidden layer.
    hidden_layer_0 = tf.contrib.layers.relu(
        concat_embeddings, FLAGS.hidden_units)

    # Output and logistic loss.
    logits = tf.contrib.layers.linear(hidden_layer_0, FLAGS.num_classes)

    predictions = tf.contrib.layers.softmax(logits)
    if mode == tf.contrib.learn.ModeKeys.INFER:
      predictions = {
          tf.contrib.learn.PredictionKey.PROBABILITIES: predictions,
          PREDICTION_KEY: features[PREDICTION_KEY]
      }
      output_alternatives = {
          DEFAULT_OUTPUT_ALTERNATIVE: (tf.contrib.learn.ProblemType.UNSPECIFIED,
                                       predictions)
      }
      return model_fn.ModelFnOps(
          mode=mode,
          predictions=predictions,
          output_alternatives=output_alternatives)

    target_one_hot = tf.one_hot(labels, FLAGS.num_classes)
    target_one_hot = tf.reduce_sum(
        input_tensor=target_one_hot, reduction_indices=[1])
    loss = tf.losses.softmax_cross_entropy(target_one_hot, logits)
    if mode == tf.contrib.learn.ModeKeys.EVAL:
      return predictions, loss, None

    opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
    train_op = tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=FLAGS.learning_rate,
        optimizer=opt)
    return model_fn.ModelFnOps(
        mode=mode, predictions=predictions, loss=loss, train_op=train_op)

  return _model_fn

Example #17

Source File: variants_inference.py From cloudml-examples with Apache License 2.0

4 votes

def _build_input_fn(input_file_pattern, batch_size, mode):
  """Build input function.

  Args:
    input_file_pattern: The file patter for examples
    batch_size: Batch size
    mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

  Returns:
    Tuple, dictionary of feature column name to tensor and labels.
  """
  def _input_fn():
    """Supplies the input to the model.

    Returns:
      A tuple consisting of 1) a dictionary of tensors whose keys are
      the feature names, and 2) a tensor of target labels if the mode
      is not INFER (and None, otherwise).
    """
    logging.info("Reading files from %s", input_file_pattern)
    input_files = sorted(list(tf.gfile.Glob(input_file_pattern)))
    logging.info("Reading files from %s", input_files)
    include_target_column = (mode != tf.contrib.learn.ModeKeys.INFER)
    features_spec = tf.contrib.layers.create_feature_spec_for_parsing(
        feature_columns=_get_feature_columns(include_target_column))

    if FLAGS.use_gzip:
      def gzip_reader():
        return tf.TFRecordReader(
            options=tf.python_io.TFRecordOptions(
                compression_type=TFRecordCompressionType.GZIP))
      reader_fn = gzip_reader
    else:
      reader_fn = tf.TFRecordReader

    features = tf.contrib.learn.io.read_batch_features(
        file_pattern=input_files,
        batch_size=batch_size,
        queue_capacity=3*batch_size,
        randomize_input=mode == tf.contrib.learn.ModeKeys.TRAIN,
        feature_queue_capacity=FLAGS.feature_queue_capacity,
        reader=reader_fn,
        features=features_spec)
    target = None
    if include_target_column:
      target = features.pop(FLAGS.target_field)
    return features, target

  return _input_fn

Example #18

Source File: utils.py From reaction_prediction_seq2seq with Apache License 2.0

4 votes

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    data_provider = pipeline.make_data_provider()
    features_and_labels = pipeline.read_from_data_provider(data_provider)

    if bucket_boundaries:
      _, batch = tf.contrib.training.bucket_by_sequence_length(
          input_length=features_and_labels["source_len"],
          bucket_boundaries=bucket_boundaries,
          tensors=features_and_labels,
          batch_size=batch_size,
          keep_input=features_and_labels["source_len"] >= 1,
          dynamic_pad=True,
          capacity=5000 + 16 * batch_size,
          allow_smaller_final_batch=allow_smaller_final_batch,
          name="bucket_queue")
    else:
      batch = tf.train.batch(
          tensors=features_and_labels,
          enqueue_many=False,
          batch_size=batch_size,
          dynamic_pad=True,
          capacity=5000 + 16 * batch_size,
          allow_smaller_final_batch=allow_smaller_final_batch,
          name="batch_queue")

    # Separate features and labels
    features_batch = {k: batch[k] for k in pipeline.feature_keys}
    if set(batch.keys()).intersection(pipeline.label_keys):
      labels_batch = {k: batch[k] for k in pipeline.label_keys}
    else:
      labels_batch = None

    return features_batch, labels_batch

  return input_fn

Example #19

Source File: util.py From pydatalab with Apache License 2.0

4 votes

def read_examples(input_files, batch_size, shuffle, num_epochs=None):
  """Creates readers and queues for reading example protos."""
  files = []
  for e in input_files:
    for path in e.split(','):
      files.extend(file_io.get_matching_files(path))
  thread_count = multiprocessing.cpu_count()

  # The minimum number of instances in a queue from which examples are drawn
  # randomly. The larger this number, the more randomness at the expense of
  # higher memory requirements.
  min_after_dequeue = 1000

  # When batching data, the queue's capacity will be larger than the batch_size
  # by some factor. The recommended formula is (num_threads + a small safety
  # margin). For now, we use a single thread for reading, so this can be small.
  queue_size_multiplier = thread_count + 3

  # Convert num_epochs == 0 -> num_epochs is None, if necessary
  num_epochs = num_epochs or None

  # Build a queue of the filenames to be read.
  filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle)

  example_id, encoded_example = tf.TextLineReader().read_up_to(
      filename_queue, batch_size)

  if shuffle:
    capacity = min_after_dequeue + queue_size_multiplier * batch_size
    return tf.train.shuffle_batch(
        [example_id, encoded_example],
        batch_size,
        capacity,
        min_after_dequeue,
        enqueue_many=True,
        num_threads=thread_count)

  else:
    capacity = queue_size_multiplier * batch_size
    return tf.train.batch(
        [example_id, encoded_example],
        batch_size,
        capacity=capacity,
        enqueue_many=True,
        num_threads=thread_count)


# ==============================================================================
# Building the TF learn estimators
# ==============================================================================

Example #20

Source File: task.py From pydatalab with Apache License 2.0

4 votes

def get_estimator(args, output_dir, features, stats, target_vocab_size):
  # Check layers used for dnn models.
  if is_dnn_model(args.model) and not args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* must be used with DNN models')
  if is_linear_model(args.model) and args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* cannot be used with linear models')

  # Build tf.learn features
  feature_columns = build_feature_columns(features, stats, args.model)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(
      save_checkpoints_steps=args.min_eval_frequency)

  train_dir = os.path.join(output_dir, 'train')
  if args.model == 'dnn_regression':
    estimator = tf.contrib.learn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_regression':
    estimator = tf.contrib.learn.LinearRegressor(
        feature_columns=feature_columns,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  elif args.model == 'dnn_classification':
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_classification':
    estimator = tf.contrib.learn.LinearClassifier(
        feature_columns=feature_columns,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  else:
    raise ValueError('bad --model-type value')

  return estimator

Example #21

Source File: task.py From pydatalab with Apache License 2.0

4 votes

def get_estimator(args, output_dir, features, stats, target_vocab_size):
  # Check layers used for dnn models.
  if is_dnn_model(args.model) and not args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* must be used with DNN models')
  if is_linear_model(args.model) and args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* cannot be used with linear models')

  # Build tf.learn features
  feature_columns = build_feature_columns(features, stats, args.model)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(
      save_checkpoints_steps=args.min_eval_frequency)

  train_dir = os.path.join(output_dir, 'train')
  if args.model == 'dnn_regression':
    estimator = tf.contrib.learn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_regression':
    estimator = tf.contrib.learn.LinearRegressor(
        feature_columns=feature_columns,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  elif args.model == 'dnn_classification':
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_classification':
    estimator = tf.contrib.learn.LinearClassifier(
        feature_columns=feature_columns,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  else:
    raise ValueError('bad --model-type value')

  return estimator