Python tensorflow.compat.v1.int64() Examples
The following are 30
code examples of tensorflow.compat.v1.int64().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: neural_assistant.py From tensor2tensor with Apache License 2.0 | 6 votes |
def compute_last_embedding(input_embeddings, input_lengths, hparams): """Computes average of last K embedding. Args: input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim] input_lengths: <tf.int64>[bs, 1] hparams: model hparams Returns: last_k_embedding: <tf.float32>[bs, emb_dim] """ max_seq_len = tf.shape(input_embeddings)[1] # <tf.float32>[bs, 1, max_seq_len] mask = tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32) del_mask = tf.sequence_mask( input_lengths - hparams.last_k, max_seq_len, dtype=tf.float32) final_mask = mask - del_mask # <tf.float32>[bs, 1, emb_dim] sum_embedding = tf.matmul(final_mask, input_embeddings) # <tf.float32>[bs, 1, emb_dim] last_k_embedding = sum_embedding / tf.to_float( tf.expand_dims( tf.ones([tf.shape(input_embeddings)[0], 1]) * hparams.last_k, 2)) # <tf.float32>[bs, dim] return tf.squeeze(last_k_embedding, 1)
Example #2
Source File: tfexample_decoder_test.py From tf-slim with Apache License 2.0 | 6 votes |
def testDecodeExampleWithVarLenTensorToDense(self): np_array = np.array([[1, 2, 3], [4, 5, 6]]) example = tf.train.Example( features=tf.train.Features(feature={ 'labels': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'labels': parsing_ops.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'labels': tfexample_decoder.Tensor('labels', shape=np_array.shape), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels, np_array)
Example #3
Source File: run_classifier.py From albert with Apache License 2.0 | 6 votes |
def serving_input_receiver_fn(): """Creates an input function for serving.""" seq_len = FLAGS.max_seq_length serialized_example = tf.placeholder( dtype=tf.string, shape=[None], name="serialized_example") features = { "input_ids": tf.FixedLenFeature([seq_len], dtype=tf.int64), "input_mask": tf.FixedLenFeature([seq_len], dtype=tf.int64), "segment_ids": tf.FixedLenFeature([seq_len], dtype=tf.int64), } feature_map = tf.parse_example(serialized_example, features=features) feature_map["is_real_example"] = tf.constant(1, dtype=tf.int32) feature_map["label_ids"] = tf.constant(0, dtype=tf.int32) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in feature_map.keys(): t = feature_map[name] if t.dtype == tf.int64: t = tf.to_int32(t) feature_map[name] = t return tf.estimator.export.ServingInputReceiver( features=feature_map, receiver_tensors=serialized_example)
Example #4
Source File: generator_utils_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testDatasetPacking(self): dataset = tf.data.Dataset.from_generator( example_generator, output_types={"inputs": tf.int64, "targets": tf.int64}, output_shapes={"inputs": tf.TensorShape((None,)), "targets": tf.TensorShape((None,))} ) dataset = generator_utils.pack_dataset( dataset, length=5, keys=("inputs", "targets"), use_custom_ops=False) with tf.Session().as_default() as sess: batch = dataset.make_one_shot_iterator().get_next() for reference in reference_packing(): example = sess.run(batch) self.assertAllEqual(set(example.keys()), set(reference.keys())) for k in reference: self.assertAllEqual(example[k], reference[k])
Example #5
Source File: tfexample_decoder_test.py From tf-slim with Apache License 2.0 | 6 votes |
def testDecodeExampleWithFixLenTensorWithShape(self): np_array = np.array([[1, 2, 3], [4, 5, 6]]) example = tf.train.Example( features=tf.train.Features(feature={ 'labels': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'labels': parsing_ops.FixedLenFeature(np_array.shape, dtype=tf.int64), } items_to_handlers = { 'labels': tfexample_decoder.Tensor('labels', shape=np_array.shape), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels, np_array)
Example #6
Source File: tfexample_decoder_test.py From tf-slim with Apache License 2.0 | 6 votes |
def testDecodeExampleWithVarLenTensor(self): np_array = np.array([[[1], [2], [3]], [[4], [5], [6]]]) example = tf.train.Example( features=tf.train.Features(feature={ 'labels': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'labels': parsing_ops.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'labels': tfexample_decoder.Tensor('labels'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels, np_array.flatten())
Example #7
Source File: vqa.py From tensor2tensor with Apache License 2.0 | 6 votes |
def example_reading_spec(self): data_fields, data_items_to_decoders = ( super(ImageVqav2Tokens10kLabels3k, self).example_reading_spec()) data_fields["image/image_id"] = tf.FixedLenFeature((), tf.int64) data_fields["image/question_id"] = tf.FixedLenFeature((), tf.int64) data_fields["image/question"] = tf.FixedLenSequenceFeature( (), tf.int64, allow_missing=True) data_fields["image/answer"] = tf.FixedLenSequenceFeature( (), tf.int64, allow_missing=True) slim = contrib.slim() data_items_to_decoders["question"] = slim.tfexample_decoder.Tensor( "image/question") data_items_to_decoders["targets"] = slim.tfexample_decoder.Tensor( "image/answer") return data_fields, data_items_to_decoders
Example #8
Source File: tfexample_decoder_test.py From tf-slim with Apache License 2.0 | 6 votes |
def testDecodeExampleWithInt64Tensor(self): np_array = np.random.randint(1, 10, size=(2, 3, 1)) example = tf.train.Example( features=tf.train.Features(feature={ 'array': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'array': parsing_ops.FixedLenFeature(np_array.shape, tf.int64) } items_to_handlers = { 'array': tfexample_decoder.Tensor('array'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_array] = decoder.decode(serialized_example, ['array']) self.assertAllEqual(tf_array.eval(), np_array)
Example #9
Source File: neural_assistant.py From tensor2tensor with Apache License 2.0 | 6 votes |
def compute_average_embedding(input_embeddings, input_lengths): """Computes bag-of-words embedding. Args: input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim] input_lengths: <tf.int64>[bs, 1] Returns: bow_embedding: <tf.float32>[bs, emb_dim] """ max_seq_len = tf.shape(input_embeddings)[1] # <tf.float32>[bs, 1, max_seq_len] mask = tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32) # <tf.float32>[bs, 1, emb_dim] sum_embedding = tf.matmul(mask, input_embeddings) # <tf.float32>[bs, 1, emb_dim] avg_embedding = sum_embedding / tf.to_float(tf.expand_dims(input_lengths, 2)) # <tf.float32>[bs, dim] return tf.squeeze(avg_embedding, 1)
Example #10
Source File: neural_assistant.py From tensor2tensor with Apache License 2.0 | 6 votes |
def compute_max_pool_embedding(input_embeddings, input_lengths): """Computes max pool embedding. Args: input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim] input_lengths: <tf.int64>[bs, 1] Returns: max_pool_embedding: <tf.float32>[bs, emb_dim] """ max_seq_len = tf.shape(input_embeddings)[1] # <tf.float32>[bs, max_seq_len] mask = 1.0 - tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32) mask = tf.squeeze(mask * (-1e-6), 1) mask = tf.expand_dims(mask, 2) # <tf.float32>[bs, emb_dim] max_pool_embedding = tf.reduce_max(input_embeddings + mask, 1) # <tf.float32>[bs, dim] return max_pool_embedding
Example #11
Source File: neural_assistant.py From tensor2tensor with Apache License 2.0 | 6 votes |
def encode_knowledge_bottom(self, features): tf.logging.info("Encoding knowledge " + str(self.triple_num)) # Make sure this is embeddings for triples # <tf.float32>[batch_size, triple_num*max_triple_length, 1, emb_dim] fact_embedding = features["encoded_triples"] # [batch_size, triple_num*max_triple_length, emb_dim] fact_embedding = tf.squeeze(fact_embedding, 2) kb_shape = common_layers.shape_list(fact_embedding) batch_size = kb_shape[0] embed_dim = kb_shape[2] # <tf.float32>[batch_size*triple_num, max_triple_length, emb_dim] re_fact_embedding = tf.reshape( fact_embedding, [batch_size * self.triple_num, -1, embed_dim], name="reshape_fact_embedding") # <tf.int64>[batch_size, triple_num] input_fact_lengths = features["triple_lens"] # Stack the fact lengths. # <tf.int64>[batch_size*max_triple_num] re_fact_lengths = tf.reshape( input_fact_lengths, [batch_size * self.triple_num, 1], name="reshape_fact_lengths") return re_fact_embedding, re_fact_lengths
Example #12
Source File: seq2seq.py From magenta with Apache License 2.0 | 6 votes |
def categorical_sample(logits, dtype=tf.int32, sample_shape=(), seed=None): """Samples from categorical distribution.""" logits = tf.convert_to_tensor(logits, name="logits") event_size = tf.shape(logits)[-1] batch_shape_tensor = tf.shape(logits)[:-1] def _sample_n(n): """Sample vector of categoricals.""" if logits.shape.ndims == 2: logits_2d = logits else: logits_2d = tf.reshape(logits, [-1, event_size]) sample_dtype = tf.int64 if logits.dtype.size > 4 else tf.int32 draws = tf.multinomial( logits_2d, n, seed=seed, output_dtype=sample_dtype) draws = tf.reshape( tf.transpose(draws), tf.concat([[n], batch_shape_tensor], 0)) return tf.cast(draws, dtype) return _call_sampler(_sample_n, sample_shape)
Example #13
Source File: tf_example_decoder.py From Object_Detection_Tracking with Apache License 2.0 | 6 votes |
def __init__(self, include_mask=False, regenerate_source_id=False): self._include_mask = include_mask self._regenerate_source_id = regenerate_source_id self._keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string), 'image/source_id': tf.FixedLenFeature((), tf.string, ''), 'image/height': tf.FixedLenFeature((), tf.int64, -1), 'image/width': tf.FixedLenFeature((), tf.int64, -1), 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 'image/object/class/label': tf.VarLenFeature(tf.int64), 'image/object/area': tf.VarLenFeature(tf.float32), 'image/object/is_crowd': tf.VarLenFeature(tf.int64), } if include_mask: self._keys_to_features.update({ 'image/object/mask': tf.VarLenFeature(tf.string), })
Example #14
Source File: evaluator.py From graphics with Apache License 2.0 | 6 votes |
def _init_graph(self): """Initialize computation graph for tensorflow.""" with self.graph.as_default(): self.encoder = g2v.GridEncoder( in_grid_res=self.in_grid_res, num_filters=self.num_filters, codelen=self.codelen, name='g2v') self.global_step = tf.get_variable( 'global_step', shape=[], dtype=tf.int64) self.grid_ph = tf.placeholder( tf.float32, shape=[self.gres, self.gres, self.gres]) self.start_ph = tf.placeholder(tf.int32, shape=[self.grid_batch, 3]) self.ingrid = self._batch_slice(self.grid_ph, self.start_ph, self.in_grid_res, self.grid_batch) self.ingrid = self.ingrid[..., tf.newaxis] self.lats = self.encoder(self.ingrid, training=False) # [gb, codelen] self.saver = tf.train.Saver() self.sess = tf.Session() self.saver.restore(self.sess, self.ckpt)
Example #15
Source File: evaluator.py From graphics with Apache License 2.0 | 6 votes |
def _init_graph(self): """Initialize computation graph for tensorflow. """ with self.graph.as_default(): self.refiner = im.ImNet(dim=self.dim, in_features=self.codelen, out_features=self.out_features, num_filters=self.num_filters) self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int64) self.pts_ph = tf.placeholder(tf.float32, shape=[self.point_batch, 3]) self.lat_ph = tf.placeholder(tf.float32, shape=[self.codelen]) lat = tf.broadcast_to(self.lat_ph[tf.newaxis], [self.point_batch, self.codelen]) code = tf.concat((self.pts_ph, lat), axis=-1) # [pb, 3+c] vals = self.refiner(code, training=False) # [pb, 1] self.vals = tf.squeeze(vals, axis=1) # [pb] self.saver = tf.train.Saver() self.sess = tf.Session() self.saver.restore(self.sess, self.ckpt)
Example #16
Source File: rendered_env_problem.py From tensor2tensor with Apache License 2.0 | 6 votes |
def example_reading_spec(self): """Return a mix of env and video data fields and decoders.""" slim = contrib.slim() video_fields, video_decoders = ( video_utils.VideoProblem.example_reading_spec(self)) env_fields, env_decoders = ( gym_env_problem.GymEnvProblem.example_reading_spec(self)) # Remove raw observations field since we want to capture them as videos. env_fields.pop(env_problem.OBSERVATION_FIELD) env_decoders.pop(env_problem.OBSERVATION_FIELD) # Add frame number spec and decoder. env_fields[_FRAME_NUMBER_FIELD] = tf.FixedLenFeature((1,), tf.int64) env_decoders[_FRAME_NUMBER_FIELD] = slim.tfexample_decoder.Tensor( _FRAME_NUMBER_FIELD) # Add video fields and decoders env_fields.update(video_fields) env_decoders.update(video_decoders) return env_fields, env_decoders
Example #17
Source File: env_problem.py From tensor2tensor with Apache License 2.0 | 6 votes |
def process_rewards(self, rewards): """Clips the rewards, optionally rounds them and casts to integer. Args: rewards: numpy array of raw (float) rewards. Returns: processed_rewards: numpy array of np.int64 """ min_reward, max_reward = self.reward_range # Clips at min and max reward. rewards = np.clip(rewards, min_reward, max_reward) if self._discrete_rewards: # Round to (nearest) int and convert to integral type. rewards = np.around(rewards, decimals=0).astype(np.int64) return rewards
Example #18
Source File: utils.py From interval-bound-propagation with Apache License 2.0 | 5 votes |
def build_dataset(raw_data, batch_size=50, sequential=True): """Builds a dataset from raw NumPy tensors.""" images, labels = raw_data # We need width, height and channel. if len(images.shape) == 3: images = np.expand_dims(images, -1) samples = Sample(images.astype(np.float32) / 255., labels.astype(np.int64)) data = tf.data.Dataset.from_tensor_slices(samples) if not sequential: data = data.shuffle(1000) return data.repeat().batch(batch_size).make_one_shot_iterator().get_next()
Example #19
Source File: utils.py From interval-bound-propagation with Apache License 2.0 | 5 votes |
def _get_least_likely_class(label, num_classes, logits): target_label = tf.argmin(logits, axis=1, output_type=tf.int64) # In the off-chance that the least likely class is the true class, the target # class is changed to the be the next index. return tf.mod(target_label + tf.cast( tf.equal(target_label, tf.cast(label, tf.int64)), tf.int64), num_classes)
Example #20
Source File: utils.py From interval-bound-propagation with Apache License 2.0 | 5 votes |
def _get_random_class(label, num_classes, seed=None): batch_size = tf.shape(label)[0] target_label = tf.random.uniform( shape=(batch_size,), minval=1, maxval=num_classes, dtype=tf.int64, seed=seed) return tf.mod(tf.cast(label, tf.int64) + target_label, num_classes)
Example #21
Source File: t2r_models.py From tensor2robot with Apache License 2.0 | 5 votes |
def get_global_step(self): # tf.train.get_global_step() does not work well under model_fn for TPU. with tf.variable_scope('', reuse=tf.AUTO_REUSE): return tf.broadcast_to( tf.get_variable('global_step', shape=[], dtype=tf.int64), shape=(self._export_batch_size,))
Example #22
Source File: reader.py From magenta with Apache License 2.0 | 5 votes |
def get_example(self, batch_size): """Get a single example from the tfrecord file. Args: batch_size: Int, minibatch size. Returns: tf.Example protobuf parsed from tfrecord. """ reader = tf.TFRecordReader() num_epochs = None if self.is_training else 1 capacity = batch_size path_queue = tf.train.input_producer( [self.record_path], num_epochs=num_epochs, shuffle=self.is_training, capacity=capacity) unused_key, serialized_example = reader.read(path_queue) features = { "note_str": tf.FixedLenFeature([], dtype=tf.string), "pitch": tf.FixedLenFeature([1], dtype=tf.int64), "velocity": tf.FixedLenFeature([1], dtype=tf.int64), "audio": tf.FixedLenFeature([64000], dtype=tf.float32), "qualities": tf.FixedLenFeature([10], dtype=tf.int64), "instrument_source": tf.FixedLenFeature([1], dtype=tf.int64), "instrument_family": tf.FixedLenFeature([1], dtype=tf.int64), } example = tf.parse_single_example(serialized_example, features) return example
Example #23
Source File: attacks_test.py From interval-bound-propagation with Apache License 2.0 | 5 votes |
def testEndToEnd(self, predictor_cls, attack_cls, optimizer_cls, epsilon, restarted=False): # l-\infty norm of perturbation ball. if isinstance(epsilon, list): # We test the ability to have different epsilons across dimensions. epsilon = tf.constant([epsilon], dtype=tf.float32) bounds = (-.5, 2.5) # Create a simple network. m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(1.), }) z = tf.constant([[1, 2]], dtype=tf.float32) predictor = predictor_cls(m, self) # Not important for the test but needed. labels = tf.constant([1], dtype=tf.int64) # We create two attacks to maximize and then minimize the output. max_spec = ibp.LinearSpecification(tf.constant([[[1.]]])) max_attack = attack_cls(predictor, max_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: max_attack = ibp.RestartedAttack(max_attack, num_restarts=10) z_max = max_attack(z, labels) min_spec = ibp.LinearSpecification(tf.constant([[[-1.]]])) min_attack = attack_cls(predictor, min_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: min_attack = ibp.RestartedAttack(min_attack, num_restarts=10) z_min = min_attack(z, labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) z_max_values, z_min_values = sess.run([z_max, z_min]) z_max_values = z_max_values[0] z_min_values = z_min_values[0] self.assertAlmostEqual(2., z_max_values[0]) self.assertAlmostEqual(2.5, z_max_values[1]) self.assertAlmostEqual(0., z_min_values[0]) self.assertAlmostEqual(1., z_min_values[1])
Example #24
Source File: gym_spaces_utils_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def test_box_space_encode(self): box_space = Box(low=0, high=10, shape=[2], dtype=np.int64) value = np.array([2, 3]) encoded_value = gym_spaces_utils.gym_space_encode(box_space, value) self.assertListEqual([2, 3], encoded_value)
Example #25
Source File: attacks.py From interval-bound-propagation with Apache License 2.0 | 5 votes |
def init_state(self, unused_x): return self._State(tf.constant(0, dtype=tf.int64))
Example #26
Source File: tfexample_decoder_test.py From tf-slim with Apache License 2.0 | 5 votes |
def testDecodeExampleWithSparseTensor(self): np_indices = np.array([[1], [2], [5]]) np_values = np.array([0.1, 0.2, 0.6]).astype('f') example = tf.train.Example( features=tf.train.Features( feature={ 'indices': self._EncodedInt64Feature(np_indices), 'values': self._EncodedFloatFeature(np_values), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'indices': parsing_ops.VarLenFeature(dtype=tf.int64), 'values': parsing_ops.VarLenFeature(dtype=tf.float32), } items_to_handlers = { 'labels': tfexample_decoder.SparseTensor(), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels.indices, np_indices) self.assertAllEqual(labels.values, np_values) self.assertAllEqual(labels.dense_shape, np_values.shape)
Example #27
Source File: tfexample_decoder_test.py From tf-slim with Apache License 2.0 | 5 votes |
def testDecodeExampleShapeKeyTensor(self): np_image = np.random.rand(2, 3, 1).astype('f') np_labels = np.array([[[1], [2], [3]], [[4], [5], [6]]]) example = tf.train.Example( features=tf.train.Features( feature={ 'image': self._EncodedFloatFeature(np_image), 'image/shape': self._EncodedInt64Feature(np.array(np_image.shape)), 'labels': self._EncodedInt64Feature(np_labels), 'labels/shape': self._EncodedInt64Feature(np.array(np_labels.shape)), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image': parsing_ops.VarLenFeature(dtype=tf.float32), 'image/shape': parsing_ops.VarLenFeature(dtype=tf.int64), 'labels': parsing_ops.VarLenFeature(dtype=tf.int64), 'labels/shape': parsing_ops.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': tfexample_decoder.Tensor('image', shape_keys='image/shape'), 'labels': tfexample_decoder.Tensor('labels', shape_keys='labels/shape'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_image, tf_labels] = decoder.decode(serialized_example, ['image', 'labels']) self.assertAllEqual(tf_image.eval(), np_image) self.assertAllEqual(tf_labels.eval(), np_labels)
Example #28
Source File: dataloader.py From gpt2-ml with Apache License 2.0 | 5 votes |
def classification_input_fn_builder(input_file, seq_length, is_training, drop_remainder, buffer_size=100): """Creates an `input_fn` closure to be passed to TPUEstimator.""" name_to_features = { "input_ids": tf.FixedLenFeature([seq_length], tf.int64), "label_ids": tf.FixedLenFeature([], tf.int64), "is_real_example": tf.FixedLenFeature([], tf.int64), } def input_fn(params): """The actual input function.""" batch_size = params["batch_size"] # For training, we want a lot of parallel reading and shuffling. # For eval, we want no shuffling and parallel reading doesn't matter. d = tf.data.TFRecordDataset(input_file) if is_training: d = d.repeat() d = d.shuffle(buffer_size=buffer_size) d = d.apply( tf.data.experimental.map_and_batch( lambda record: _decode_record(record, name_to_features), batch_size=batch_size, drop_remainder=drop_remainder)) return d return input_fn
Example #29
Source File: dataloader.py From gpt2-ml with Apache License 2.0 | 5 votes |
def _decode_record(record, name_to_features): """Decodes a record to a TensorFlow example.""" example = tf.parse_single_example(record, name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in list(example.keys()): t = example[name] if t.dtype == tf.int64: t = tf.cast(t, tf.int32) example[name] = t return example
Example #30
Source File: utils.py From lamb with Apache License 2.0 | 5 votes |
def compute_lengths(symbols_list, eos_symbol, name=None, dtype=tf.int64): """Computes sequence lengths given end-of-sequence symbol. Args: symbols_list: list of [batch_size] tensors of symbols (e.g. integers). eos_symbol: end of sequence symbol (e.g. integer). name: name for the name scope of this op. dtype: type of symbols, default: tf.int64. Returns: Tensor [batch_size] of lengths of sequences. """ with tf.name_scope(name, 'compute_lengths'): max_len = len(symbols_list) eos_symbol_ = tf.constant(eos_symbol, dtype=dtype) # Array with max_len-time where we have EOS, 0 otherwise. Maximum of this is # the first EOS in that example. ends = [tf.constant(max_len - i, dtype=tf.int64) * tf.to_int64(tf.equal(s, eos_symbol_)) for i, s in enumerate(symbols_list)] # Lengths of sequences, or max_len for sequences that didn't have EOS. # Note: examples that don't have EOS will have max value of 0 and value of # max_len+1 in lens_. lens_ = max_len + 1 - tf.reduce_max(tf.stack(ends, 1), axis=1) # For examples that didn't have EOS decrease max_len+1 to max_len as the # length. lens = tf.subtract(lens_, tf.to_int64(tf.equal(lens_, max_len + 1))) return tf.stop_gradient(tf.reshape(lens, [-1]))