python source code of caption

Action_Recognition_Zoo-master
- Images_for_readme
  - README.md
- process_dataset.py
- LICENSE
- average_scores.py
- models.py
- test_models.py
- main.py
- README.md
- dataset.py
- model_zoo
  - inceptionv4
    - pytorch_load.pyc
    - tensorflow_dump.py
    - __init__.pyc
    - torch_load.lua
    - __init__.py
    - pytorch_load.py
  - LICENSE
  - models
    - .github
      - ISSUE_TEMPLATE.md
    - textsum
      - BUILD
      - data.py
      - seq2seq_attention_decode.py
      - data_convert_example.py
      - data
        data
        vocab
      - batch_reader.py
      - README.md
      - seq2seq_attention.py
      - seq2seq_attention_model.py
      - beam_search.py
      - seq2seq_lib.py
    - street
      - g3doc
        vgslspecs.md
      - testdata
        arial-32-tiny
        numbers-16-tiny
        charset_size_10.txt
        numbers.charset_size=12.txt
        mnist-tiny
        charset_size=134.txt
        arial.charset_size=105.txt
      - python
        vgsl_train.py
        decoder.py
        vgsl_model.py
        decoder_test.py
        vgslspecs_test.py
        errorcounter_test.py
        shapes_test.py
        vgsl_eval.py
        errorcounter.py
        vgsl_input.py
        nn_ops.py
        vgsl_model_test.py
        shapes.py
        vgslspecs.py
      - README.md
      - cc
        rnn_ops.cc
    - AUTHORS
    - syntaxnet
      - syntaxnet
        char_properties.cc
        populate_test_inputs.h
        BUILD
        lexicon_builder.cc
        document_format.cc
        shared_store.h
        sentence_batch.cc
        arc_standard_transitions.cc
        parser_eval.py
        base.h
        workspace.cc
        registry.cc
        embedding_feature_extractor.h
        structured_graph_builder.py
        binary_segment_state_test.cc
        registry.h
        arc_standard_transitions_test.cc
        feature_extractor.h
        parser_transitions.h
        embedding_feature_extractor.cc
        char_properties_test.cc
        parser_trainer.py
        context.pbtxt
        sentence_features.h
        sentence.proto
        shared_store_test.cc
        fml_parser.cc
        dictionary.proto
        parser_features.h
        tagger_transitions.cc
        term_frequency_map.cc
        char_properties.h
        morpher_transitions.cc
        shared_store.cc
        testdata
        context.pbtxt
        document
        mini-training-set
        graph_builder.py
        graph_builder_test.py
        binary_segment_transitions_test.cc
        binary_segment_state.h
        binary_segment_state.cc
        reader_ops_test.py
        parser_features.cc
        models
        parsey_mcparseface
        context.pbtxt
        prefix-table
        fine-to-universal.map
        suffix-table
        label-map
        tag-map
        parsey_universal
        context.pbtxt
        tokenize_zh.sh
        context-tokenize-zh.pbtxt
        tokenize.sh
        parse.sh
        binary_segment_transitions.cc
        text_formats_test.py
        affix.h
        task_context.cc
        reader_ops.cc
        text_formats.cc
        beam_reader_ops.cc
        conll2tree.py
        populate_test_inputs.cc
        utils.h
        utils.cc
        syntaxnet.bzl
        proto_io.h
        feature_extractor.cc
        segmenter_utils.cc
        term_frequency_map.h
        sentence_batch.h
        task_spec.proto
        sparse.proto
        affix.cc
        parser_transitions.cc
        ops
        parser_ops.cc
        parser_trainer_test.sh
        morphology_label_set_test.cc
        fml_parser.h
        morphology_label_set.cc
        segmenter_utils_test.cc
        parser_state.cc
        feature_extractor.proto
        task_context.h
        feature_types.h
        morphology_label_set.h
        sentence_features_test.cc
        lexicon_builder_test.py
        workspace.h
        load_parser_ops.py
        tagger_transitions_test.cc
        test_main.cc
        document_format.h
        demo.sh
        document_filters.cc
        parser_state.h
        parser_features_test.cc
        unpack_sparse_features.cc
        segmenter_utils.h
        sentence_features.cc
        beam_reader_ops_test.py
        kbest_syntax.proto
      - universal.md
      - third_party
        utf
        runestrlen.c
        BUILD
        utfutf.c
        runetypebody.c
        runestrcmp.c
        runestrncmp.c
        runestrrchr.c
        runestrcpy.c
        utfnlen.c
        runestrncat.c
        utflen.c
        README
        utf.h
        rune.c
        utfrrune.c
        runestrdup.c
        utfrune.c
        runestrchr.c
        utfecpy.c
        runestrcat.c
        runestrncpy.c
        runestrecpy.c
        runetype.c
        utfdef.h
        runestrstr.c
      - util
        utf8
        BUILD
        unilib.cc
        unicodetext_unittest.cc
        unicodetext_main.cc
        unilib.h
        unicodetext.cc
        unicodetext.h
        unilib_utf8_utils.h
        gtest_main.cc
      - README.md
      - tools
        bazel.rc
      - Dockerfile
      - WORKSPACE
      - .gitignore
    - autoencoder
      - autoencoder_models
        DenoisingAutoencoder.py
        VariationalAutoencoder.py
        Autoencoder.py
        __init__.py
      - Utils.py
      - VariationalAutoencoderRunner.py
      - MaskingNoiseAutoencoderRunner.py
      - AdditiveGaussianNoiseAutoencoderRunner.py
      - AutoencoderRunner.py
      - __init__.py
    - swivel
      - analogy.cc
      - nearest.py
      - swivel.py
      - eval.mk
      - wordsim.py
      - text2bin.py
      - prep.py
      - fastprep.cc
      - fastprep.mk
      - README.md
      - .gitignore
      - vecs.py
      - glove_to_shards.py
    - slim
      - BUILD
      - datasets
        dataset_utils.py
        cifar10.py
        mnist.py
        download_and_convert_cifar10.py
        flowers.py
        __init__.py
        download_and_convert_mnist.py
        download_and_convert_flowers.py
        imagenet.py
        dataset_factory.py
      - download_and_convert_data.py
      - train_image_classifier.py
      - .DS_Store
      - deployment
        __init__.py
        model_deploy_test.py
        model_deploy.py
      - slim_walkthough.ipynb
      - nets
        vgg.py
        inception_utils.py
        inception_v3.py
        alexnet.py
        inception_v3_test.py
        cifarnet.py
        lenet.py
        inception_resnet_v2_test.py
        resnet_v1.py
        alexnet_test.py
        nets_factory.py
        resnet_v1_test.py
        nets_factory_test.py
        resnet_utils.py
        inception_v1.py
        inception_resnet_v2.py
        inception_v2.py
        vgg_test.py
        overfeat.py
        __init__.py
        inception_v4.py
        inception_v4_test.py
        overfeat_test.py
        resnet_v2_test.py
        inception.py
        resnet_v2.py
        inception_v1_test.py
        inception_v2_test.py
      - README.md
      - eval_image_classifier.py
      - scripts
        finetune_inception_v3_on_flowers.sh
        train_cifarnet_on_cifar10.sh
        finetune_inception_v1_on_flowers.sh
        train_lenet_on_mnist.sh
      - preprocessing
        vgg_preprocessing.py
        cifarnet_preprocessing.py
        inception_preprocessing.py
        lenet_preprocessing.py
        __init__.py
        preprocessing_factory.py
      - ._.DS_Store
    - video_prediction
      - push_datafiles.txt
      - download_data.sh
      - prediction_train.py
      - prediction_input.py
      - lstm_ops.py
      - README.md
      - prediction_model.py
    - .gitmodules
    - LICENSE
    - neural_gpu
      - neural_gpu.py
      - neural_gpu_trainer.py
      - README.md
      - data_utils.py
    - transformer
      - cluttered_mnist.py
      - tf_utils.py
      - example.py
      - data
        README.md
      - README.md
      - spatial_transformer.py
    - lm_1b
      - BUILD
      - lm_1b_eval.py
      - README.md
      - data_utils.py
    - namignizer
      - model.py
      - README.md
      - names.py
      - data_utils.py
      - .gitignore
    - resnet
      - BUILD
      - g3doc
        cifar_resnet.gif
        cifar_resnet_legends.gif
      - resnet_model.py
      - README.md
      - cifar_input.py
      - resnet_main.py
    - CONTRIBUTING.md
    - .DS_Store
    - differential_privacy
      - privacy_accountant
        tf
        BUILD
        accountant.py
        python
        BUILD
        gaussian_moments.py
      - multiple_teachers
        BUILD
        analysis.py
        metrics.py
        train_student.py
        train_student_mnist_250_lap_20_count_50_epochs_600.sh
        aggregation.py
        deep_cnn.py
        input.py
        README.md
        utils.py
        train_teachers.py
      - __init__.py
      - README.md
      - dp_sgd
        dp_mnist
        BUILD
        dp_mnist.py
        per_example_gradients
        BUILD
        per_example_gradients.py
        dp_optimizer
        BUILD
        sanitizer.py
        utils.py
        dp_pca.py
        dp_optimizer.py
        README.md
    - compression
      - decoder.py
      - encoder.py
      - README.md
      - msssim.py
    - inception
      - g3doc
      - inception
        imagenet_train.py
        inception_eval.py
        BUILD
        imagenet_eval.py
        slim
        BUILD
        variables_test.py
        scopes_test.py
        ops.py
        variables.py
        inception_model.py
        losses.py
        inception_test.py
        collections_test.py
        slim.py
        README.md
        ops_test.py
        losses_test.py
        scopes.py
        imagenet_distributed_train.py
        inception_model.py
        flowers_data.py
        image_processing.py
        inception_train.py
        imagenet_data.py
        data
        download_and_preprocess_imagenet.sh
        preprocess_imagenet_validation_data.py
        download_and_preprocess_flowers.sh
        download_and_preprocess_flowers_mac.sh
        download_imagenet.sh
        imagenet_lsvrc_2015_synsets.txt
        build_imagenet_data.py
        process_bounding_boxes.py
        build_image_data.py
        flowers_eval.py
        dataset.py
        flowers_train.py
        inception_distributed_train.py
      - README.md
      - WORKSPACE
      - .gitignore
    - README.md
    - im2txt
      - g3doc
      - README.md
      - im2txt
        BUILD
        evaluate.py
        show_and_tell_model.py
        data
        download_and_preprocess_mscoco.sh
        build_mscoco_data.py
        show_and_tell_model_test.py
        train.py
        configuration.py
        ops
        BUILD
        inputs.py
        image_processing.py
        image_embedding_test.py
        image_embedding.py
        run_inference.py
        inference_wrapper.py
        inference_utils
        BUILD
        caption_generator_test.py
        caption_generator.py
        vocabulary.py
        inference_wrapper_base.py
      - WORKSPACE
      - .gitignore
    - WORKSPACE
    - .gitignore
    - neural_programmer
      - wiki_data.py
      - neural_programmer.py
      - parameters.py
      - nn_utils.py
      - model.py
      - README.md
      - data_utils.py
    - ._.DS_Store
  - inceptionresnetv2
    - pytorch_load.pyc
    - tensorflow_dump.py
    - __init__.pyc
    - torch_load.lua
    - __init__.py
    - pytorch_load.py
  - __init__.pyc
  - __init__.py
  - README.md
  - bninception
    - pytorch_load.pyc
    - layer_factory.py
    - bn_inception.yaml
    - __init__.pyc
    - parse_caffe.py
    - layer_factory.pyc
    - __init__.py
    - inceptionv3.yaml
    - pytorch_load.py
- optical_flow
  - main.cpp
  - gpu_makefile
  - makefile
  - gpu_main.cpp
- transforms.py
- opts.py

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class for generating captions from an image-to-text model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import heapq
import math


import numpy as np


class Caption(object):
  """Represents a complete or partial caption."""

  def __init__(self, sentence, state, logprob, score, metadata=None):
    """Initializes the Caption.

    Args:
      sentence: List of word ids in the caption.
      state: Model state after generating the previous word.
      logprob: Log-probability of the caption.
      score: Score of the caption.
      metadata: Optional metadata associated with the partial sentence. If not
        None, a list of strings with the same length as 'sentence'.
    """
    self.sentence = sentence
    self.state = state
    self.logprob = logprob
    self.score = score
    self.metadata = metadata

  def __cmp__(self, other):
    """Compares Captions by score."""
    assert isinstance(other, Caption)
    if self.score == other.score:
      return 0
    elif self.score < other.score:
      return -1
    else:
      return 1


class TopN(object):
  """Maintains the top n elements of an incrementally provided set."""

  def __init__(self, n):
    self._n = n
    self._data = []

  def size(self):
    assert self._data is not None
    return len(self._data)

  def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

  def extract(self, sort=False):
    """Extracts all elements from the TopN. This is a destructive operation.

    The only method that can be called immediately after extract() is reset().

    Args:
      sort: Whether to return the elements in descending sorted order.

    Returns:
      A list of data; the top n elements provided to the set.
    """
    assert self._data is not None
    data = self._data
    self._data = None
    if sort:
      data.sort(reverse=True)
    return data

  def reset(self):
    """Returns the TopN to an empty state."""
    self._data = []


class CaptionGenerator(object):
  """Class to generate captions from an image-to-text model."""

  def __init__(self,
               model,
               vocab,
               beam_size=3,
               max_caption_length=20,
               length_normalization_factor=0.0):
    """Initializes the generator.

    Args:
      model: Object encapsulating a trained image-to-text model. Must have
        methods feed_image() and inference_step(). For example, an instance of
        InferenceWrapperBase.
      vocab: A Vocabulary object.
      beam_size: Beam size to use when generating captions.
      max_caption_length: The maximum caption length before stopping the search.
      length_normalization_factor: If != 0, a number x such that captions are
        scored by logprob/length^x, rather than logprob. This changes the
        relative scores of captions depending on their lengths. For example, if
        x > 0 then longer captions will be favored.
    """
    self.vocab = vocab
    self.model = model

    self.beam_size = beam_size
    self.max_caption_length = max_caption_length
    self.length_normalization_factor = length_normalization_factor

  def beam_search(self, sess, encoded_image):
    """Runs beam search caption generation on a single image.

    Args:
      sess: TensorFlow Session object.
      encoded_image: An encoded image string.

    Returns:
      A list of Caption sorted by descending score.
    """
    # Feed in the image to get the initial state.
    initial_state = self.model.feed_image(sess, encoded_image)

    initial_beam = Caption(
        sentence=[self.vocab.start_id],
        state=initial_state[0],
        logprob=0.0,
        score=0.0,
        metadata=[""])
    partial_captions = TopN(self.beam_size)
    partial_captions.push(initial_beam)
    complete_captions = TopN(self.beam_size)

    # Run beam search.
    for _ in range(self.max_caption_length - 1):
      partial_captions_list = partial_captions.extract()
      partial_captions.reset()
      input_feed = np.array([c.sentence[-1] for c in partial_captions_list])
      state_feed = np.array([c.state for c in partial_captions_list])

      softmax, new_states, metadata = self.model.inference_step(sess,
                                                                input_feed,
                                                                state_feed)

      for i, partial_caption in enumerate(partial_captions_list):
        word_probabilities = softmax[i]
        state = new_states[i]
        # For this partial caption, get the beam_size most probable next words.
        words_and_probs = list(enumerate(word_probabilities))
        words_and_probs.sort(key=lambda x: -x[1])
        words_and_probs = words_and_probs[0:self.beam_size]
        # Each next word gives a new partial caption.
        for w, p in words_and_probs:
          if p < 1e-12:
            continue  # Avoid log(0).
          sentence = partial_caption.sentence + [w]
          logprob = partial_caption.logprob + math.log(p)
          score = logprob
          if metadata:
            metadata_list = partial_caption.metadata + [metadata[i]]
          else:
            metadata_list = None
          if w == self.vocab.end_id:
            if self.length_normalization_factor > 0:
              score /= len(sentence)**self.length_normalization_factor
            beam = Caption(sentence, state, logprob, score, metadata_list)
            complete_captions.push(beam)
          else:
            beam = Caption(sentence, state, logprob, score, metadata_list)
            partial_captions.push(beam)
      if partial_captions.size() == 0:
        # We have run out of partial candidates; happens when beam_size = 1.
        break

    # If we have no complete captions then fall back to the partial captions.
    # But never output a mixture of complete and partial captions because a
    # partial caption could have a higher score than all the complete captions.
    if not complete_captions.size():
      complete_captions = partial_captions

    return complete_captions.extract(sort=True)