python source code of predict

#
# Copyright (c) 2018-2020 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import falcon
from grpc import StatusCode
import numpy as np
from tensorflow.core.framework import tensor_pb2
from tensorflow.python.framework import tensor_shape
from tensorflow_serving.apis import predict_pb2
from tensorflow.python.framework import dtypes as dtypes
from tensorflow.python.framework import tensor_util as tensor_util
from ie_serving.config import GLOBAL_CONFIG
from ie_serving.models.shape_management.utils import BatchingMode, ShapeMode
from ie_serving.server.constants import \
    INVALID_INPUT_KEY, INVALID_SHAPE, INVALID_BATCHSIZE, GRPC, REST
from ie_serving.logger import get_logger
from tensorflow import __version__ as tf_version
if tf_version.split(".")[0] == "2":
    from tensorflow import make_ndarray, make_tensor_proto
else:  # TF version 1.x
    from tensorflow.contrib.util import make_ndarray, make_tensor_proto

logger = get_logger(__name__)

statusCodes = {
    'invalid_arg': {GRPC: StatusCode.INVALID_ARGUMENT,
                    REST: falcon.HTTP_BAD_REQUEST},
}


def prepare_input_data(target_engine, data, service_type):
    # returns:
    # inference_input, None on success
    # None, error_message on error
    model_inputs_in_input_request = list(dict(data).keys())
    input_keys = target_engine.input_key_names
    inference_input = {}

    for requested_input_blob in model_inputs_in_input_request:
        if requested_input_blob not in input_keys:
            message = INVALID_INPUT_KEY % (model_inputs_in_input_request,
                                           input_keys)
            logger.debug("PREDICT error: {}".format(message))
            return None, message

        tensor_name = target_engine.model_keys['inputs'][requested_input_blob]
        if service_type == GRPC:
            try:
                tensor_input = make_ndarray(data[requested_input_blob])
            except Exception as e:
                message = str(e)
                logger.debug("PREDICT prepare_input_data make_ndarray error: "
                             "{}".format(message))
                return None, message
        else:
            tensor_input = np.asarray(data[requested_input_blob])
        # Validate shape if shape not in auto mode
        if target_engine.shape_info.mode != ShapeMode.AUTO:
            shape_required_in_model = target_engine.net.inputs[
                tensor_name].shape

            # For reshapable models check all dimensions,
            # for non-reshapable, check all starting from the second (omit
            # batch size)
            if target_engine.shape_info.mode == ShapeMode.DISABLED:
                starting_dim = 1
            else:
                starting_dim = 0

            # check requested shape and model shape
            if shape_required_in_model[starting_dim:] != list(
                    tensor_input.shape)[starting_dim:]:
                message = INVALID_SHAPE.format(list(tensor_input.shape),
                                               shape_required_in_model)
                logger.debug("PREDICT error: {}".format(message))
                return None, message

            # check if input batch size match the model only if not auto mode
            if target_engine.batching_info.mode != \
                BatchingMode.AUTO and shape_required_in_model[0] != \
                    tensor_input.shape[0]:
                message = INVALID_BATCHSIZE.format(
                    tensor_input.shape[0],
                    target_engine.batching_info.batch_size)
                logger.debug("PREDICT error,Invalid batchsize:{}".format(
                    message))
                return None, message

        inference_input[tensor_name] = tensor_input
    return inference_input, None


'''
function _prepare_output_as_AppendArrayToTensorProto returns inference
results in a form of flattened list of array elements. It is serialized
using tensor_util._NP_TO_APPEND_FN function which employs module
fast_tensor_util and functions Append<dtype>ArrayToTensorProto.
Despite the module name, it is slower from make_tensor_proto.
'''


def _prepare_output_as_AppendArrayToTensorProto(
        inference_output,
        model_available_outputs):
    response = predict_pb2.PredictResponse()
    for response_output_name, model_output_name in \
            model_available_outputs.items():
        if model_output_name in inference_output:
            dtype = dtypes.as_dtype(inference_output[model_output_name].dtype)
            output_tensor = tensor_pb2.TensorProto(
                dtype=dtype.as_datatype_enum,
                tensor_shape=tensor_shape.as_shape(
                    inference_output[model_output_name].shape).as_proto())
            result = inference_output[model_output_name].flatten()
            tensor_util._NP_TO_APPEND_FN[dtype.as_numpy_dtype](output_tensor,
                                                               result)
            response.outputs[response_output_name].CopyFrom(output_tensor)
    return response


'''
The function prepare_output_with_make_tensor_proto implements faster
serialization mechanism. For most of the models it will return
data in string format converted via numpy.toString calls.
On the client side the inference response can be deserialized using
Tensorflow make_ndarray function.
'''


def _prepare_output_with_make_tensor_proto(
        inference_output,
        model_available_outputs):
    response = predict_pb2.PredictResponse()
    for response_output_name in model_available_outputs:
        model_output_name = model_available_outputs[response_output_name]
        response.outputs[response_output_name].CopyFrom(
            make_tensor_proto(inference_output[model_output_name]))
    return response


# Serialization method selection
prepare_output = None
if GLOBAL_CONFIG['serialization_function'] == \
        '_prepare_output_as_AppendArrayToTensorProto':
    prepare_output = _prepare_output_as_AppendArrayToTensorProto
else:
    prepare_output = _prepare_output_with_make_tensor_proto