# coding=utf-8 # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== # pylint: disable=g-short-docstring-punctuation """Metrics that use histograms.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function # pylint: disable=g-direct-tensorflow-import from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import confusion_matrix as cm from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import histogram_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope def auc_using_histogram(boolean_labels, scores, score_range, nbins=100, collections=None, check_shape=True, name=None): """AUC computed by maintaining histograms. Rather than computing AUC directly, this Op maintains Variables containing histograms of the scores associated with `True` and `False` labels. By comparing these the AUC is generated, with some discretization error. See: "Efficient AUC Learning Curve Calculation" by Bouckaert. This AUC Op updates in `O(batch_size + nbins)` time and works well even with large class imbalance. The accuracy is limited by discretization error due to finite number of bins. If scores are concentrated in a fewer bins, accuracy is lower. If this is a concern, we recommend trying different numbers of bins and comparing results. Args: boolean_labels: 1-D boolean `Tensor`. Entry is `True` if the corresponding record is in class. scores: 1-D numeric `Tensor`, same shape as boolean_labels. score_range: `Tensor` of shape `[2]`, same dtype as `scores`. The min/max values of score that we expect. Scores outside range will be clipped. nbins: Integer number of bins to use. Accuracy strictly increases as the number of bins increases. collections: List of graph collections keys. Internal histogram Variables are added to these collections. Defaults to `[GraphKeys.LOCAL_VARIABLES]`. check_shape: Boolean. If `True`, do a runtime shape check on the scores and labels. name: A name for this Op. Defaults to "auc_using_histogram". Returns: auc: `float32` scalar `Tensor`. Fetching this converts internal histograms to auc value. update_op: `Op`, when run, updates internal histograms. """ if collections is None: collections = [ops.GraphKeys.LOCAL_VARIABLES] with variable_scope.variable_scope( name, 'auc_using_histogram', [boolean_labels, scores, score_range]): scores, boolean_labels = cm.remove_squeezable_dimensions( scores, boolean_labels) score_range = ops.convert_to_tensor(score_range, name='score_range') boolean_labels, scores = _check_labels_and_scores( boolean_labels, scores, check_shape) hist_true, hist_false = _make_auc_histograms(boolean_labels, scores, score_range, nbins) hist_true_acc, hist_false_acc, update_op = _auc_hist_accumulate(hist_true, hist_false, nbins, collections) auc = _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins) return auc, update_op def _check_labels_and_scores(boolean_labels, scores, check_shape): """Check the rank of labels/scores, return tensor versions.""" with ops.name_scope('_check_labels_and_scores', values=[boolean_labels, scores]): boolean_labels = ops.convert_to_tensor(boolean_labels, name='boolean_labels') scores = ops.convert_to_tensor(scores, name='scores') if boolean_labels.dtype != dtypes.bool: raise ValueError( 'Argument boolean_labels should have dtype bool. Found: %s' % boolean_labels.dtype) if check_shape: labels_rank_1 = control_flow_ops.Assert( math_ops.equal(1, array_ops.rank(boolean_labels)), ['Argument boolean_labels should have rank 1. Found: ', boolean_labels.name, array_ops.shape(boolean_labels)]) scores_rank_1 = control_flow_ops.Assert( math_ops.equal(1, array_ops.rank(scores)), ['Argument scores should have rank 1. Found: ', scores.name, array_ops.shape(scores)]) with ops.control_dependencies([labels_rank_1, scores_rank_1]): return boolean_labels, scores else: return boolean_labels, scores def _make_auc_histograms(boolean_labels, scores, score_range, nbins): """Create histogram tensors from one batch of labels/scores.""" with variable_scope.variable_scope( None, 'make_auc_histograms', [boolean_labels, scores, nbins]): # Histogram of scores for records in this batch with True label. hist_true = histogram_ops.histogram_fixed_width( array_ops.boolean_mask(scores, boolean_labels), score_range, nbins=nbins, dtype=dtypes.int64, name='hist_true') # Histogram of scores for records in this batch with False label. hist_false = histogram_ops.histogram_fixed_width( array_ops.boolean_mask(scores, math_ops.logical_not(boolean_labels)), score_range, nbins=nbins, dtype=dtypes.int64, name='hist_false') return hist_true, hist_false def _auc_hist_accumulate(hist_true, hist_false, nbins, collections): """Accumulate histograms in new variables.""" with variable_scope.variable_scope( None, 'hist_accumulate', [hist_true, hist_false]): # Holds running total histogram of scores for records labeled True. hist_true_acc = variable_scope.get_variable( 'hist_true_acc', shape=[nbins], dtype=hist_true.dtype, initializer=init_ops.zeros_initializer(), collections=collections, trainable=False) # Holds running total histogram of scores for records labeled False. hist_false_acc = variable_scope.get_variable( 'hist_false_acc', shape=[nbins], dtype=hist_true.dtype, initializer=init_ops.zeros_initializer(), collections=collections, trainable=False) update_op = control_flow_ops.group( hist_true_acc.assign_add(hist_true), hist_false_acc.assign_add(hist_false), name='update_op') return hist_true_acc, hist_false_acc, update_op def _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins): """Convert histograms to auc. Args: hist_true_acc: `Tensor` holding accumulated histogram of scores for records that were `True`. hist_false_acc: `Tensor` holding accumulated histogram of scores for records that were `False`. nbins: Integer number of bins in the histograms. Returns: Scalar `Tensor` estimating AUC. """ # Note that this follows the "Approximating AUC" section in: # Efficient AUC learning curve calculation, R. R. Bouckaert, # AI'06 Proceedings of the 19th Australian joint conference on Artificial # Intelligence: advances in Artificial Intelligence # Pages 181-191. # Note that the above paper has an error, and we need to re-order our bins to # go from high to low score. # Normalize histogram so we get fraction in each bin. normed_hist_true = math_ops.truediv(hist_true_acc, math_ops.reduce_sum(hist_true_acc)) normed_hist_false = math_ops.truediv(hist_false_acc, math_ops.reduce_sum(hist_false_acc)) # These become delta x, delta y from the paper. delta_y_t = array_ops.reverse_v2(normed_hist_true, [0], name='delta_y_t') delta_x_t = array_ops.reverse_v2(normed_hist_false, [0], name='delta_x_t') # strict_1d_cumsum requires float32 args. delta_y_t = math_ops.cast(delta_y_t, dtypes.float32) delta_x_t = math_ops.cast(delta_x_t, dtypes.float32) # Trapezoidal integration, \int_0^1 0.5 * (y_t + y_{t-1}) dx_t y_t = _strict_1d_cumsum(delta_y_t, nbins) first_trap = delta_x_t[0] * y_t[0] / 2.0 other_traps = delta_x_t[1:] * (y_t[1:] + y_t[:nbins - 1]) / 2.0 return math_ops.add(first_trap, math_ops.reduce_sum(other_traps), name='auc') def _strict_1d_cumsum(tensor, len_tensor): """Cumsum of a 1D tensor with defined shape by padding and convolving.""" # Assumes tensor shape is fully defined. return math_ops.cumsum(tensor)[:len_tensor]