python source code of image-search-engine

"""
  @author Victor I. Afolabi
  A.I. Engineer & Software developer
  javafolabi@gmail.com
  
  Created on 25 December, 2017 @ 7:18 PM.
  
  Copyright © 2017. Victor. All rights reserved.
"""
import argparse
import glob
import os
import pickle

import cv2
from scipy.spatial.distance import euclidean


################################################################################################
# +———————————————————————————————————————————————————————————————————————————————————————————+
# | Step 1: Image Descriptor
# +———————————————————————————————————————————————————————————————————————————————————————————+
################################################################################################
class RGBHistogram:
    """
    Image descriptor using color histogram.

    :param bins: list
        Histogram size. 1-D list containing ideal values
        between 8 and 128; but you can go up till 0 - 256.

    Example:
        >>> histogram = RGBHistogram(bins=[32, 32, 32])
        >>> feature_vector = histogram.describe(image='folder/image.jpg')
        >>> print(feature_vector.shape)
    """

    def __init__(self, bins):
        self.bins = bins

    def describe(self, image):
        """
        Color description of a given image

        compute a 3D histogram in the RGB color space,
        then normalize the histogram so that images
        with the same content, but either scaled larger
        or smaller will have (roughly) the same histogram

        :param image:
            Image to be described.
        :return: flattened 3-D histogram
            Flattened descriptor [feature vector].
        """
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        hist = cv2.calcHist(images=[image], channels=[0, 1, 2], mask=None,
                            histSize=self.bins, ranges=[0, 256] * 3)
        hist = cv2.normalize(hist, dst=hist.shape)
        return hist.flatten()


################################################################################################
# +———————————————————————————————————————————————————————————————————————————————————————————+
# | Step 2: Indexing
# +———————————————————————————————————————————————————————————————————————————————————————————+
################################################################################################
# key - image file name, value - computed feature vector/descriptor
def feature_extraction(dataset):
    features = {}
    descriptor = RGBHistogram(bins=[8, 8, 8])

    for filename in glob.glob(os.path.join(dataset, '*.jpg|png$')):
        # e.g. places/eiffel_tower.jpg => eiffel_tower
        img_name = os.path.basename(filename).split('.')[0]

        image = cv2.imread(filename)
        feature = descriptor.describe(image)
        # key - image name, value - feature vector
        features[img_name] = feature
    return features


# Writing the index to disk
def save(obj, path):
    if not os.path.isfile(path):
        os.makedirs(os.path.dirname(path))
    with open(path, 'w') as f:
        pickle.dump(obj, f)


################################################################################################
# +———————————————————————————————————————————————————————————————————————————————————————————+
# | Step 3: Searching
# +———————————————————————————————————————————————————————————————————————————————————————————+
################################################################################################
class Searcher:
    def __init__(self, features):
        self.features = features

    def search(self, query):
        results = {}

        for name, feature in self.features.item():
            dist = euclidean(query, feature)
            results[name] = dist

        results = sorted([(d, n) for n, d in results.items()])
        return results

    # @staticmethod
    # def chi_squared(a, b, eps=1e-10):
    #     # compute the chi-squared distance
    #     dist = 0.5 * np.sum([pow(a - b, 2) / (a + b + eps)
    #                          for (a, b) in zip(a, b)])
    #     # return the chi-squared distance
    #     return dist


if __name__ == '__main__':
    ################################################################################################
    # +———————————————————————————————————————————————————————————————————————————————————————————+
    # | Command line argument
    # +———————————————————————————————————————————————————————————————————————————————————————————+
    ################################################################################################
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--dataset', default='../images/lord-of-the-rings/',
                        help='Path to dataset.')
    parser.add_argument('-i', '--features', default='../saved/features.pkl',
                        help='Path to the features file.')
    args = parser.parse_args()

    # Extracting features
    features = feature_extraction(args.dataset)
    # Saving extracted features
    save(features, args.features)