#!/usr/bin/python # # Copyright 2015 The Cluster-Insight Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """A cache of recent values of objects. SimpleCache stores a dictionary from object labels to the tuple (update_time, create_time, value) The lookup() method returns the value associated with the given label and its creation time if the value is defined and it was most recently updated less than _max_data_age_seconds ago. If no such value exists or it is too old, lookup() will fail. Note that lookup() success depends on the last update time and not on the data's creation time. The update() method stores a value associated with the given label in the cache. The value's most recent update time is passed as a parameter. If the given label existed before and the associated value without its 'timestamp' attribute did not change, then the data stored in the cache will not change. Only the most recent update time will be changed in the cache. Otherwise the value, the most recent update time and the creation time will be changed. In other words, the creation time is updated whenever the value is also changed in the cache. Calling update() always changes the update time, but it may not change the value or the creation time. Old data is removed from the cache as a side effect of calling the update() operation. Old data is removed when it was created more than DATA_CLEANUP_AGE_SECONDS seconds ago. There is no cleanup as the result of the lookup() to avoid slowing down cache hits. In this way ephemeral data does not stay in the cache indefinitely as long as new data is inserted into the cache. This class is thread-safe. Usage: cache = SimpleCache(MAX_DATA_AGE_SECONDS, DATA_CLEANUP_AGE_SECONDS) def get_value(label): value, timestamp_seconds = cache.lookup(label) if timestamp_seconds is not None: # handle cache hit else: # handle cache miss; usually fetch data from source value = fetch_data() timestamp_now = time.time() cache.update(label, value, timestamp_now) return value """ import collections import copy import threading import time import types # local import import utilities class SimpleCache(object): """A cache of named objects with specified freshness and cleanup times. Attributes: _lock: a lock protecting access to the data. _max_data_age_seconds: data older than this many seconds will not be returned. _data_cleanup_age_seconds: data older than this many seconds will be cleaned from the cache. _label_to_tuple: a lookup table from label to a named tuple (update_timestamp, value), where 'update_timestamp' is the time the data was last updated. 'value' is a deep copy of the data. _namedtuple: a named tuple containing a 'update_timestamp' and 'value' fields. """ def __init__(self, max_data_age_seconds, data_cleanup_age_seconds): assert (isinstance(max_data_age_seconds, int) or isinstance(max_data_age_seconds, long) or isinstance(max_data_age_seconds, float)) assert (isinstance(data_cleanup_age_seconds, int) or isinstance(data_cleanup_age_seconds, long) or isinstance(data_cleanup_age_seconds, float)) assert max_data_age_seconds >= 0 assert data_cleanup_age_seconds >= 0 assert data_cleanup_age_seconds >= max_data_age_seconds self._lock = threading.Lock() self._max_data_age_seconds = max_data_age_seconds self._data_cleanup_age_seconds = data_cleanup_age_seconds self._label_to_tuple = {} self._namedtuple = collections.namedtuple( 'Tuple', ['create_timestamp', 'update_timestamp', 'value']) def _cleanup(self, now): """Removes all data older than _data_cleanup_age_seconds from the cache. This routine prevents the accumulation of stale ephemeral data. Such data usually has a unique label. This method must be called when '_lock' is held. Args: now: current time in seconds since the Epoch. """ assert isinstance(now, float) threshold = now - self._data_cleanup_age_seconds # Scan the cache using a list of keys instead of iterating on the cache # directly because we are deleting elements from the cache while iterating. for key in self._label_to_tuple.keys(): if self._label_to_tuple[key].create_timestamp <= threshold: # delete current entry from the cache del self._label_to_tuple[key] def lookup(self, label, now=None): """Lookup the data with the given label in the cache. Args: label: the label of the data. must be a string. may be empty. now: current time in seconds. If 'now' is None, the cached entry is compared with the current wallclock time. Otherwise the cached entry is compared with the value of 'now'. Returns: When the given label has recent data in the cache ('update_timestamp' less than self._max_data_age_seconds seconds old), returns a tuple (deep copy of cached value, create_timestamp_of_cached_data). When the given label was not found in the cache or its data is too old, returns the tuple (None, None). """ assert isinstance(label, types.StringTypes) assert (now is None) or isinstance(now, float) self._lock.acquire() ts_seconds = time.time() if now is None else now if ((label in self._label_to_tuple) and (ts_seconds < (self._label_to_tuple[label].update_timestamp + self._max_data_age_seconds))): # a cache hit assert self._label_to_tuple[label].value is not None value, timestamp = (copy.deepcopy(self._label_to_tuple[label].value), self._label_to_tuple[label].create_timestamp) else: value, timestamp = (None, None) self._lock.release() return (value, timestamp) def update(self, label, value, update_timestamp=None): """Stores the given value and timestamp for the given label. Args: label: the value's label. It must be a string. It can be empty. value: the value stored in the cache. Must not be None. update_timestamp: the timestamp in seconds of the value. If 'update_timestamp' is None, then the update timestamp associated with 'value' is the current wallclock time. If 'update_timestamp' is not None, then this timestamp is stored with 'value'. If 'value' is the same as the current value associated with the label after removal of 'timestamp' attributes, then the cached value is not changed. The cache keeps a deep copy of 'value', so the caller may change 'value' afterwards. Returns: The values that was stored in the cache. If the value stored in the cache was not changed, then the returned value is the deep copy of the old cached value. Otherwise the returned value is 'value'. In any case, the caller may modify 'value' or the returned value after this method returns. """ assert isinstance(label, types.StringTypes) assert value is not None assert ((update_timestamp is None) or isinstance(update_timestamp, float)) self._lock.acquire() # Cleanup only when inserting new values into the cache in order to # avoid penalizing the cache hit operation. ts = time.time() if update_timestamp is None else update_timestamp self._cleanup(ts) if ((label in self._label_to_tuple) and (utilities.timeless_json_hash(value) == utilities.timeless_json_hash(self._label_to_tuple[label].value))): # cannot update just one field in a named tuple. create_ts = self._label_to_tuple[label].create_timestamp update_value = self._label_to_tuple[label].value ret_value = copy.deepcopy(update_value) else: create_ts = ts update_value = copy.deepcopy(value) ret_value = value # cannot update just one field in a named tuple. self._label_to_tuple[label] = self._namedtuple( update_timestamp=ts, create_timestamp=create_ts, value=update_value) self._lock.release() return ret_value def size(self): """Returns the number of entries in the cache. Note that you may lookup only recent entries in the cache (see the explanation of the lookup() function), even when the cache contains additional older entries. Returns: Number of entries in the cache. """ self._lock.acquire() n = len(self._label_to_tuple) self._lock.release() return n