Python sklearn.externals.joblib.Memory() Examples
The following are 22
code examples of sklearn.externals.joblib.Memory().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.externals.joblib
, or try the search function
.
Example #1
Source File: asirra.py From sklearn-theano with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fetch_asirra(image_count=1000): """ Parameters ---------- image_count : positive integer Returns ------- data : Bunch Dictionary-like object with the following attributes : 'images', the sample images, 'data', the flattened images, 'target', the label for the image (0 for cat, 1 for dog), and 'DESCR' the full description of the dataset. """ partial_path = check_fetch_asirra() m = Memory(cachedir=partial_path, compress=6, verbose=0) load_func = m.cache(_fetch_asirra) images, target = load_func(partial_path, image_count=image_count) return Bunch(data=images.reshape(len(images), -1), images=images, target=target, DESCR="Asirra cats and dogs dataset")
Example #2
Source File: feature_extraction.py From mne-features with BSD 3-Clause "New" or "Revised" License | 6 votes |
def transform(self, X): """Extract features from the array X. Parameters ---------- X : ndarray, shape (n_epochs, n_channels, n_times) Returns ------- Xnew : ndarray, shape (n_epochs, n_features) Extracted features. """ mem = joblib.Memory(location=self.memory) _extractor = mem.cache(extract_features) return _extractor(X, self.sfreq, self.selected_funcs, funcs_params=self.params, n_jobs=self.n_jobs)
Example #3
Source File: base.py From modl with BSD 2-Clause "Simplified" License | 6 votes |
def __init__(self, mask=None, smoothing_fwhm=None, standardize=True, detrend=True, low_pass=None, high_pass=None, t_r=None, target_affine=None, target_shape=None, mask_strategy='epi', mask_args=None, memory=Memory(cachedir=None), memory_level=2, n_jobs=1, verbose=0, ): self.mask = mask self.smoothing_fwhm = smoothing_fwhm self.standardize = standardize self.detrend = detrend self.low_pass = low_pass self.high_pass = high_pass self.t_r = t_r self.target_affine = target_affine self.target_shape = target_shape self.mask_strategy = mask_strategy self.mask_args = mask_args self.memory = memory self.memory_level = memory_level self.n_jobs = n_jobs self.verbose = verbose
Example #4
Source File: unmask.py From modl with BSD 2-Clause "Simplified" License | 6 votes |
def __init__(self, mask_img=None, smoothing_fwhm=None, standardize=False, detrend=False, low_pass=None, high_pass=None, t_r=None, target_affine=None, target_shape=None, mask_strategy='background', mask_args=None, memory=Memory(cachedir=None), memory_level=0, n_jobs=1, verbose=0 ): # Mask is provided or computed MultiNiftiMasker.__init__(self, mask_img=mask_img, n_jobs=n_jobs, smoothing_fwhm=smoothing_fwhm, standardize=standardize, detrend=detrend, low_pass=low_pass, high_pass=high_pass, t_r=t_r, target_affine=target_affine, target_shape=target_shape, mask_strategy=mask_strategy, mask_args=mask_args, memory=memory, memory_level=memory_level, verbose=verbose)
Example #5
Source File: ClusteringForwarder_old.py From PReMVOS with MIT License | 6 votes |
def cluster_all_options(self,original_track_ys): total_clustering_start = time.time() num_outputs_already_saved = len(os.listdir(self.output_folder + "tree_cache/")) savedMemory = Memory(self.output_folder + "tree_cache/" + str(num_outputs_already_saved).zfill(4) + "/") num_outputs_already_saved = len(os.listdir(self.output_folder + "tests/")) self.test_output_file = self.output_folder + "tests/" + str(num_outputs_already_saved).zfill(3) + "/" for n_components in [8, 12, 16, 24, 32, 48, 64, 96, 128]: start_this_dimensions = time.time() pca = PCA(n_components=n_components) track_ys = pca.fit_transform(original_track_ys) for min_samples in range(6, 51, 2): start_this_run = time.time() for min_cluster_size in range(6, 51, 2): cluster_ids, _,_ = self.cluster_and_classify(track_ys, None, (n_components, min_cluster_size, min_samples), savedMemory) cluster_class_list, cluster_class_counts = self.create_cluster_class_lists(cluster_ids, track_classes) self.write_summary(cluster_ids, cluster_class_list, cluster_class_counts, (n_components, min_cluster_size, min_samples)) print("this run elapsed =", time.time() - start_this_run, file=log.v5) print("this dimensionality elapsed =", time.time() - start_this_dimensions, file=log.v5) print("total clustering elapsed =", time.time() - total_clustering_start, file=log.v5) return
Example #6
Source File: fmri.py From modl with BSD 2-Clause "Simplified" License | 5 votes |
def __init__(self, dictionary, alpha=0.1, transform_batch_size=None, mask=None, smoothing_fwhm=None, standardize=False, detrend=False, low_pass=None, high_pass=None, t_r=None, target_affine=None, target_shape=None, mask_strategy='background', mask_args=None, memory=Memory(cachedir=None), memory_level=2, n_jobs=1, verbose=0, ): self.dictionary = dictionary fMRICoderMixin.__init__(self, n_components=None, alpha=alpha, dict_init=self.dictionary, mask=mask, smoothing_fwhm=smoothing_fwhm, standardize=standardize, detrend=detrend, low_pass=low_pass, high_pass=high_pass, transform_batch_size=transform_batch_size, t_r=t_r, target_affine=target_affine, target_shape=target_shape, mask_strategy=mask_strategy, mask_args=mask_args, memory=memory, memory_level=memory_level, n_jobs=n_jobs, verbose=verbose)
Example #7
Source File: test_pipeline.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pipeline_wrong_memory(): # Test that an error is raised when memory is not a string or a Memory # instance iris = load_iris() X = iris.data y = iris.target # Define memory as an integer memory = 1 cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())], memory=memory) assert_raises_regex(ValueError, "'memory' should be None, a string or" " have the same interface as " "sklearn.externals.joblib.Memory." " Got memory='1' instead.", cached_pipe.fit, X, y)
Example #8
Source File: test_pipeline.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pipeline_with_cache_attribute(): X = np.array([[1, 2]]) pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=DummyMemory()) pipe.fit(X, y=None) dummy = WrongDummyMemory() pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=dummy) assert_raises_regex(ValueError, "'memory' should be None, a string or" " have the same interface as " "sklearn.externals.joblib.Memory." " Got memory='{}' instead.".format(dummy), pipe.fit, X)
Example #9
Source File: transform.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, min_eig=0, copy=True, memory=Memory(cachedir=None, verbose=0)): self.min_eig = min_eig self.copy = copy self.memory = memory
Example #10
Source File: transform.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, min_eig=0, copy=True, negatives_likely=True, memory=Memory(cachedir=None, verbose=0)): self.min_eig = min_eig self.copy = copy self.negatives_likely = negatives_likely self.memory = memory
Example #11
Source File: transform.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_memory(memory): if isinstance(memory, string_types): return Memory(memory, verbose=0) return memory
Example #12
Source File: second_level_model.py From nistats with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, mask_img=None, smoothing_fwhm=None, memory=Memory(None), memory_level=1, verbose=0, n_jobs=1, minimize_memory=True): self.mask_img = mask_img self.smoothing_fwhm = smoothing_fwhm if isinstance(memory, _basestring): self.memory = Memory(memory) else: self.memory = memory self.memory_level = memory_level self.verbose = verbose self.n_jobs = n_jobs self.minimize_memory = minimize_memory self.second_level_input_ = None self.confounds_ = None
Example #13
Source File: knn.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, div_funcs=('kl',), Ks=(3,), do_sym=False, n_jobs=1, clamp=True, min_dist=1e-3, flann_algorithm='auto', flann_args=None, version='best', memory=Memory(cachedir=None, verbose=0)): self.div_funcs = div_funcs self.Ks = Ks self.do_sym = do_sym self.n_jobs = n_jobs self.clamp = clamp self.min_dist = min_dist self.flann_algorithm = flann_algorithm self.flann_args = flann_args self.version = version self.memory = memory
Example #14
Source File: fmri.py From modl with BSD 2-Clause "Simplified" License | 5 votes |
def __init__(self, n_components=20, alpha=0.1, dict_init=None, transform_batch_size=None, mask=None, smoothing_fwhm=None, standardize=True, detrend=True, low_pass=None, high_pass=None, t_r=None, target_affine=None, target_shape=None, mask_strategy='background', mask_args=None, memory=Memory(cachedir=None), memory_level=2, n_jobs=1, verbose=0, ): BaseNilearnEstimator.__init__(self, mask=mask, smoothing_fwhm=smoothing_fwhm, standardize=standardize, detrend=detrend, low_pass=low_pass, high_pass=high_pass, t_r=t_r, target_affine=target_affine, target_shape=target_shape, mask_strategy=mask_strategy, mask_args=mask_args, memory=memory, memory_level=memory_level, n_jobs=n_jobs, verbose=verbose) self.n_components = n_components self.transform_batch_size = transform_batch_size self.dict_init = dict_init self.alpha = alpha
Example #15
Source File: test_fmri.py From modl with BSD 2-Clause "Simplified" License | 5 votes |
def test_dict_fact(method, memory): if memory: memory = Memory(cachedir=get_cache_dirs()[0]) memory_level = 2 else: if method != 'masked': pytest.skip() memory = Memory(cachedir=None) memory_level = 0 data, mask_img, components, init = _make_test_data(n_subjects=10) dict_fact = fMRIDictFact(n_components=4, random_state=0, memory=memory, memory_level=memory_level, mask=mask_img, dict_init=init, method=method, reduction=2, smoothing_fwhm=None, n_epochs=2, alpha=1) dict_fact.fit(data) maps = np.rollaxis(dict_fact.components_img_.get_data(), 3, 0) components = np.rollaxis(components.get_data(), 3, 0) maps = maps.reshape((maps.shape[0], -1)) components = components.reshape((components.shape[0], -1)) S = np.sqrt(np.sum(components ** 2, axis=1)) S[S == 0] = 1 components /= S[:, np.newaxis] S = np.sqrt(np.sum(maps ** 2, axis=1)) S[S == 0] = 1 maps /= S[:, np.newaxis] G = np.abs(components.dot(maps.T)) recovered_maps = np.sum(G > 0.95) assert (recovered_maps >= 4)
Example #16
Source File: image.py From modl with BSD 2-Clause "Simplified" License | 5 votes |
def load_image(source, scale=1, gray=False, memory=Memory(cachedir=None)): data_dir = get_data_dirs()[0] if source == 'face': image = face(gray=gray) image = image.astype(np.float32) / 255 if image.ndim == 2: image = image[..., np.newaxis] if scale != 1: image = memory.cache(rescale)(image, scale=scale) return image elif source == 'lisboa': image = imread(join(data_dir, 'images', 'lisboa.jpg'), as_grey=gray) image = image.astype(np.float32) / 255 if image.ndim == 2: image = image[..., np.newaxis] if scale != 1: image = memory.cache(rescale)(image, scale=scale) return image elif source == 'aviris': from spectral import open_image image = open_image( join(data_dir, 'aviris', 'f100826t01p00r05rdn_b/' 'f100826t01p00r05rdn_b_sc01_ort_img.hdr')) image = np.array(image.open_memmap(), dtype=np.float32) good_bands = list(range(image.shape[2])) good_bands.remove(110) image = image[:, :, good_bands] indices = image == -50 image[indices] = -1 image[~indices] -= np.min(image[~indices]) image[~indices] /= np.max(image[~indices]) return image else: raise ValueError('Data source is not known')
Example #17
Source File: test_pipeline.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_make_pipeline_memory(): cachedir = mkdtemp() memory = Memory(cachedir=cachedir) pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory) assert_true(pipeline.memory is memory) pipeline = make_pipeline(DummyTransf(), SVC()) assert_true(pipeline.memory is None) shutil.rmtree(cachedir)
Example #18
Source File: test_pipeline.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_pipeline_memory(): iris = load_iris() X = iris.data y = iris.target cachedir = mkdtemp() try: memory = Memory(cachedir=cachedir, verbose=10) # Test with Transformer + SVC clf = SVC(probability=True, random_state=0) transf = DummyTransf() pipe = Pipeline([('transf', clone(transf)), ('svc', clf)]) cached_pipe = Pipeline([('transf', transf), ('svc', clf)], memory=memory) # Memoize the transformer at the first fit cached_pipe.fit(X, y) pipe.fit(X, y) # Get the time stamp of the transformer in the cached pipeline ts = cached_pipe.named_steps['transf'].timestamp_ # Check that cached_pipe and pipe yield identical results assert_array_equal(pipe.predict(X), cached_pipe.predict(X)) assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X)) assert_array_equal(pipe.predict_log_proba(X), cached_pipe.predict_log_proba(X)) assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y)) assert_array_equal(pipe.named_steps['transf'].means_, cached_pipe.named_steps['transf'].means_) assert_false(hasattr(transf, 'means_')) # Check that we are reading the cache while fitting # a second time cached_pipe.fit(X, y) # Check that cached_pipe and pipe yield identical results assert_array_equal(pipe.predict(X), cached_pipe.predict(X)) assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X)) assert_array_equal(pipe.predict_log_proba(X), cached_pipe.predict_log_proba(X)) assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y)) assert_array_equal(pipe.named_steps['transf'].means_, cached_pipe.named_steps['transf'].means_) assert_equal(ts, cached_pipe.named_steps['transf'].timestamp_) # Create a new pipeline with cloned estimators # Check that even changing the name step does not affect the cache hit clf_2 = SVC(probability=True, random_state=0) transf_2 = DummyTransf() cached_pipe_2 = Pipeline([('transf_2', transf_2), ('svc', clf_2)], memory=memory) cached_pipe_2.fit(X, y) # Check that cached_pipe and pipe yield identical results assert_array_equal(pipe.predict(X), cached_pipe_2.predict(X)) assert_array_equal(pipe.predict_proba(X), cached_pipe_2.predict_proba(X)) assert_array_equal(pipe.predict_log_proba(X), cached_pipe_2.predict_log_proba(X)) assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y)) assert_array_equal(pipe.named_steps['transf'].means_, cached_pipe_2.named_steps['transf_2'].means_) assert_equal(ts, cached_pipe_2.named_steps['transf_2'].timestamp_) finally: shutil.rmtree(cachedir)
Example #19
Source File: knn.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 4 votes |
def fit(self, X, y=None, get_rhos=False): ''' Sets up for divergence estimation "from" new data "to" X. Builds FLANN indices for each bag, and maybe gets within-bag distances. Parameters ---------- X : list of arrays or :class:`skl_groups.features.Features` The bags to search "to". get_rhos : boolean, optional, default False Compute within-bag distances :attr:`rhos_`. These are only needed for some divergence functions or if do_sym is passed, and they'll be computed (and saved) during :meth:`transform` if they're not computed here. If you're using Jensen-Shannon divergence, a higher max_K may be needed once it sees the number of points in the transformed bags, so the computation here might be wasted. ''' self.features_ = X = as_features(X, stack=True, bare=True) # if we're using a function that needs to pick its K vals itself, # then we need to set max_K here. when we transform(), might have to # re-do this :| Ks = self._get_Ks() _, _, _, max_K, save_all_Ks, _ = _choose_funcs( self.div_funcs, Ks, X.dim, X.n_pts, None, self.version) if max_K >= X.n_pts.min(): msg = "asked for K = {}, but there's a bag with only {} points" raise ValueError(msg.format(max_K, X.n_pts.min())) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) self.indices_ = id = memory.cache(_build_indices)(X, self._flann_args()) if get_rhos: self.rhos_ = _get_rhos(X, id, Ks, max_K, save_all_Ks, self.min_dist) elif hasattr(self, 'rhos_'): del self.rhos_ return self
Example #20
Source File: ClusteringForwarder_old.py From PReMVOS with MIT License | 4 votes |
def cluster_and_classify(self,track_ys, track_classes, xxx_todo_changeme,savedMemory=Memory(cachedir=None, verbose=0), mode=0): (n_components, min_cluster_size, min_samples) = xxx_todo_changeme from hdbscan import HDBSCAN, all_points_membership_vectors, approximate_predict start = time.time() clusterer = None if track_classes is not None: _, class_ids = numpy.unique(track_classes, return_inverse=True) # ## Reorder classes to be in size order # orig_ids, sizes = numpy.unique(class_ids, return_counts=True) # new_ids = numpy.argsort(sizes)[::-1] # mapping = dict(zip(new_ids, orig_ids)) # new_class_ids = numpy.copy(class_ids) # for k, v in mapping.iteritems(): class_ids[new_class_ids == k] = v else: class_ids = None if mode==1: clusterer = HDBSCAN(min_cluster_size=min_cluster_size, min_samples=min_samples, core_dist_n_jobs=-2, algorithm='boruvka_kdtree',cluster_selection_method='eom', prediction_data=True, memory=savedMemory).fit(track_ys) soft_clusters = all_points_membership_vectors(clusterer) # cluster_ids = numpy.array([numpy.argmax(x) if numpy.max(x)>1.0/len(x) else -1 for x in soft_clusters]) cluster_ids = numpy.array([numpy.argmax(x) for x in soft_clusters]) elif mode==2: cluster_ids = class_ids else: clusterer = HDBSCAN(min_cluster_size=min_cluster_size, min_samples=min_samples, core_dist_n_jobs=-2, algorithm='boruvka_kdtree', cluster_selection_method='eom',prediction_data=True, memory=savedMemory) # clusterer = HDBSCAN(min_cluster_size=min_cluster_size, min_samples=min_samples, core_dist_n_jobs=-2, # algorithm='boruvka_kdtree', # cluster_selection_method='leaf', prediction_data=True, memory=savedMemory) cluster_ids = clusterer.fit_predict(track_ys) # ## Reorder clusters to be in size order # orig_ids, sizes = numpy.unique(cluster_ids, return_counts=True) # orig_ids = orig_ids[1:] # sizes = sizes[1:] # new_ids = numpy.argsort(sizes)[::-1] # mapping = dict(zip(new_ids, orig_ids)) # new_cluster_ids = numpy.copy(cluster_ids) # for k, v in mapping.iteritems(): cluster_ids[new_cluster_ids == k] = v duration = time.time() - start print(n_components, min_samples, min_cluster_size, duration, file=log.v5) return cluster_ids, class_ids,clusterer
Example #21
Source File: knn.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 4 votes |
def transform(self, X): r''' Computes the divergences from X to :attr:`features_`. Parameters ---------- X : list of bag feature arrays or :class:`skl_groups.features.Features` The bags to search "from". Returns ------- divs : array of shape ``[len(div_funcs), len(Ks), len(X), len(features_)] + ([2] if do_sym else [])`` The divergences from X to :attr:`features_`. ``divs[d, k, i, j]`` is the ``div_funcs[d]`` divergence from ``X[i]`` to ``fetaures_[j]`` using a K of ``Ks[k]``. If ``do_sym``, ``divs[d, k, i, j, 0]`` is :math:`D_{d,k}( X_i \| \texttt{features_}_j)` and ``divs[d, k, i, j, 1]`` is :math:`D_{d,k}(\texttt{features_}_j \| X_i)`. ''' X = as_features(X, stack=True, bare=True) Y = self.features_ Ks = np.asarray(self.Ks) if X.dim != Y.dim: msg = "incompatible dimensions: fit with {}, transform with {}" raise ValueError(msg.format(Y.dim, X.dim)) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) # ignore Y_indices to avoid slow pickling of them # NOTE: if the indices are approximate, then might not get the same # results! est = memory.cache(_est_divs, ignore=['n_jobs', 'Y_indices', 'Y_rhos']) output, self.rhos_ = est( X, Y, self.indices_, getattr(self, 'rhos_', None), self.div_funcs, Ks, self.do_sym, self.clamp, self.version, self.min_dist, self._flann_args(), self._n_jobs) return output
Example #22
Source File: first_level_model.py From nistats with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover', drift_model='cosine', high_pass=.01, drift_order=1, fir_delays=[0], min_onset=-24, mask_img=None, target_affine=None, target_shape=None, smoothing_fwhm=None, memory=Memory(None), memory_level=1, standardize=False, signal_scaling=0, noise_model='ar1', verbose=0, n_jobs=1, minimize_memory=True, subject_label=None): # design matrix parameters self.t_r = t_r self.slice_time_ref = slice_time_ref self.hrf_model = hrf_model self.drift_model = drift_model self.high_pass = high_pass self.drift_order = drift_order self.fir_delays = fir_delays self.min_onset = min_onset # glm parameters self.mask_img = mask_img self.target_affine = target_affine self.target_shape = target_shape self.smoothing_fwhm = smoothing_fwhm if isinstance(memory, _basestring): self.memory = Memory(memory) else: self.memory = memory self.memory_level = memory_level self.standardize = standardize if signal_scaling is False: self.signal_scaling = signal_scaling elif signal_scaling in [0, 1, (0, 1)]: self.scaling_axis = signal_scaling self.signal_scaling = True self.standardize = False else: raise ValueError('signal_scaling must be "False", "0", "1"' ' or "(0, 1)"') self.noise_model = noise_model self.verbose = verbose self.n_jobs = n_jobs self.minimize_memory = minimize_memory # attributes self.labels_ = None self.results_ = None self.subject_label = subject_label