Python sklearn.externals.six.string_types() Examples
The following are 22
code examples of sklearn.externals.six.string_types().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.externals.six
, or try the search function
.
Example #1
Source File: util.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _val_cols(cols): # if it's None, return immediately if cols is None: return cols # try to make cols a list if not is_iterable(cols): if isinstance(cols, six.string_types): return [cols] else: raise ValueError('cols must be an iterable sequence') # if it is an index or a np.ndarray, it will have a built-in # (potentially more efficient tolist() method) if hasattr(cols, 'tolist') and hasattr(cols.tolist, '__call__'): return cols.tolist() # make it a list implicitly, make no guarantees about elements return [c for c in cols]
Example #2
Source File: fixes.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def is_iterable(x): """Python 3.x adds the ``__iter__`` attribute to strings. Thus, our previous tests for iterable will fail when using ``hasattr``. Parameters ---------- x : object The object or primitive to test whether or not is an iterable. Returns ------- bool True if ``x`` is an iterable """ if isinstance(x, six.string_types): return False return hasattr(x, '__iter__')
Example #3
Source File: base.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def validate_x(x): """Given an iterable or None, ``x``, validate that if it is an iterable, it only contains string types. Parameters ---------- x : None or iterable, shape=(n_features,) The feature names Returns ------- x : None or iterable, shape=(n_features,) The feature names """ if x is not None: # validate feature_names if not (is_iterable(x) and all([isinstance(i, six.string_types) for i in x])): raise TypeError('x must be an iterable of strings. ' 'Got %s' % str(x)) return x
Example #4
Source File: impute.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _val_values(vals): """Validate that all values in the iterable are either numeric, or in ('mode', 'median', 'mean'). If not, will raise a TypeError Raises ------ ``TypeError`` if not all values are numeric or in valid values. """ if not all([ (is_numeric(i) or (isinstance(i, six.string_types)) and i in ('mode', 'mean', 'median')) for i in vals ]): raise TypeError('All values in self.fill must be numeric or in ("mode", "mean", "median"). ' 'Got: %s' % ', '.join(vals))
Example #5
Source File: validation.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 6 votes |
def is_iterable(x): """Determine whether an item is iterable. Python 3 introduced the ``__iter__`` functionality to strings, making them falsely behave like iterables. This function determines whether an object is an iterable given the presence of the ``__iter__`` method and that the object is *not* a string. Parameters ---------- x : int, object, str, iterable, None The object in question. Could feasibly be any type. """ if isinstance(x, six.string_types): return False return hasattr(x, "__iter__")
Example #6
Source File: decision_forest.py From daal4py with Apache License 2.0 | 6 votes |
def _to_absolute_max_features(max_features, n_features, is_classification=False): if max_features is None: return n_features elif isinstance(max_features, string_types): if max_features == "auto": return max(1, int(np.sqrt(n_features))) if is_classification else n_features elif max_features == 'sqrt': return max(1, int(np.sqrt(n_features))) elif max_features == "log2": return max(1, int(np.log2(n_features))) else: raise ValueError( 'Invalid value for max_features. Allowed string ' 'values are "auto", "sqrt" or "log2".') elif isinstance(max_features, (numbers.Integral, np.integer)): return max_features else: # float if max_features > 0.0: return max(1, int(max_features * n_features)) else: return 0
Example #7
Source File: STFIWF.py From 2016CCF_BDCI_Sougou with MIT License | 5 votes |
def _check_stop_list(stop): if stop == "english": return ENGLISH_STOP_WORDS elif isinstance(stop, six.string_types): raise ValueError("not a built-in stop list: %s" % stop) elif stop is None: return None else: # assume it's a collection return frozenset(stop)
Example #8
Source File: rdd.py From sparkit-learn with Apache License 2.0 | 5 votes |
def get(self, key): if isinstance(key, tuple): raise IndexError("Too many indices for DictRDD") elif isinstance(key, slice): if key == slice(None, None, None): return self rdd = self._rdd.map(lambda x: x[key]) return DictRDD(rdd, bsize=self.bsize, columns=self.dtype[key], dtype=self.dtype[key], noblock=True) elif hasattr(key, "__iter__") and not isinstance(key, six.string_types): if tuple(key) == self.columns: return self indices = [self.columns.index(k) for k in key] dtype = [self.dtype[i] for i in indices] rdd = self._rdd.map(lambda x: tuple(x[i] for i in indices)) return DictRDD(rdd, bsize=self.bsize, columns=key, dtype=dtype, noblock=True) else: index = self.columns.index(key) dtype = self.dtype[index] bsize = self.bsize rdd = self._rdd.map(lambda x: x[index]) if dtype is np.ndarray: return ArrayRDD(rdd, bsize=bsize, noblock=True) elif dtype is sp.spmatrix: return SparseRDD(rdd, bsize=bsize, noblock=True) else: return BlockRDD(rdd, bsize=bsize, dtype=dtype, noblock=True)
Example #9
Source File: spectral_embedding_.py From intro_ds with Apache License 2.0 | 5 votes |
def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. If affinity is "precomputed" X : array-like, shape (n_samples, n_samples), Interpret X as precomputed adjacency graph computed from samples. Returns ------- self : object Returns the instance itself. """ X = check_array(X, ensure_min_samples=2, estimator=self) random_state = check_random_state(self.random_state) if isinstance(self.affinity, six.string_types): if self.affinity not in set(("nearest_neighbors", "rbf", "precomputed")): raise ValueError(("%s is not a valid affinity. Expected " "'precomputed', 'rbf', 'nearest_neighbors' " "or a callable.") % self.affinity) elif not callable(self.affinity): raise ValueError(("'affinity' is expected to be an affinity " "name or a callable. Got: %s") % self.affinity) affinity_matrix = self._get_affinity_matrix(X) self.embedding_ = spectral_embedding(affinity_matrix, n_components=self.n_components, eigen_solver=self.eigen_solver, random_state=random_state) return self
Example #10
Source File: autoencoder.py From smrt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _validate_activation_optimization(activation_function, learning_function): """Given the keys for the activation function and the learning function get the appropriate TF callable. The reason we store and pass around strings is so the models can be more easily pickled (and don't attempt to pickle a non-instance method) Parameters ---------- activation_function : str The key for the activation function learning_function : str The key for the learning function. Returns ------- activation : callable The activation function learning : callable The learning function. """ if isinstance(activation_function, six.string_types): activation = PERMITTED_ACTIVATIONS.get(activation_function, None) if activation is None: raise ValueError('Permitted activation functions: %r' % list(PERMITTED_ACTIVATIONS.keys())) else: raise TypeError('Activation function must be a string') # validation optimization function: if isinstance(learning_function, six.string_types): learning = PERMITTED_OPTIMIZERS.get(learning_function, None) if learning is None: raise ValueError('Permitted learning functions: %r' % list(PERMITTED_OPTIMIZERS.keys())) else: raise TypeError('Learning function must be a string') return activation, learning
Example #11
Source File: STFIWF.py From 2016_CCFsougou2 with MIT License | 5 votes |
def _check_stop_list(stop): if stop == "english": return ENGLISH_STOP_WORDS elif isinstance(stop, six.string_types): raise ValueError("not a built-in stop list: %s" % stop) elif stop is None: return None else: # assume it's a collection return frozenset(stop)
Example #12
Source File: _k_means_0_21.py From daal4py with Apache License 2.0 | 5 votes |
def _daal4py_compute_starting_centroids(X, X_fptype, nClusters, cluster_centers_0, random_state): def is_string(s, target_str): return isinstance(s, string_types) and s == target_str deterministic = False if is_string(cluster_centers_0, 'k-means++'): _seed = random_state.randint(np.iinfo('i').max) daal_engine = daal4py.engines_mt19937(fptype=X_fptype, method='defaultDense', seed=_seed) _n_local_trials = 2 + int(np.log(nClusters)) kmeans_init = daal4py.kmeans_init(nClusters, fptype=X_fptype, nTrials=_n_local_trials, method='plusPlusDense', engine=daal_engine) kmeans_init_res = kmeans_init.compute(X) centroids_ = kmeans_init_res.centroids elif is_string(cluster_centers_0, 'random'): _seed = random_state.randint(np.iinfo('i').max) daal_engine = daal4py.engines_mt19937(seed=_seed, fptype=X_fptype, method='defaultDense') kmeans_init = daal4py.kmeans_init(nClusters, fptype=X_fptype, method='randomDense', engine=daal_engine) kmeans_init_res = kmeans_init.compute(X) centroids_ = kmeans_init_res.centroids elif hasattr(cluster_centers_0, '__array__'): deterministic = True cc_arr = np.ascontiguousarray(cluster_centers_0, dtype=X.dtype) _validate_center_shape(X, nClusters, cc_arr) centroids_ = cc_arr elif callable(cluster_centers_0): cc_arr = cluster_centers_0(X, nClusters, random_state) cc_arr = np.ascontiguousarray(cc_arr, dtype=X.dtype) _validate_center_shape(X, nClusters, cc_arr) centroids_ = cc_arr elif is_string(cluster_centers_0, 'deterministic'): deterministic = True kmeans_init = daal4py.kmeans_init(nClusters, fptype=X_fptype, method='defaultDense') kmeans_init_res = kmeans_init.compute(X) centroids_ = kmeans_init_res.centroids else: raise ValueError("Cluster centers should either be 'k-means++', 'random', 'deterministic' or an array") return deterministic, centroids_
Example #13
Source File: transform.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_memory(memory): if isinstance(memory, string_types): return Memory(memory, verbose=0) return memory
Example #14
Source File: features.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __getitem__(self, key): if (isinstance(key, string_types) or (isinstance(key, (tuple, list)) and any(isinstance(x, string_types) for x in key))): msg = "Features indexing only subsets rows, but got {!r}" raise TypeError(msg.format(key)) if np.isscalar(key): return self.features[key] else: return type(self)(self.features[key], copy=False, stack=False, **{k: v[key] for k, v in iteritems(self.meta)})
Example #15
Source File: STFIWF.py From 2016CCF-sougou with Apache License 2.0 | 5 votes |
def _check_stop_list(stop): if stop == "english": return ENGLISH_STOP_WORDS elif isinstance(stop, six.string_types): raise ValueError("not a built-in stop list: %s" % stop) elif stop is None: return None else: # assume it's a collection return frozenset(stop)
Example #16
Source File: balance.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _validate_target(y): if (not y) or (not isinstance(y, six.string_types)): raise ValueError('y must be a column name') return str(y) # force string
Example #17
Source File: split.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _val_y(y): if isinstance(y, six.string_types): return str(y) elif y is None: return y raise TypeError('y must be a string. Got %s' % y)
Example #18
Source File: grid_search.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _val_exp_loss_prem(x, y, z): """Takes three strings (or unicode) and cleans them for indexing an H2OFrame. Parameters ---------- x : str exp name y : str loss name z : str premium name Returns ------- out : tuple exp : str The name of the exp feature (``x``) loss : str The name of the loss feature (``y``) prem : str or None The name of the prem feature (``z``) """ if not all([isinstance(i, six.string_types) for i in (x, y)]): raise TypeError('exposure and loss must be strings or unicode') if z is not None: if not isinstance(z, six.string_types): raise TypeError('premium must be None or string or unicode') out = (str(x), str(y), str(z) if z is not None else z) return out
Example #19
Source File: grid_search.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _kv_str(k, v): k = str(k) # h2o likes unicode... # likewise, if the v is unicode, let's make it a string. v = v if not isinstance(v, six.string_types) else str(v) return k, v
Example #20
Source File: knn.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 4 votes |
def transform(self, X): r''' Computes the divergences from X to :attr:`features_`. Parameters ---------- X : list of bag feature arrays or :class:`skl_groups.features.Features` The bags to search "from". Returns ------- divs : array of shape ``[len(div_funcs), len(Ks), len(X), len(features_)] + ([2] if do_sym else [])`` The divergences from X to :attr:`features_`. ``divs[d, k, i, j]`` is the ``div_funcs[d]`` divergence from ``X[i]`` to ``fetaures_[j]`` using a K of ``Ks[k]``. If ``do_sym``, ``divs[d, k, i, j, 0]`` is :math:`D_{d,k}( X_i \| \texttt{features_}_j)` and ``divs[d, k, i, j, 1]`` is :math:`D_{d,k}(\texttt{features_}_j \| X_i)`. ''' X = as_features(X, stack=True, bare=True) Y = self.features_ Ks = np.asarray(self.Ks) if X.dim != Y.dim: msg = "incompatible dimensions: fit with {}, transform with {}" raise ValueError(msg.format(Y.dim, X.dim)) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) # ignore Y_indices to avoid slow pickling of them # NOTE: if the indices are approximate, then might not get the same # results! est = memory.cache(_est_divs, ignore=['n_jobs', 'Y_indices', 'Y_rhos']) output, self.rhos_ = est( X, Y, self.indices_, getattr(self, 'rhos_', None), self.div_funcs, Ks, self.do_sym, self.clamp, self.version, self.min_dist, self._flann_args(), self._n_jobs) return output
Example #21
Source File: knn.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 4 votes |
def fit(self, X, y=None, get_rhos=False): ''' Sets up for divergence estimation "from" new data "to" X. Builds FLANN indices for each bag, and maybe gets within-bag distances. Parameters ---------- X : list of arrays or :class:`skl_groups.features.Features` The bags to search "to". get_rhos : boolean, optional, default False Compute within-bag distances :attr:`rhos_`. These are only needed for some divergence functions or if do_sym is passed, and they'll be computed (and saved) during :meth:`transform` if they're not computed here. If you're using Jensen-Shannon divergence, a higher max_K may be needed once it sees the number of points in the transformed bags, so the computation here might be wasted. ''' self.features_ = X = as_features(X, stack=True, bare=True) # if we're using a function that needs to pick its K vals itself, # then we need to set max_K here. when we transform(), might have to # re-do this :| Ks = self._get_Ks() _, _, _, max_K, save_all_Ks, _ = _choose_funcs( self.div_funcs, Ks, X.dim, X.n_pts, None, self.version) if max_K >= X.n_pts.min(): msg = "asked for K = {}, but there's a bag with only {} points" raise ValueError(msg.format(max_K, X.n_pts.min())) memory = self.memory if isinstance(memory, string_types): memory = Memory(cachedir=memory, verbose=0) self.indices_ = id = memory.cache(_build_indices)(X, self._flann_args()) if get_rhos: self.rhos_ = _get_rhos(X, id, Ks, max_K, save_all_Ks, self.min_dist) elif hasattr(self, 'rhos_'): del self.rhos_ return self
Example #22
Source File: dict_vectorizer.py From sparkit-learn with Apache License 2.0 | 4 votes |
def fit(self, Z): """Learn a list of feature name -> indices mappings. Parameters ---------- Z : DictRDD with column 'X' Dict(s) or Mapping(s) from feature names (arbitrary Python objects) to feature values (strings or convertible to dtype). Returns ------- self """ X = Z[:, 'X'] if isinstance(Z, DictRDD) else Z """Create vocabulary """ class SetAccum(AccumulatorParam): def zero(self, initialValue): return set(initialValue) def addInPlace(self, v1, v2): v1 |= v2 return v1 accum = X.context.accumulator(set(), SetAccum()) def mapper(X, separator=self.separator): feature_names = [] for x in X: for f, v in six.iteritems(x): if isinstance(v, six.string_types): f = "%s%s%s" % (f, self.separator, v) feature_names.append(f) accum.add(set(feature_names)) X.foreach(mapper) # init vocabulary feature_names = list(accum.value) if self.sort: feature_names.sort() vocab = dict((f, i) for i, f in enumerate(feature_names)) self.feature_names_ = feature_names self.vocabulary_ = vocab return self