Python sklearn.externals.six.string_types() Examples

The following are 22 code examples of sklearn.externals.six.string_types(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.externals.six , or try the search function .
Example #1
Source File: util.py    From skutil with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _val_cols(cols):
    # if it's None, return immediately
    if cols is None:
        return cols

    # try to make cols a list
    if not is_iterable(cols):
        if isinstance(cols, six.string_types):
            return [cols]
        else:
            raise ValueError('cols must be an iterable sequence')

    # if it is an index or a np.ndarray, it will have a built-in
    # (potentially more efficient tolist() method)
    if hasattr(cols, 'tolist') and hasattr(cols.tolist, '__call__'):
        return cols.tolist()

    # make it a list implicitly, make no guarantees about elements
    return [c for c in cols] 
Example #2
Source File: fixes.py    From skutil with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def is_iterable(x):
    """Python 3.x adds the ``__iter__`` attribute
    to strings. Thus, our previous tests for iterable
    will fail when using ``hasattr``.

    Parameters
    ----------

    x : object
        The object or primitive to test whether
        or not is an iterable.


    Returns
    -------

    bool
        True if ``x`` is an iterable
    """
    if isinstance(x, six.string_types):
        return False
    return hasattr(x, '__iter__') 
Example #3
Source File: base.py    From skutil with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def validate_x(x):
    """Given an iterable or None, ``x``, validate that if
    it is an iterable, it only contains string types.

    Parameters
    ----------

    x : None or iterable, shape=(n_features,)
        The feature names


    Returns
    -------

    x : None or iterable, shape=(n_features,)
        The feature names
    """
    if x is not None:
        # validate feature_names
        if not (is_iterable(x) and all([isinstance(i, six.string_types) for i in x])):
            raise TypeError('x must be an iterable of strings. '
                            'Got %s' % str(x))

    return x 
Example #4
Source File: impute.py    From skutil with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _val_values(vals):
    """Validate that all values in the iterable
    are either numeric, or in ('mode', 'median', 'mean').
    If not, will raise a TypeError

    Raises
    ------

    ``TypeError`` if not all values are numeric or
    in valid values.
    """
    if not all([
                   (is_numeric(i) or (isinstance(i, six.string_types)) and i in ('mode', 'mean', 'median'))
                   for i in vals
               ]):
        raise TypeError('All values in self.fill must be numeric or in ("mode", "mean", "median"). '
                        'Got: %s' % ', '.join(vals)) 
Example #5
Source File: validation.py    From Hands-on-Supervised-Machine-Learning-with-Python with MIT License 6 votes vote down vote up
def is_iterable(x):
    """Determine whether an item is iterable.

    Python 3 introduced the ``__iter__`` functionality to
    strings, making them falsely behave like iterables. This
    function determines whether an object is an iterable given
    the presence of the ``__iter__`` method and that the object
    is *not* a string.

    Parameters
    ----------
    x : int, object, str, iterable, None
        The object in question. Could feasibly be any type.
    """
    if isinstance(x, six.string_types):
        return False
    return hasattr(x, "__iter__") 
Example #6
Source File: decision_forest.py    From daal4py with Apache License 2.0 6 votes vote down vote up
def _to_absolute_max_features(max_features, n_features, is_classification=False):
    if max_features is None:
        return n_features
    elif isinstance(max_features, string_types):
        if max_features == "auto":
            return max(1, int(np.sqrt(n_features))) if is_classification else n_features
        elif max_features == 'sqrt':
            return max(1, int(np.sqrt(n_features)))
        elif max_features == "log2":
            return max(1, int(np.log2(n_features)))
        else:
            raise ValueError(
                'Invalid value for max_features. Allowed string '
                'values are "auto", "sqrt" or "log2".')
    elif isinstance(max_features, (numbers.Integral, np.integer)):
        return max_features
    else: # float
        if max_features > 0.0:
            return max(1, int(max_features * n_features))
        else:
            return 0 
Example #7
Source File: STFIWF.py    From 2016CCF_BDCI_Sougou with MIT License 5 votes vote down vote up
def _check_stop_list(stop):
    if stop == "english":
        return ENGLISH_STOP_WORDS
    elif isinstance(stop, six.string_types):
        raise ValueError("not a built-in stop list: %s" % stop)
    elif stop is None:
        return None
    else:  # assume it's a collection
        return frozenset(stop) 
Example #8
Source File: rdd.py    From sparkit-learn with Apache License 2.0 5 votes vote down vote up
def get(self, key):
        if isinstance(key, tuple):
            raise IndexError("Too many indices for DictRDD")
        elif isinstance(key, slice):
            if key == slice(None, None, None):
                return self
            rdd = self._rdd.map(lambda x: x[key])
            return DictRDD(rdd, bsize=self.bsize, columns=self.dtype[key],
                           dtype=self.dtype[key], noblock=True)
        elif hasattr(key, "__iter__") and not isinstance(key, six.string_types):
            if tuple(key) == self.columns:
                return self
            indices = [self.columns.index(k) for k in key]
            dtype = [self.dtype[i] for i in indices]
            rdd = self._rdd.map(lambda x: tuple(x[i] for i in indices))
            return DictRDD(rdd, bsize=self.bsize, columns=key, dtype=dtype,
                           noblock=True)
        else:
            index = self.columns.index(key)
            dtype = self.dtype[index]
            bsize = self.bsize
            rdd = self._rdd.map(lambda x: x[index])
            if dtype is np.ndarray:
                return ArrayRDD(rdd, bsize=bsize, noblock=True)
            elif dtype is sp.spmatrix:
                return SparseRDD(rdd, bsize=bsize, noblock=True)
            else:
                return BlockRDD(rdd, bsize=bsize, dtype=dtype, noblock=True) 
Example #9
Source File: spectral_embedding_.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples
            and n_features is the number of features.

            If affinity is "precomputed"
            X : array-like, shape (n_samples, n_samples),
            Interpret X as precomputed adjacency graph computed from
            samples.

        Returns
        -------
        self : object
            Returns the instance itself.
        """

        X = check_array(X, ensure_min_samples=2, estimator=self)

        random_state = check_random_state(self.random_state)
        if isinstance(self.affinity, six.string_types):
            if self.affinity not in set(("nearest_neighbors", "rbf",
                                         "precomputed")):
                raise ValueError(("%s is not a valid affinity. Expected "
                                  "'precomputed', 'rbf', 'nearest_neighbors' "
                                  "or a callable.") % self.affinity)
        elif not callable(self.affinity):
            raise ValueError(("'affinity' is expected to be an affinity "
                              "name or a callable. Got: %s") % self.affinity)

        affinity_matrix = self._get_affinity_matrix(X)
        self.embedding_ = spectral_embedding(affinity_matrix,
                                             n_components=self.n_components,
                                             eigen_solver=self.eigen_solver,
                                             random_state=random_state)
        return self 
Example #10
Source File: autoencoder.py    From smrt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _validate_activation_optimization(activation_function, learning_function):
    """Given the keys for the activation function and the learning function
    get the appropriate TF callable. The reason we store and pass around strings
    is so the models can be more easily pickled (and don't attempt to pickle a
    non-instance method)

    Parameters
    ----------
    activation_function : str
        The key for the activation function

    learning_function : str
        The key for the learning function.

    Returns
    -------
    activation : callable
        The activation function

    learning : callable
        The learning function.
    """
    if isinstance(activation_function, six.string_types):
        activation = PERMITTED_ACTIVATIONS.get(activation_function, None)
        if activation is None:
            raise ValueError('Permitted activation functions: %r' % list(PERMITTED_ACTIVATIONS.keys()))
    else:
        raise TypeError('Activation function must be a string')

    # validation optimization function:
    if isinstance(learning_function, six.string_types):
        learning = PERMITTED_OPTIMIZERS.get(learning_function, None)
        if learning is None:
            raise ValueError('Permitted learning functions: %r' % list(PERMITTED_OPTIMIZERS.keys()))
    else:
        raise TypeError('Learning function must be a string')

    return activation, learning 
Example #11
Source File: STFIWF.py    From 2016_CCFsougou2 with MIT License 5 votes vote down vote up
def _check_stop_list(stop):
    if stop == "english":
        return ENGLISH_STOP_WORDS
    elif isinstance(stop, six.string_types):
        raise ValueError("not a built-in stop list: %s" % stop)
    elif stop is None:
        return None
    else:  # assume it's a collection
        return frozenset(stop) 
Example #12
Source File: _k_means_0_21.py    From daal4py with Apache License 2.0 5 votes vote down vote up
def _daal4py_compute_starting_centroids(X, X_fptype, nClusters, cluster_centers_0, random_state):

    def is_string(s, target_str):
        return isinstance(s, string_types) and s == target_str

    deterministic = False
    if is_string(cluster_centers_0, 'k-means++'):
        _seed = random_state.randint(np.iinfo('i').max)
        daal_engine = daal4py.engines_mt19937(fptype=X_fptype, method='defaultDense', seed=_seed)
        _n_local_trials = 2 + int(np.log(nClusters))
        kmeans_init = daal4py.kmeans_init(nClusters, fptype=X_fptype,
                                          nTrials=_n_local_trials, method='plusPlusDense', engine=daal_engine)
        kmeans_init_res = kmeans_init.compute(X)
        centroids_ = kmeans_init_res.centroids
    elif is_string(cluster_centers_0, 'random'):
        _seed = random_state.randint(np.iinfo('i').max)
        daal_engine = daal4py.engines_mt19937(seed=_seed, fptype=X_fptype, method='defaultDense')
        kmeans_init = daal4py.kmeans_init(nClusters, fptype=X_fptype, method='randomDense', engine=daal_engine)
        kmeans_init_res = kmeans_init.compute(X)
        centroids_ = kmeans_init_res.centroids
    elif hasattr(cluster_centers_0, '__array__'):
        deterministic = True
        cc_arr = np.ascontiguousarray(cluster_centers_0, dtype=X.dtype)
        _validate_center_shape(X, nClusters, cc_arr)
        centroids_ = cc_arr
    elif callable(cluster_centers_0):
        cc_arr = cluster_centers_0(X, nClusters, random_state)
        cc_arr = np.ascontiguousarray(cc_arr, dtype=X.dtype)
        _validate_center_shape(X, nClusters, cc_arr)
        centroids_ = cc_arr
    elif is_string(cluster_centers_0, 'deterministic'):
        deterministic = True
        kmeans_init = daal4py.kmeans_init(nClusters, fptype=X_fptype, method='defaultDense')
        kmeans_init_res = kmeans_init.compute(X)
        centroids_ = kmeans_init_res.centroids
    else:
        raise ValueError("Cluster centers should either be 'k-means++', 'random', 'deterministic' or an array")
    return deterministic, centroids_ 
Example #13
Source File: transform.py    From skl-groups with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_memory(memory):
    if isinstance(memory, string_types):
        return Memory(memory, verbose=0)
    return memory 
Example #14
Source File: features.py    From skl-groups with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __getitem__(self, key):
        if (isinstance(key, string_types) or
                (isinstance(key, (tuple, list)) and
                 any(isinstance(x, string_types) for x in key))):
            msg = "Features indexing only subsets rows, but got {!r}"
            raise TypeError(msg.format(key))

        if np.isscalar(key):
            return self.features[key]
        else:
            return type(self)(self.features[key], copy=False, stack=False,
                              **{k: v[key] for k, v in iteritems(self.meta)}) 
Example #15
Source File: STFIWF.py    From 2016CCF-sougou with Apache License 2.0 5 votes vote down vote up
def _check_stop_list(stop):
    if stop == "english":
        return ENGLISH_STOP_WORDS
    elif isinstance(stop, six.string_types):
        raise ValueError("not a built-in stop list: %s" % stop)
    elif stop is None:
        return None
    else:  # assume it's a collection
        return frozenset(stop) 
Example #16
Source File: balance.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _validate_target(y):
    if (not y) or (not isinstance(y, six.string_types)):
        raise ValueError('y must be a column name')
    return str(y)  # force string 
Example #17
Source File: split.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _val_y(y):
    if isinstance(y, six.string_types):
        return str(y)
    elif y is None:
        return y
    raise TypeError('y must be a string. Got %s' % y) 
Example #18
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _val_exp_loss_prem(x, y, z):
    """Takes three strings (or unicode) and cleans them
    for indexing an H2OFrame.

    Parameters
    ----------

    x : str
        exp name

    y : str
        loss name

    z : str
        premium name

    Returns
    -------

    out : tuple
        exp : str
            The name of the exp feature (``x``) 

        loss : str
            The name of the loss feature (``y``)

        prem : str or None
            The name of the prem feature (``z``)
    """
    if not all([isinstance(i, six.string_types) for i in (x, y)]):
        raise TypeError('exposure and loss must be strings or unicode')

    if z is not None:
        if not isinstance(z, six.string_types):
            raise TypeError('premium must be None or string or unicode')

    out = (str(x), str(y), str(z) if z is not None else z)
    return out 
Example #19
Source File: grid_search.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _kv_str(k, v):
    k = str(k)  # h2o likes unicode...
    # likewise, if the v is unicode, let's make it a string.
    v = v if not isinstance(v, six.string_types) else str(v)
    return k, v 
Example #20
Source File: knn.py    From skl-groups with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def transform(self, X):
        r'''
        Computes the divergences from X to :attr:`features_`.

        Parameters
        ----------
        X : list of bag feature arrays or :class:`skl_groups.features.Features`
            The bags to search "from".

        Returns
        -------
        divs : array of shape ``[len(div_funcs), len(Ks), len(X), len(features_)] + ([2] if do_sym else [])``
            The divergences from X to :attr:`features_`.
            ``divs[d, k, i, j]`` is the ``div_funcs[d]`` divergence
            from ``X[i]`` to ``fetaures_[j]`` using a K of ``Ks[k]``.
            If ``do_sym``, ``divs[d, k, i, j, 0]`` is
            :math:`D_{d,k}( X_i \| \texttt{features_}_j)` and
            ``divs[d, k, i, j, 1]`` is :math:`D_{d,k}(\texttt{features_}_j \| X_i)`.
        '''
        X = as_features(X, stack=True, bare=True)
        Y = self.features_

        Ks = np.asarray(self.Ks)

        if X.dim != Y.dim:
            msg = "incompatible dimensions: fit with {}, transform with {}"
            raise ValueError(msg.format(Y.dim, X.dim))

        memory = self.memory
        if isinstance(memory, string_types):
            memory = Memory(cachedir=memory, verbose=0)

        # ignore Y_indices to avoid slow pickling of them
        # NOTE: if the indices are approximate, then might not get the same
        #       results!
        est = memory.cache(_est_divs, ignore=['n_jobs', 'Y_indices', 'Y_rhos'])
        output, self.rhos_ = est(
            X, Y, self.indices_, getattr(self, 'rhos_', None),
            self.div_funcs, Ks,
            self.do_sym, self.clamp, self.version, self.min_dist,
            self._flann_args(), self._n_jobs)
        return output 
Example #21
Source File: knn.py    From skl-groups with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def fit(self, X, y=None, get_rhos=False):
        '''
        Sets up for divergence estimation "from" new data "to" X.
        Builds FLANN indices for each bag, and maybe gets within-bag distances.

        Parameters
        ----------
        X : list of arrays or :class:`skl_groups.features.Features`
            The bags to search "to".

        get_rhos : boolean, optional, default False
            Compute within-bag distances :attr:`rhos_`. These are only needed
            for some divergence functions or if do_sym is passed, and they'll
            be computed (and saved) during :meth:`transform` if they're not
            computed here.

            If you're using Jensen-Shannon divergence, a higher max_K may
            be needed once it sees the number of points in the transformed bags,
            so the computation here might be wasted.
        '''
        self.features_ = X = as_features(X, stack=True, bare=True)

        # if we're using a function that needs to pick its K vals itself,
        # then we need to set max_K here. when we transform(), might have to
        # re-do this :|
        Ks = self._get_Ks()
        _, _, _, max_K, save_all_Ks, _ = _choose_funcs(
            self.div_funcs, Ks, X.dim, X.n_pts, None, self.version)

        if max_K >= X.n_pts.min():
            msg = "asked for K = {}, but there's a bag with only {} points"
            raise ValueError(msg.format(max_K, X.n_pts.min()))

        memory = self.memory
        if isinstance(memory, string_types):
            memory = Memory(cachedir=memory, verbose=0)

        self.indices_ = id = memory.cache(_build_indices)(X, self._flann_args())
        if get_rhos:
            self.rhos_ = _get_rhos(X, id, Ks, max_K, save_all_Ks, self.min_dist)
        elif hasattr(self, 'rhos_'):
            del self.rhos_
        return self 
Example #22
Source File: dict_vectorizer.py    From sparkit-learn with Apache License 2.0 4 votes vote down vote up
def fit(self, Z):
        """Learn a list of feature name -> indices mappings.

        Parameters
        ----------
        Z : DictRDD with column 'X'
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

        Returns
        -------
        self
        """
        X = Z[:, 'X'] if isinstance(Z, DictRDD) else Z

        """Create vocabulary
        """
        class SetAccum(AccumulatorParam):

            def zero(self, initialValue):
                return set(initialValue)

            def addInPlace(self, v1, v2):
                v1 |= v2
                return v1

        accum = X.context.accumulator(set(), SetAccum())

        def mapper(X, separator=self.separator):
            feature_names = []
            for x in X:
                for f, v in six.iteritems(x):
                    if isinstance(v, six.string_types):
                        f = "%s%s%s" % (f, self.separator, v)
                    feature_names.append(f)
            accum.add(set(feature_names))

        X.foreach(mapper)  # init vocabulary
        feature_names = list(accum.value)

        if self.sort:
            feature_names.sort()

        vocab = dict((f, i) for i, f in enumerate(feature_names))

        self.feature_names_ = feature_names
        self.vocabulary_ = vocab

        return self