Python sklearn.covariance.MinCovDet() Examples

The following are 16 code examples of sklearn.covariance.MinCovDet(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.covariance , or try the search function .
Example #1
Source File: getFilteredSkels.py    From tierpsy-tracker with MIT License 6 votes vote down vote up
def _h_getMahalanobisRobust(dat, critical_alpha=0.01, good_rows=np.zeros(0)):
    '''Calculate the Mahalanobis distance from the sample vector.'''
    if good_rows.size == 0:
        good_rows = np.any(~np.isnan(dat), axis=1)

    try:
        dat2fit = dat[good_rows]
        assert not np.any(np.isnan(dat2fit))

        robust_cov = MinCovDet().fit(dat2fit)
        mahalanobis_dist = np.sqrt(robust_cov.mahalanobis(dat))
    except ValueError:
        # this step will fail if the covariance matrix is not singular. This happens if the data is not
        # a unimodal symetric distribution. For example there is too many small noisy particles. Therefore
        # I will take a safe option and return zeros in the mahalanobis
        # distance if this is the case.
        mahalanobis_dist = np.zeros(dat.shape[0])

    # critial distance of the maholanobis distance using the chi-square distirbution
    # https://en.wikiversity.org/wiki/Mahalanobis%27_distance
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html
    maha_lim = chi2.ppf(1 - critical_alpha, dat.shape[1])
    outliers = mahalanobis_dist > maha_lim

    return mahalanobis_dist, outliers, maha_lim 
Example #2
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # Validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.detector_ = MinCovDet(store_precision=self.store_precision,
                                   assume_centered=self.assume_centered,
                                   support_fraction=self.support_fraction,
                                   random_state=self.random_state)
        self.detector_.fit(X=X, y=y)

        # Use mahalanabis distance as the outlier score
        self.decision_scores_ = self.detector_.dist_
        self._process_decision_scores()
        return self 
Example #3
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def raw_location_(self):
        """The raw robust estimated location before correction and
        re-weighting.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.raw_location_ 
Example #4
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def raw_covariance_(self):
        """The raw robust estimated location before correction and
        re-weighting.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.raw_covariance_ 
Example #5
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def raw_support_(self):
        """A mask of the observations that have been used to compute
        the raw robust estimates of location and shape, before correction
        and re-weighting.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.raw_support_ 
Example #6
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def location_(self):
        """Estimated robust location.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.location_ 
Example #7
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def precision_(self):
        """ Estimated pseudo inverse matrix.
        (stored only if store_precision is True)

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.precision_ 
Example #8
Source File: mcd.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def support_(self):
        """A mask of the observations that have been used to compute
        the robust estimates of location and shape.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.support_ 
Example #9
Source File: test_robust_covariance.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_mcd_class_on_invalid_input():
    X = np.arange(100)
    mcd = MinCovDet()
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         mcd.fit, X) 
Example #10
Source File: test_robust_covariance.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
                          tol_support):

    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T) ** 2)
    assert(error_location < tol_loc)
    error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
    assert(error_cov < tol_cov)
    assert(np.sum(H) >= tol_support)
    assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_) 
Example #11
Source File: test_robust_covariance.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_mcd_issue1127():
    # Check that the code does not break with X.shape = (3, 1)
    # (i.e. n_support = n_samples)
    rnd = np.random.RandomState(0)
    X = rnd.normal(size=(3, 1))
    mcd = MinCovDet()
    mcd.fit(X) 
Example #12
Source File: test_robust_covariance.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_mcd_issue3367():
    # Check that MCD completes when the covariance matrix is singular
    # i.e. one of the rows and columns are all zeros
    rand_gen = np.random.RandomState(0)

    # Think of these as the values for X and Y -> 10 values between -5 and 5
    data_values = np.linspace(-5, 5, 10).tolist()
    # Get the cartesian product of all possible coordinate pairs from above set
    data = np.array(list(itertools.product(data_values, data_values)))

    # Add a third column that's all zeros to make our data a set of point
    # within a plane, which means that the covariance matrix will be singular
    data = np.hstack((data, np.zeros((data.shape[0], 1))))

    # The below line of code should raise an exception if the covariance matrix
    # is singular. As a further test, since we have points in XYZ, the
    # principle components (Eigenvectors) of these directly relate to the
    # geometry of the points. Since it's a plane, we should be able to test
    # that the Eigenvector that corresponds to the smallest Eigenvalue is the
    # plane normal, specifically [0, 0, 1], since everything is in the XY plane
    # (as I've set it up above). To do this one would start by:
    #
    #     evals, evecs = np.linalg.eigh(mcd_fit.covariance_)
    #     normal = evecs[:, np.argmin(evals)]
    #
    # After which we need to assert that our `normal` is equal to [0, 0, 1].
    # Do note that there is floating point error associated with this, so it's
    # best to subtract the two and then compare some small tolerance (e.g.
    # 1e-12).
    MinCovDet(random_state=rand_gen).fit(data) 
Example #13
Source File: test_robust_covariance.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_mcd_support_covariance_is_zero():
    # Check that MCD returns a ValueError with informative message when the
    # covariance of the support data is equal to 0.
    X_1 = np.array([0.5, 0.1, 0.1, 0.1, 0.957, 0.1, 0.1, 0.1, 0.4285, 0.1])
    X_1 = X_1.reshape(-1, 1)
    X_2 = np.array([0.5, 0.3, 0.3, 0.3, 0.957, 0.3, 0.3, 0.3, 0.4285, 0.3])
    X_2 = X_2.reshape(-1, 1)
    msg = ('The covariance matrix of the support data is equal to 0, try to '
           'increase support_fraction')
    for X in [X_1, X_2]:
        assert_raise_message(ValueError, msg, MinCovDet().fit, X) 
Example #14
Source File: stats_of_cloth.py    From TextileDefectDetection with GNU Affero General Public License v3.0 5 votes vote down vote up
def compute_MCD_weft(weftsPickled, target_path):

    weft_points_list = floatPointList()
    for pickled_path in weftsPickled:
        weft_points_list.extend(pickle.load(open(pickled_path, "rb" )))

    x_vals = [fp.x for fp in weft_points_list]
    y_vals = [fp.y for fp in weft_points_list]

    mean_hor_dist = weft_points_list.getMedianWeftDist()

    min_x = min(x_vals) + 1.5 * mean_hor_dist
    max_x = max(x_vals) - 1.5 * mean_hor_dist
    min_y = min(y_vals) + 1.5 * mean_hor_dist
    max_y = max(y_vals) - 1.5 * mean_hor_dist

    inner_points = floatPointList()
    for pt in weft_points_list:
        if min_x < pt.x < max_x and min_y < pt.y < max_y:
            inner_points.append(pt)

    X = np.zeros([len(inner_points), 3])

    for idx, pt in enumerate(inner_points):
        X[idx,0] = pt.area
        X[idx,1] = pt.right_dist
        X[idx,2] = pt.left_dist

    Y = X[~(X<=0).any(axis=1)]

    robust_cov = MinCovDet(support_fraction=0.8).fit(Y)
    pickle.dump(robust_cov, open(target_path, "wb")) 
Example #15
Source File: stats_of_cloth.py    From TextileDefectDetection with GNU Affero General Public License v3.0 5 votes vote down vote up
def compute_MCD_warp(warpsPickled, target_path):

    warp_points_list = floatPointList()
    for pickled_path in warpsPickled:
        warp_points_list.extend(pickle.load(open(pickled_path, "rb" )))

    x_vals = [fp.x for fp in warp_points_list]
    y_vals = [fp.y for fp in warp_points_list]

    mean_ver_dist = warp_points_list.getMedianWarpDist()

    min_x = min(x_vals) + 1.5 * mean_ver_dist
    max_x = max(x_vals) - 1.5 * mean_ver_dist
    min_y = min(y_vals) + 1.5 * mean_ver_dist
    max_y = max(y_vals) - 1.5 * mean_ver_dist

    inner_points = floatPointList()
    for pt in warp_points_list:
        if min_x < pt.x < max_x and min_y < pt.y < max_y:
            inner_points.append(pt)


    #####CHANGED
    #print("attention, only 2D!!!!!")
    X = np.zeros([len(inner_points), 3])

    for idx, pt in enumerate(inner_points):
        X[idx,0] = pt.area
        X[idx,1] = pt.lower_dist
        X[idx,2] = pt.upper_dist


    Y = X[~(X<=0).any(axis=1)]

    robust_cov = MinCovDet(support_fraction=0.8).fit(Y)
    pickle.dump(robust_cov, open(target_path, "wb")) 
Example #16
Source File: test_covariance.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.covariance.EmpiricalCovariance, covariance.EmpiricalCovariance)
        self.assertIs(df.covariance.EllipticEnvelope, covariance.EllipticEnvelope)
        self.assertIs(df.covariance.GraphLasso, covariance.GraphLasso)
        self.assertIs(df.covariance.GraphLassoCV, covariance.GraphLassoCV)
        self.assertIs(df.covariance.LedoitWolf, covariance.LedoitWolf)
        self.assertIs(df.covariance.MinCovDet, covariance.MinCovDet)
        self.assertIs(df.covariance.OAS, covariance.OAS)
        self.assertIs(df.covariance.ShrunkCovariance, covariance.ShrunkCovariance)

        self.assertIs(df.covariance.shrunk_covariance, covariance.shrunk_covariance)
        self.assertIs(df.covariance.graph_lasso, covariance.graph_lasso)