Python scipy.stats.mode() Examples

The following are 30 code examples of scipy.stats.mode(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: missing_values_cleaner.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def transform(self, X):
        """ transform
        
        Does the transformation process in the samples in X.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
        
        """
        r, c = get_dimensions(X)
        for i in range(r):
            if self.strategy in ['mean', 'median', 'mode']:
                self.window.add_element([X[i][:]])
            for j in range(c):
                if X[i][j] in self.missing_value or np.isnan(X[i][j]):
                    X[i][j] = self._get_substitute(j)

        return X 
Example #2
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_random_weights():
    # set this up so that each row should have a weighted mode of 6,
    # with a score that is easily reproduced
    mode_result = 6

    rng = np.random.RandomState(0)
    x = rng.randint(mode_result, size=(100, 10))
    w = rng.random_sample(x.shape)

    x[:, :5] = mode_result
    w[:, :5] += 1

    mode, score = weighted_mode(x, w, axis=1)

    assert_array_equal(mode, mode_result)
    assert_array_almost_equal(score.ravel(), w[:, :5].sum(1)) 
Example #3
Source File: data_utils.py    From RGAN with MIT License 6 votes vote down vote up
def resample_eICU_patient(pid, resample_factor_in_min, variables, upto_in_minutes):
    """
    Resample a *single* patient.
    """
    pat_df = pd.read_hdf(paths.eICU_hdf_dir + '/vitalPeriodic.h5',
                         where='patientunitstayid = ' + str(pid),
                         columns=['observationoffset', 'patientunitstayid'] + variables,
                         mode='r')
    # sometimes it's empty
    if pat_df.empty:
        return None
    if not upto_in_minutes is None:
        pat_df = pat_df.loc[0:upto_in_minutes*60]
    # convert the offset to a TimedeltaIndex (necessary for resampling)
    pat_df.observationoffset = pd.TimedeltaIndex(pat_df.observationoffset, unit='m')
    pat_df.set_index('observationoffset', inplace=True)
    pat_df.sort_index(inplace=True)
    # resample by time
    pat_df_resampled = pat_df.resample(str(resample_factor_in_min) + 'T').median()  # pandas ignores NA in median by default
    # rename pid, cast to int
    pat_df_resampled.rename(columns={'patientunitstayid': 'pid'}, inplace=True)
    pat_df_resampled['pid'] = np.int32(pat_df_resampled['pid'])
    # get offsets in minutes from index
    pat_df_resampled['offset'] = np.int32(pat_df_resampled.index.total_seconds()/60)
    return pat_df_resampled 
Example #4
Source File: main_test.py    From PointNetGPD with MIT License 6 votes vote down vote up
def main():
    repeat = 10
    num_point = 500
    model.eval()
    torch.set_grad_enabled(False)

    # load pc(should be in local gripper coordinate)
    # local_pc: (N, 3)
    # local_pc = np.load("test.npy")
    local_pc = np.random.random([500, 3])  # test only
    predict = []
    for _ in range(repeat):
        if len(local_pc) >= num_point:
            local_pc = local_pc[np.random.choice(len(local_pc), num_point, replace=False)]
        else:
            local_pc = local_pc[np.random.choice(len(local_pc), num_point, replace=True)]

        # run model
        predict.append(test_network(model, local_pc)[0])
    print("voting: ", predict)
    predict = mode(predict).mode[0]

    # output
    print("Test result:", predict) 
Example #5
Source File: random_forests.py    From bdol-ml with GNU Lesser General Public License v3.0 6 votes vote down vote up
def testForest(roots, X, Y):
  errs = 0.0

  for i in range(0, X.shape[0]):
    votes = []
    for r in roots:
      yhat = np.argmax(dt_value(r, X[i, :]))
      votes.append(yhat)
    yhatEnsemble = stats.mode(votes)
    if Y[i, int(yhatEnsemble[0])] != 1:
      errs += 1.0

  return errs/X.shape[0]

# Turn off runtime warnings for invalid or divide errors
# These arise when calculating the entropy. We set any invalid entropy
# calculations (e.g. log(0)) to 0. 
Example #6
Source File: random_forest.py    From bdol-ml with GNU Lesser General Public License v3.0 6 votes vote down vote up
def test(self, test_data, test_target):
        t = 0
        # TODO: refactor the RF test function to depend not on an external
        # root but on itself
        dt = FastDecisionTree(1, 1)
        yhat_forest = np.zeros((test_data.shape[0], self.n_trees))
        for i in range(len(self.roots)):
            r = self.roots[i]
            prog_bar(t, self.n_trees)
            t += 1

            yhat_forest[:, i:] = dt.test_preds(r, test_data)

        prog_bar(self.n_trees, self.n_trees)

        yhat = stats.mode(yhat_forest, axis=1)[0]
        return yhat 
Example #7
Source File: parcellation.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _get_redop(red_op, weights=None, axis=None):
    if red_op in ['mean', 'average']:
        if weights is None:
            def fred(x, w): return np.mean(x, axis=axis)
        else:
            def fred(x, w): return np.average(x, weights=w, axis=axis)
    elif red_op == 'median':
        def fred(x, w): return np.median(x, axis=axis)
    elif red_op == 'mode':
        if weights is None:
            def fred(x, w): return mode(x, axis=axis)[0].ravel()
        else:
            def fred(x, w): return weighted_mode(x, w, axis=axis)
    elif red_op == 'sum':
        def fred(x, w): return np.sum(x if w is None else w * x, axis=axis)
    elif red_op == 'max':
        def fred(x, w): return np.max(x, axis=axis)
    elif red_op == 'min':
        def fred(x, w): return np.min(x, axis=axis)
    else:
        raise ValueError("Unknown reduction operation '{0}'".format(red_op))
    return fred 
Example #8
Source File: manytimepad.py    From simple-cryptography with MIT License 6 votes vote down vote up
def find_key(streamciphers, iterations=50):

    ciphers = deepcopy(streamciphers)
    n = len(ciphers)
    # key size of longest cipher
    ksize = len(max(ciphers, key=len))
    possiblekeys = []

    for _ in range(iterations):

        shuffle(ciphers)
        k = bytearray(ksize)

        for a in range(n - 2):
            for b in range(a + 1, n - 1):
                for c in range(b + 1, n):
                    x, y, z = truncate3(ciphers[a], ciphers[b], ciphers[c])
                    build_key(k, x, y, z)

        possiblekeys.append(k)

    # finalize key using frequency analysis
    key_array = stats.mode(numpy.array(possiblekeys))[0][0]
    return bytes(list(key_array)) 
Example #9
Source File: missing_values_cleaner.py    From scikit-multiflow with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def partial_fit(self, X, y=None):
        """ partial_fit
        
        Partial fits the model.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
            
        y: Array-like
            The true labels.
        
        Returns
        -------
        MissingValuesCleaner
            self
        
        """
        X = np.asarray(X)
        if self.strategy in ['mean', 'median', 'mode']:
            self.window.add_element(X)
        return self 
Example #10
Source File: evaler.py    From ecg with GNU General Public License v3.0 6 votes vote down vote up
def predict(record):
    ecg = load.load_ecg(record +".mat")
    preproc = util.load(".")
    x = preproc.process_x([ecg])

    params = json.load(open("config.json"))
    params.update({
        "compile" : False,
        "input_shape": [None, 1],
        "num_categories": len(preproc.classes)
    })

    model = network.build_network(**params)
    model.load_weights('model.hdf5')

    probs = model.predict(x)
    prediction = sst.mode(np.argmax(probs, axis=2).squeeze())[0][0]
    return preproc.int_to_class[prediction] 
Example #11
Source File: weather.py    From Supply-demand-forecasting with MIT License 6 votes vote down vote up
def get_weather_dict(self,data_dir):
        t0 = time()
        filename = '../data_raw/' + data_dir.split('/')[-2] + '_weather.csv.dict.pickle'
        dumpload = DumpLoad( filename)
        if dumpload.isExisiting():
            return dumpload.load()
        
        resDict = {}
        df = self.load_weatherdf(data_dir)
        for index, row in df.iterrows():
            resDict[row['time_slotid']] = (index, row['weather'], row['temparature'], row['pm25'])
        for name, group in df.groupby('time_date'):
            resDict[name] = (-1, mode(group['weather'])[0][0], mode(group['temparature'])[0][0], mode(group['pm25'])[0][0])
            
       
        dumpload.dump(resDict)
        print "dump weather dict:", round(time()-t0, 3), "s"
        return resDict 
Example #12
Source File: historicaldata.py    From Supply-demand-forecasting with MIT License 6 votes vote down vote up
def find_history_data(self, row, history_dict=None,):
        start_district_id = row.iloc[0]
        time_id = row.iloc[1]
        index = ['history_mean','history_median','history_mode','history_plus_mean','history_plus_median', 'history_plus_mode']

        min_list = self.__get_historylist_from_dict(history_dict, start_district_id, time_id)
        plus_list1 = self.__get_historylist_from_dict(history_dict, start_district_id, time_id-1)
        plus_list2 = self.__get_historylist_from_dict(history_dict, start_district_id, time_id-2)
        plus_list = np.array((plus_list1 + plus_list2 + min_list))
        min_list = np.array(min_list)
        
        res =pd.Series([min_list.mean(), np.median(min_list), mode(min_list)[0][0], plus_list.mean(), np.median(plus_list),mode(plus_list)[0][0]], index = index)
        
        return res
    
        return pd.Series(res, index = ['history_mean', 'history_mode', 'history_median']) 
Example #13
Source File: pc_util.py    From H3DNet with MIT License 6 votes vote down vote up
def point_add_sem_label(pt, sem, k=10):
    sem_pt = sem[:, 0:3]
    sem_label = sem[:,3]
    pt_label = np.zeros(pt.shape[0])
    if pt.shape[0]==0:
        return pt_label
    else:
        nbrs = NearestNeighbors(n_neighbors=k,algorithm='ball_tree').fit(sem_pt)
        distances, indices = nbrs.kneighbors(pt)
        for i in range(pt.shape[0]):
            labels = sem_label[indices[i]]
            l, count = stats.mode(labels, axis=None)
            pt_label[i] = l
        return pt_label


    
# ----------------------------------------
# Testing
# ---------------------------------------- 
Example #14
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_axes(self):
        data1 = [10, 10, 30, 40]
        data2 = [10, 10, 10, 10]
        data3 = [20, 10, 20, 20]
        data4 = [30, 30, 30, 30]
        data5 = [40, 30, 30, 30]
        arr = np.array([data1, data2, data3, data4, data5])

        vals = stats.mode(arr, axis=None)
        assert_equal(vals[0], np.array([30]))
        assert_equal(vals[1], np.array([8]))

        vals = stats.mode(arr, axis=0)
        assert_equal(vals[0], np.array([[10, 10, 30, 30]]))
        assert_equal(vals[1], np.array([[2, 3, 3, 2]]))

        vals = stats.mode(arr, axis=1)
        assert_equal(vals[0], np.array([[10], [10], [20], [30], [30]]))
        assert_equal(vals[1], np.array([[2], [4], [3], [4], [3]])) 
Example #15
Source File: random_forest.py    From ycimpute with Apache License 2.0 6 votes vote down vote up
def predict(self, X):
        """
        Predicts the output (y) of a given matrix X

        Parameters
        ----------
        X : numerical or ordinal matrix of values corresponding to some output

        Returns
        -------
        The predict values corresponding to the inputs
        """

        votes = np.zeros(shape=(self.num_trees, X.shape[0]))
        for i, tree in enumerate(self.forest):
            votes[i] = tree.predict(X)

        predictions = np.zeros(shape=X.shape[0])
        if isinstance(self, RegressionForest):
            predictions = votes.mean(axis=0)
        else:
            # print(votes)
            predictions = np.squeeze(mode(votes, axis=0)[0])

        return predictions 
Example #16
Source File: reference.py    From arcasHLA with GNU General Public License v3.0 5 votes vote down vote up
def get_mode(lengths):
    return stats.mode(lengths)[0][0] 
Example #17
Source File: snr.py    From SAMRI with GNU General Public License v3.0 5 votes vote down vote up
def iter_base_metrics(file_template, substitutions,
	save_as='',
	):
	"""
	Create a `pandas.DataFrame` (optionally savable as `.csv`), containing base metrics (mean, median, mode, standard deviation) at each 4th dimension point of a 4D NIfTI file.
	This function is an iteration wrapper of `samri.report.snr.base_metrics()` using the SAMRI file_template/substitution model.

	Parameters
	----------

	file_template : str
		A formattable string containing as format fields keys present in the dictionaries passed to the `substitutions` variable.
	substitutions : list of dicts
		A list of dictionaries countaining formatting strings as keys and strings as values.
	save_as : str, optional
		Path to which to save the Pandas DataFrame.

	Returns
	-------

	pandas.DataFrame
		Pandas DataFrame object containing a row for each analyzed file and columns named 'Mean', 'Median', 'Mode', and 'Standard Deviation', and (provided the respective key is present in the `sustitutions` variable) 'subject', 'session', 'task', and 'acquisition'.
	"""

	n_jobs = mp.cpu_count()-2
	base_metrics_data = Parallel(n_jobs=n_jobs, verbose=0, backend="threading")(map(delayed(base_metrics),
		[file_template]*len(substitutions),
		substitutions,
		))

	df = pd.concat(base_metrics_data)

	if save_as:
		save_as = path.abspath(path.expanduser(save_as))
		if save_as.lower().endswith('.csv'):
			df.to_csv(save_as)
		else:
			raise ValueError("Please specify an output path ending in any one of "+",".join((".csv",))+".")
	return df 
Example #18
Source File: sparsedetect.py    From suite2p with GNU General Public License v3.0 5 votes vote down vote up
def neuropil_subtraction(mov,lx):
    """ subtract low-pass filtered version of binned movie

    low-pass filtered version ~ neuropil
    subtract to help ignore neuropil
    
    Parameters
    ----------------

    mov : 3D array
        binned movie, size [nbins x Ly x Lx]

    lx : int
        size of filter

    Returns
    ----------------

    mov : 3D array
        binned movie with "neuropil" subtracted, size [nbins x Ly x Lx]

    """
    if len(mov.shape)<3:
        mov = mov[np.newaxis, :, :]
    nbinned, Ly, Lx = mov.shape
    c1 = uniform_filter(np.ones((Ly,Lx)), size=[lx, lx], mode = 'constant')
    for j in range(nbinned):
        mov[j] -= uniform_filter(mov[j], size=[lx, lx], mode = 'constant') / c1
    return mov 
Example #19
Source File: plot_utils.py    From arviz with Apache License 2.0 5 votes vote down vote up
def calculate_point_estimate(point_estimate, values, bw=4.5):
    """Validate and calculate the point estimate.

    Parameters
    ----------
    point_estimate : Optional[str]
        Plot point estimate per variable. Values should be 'mean', 'median', 'mode' or None.
        Defaults to 'auto' i.e. it falls back to default set in rcParams.
    values : 1-d array
    bw : float
        Bandwidth scaling factor. Should be larger than 0. The higher this number the smoother the
        KDE will be. Defaults to 4.5 which is essentially the same as the Scott's rule of thumb
        (the default used rule by SciPy).

    Returns
    -------
    point_value : float
        best estimate of data distribution
    """
    point_value = None
    if point_estimate == "auto":
        point_estimate = rcParams["plot.point_estimate"]
    elif point_estimate not in ("mean", "median", "mode", None):
        raise ValueError(
            "Point estimate should be 'mean', 'median', 'mode' or None, not {}".format(
                point_estimate
            )
        )
    if point_estimate == "mean":
        point_value = values.mean()
    elif point_estimate == "mode":
        if isinstance(values[0], float):
            density, lower, upper = _fast_kde(values, bw=bw)
            x = np.linspace(lower, upper, len(density))
            point_value = x[np.argmax(density)]
        else:
            point_value = mode(values)[0][0]
    elif point_estimate == "median":
        point_value = np.median(values)

    return point_value 
Example #20
Source File: analyse_results_paper_v2.py    From YAFS with MIT License 5 votes vote down vote up
def getRbyApp(df,dtmp):
    dr = pd.DataFrame(
        columns=['app', 'user', 'avg', 'std', 'm', 'r', 'invalid', 'over','totalmsg'])  # m - numero de mensajes enviados
    times = []
    ixloc = 0
    for g in dtmp.keys():
        ids = dtmp[g]
        responses = []
        messages = []
        over = 0
        # Firstly, it computes the mode in all the app,user transmissions
        for i in ids:
            messages.append(df[df.id == i].shape[0])  # number of messages send by the user

        # Requests with a inferior number of messages are filtered
        msg = np.array(messages)
        # mode = stats.mode(msg).mode[0]
        mode = stats.mode(msg)[0][0]

        # Secondly, if each transmission has the same mode then the time is storaged
        invalid = 0
        for i in ids:
            dm = df[df.id == i]
            if mode == dm.shape[0]:
                r = dm['time_out'].max() - dm['time_emit'].min()
                responses.append(r)
                times.append(dm['time_emit'].min())
            else:
                invalid += 1

        resp = np.array(responses)

        avg = resp.mean()
        dsv = resp.std()
        totalmsg = len(resp)
        dr.loc[ixloc] = [g[0], g[1], avg, dsv, mode, resp, invalid, over,totalmsg]
        ixloc += 1
        print g, "\t", len(dtmp[g]), "\t", invalid, "\t", over

    return dr, times 
Example #21
Source File: naive_multiband.py    From gatspy with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def mode_in_range(a, axis=0, tol=1E-3):
    """Find the mode of values to within a certain range"""
    a_trunc = a // tol
    vals, counts = mode(a_trunc, axis)
    mask = (a_trunc == vals)
    # mean of each row
    return np.sum(a * mask, axis) / np.sum(mask, axis) 
Example #22
Source File: HydroSEDPluginUtils.py    From WMF with GNU General Public License v3.0 5 votes vote down vote up
def BasinConvert2HillsOrChannels(self, Var, Metodo, Agregado):
        '''Agrega una variable por canales o laderas de acuerdo a una metodologia'''
        #Funcion de metodologias
        def __fx__(x, metodo = 'media'):
            if metodo == 'media':
                return np.nanmean(x)
            elif metodo[0] == 'P':
                percentil = int(metodo[1:])
                return np.nanpercentile(x, percentil)
            elif metodo == 'min':
                return np.nanmin(x)
            elif metodo == 'max':
                return np.nanmax(x)
            elif metodo == 'moda':
                res = stat.mode(x)
                return res.mode[0]
        #Variable vacia nula
        VarAgregada = np.ones(self.cuenca.ncells)*wmf.cu.nodata
        #Itera por las laderas del elemento cuenca
        for i in range(1,self.cuenca.nhills+1):
            #Define posiciones de acuerdo a la metodologia
            hacer = True
            pos = np.where(self.cuenca.hills_own == i)[0]
            if Agregado == 'Canales':
                pos2 = np.where((self.cuenca.hills_own == i) & (self.cuenca.CellCauce == 1))[0]
                if len(pos2)==0:
                    hacer = False
                else:
                    pos = pos2
            #Agrega la variable
            if hacer:
                VarTemporal = __fx__(Var[pos], Metodo)
                VarAgregada[pos] = VarTemporal
            #correccion no data de canales 
        #if Agregado == 'Canales':
            #posMalos = np.where((self.cuenca.CellCauce == 1) & (VarAgregada == wmf.cu.nodata))[0]
            #print posMalos
                ##posBuenos = np.where((self.cuenca.CellCauce == 1) & (VarAgregada <> wmf.cu.nodata))[0]
                #VarAgregada[posMalos] = VarAgregada[posBuenos].mean()
                #print VarAgregada[posMalos].min()
        return VarAgregada 
Example #23
Source File: handdetector.py    From deep-prior with GNU General Public License v3.0 5 votes vote down vote up
def getCrop(self, dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z=True):
        """
        Crop patch from image
        :param dpt: depth image to crop from
        :param xstart: start x
        :param xend: end x
        :param ystart: start y
        :param yend: end y
        :param zstart: start z
        :param zend: end z
        :param thresh_z: threshold z values
        :return: cropped image
        """
        if len(dpt.shape) == 2:
            cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])].copy()
            # add pixels that are out of the image in order to keep aspect ratio
            cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0),
                                           abs(yend)-min(yend, dpt.shape[0])),
                                          (abs(xstart)-max(xstart, 0),
                                           abs(xend)-min(xend, dpt.shape[1]))), mode='constant', constant_values=0)
        elif len(dpt.shape) == 3:
            cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :].copy()
            # add pixels that are out of the image in order to keep aspect ratio
            cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0),
                                           abs(yend)-min(yend, dpt.shape[0])),
                                          (abs(xstart)-max(xstart, 0),
                                           abs(xend)-min(xend, dpt.shape[1])),
                                          (0, 0)), mode='constant', constant_values=0)
        else:
            raise NotImplementedError()

        if thresh_z is True:
            msk1 = numpy.bitwise_and(cropped < zstart, cropped != 0)
            msk2 = numpy.bitwise_and(cropped > zend, cropped != 0)
            cropped[msk1] = zstart
            cropped[msk2] = 0.  # backface is at 0, it is set later
        return cropped 
Example #24
Source File: handdetector.py    From deep-prior with GNU General Public License v3.0 5 votes vote down vote up
def getNDValue(self):
        """
        Get value of not defined depth value distances
        :return:value of not defined depth value
        """
        if self.dpt[self.dpt < self.minDepth].shape[0] > self.dpt[self.dpt > self.maxDepth].shape[0]:
            return stats.mode(self.dpt[self.dpt < self.minDepth])[0][0]
        else:
            return stats.mode(self.dpt[self.dpt > self.maxDepth])[0][0] 
Example #25
Source File: one_against_one.py    From Support-Vector-Machines-Succinctly with MIT License 5 votes vote down vote up
def predict_class(X, classifiers, class_pairs):
    predictions = np.zeros((X.shape[0], len(classifiers)))
    for idx, clf in enumerate(classifiers):
        class_pair = class_pairs[idx]
        prediction = clf.predict(X)
        predictions[:, idx] = np.where(prediction == 1, class_pair[0], class_pair[1])
    return mode(predictions, axis=1)[0].ravel().astype(int) 
Example #26
Source File: estimators.py    From Pyspatialml with GNU General Public License v3.0 5 votes vote down vote up
def _uniform_weighting(self, neighbor_vals):
        X = mode(neighbor_vals, axis=1)
        return X 
Example #27
Source File: score_specs.py    From DIVE-backend with GNU General Public License v3.0 5 votes vote down vote up
def _mode(v):
    m = mode(v)
    return [ m[0][0], m[1][0] ] 
Example #28
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_kstest():
    # from numpy.testing import assert_almost_equal

    # comparing with values from R
    x = np.linspace(-1,1,9)
    D,p = stats.kstest(x,'norm')
    assert_almost_equal(D, 0.15865525393145705, 12)
    assert_almost_equal(p, 0.95164069201518386, 1)

    x = np.linspace(-15,15,9)
    D,p = stats.kstest(x,'norm')
    assert_almost_equal(D, 0.44435602715924361, 15)
    assert_almost_equal(p, 0.038850140086788665, 8)

    # test for namedtuple attribute results
    attributes = ('statistic', 'pvalue')
    res = stats.kstest(x, 'norm')
    check_named_results(res, attributes)

    # the following tests rely on deterministicaly replicated rvs
    np.random.seed(987654321)
    x = stats.norm.rvs(loc=0.2, size=100)
    D,p = stats.kstest(x, 'norm', mode='asymp')
    assert_almost_equal(D, 0.12464329735846891, 15)
    assert_almost_equal(p, 0.089444888711820769, 15)
    assert_almost_equal(np.array(stats.kstest(x, 'norm', mode='asymp')),
                np.array((0.12464329735846891, 0.089444888711820769)), 15)
    assert_almost_equal(np.array(stats.kstest(x,'norm', alternative='less')),
                np.array((0.12464329735846891, 0.040989164077641749)), 15)
    # this 'greater' test fails with precision of decimal=14
    assert_almost_equal(np.array(stats.kstest(x,'norm', alternative='greater')),
                np.array((0.0072115233216310994, 0.98531158590396228)), 12)

    # missing: no test that uses *args 
Example #29
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_mode_nan(self):
        data1 = [3, np.nan, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6]
        actual = stats.mode(data1)
        assert_equal(actual, (6, 3))

        actual = stats.mode(data1, nan_policy='omit')
        assert_equal(actual, (6, 3))
        assert_raises(ValueError, stats.mode, data1, nan_policy='raise')
        assert_raises(ValueError, stats.mode, data1, nan_policy='foobar') 
Example #30
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_objects(self):
        # Python objects must be sortable (le + eq) and have ne defined
        # for np.unique to work. hash is for set.
        class Point(object):
            def __init__(self, x):
                self.x = x

            def __eq__(self, other):
                return self.x == other.x

            def __ne__(self, other):
                return self.x != other.x

            def __lt__(self, other):
                return self.x < other.x

            def __hash__(self):
                return hash(self.x)

        points = [Point(x) for x in [1, 2, 3, 4, 3, 2, 2, 2]]
        arr = np.empty((8,), dtype=object)
        arr[:] = points
        assert_(len(set(points)) == 4)
        assert_equal(np.unique(arr).shape, (4,))
        with suppress_warnings() as sup:
            r = sup.record(RuntimeWarning, ".*checked for nan values")
            vals = stats.mode(arr)
            assert_equal(len(r), 1)

        assert_equal(vals[0][0], Point(2))
        assert_equal(vals[1][0], 4)