Python numpy.isnan() Examples

The following are code examples for showing how to use numpy.isnan(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 8 votes vote down vote up
def rhoA(self):
        # rhoA
        rhoA = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            weights = pd.DataFrame(self.outer_weights[self.latent[i]])
            weights = weights[(weights.T != 0).any()]
            result = pd.DataFrame.dot(weights.T, weights)
            result_ = pd.DataFrame.dot(weights, weights.T)

            S = self.data_[self.Variables['measurement'][
                self.Variables['latent'] == self.latent[i]]]
            S = pd.DataFrame.dot(S.T, S) / S.shape[0]
            numerador = (
                np.dot(np.dot(weights.T, (S - np.diag(np.diag(S)))), weights))
            denominador = (
                (np.dot(np.dot(weights.T, (result_ - np.diag(np.diag(result_)))), weights)))
            rhoA_ = ((result)**2) * (numerador / denominador)
            if(np.isnan(rhoA_.values)):
                rhoA[self.latent[i]] = 1
            else:
                rhoA[self.latent[i]] = rhoA_.values

        return rhoA.T 
Example 2
Project: QUANTAXIS   Author: yutiansut   File: QAIndicator_Series.py    (license) View Source Project 7 votes vote down vote up
def SMA(Series, N, M=1):

    ret = []
    i = 1
    length = len(Series)
    # ??X????? nan ?
    while i < length:
        if np.isnan(Series[i]):
            i += 1
        else:
            break
    preY = Series[i]  # Y'
    ret.append(preY)
    while i < length:
        Y = (M * Series[i] + (N - M) * preY) / float(N)
        ret.append(Y)
        preY = Y
        i += 1
    return pd.Series(ret) 
Example 3
Project: pyrsss   Author: butala   File: nan.py    (MIT License) View Source Project 7 votes vote down vote up
def nan_helper(y):
    """
    Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= NP.interp(x(nans), x(~nans), y[~nans])
    """
    # Source: http://stackoverflow.com/questions/6518811/interpolate-nan-values-in-a-numpy-array
    return NP.isnan(y), lambda z: z.nonzero()[0] 
Example 4
Project: AutoML5   Author: djajetic   File: data_converter.py    (MIT License) View Source Project 6 votes vote down vote up
def replace_missing(X):
    # This is ugly, but
    try:
        if X.getformat()=='csr':
            return X
    except:
	X[np.isnan(X)]=-999.0 #djajetic 05.09.2015
	return X #djajetic 05.09.2015
	
        p=len(X)
        nn=len(X[0])*2
        XX = np.zeros([p,nn])
        for i in range(len(X)):
            line = X[i]
            line1 = [0 if np.isnan(x) else x for x in line]
            line2 = [1 if np.isnan(x) else 0 for x in line] # indicator of missingness
            XX[i] = line1 + line2
    return XX 
Example 5
Project: pylspm   Author: lseman   File: imputation.py    (MIT License) View Source Project 6 votes vote down vote up
def get(self, X):
        X = np.array(X)
        X_nan = np.isnan(X)
        imputed = self.meanImput(X.copy())

        if len(self.estimators_) > 1:
            for i, estimator_ in enumerate(self.estimators_):
                X_s = np.delete(imputed, i, 1)
                y_nan = X_nan[:, i]

                X_unk = X_s[y_nan]

                result_ = []
                if len(X_unk) > 0:
                    for unk in X_unk:
                        result_.append(estimator_.predict(unk))
                    X[y_nan, i] = result_

        return X 
Example 6
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def treegauss_remove_row(
        data_row,
        tree_grid,
        latent_row,
        vert_ss,
        edge_ss,
        feat_ss, ):
    # Update sufficient statistics.
    for v in range(latent_row.shape[0]):
        z = latent_row[v, :]
        vert_ss[v, :, :] -= np.outer(z, z)
    for e in range(tree_grid.shape[1]):
        z1 = latent_row[tree_grid[1, e], :]
        z2 = latent_row[tree_grid[2, e], :]
        edge_ss[e, :, :] -= np.outer(z1, z2)
    for v, x in enumerate(data_row):
        if np.isnan(x):
            continue
        z = latent_row[v, :]
        feat_ss[v] -= 1
        feat_ss[v, 1] -= x
        feat_ss[v, 2:] -= x * z  # TODO Use central covariance. 
Example 7
Project: seq2seq   Author: google   File: models_test.py    (license) View Source Project 6 votes vote down vote up
def test_train(self):
    model, fetches_ = self._test_pipeline(tf.contrib.learn.ModeKeys.TRAIN)
    predictions_, loss_, _ = fetches_

    target_len = self.sequence_length + 10 + 2
    max_decode_length = model.params["target.max_seq_len"]
    expected_decode_len = np.minimum(target_len, max_decode_length)

    np.testing.assert_array_equal(predictions_["logits"].shape, [
        self.batch_size, expected_decode_len - 1,
        model.target_vocab_info.total_size
    ])
    np.testing.assert_array_equal(predictions_["losses"].shape,
                                  [self.batch_size, expected_decode_len - 1])
    np.testing.assert_array_equal(predictions_["predicted_ids"].shape,
                                  [self.batch_size, expected_decode_len - 1])
    self.assertFalse(np.isnan(loss_)) 
Example 8
Project: zipline-chinese   Author: zhanghan1990   File: risk.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def information_ratio(algorithm_returns, benchmark_returns):
    """
    http://en.wikipedia.org/wiki/Information_ratio

    Args:
        algorithm_returns (np.array-like):
            All returns during algorithm lifetime.
        benchmark_returns (np.array-like):
            All benchmark returns during algo lifetime.

    Returns:
        float. Information ratio.
    """
    relative_returns = algorithm_returns - benchmark_returns

    relative_deviation = relative_returns.std(ddof=1)

    if zp_math.tolerant_equals(relative_deviation, 0) or \
       np.isnan(relative_deviation):
        return 0.0

    return np.mean(relative_returns) / relative_deviation 
Example 9
Project: zipline-chinese   Author: zhanghan1990   File: data_frame_source.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def raw_data_gen(self):
        for dt, series in self.data.iterrows():
            for sid, price in series.iteritems():
                # Skip SIDs that can not be forward filled
                if np.isnan(price) and \
                   sid not in self.started_sids:
                    continue
                self.started_sids.add(sid)

                event = {
                    'dt': dt,
                    'sid': sid,
                    'price': price,
                    # Just chose something large
                    # if no volume available.
                    'volume': 1e9,
                }
                yield event 
Example 10
Project: zipline-chinese   Author: zhanghan1990   File: test_sources.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_nan_filter_dataframe(self):
        dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
        df = pd.DataFrame(np.random.randn(2, 2),
                          index=dates,
                          columns=[4, 5])
        # should be filtered
        df.loc[dates[0], 4] = np.nan
        # should not be filtered, should have been ffilled
        df.loc[dates[1], 5] = np.nan
        source = DataFrameSource(df)
        event = next(source)
        self.assertEqual(5, event.sid)
        event = next(source)
        self.assertEqual(4, event.sid)
        event = next(source)
        self.assertEqual(5, event.sid)
        self.assertFalse(np.isnan(event.price)) 
Example 11
Project: table-compositor   Author: InvestmentSystems   File: util.py    (MIT License) View Source Project 6 votes vote down vote up
def df_type_to_str(i):
    '''
    Convert into simple datatypes from pandas/numpy types
    '''
    if isinstance(i, np.bool_):
        return bool(i)
    if isinstance(i, np.int_):
        return int(i)
    if isinstance(i, np.float):
        if np.isnan(i):
            return 'NaN'
        elif np.isinf(i):
            return str(i)
        return float(i)
    if isinstance(i, np.uint):
        return int(i)
    if type(i) == bytes:
        return i.decode('UTF-8')
    if isinstance(i, (tuple, list)):
        return str(i)
    if i is pd.NaT:  # not identified as a float null
        return 'NaN'
    return str(i) 
Example 12
Project: hip-mdp-public   Author: dtak   File: hiv.py    (MIT License) View Source Project 6 votes vote down vote up
def calc_reward(self, action=0, state=None, **kw ):
        """Calculate the reward for the specified transition."""
        eps1, eps2 = self.eps_values_for_actions[action]
        if state is None:
            state = self.observe()
        if self.logspace:
            T1, T2, T1s, T2s, V, E = 10**state
        else:
            T1, T2, T1s, T2s, V, E = state
        # the reward function penalizes treatment because of side-effects
        reward = -0.1*V - 2e4*eps1**2 - 2e3*eps2**2 + 1e3*E
        # Constrain reward to be within specified range
        if np.isnan(reward):
            reward = -self.reward_bound
        elif reward > self.reward_bound:
            reward = self.reward_bound
        elif reward < -self.reward_bound:
            reward = -self.reward_bound
        return reward 
Example 13
Project: lung-cancer-detector   Author: YichenGong   File: util.py    (MIT License) View Source Project 6 votes vote down vote up
def to_rgb(img):
    """
    Converts the given array into a RGB image. If the number of channels is not
    3 the array is tiled such that it has 3 channels. Finally, the values are
    rescaled to [0,255) 
    
    :param img: the array to convert [nx, ny, channels]
    
    :returns img: the rgb image [nx, ny, 3]
    """
    img = np.atleast_3d(img)
    channels = img.shape[2]
    if channels < 3:
        img = np.tile(img, 3)
    
    img[np.isnan(img)] = 0
    img -= np.amin(img)
    img /= np.amax(img)
    img *= 255
    return img 
Example 14
Project: NeoAnalysis   Author: neoanalysis   File: ColorMapWidget.py    (license) View Source Project 6 votes vote down vote up
def map(self, data):
        data = data[self.fieldName]
        colors = np.empty((len(data), 4))
        default = np.array(fn.colorTuple(self['Default'])) / 255.
        colors[:] = default
        
        for v in self.param('Values'):
            mask = data == v.maskValue
            c = np.array(fn.colorTuple(v.value())) / 255.
            colors[mask] = c
        #scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1)
        #cmap = self.value()
        #colors = cmap.map(scaled, mode='float')
        
        #mask = np.isnan(data) | np.isinf(data)
        #nanColor = self['NaN']
        #nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.)
        #colors[mask] = nanColor
        
        return colors 
Example 15
Project: NeoAnalysis   Author: neoanalysis   File: ColorMapWidget.py    (license) View Source Project 6 votes vote down vote up
def map(self, data):
        data = data[self.fieldName]
        colors = np.empty((len(data), 4))
        default = np.array(fn.colorTuple(self['Default'])) / 255.
        colors[:] = default
        
        for v in self.param('Values'):
            mask = data == v.maskValue
            c = np.array(fn.colorTuple(v.value())) / 255.
            colors[mask] = c
        #scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1)
        #cmap = self.value()
        #colors = cmap.map(scaled, mode='float')
        
        #mask = np.isnan(data) | np.isinf(data)
        #nanColor = self['NaN']
        #nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.)
        #colors[mask] = nanColor
        
        return colors 
Example 16
Project: risk-slim   Author: ustunb   File: lattice_cpa.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def round_solution_pool(pool, constraints):

    pool.distinct().sort()
    P = pool.P
    L0_reg_ind = np.isnan(constraints['coef_set'].C_0j)
    L0_max = constraints['L0_max']
    rounded_pool = SolutionPool(P)

    for solution in pool.solutions:
        # sort from largest to smallest coefficients
        feature_order = np.argsort([-abs(x) for x in solution])
        rounded_solution = np.zeros(shape=(1, P))
        l0_norm_count = 0
        for k in range(0, P):
            j = feature_order[k]
            if not L0_reg_ind[j]:
                rounded_solution[0, j] = np.round(solution[j], 0)
            elif l0_norm_count < L0_max:
                rounded_solution[0, j] = np.round(solution[j], 0)
                l0_norm_count += L0_reg_ind[j]

        rounded_pool.add(objvals=np.nan, solutions=rounded_solution)

    rounded_pool.distinct().sort()
    return rounded_pool 
Example 17
Project: TADPOLE   Author: noxtoby   File: TADPOLE_D1_D2.py    (license) View Source Project 6 votes vote down vote up
def checkFSXvalsAgainstADNIMERGE(tadpoleDF, mriADNI1FileFSX, otherSSvisCodeStr, ssNameTag,
                                 ignoreMissingCols = False):
  nrRows, nrCols = tadpoleDF.shape
  colListOtherSS = list(ssDF.columns.values)
  colListTadpoleDF = list(tadpoleDF.columns.values)

  tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]] = \
    tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]].apply(pd.to_numeric, errors='coerce')


  tadpoleDF['HIPPOSUM'] = tadpoleDF['ST29SV%s' % ssNameTag] + tadpoleDF['ST88SV%s' % ssNameTag]
  for r in range(nrRows):

    valsNan = np.isnan(tadpoleDF['Hippocampus'][r]) or (np.isnan(tadpoleDF['ST29SV%s' % ssNameTag][r]) and \
                 np.isnan(tadpoleDF['ST88SV%s' % ssNameTag][r]))
    if valsNan:
      continue

    valsNotEq = tadpoleDF['Hippocampus'][r] != (tadpoleDF['ST29SV%s' % ssNameTag][r] + tadpoleDF['ST88SV%s' % ssNameTag][r])
    if valsNotEq:
      print('entries dont match\n ', tadpoleDF[['RID','VISCODE', 'Hippocampus', 'ST29SV%s' % ssNameTag,\
        'ST88SV%s' % ssNameTag, 'HIPPOSUM']].iloc[r])

  # Conclusion: the reason why entries above don't match is because UCSFFSX has duplicate entries for the same subject and viscode. 
Example 18
Project: SWEETer-Cat   Author: DanielAndreasen   File: test_utils.py    (MIT License) View Source Project 6 votes vote down vote up
def test_hz():
    """Test the hz function."""
    df, _ = readSC()
    for (teff, logg, mass) in df.loc[:, ['teff', 'logg', 'mass']].values:
        lum = (teff / 5777)**4 * (mass / ((10**logg) / (10**4.44)))**2
        assert isinstance(hz(teff, lum, model=2), float)
        assert isinstance(hz(teff, lum, model=4), float)

    teff = 5777
    lum = 1
    invalids = [{teff: lum}, [teff, lum], (teff, lum), "..."]
    for model in range(1, 6):
        assert isinstance(hz(teff, lum, model), float)
    results = [0.75, 0.98, 0.99, 1.71, 1.77]
    for model, result in enumerate(results, start=1):
        assert round(hz(teff, lum, model), 2) == result
        for invalid in invalids:
            assert np.isnan(hz(invalid, lum, model))
            assert np.isnan(hz(teff, invalid, model))
    assert hz(teff, lum, 2) < hz(teff, lum, 4)  # hz1 < hz2 
Example 19
Project: PersonalizedMultitaskLearning   Author: mitmedialab   File: helperFuncs.py    (license) View Source Project 6 votes vote down vote up
def generateWekaFile(X,Y,features,path,name):
	f = open(path + name + '.arff', 'w')
	f.write("@relation '" + name + "'\n\n")

	for feat in features:
		f.write("@attribute " + feat + " numeric\n")
	f.write("@attribute cluster {True,False}\n\n")

	f.write("@data\n\n")
	for i in range(X.shape[0]):
		for j in range(X.shape[1]):
			if np.isnan(X[i,j]):
				f.write("?,")
			else:
				f.write(str(X[i,j]) + ",")
		if Y[i] == 1.0 or Y[i] == True:
			f.write("True\n")
		else:
			f.write("False\n")

	f.close() 
Example 20
Project: attend_infer_repeat   Author: akosiorek   File: prior_test.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def test_posterior_zeros(self):
        p = np.asarray([.5, 0., 0.]).reshape((1, 3))

        posterior = self.eval(self.posterior, p)
        print 'posterior', posterior
        posterior_grad = self.eval(self.posterior_grad, p)
        print 'posterior grad', posterior_grad

        kl = self.eval(self.posterior_kl, p)
        print kl
        self.assertGreater(kl.sum(), 0)
        self.assertFalse(np.isnan(kl).any())
        self.assertTrue(np.isfinite(kl).all())

        grad = self.eval(self.posterior_kl_grad, p)
        print grad
        self.assertFalse(np.isnan(grad).any())
        self.assertTrue(np.isfinite(grad).all()) 
Example 21
Project: seqhawkes   Author: mlukasik   File: util.py    (GNU Lesser General Public License v3.0) View Source Project 6 votes vote down vote up
def update_summary(
    var_up,
    var,
    start,
    end,
    ):
    diff = np.abs(var_up - var)
    reldiff = diff / var

    # filter out nan's

    try:
        reldiff = reldiff[~np.isnan(reldiff)]
    except:
        pass
    return (np.mean(diff), np.std(diff), np.mean(reldiff),
            np.std(reldiff), (end - start).microseconds) 
Example 22
Project: dc_stat_think   Author: justinbois   File: test_dc_stat_think.py    (MIT License) View Source Project 6 votes vote down vote up
def test_bootstrap_replicate_1d(data, seed):
    np.random.seed(seed)
    x = dcst.bootstrap_replicate_1d(data, np.mean)
    np.random.seed(seed)
    x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.mean)
    assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
                or np.isclose(x, x_correct, atol=atol, equal_nan=True)

    np.random.seed(seed)
    x = dcst.bootstrap_replicate_1d(data, np.median)
    np.random.seed(seed)
    x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.median)
    assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
                or np.isclose(x, x_correct, atol=atol, equal_nan=True)

    np.random.seed(seed)
    x = dcst.bootstrap_replicate_1d(data, np.std)
    np.random.seed(seed)
    x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.std)
    assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
                or np.isclose(x, x_correct, atol=atol, equal_nan=True) 
Example 23
Project: DomainDependencyMemeJsai2017   Author: GINK03   File: deal.py    (license) View Source Project 6 votes vote down vote up
def step4():
  key_vec = pickle.loads(open("key_vec.pkl", "rb").read()) 
  vecs = []
  for ev, vec in enumerate(key_vec.values()):
    x = np.array(vec)
    if np.isnan(x).any():
      # print(vec)
      continue
    vecs.append(x)
  vecs   = np.array(vecs)
  kmeans = KMeans(n_clusters=128, init='k-means++', n_init=10, max_iter=300,
                       tol=0.0001,precompute_distances='auto', verbose=0,
                       random_state=None, copy_x=True, n_jobs=1)
  print("now fitting...")
  kmeans.fit(vecs)
  
  open("kmeans.model", "wb").write( pickle.dumps(kmeans) )
  for p in kmeans.predict(vecs):
    print(p) 
Example 24
Project: DomainDependencyMemeJsai2017   Author: GINK03   File: deal.py    (license) View Source Project 6 votes vote down vote up
def _step5(arr):
  kmeans = pickle.loads(open("kmeans.model", "rb").read())
  key, lines, tipe = arr
  print(key)
  open("./tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe,key=key), "w").write("\n".join(lines))
  res  = os.popen("./fasttext print-sentence-vectors ./models/model.bin < tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe, key=key)).read()
  w    = open("tmp/tmp.{tipe}.{key}.json".format(tipe=tipe,key=key), "w")
  for line in res.split("\n"):
    try:
      vec = list(map(float, line.split()[-100:]))
    except:
      print(line)
      print(res)
      continue
    x = np.array(vec)
    if np.isnan(x).any():
      continue
    cluster = kmeans.predict([vec])
    txt = line.split()[:-100]
    obj = {"txt": txt, "cluster": cluster.tolist()} 
    data = json.dumps(obj, ensure_ascii=False)
    w.write( data + "\n" ) 
Example 25
Project: lm   Author: rafaljozefowicz   File: language_model_test.py    (license) View Source Project 6 votes vote down vote up
def test_lm(self):
        hps = get_test_hparams()

        with tf.variable_scope("model"):
            model = LM(hps)

        with self.test_session() as sess:
            tf.initialize_all_variables().run()
            tf.initialize_local_variables().run()

            loss = 1e5
            for i in range(50):
                x, y, w = simple_data_generator(hps.batch_size, hps.num_steps)
                loss, _ = sess.run([model.loss, model.train_op], {model.x: x, model.y: y, model.w: w})
                print("%d: %.3f %.3f" % (i, loss, np.exp(loss)))
                if np.isnan(loss):
                    print("NaN detected")
                    break

            self.assertLess(loss, 1.0) 
Example 26
Project: histwords   Author: williamleif   File: seriesanalysis.py    (license) View Source Project 6 votes vote down vote up
def get_series_median_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=10, exclude_partial_missing=False):
    """
    Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years 
    """
    medians = []
    r_word_time_series = {}
    if exclude_partial_missing:
        for word, time_series in word_time_series.iteritems():
            if not np.isnan(np.sum(time_series.values())):
                r_word_time_series[word] = time_series
    else:
        r_word_time_series = word_time_series
    for year in xrange(start_year, end_year + 1, year_inc):
        word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] 
            if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not r_word_time_series[word][year] == 0])
        if len(word_array) == 0:
            continue
        if one_minus:
            word_array = 1 - word_array
        medians.append(np.median(word_array))
    return np.array(medians) 
Example 27
Project: histwords   Author: williamleif   File: seriesanalysis.py    (license) View Source Project 6 votes vote down vote up
def get_series_mean_std_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1, exclude_partial_missing=False):
    """
    Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years 
    """
    means = []
    stderrs = []
    r_word_time_series = {}
    if exclude_partial_missing:
        for word, time_series in word_time_series.iteritems():
            if not np.isnan(np.sum(time_series.values())):
                r_word_time_series[word] = time_series
    else:
        r_word_time_series = word_time_series
    for year in xrange(start_year, end_year + 1, year_inc):
        word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] 
            if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not np.isinf(r_word_time_series[word][year])])
        if len(word_array) == 0:
            continue
        if one_minus:
            word_array = 1 - word_array
        means.append(word_array.mean())
        stderrs.append(word_array.std())
    return np.array(means), np.array(stderrs) 
Example 28
Project: histwords   Author: williamleif   File: seriesanalysis.py    (license) View Source Project 6 votes vote down vote up
def get_series_mean_stderr_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1,  exclude_partial_missing=False):
    """
    Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years 
    """
    means = []
    stderrs = []
    r_word_time_series = {}
    if exclude_partial_missing:
        for word, time_series in word_time_series.iteritems():
            time_series = {year:val for year, val in time_series.iteritems() if year >= start_year and year <= end_year}
            if not np.isnan(np.sum(time_series.values())):
                r_word_time_series[word] = time_series
    else:
        r_word_time_series = word_time_series
    for year in xrange(start_year, end_year + 1, year_inc):
        word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] 
            if word in r_word_time_series and not np.isnan(r_word_time_series[word][year])])
        if one_minus:
            word_array = 1 - word_array
        means.append(word_array.mean())
        stderrs.append(word_array.std() / len(word_array))
    return np.array(means), np.array(stderrs) 
Example 29
Project: histwords   Author: williamleif   File: seriesanalysis.py    (license) View Source Project 6 votes vote down vote up
def get_yearly_set_dev(series, i_year_words, one_minus=False, start_year=1900, end_year=2000, method='diff'):
    """
    Gets the mean relative deviation of the words in words vs. the full series.
    """
    base_mat = _make_series_mat(series, series.keys(), one_minus=one_minus, start_year=start_year, end_year=end_year)
    means = []
    stderrs = []
    r_word_time_series = series
    for year in xrange(start_year, end_year + 1):
        word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] 
            if word in r_word_time_series and not np.isnan(r_word_time_series[word][year])])
        if one_minus:
            word_array = 1 - word_array
        if method == 'diff':
            word_array = word_array - base_mat.mean(0)[year-start_year]
        elif method == 'ratio':
            word_array = word_array / base_mat.mean(0)[year-start_year]
        else:
            raise RuntimeError("Unknown deviation method. Use diff or ratio.")
        means.append(word_array.mean())
        stderrs.append(word_array.std() / len(word_array))
    return np.array(means), np.array(stderrs) 
Example 30
Project: kmeans-service   Author: MAYHEM-Lab   File: sf_kmeans.py    (license) View Source Project 6 votes vote down vote up
def log_likelihood(self, data):
        nks = np.bincount(self.labels_, minlength=self.n_clusters)  # number of points in each cluster
        n, d = data.shape
        log_likelihood = 0
        covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
        covar_matrix_det_v = np.linalg.det(covar_matrices)
        self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
        for k, nk in enumerate(nks):
            if self.verbose == 1:
                print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k]))
            term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k])))
            cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k])
            cdist_no_nan = cdist_result[~np.isnan(cdist_result)]  #  to deal with nans returned by cdist
            term_2 = -0.5 * (np.sum(cdist_no_nan))
            k_sum = term_1 + term_2
            log_likelihood += k_sum
        if np.isnan(log_likelihood) or log_likelihood == float('inf'):
            raise Exception('ll is nan or inf')
        return log_likelihood 
Example 31
Project: empyrical   Author: quantopian   File: test_stats.py    (license) View Source Project 6 votes vote down vote up
def test_alpha(self, returns, benchmark, expected):
        observed = self.empyrical.alpha(returns, benchmark)
        assert_almost_equal(
            observed,
            expected,
            DECIMAL_PLACES)

        if len(returns) == len(benchmark):
            # Compare to scipy linregress
            returns_arr = returns.values
            benchmark_arr = benchmark.values
            mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr)
            slope, intercept, _, _, _ = stats.linregress(benchmark_arr[mask],
                                                         returns_arr[mask])

            assert_almost_equal(
                observed,
                intercept * 252,
                DECIMAL_PLACES
            )

    # Alpha/beta translation tests. 
Example 32
Project: empyrical   Author: quantopian   File: test_stats.py    (license) View Source Project 6 votes vote down vote up
def test_beta(self, returns, benchmark, expected):
        observed = self.empyrical.beta(returns, benchmark)
        assert_almost_equal(
            observed,
            expected,
            DECIMAL_PLACES)

        if len(returns) == len(benchmark):
            # Compare to scipy linregress
            returns_arr = returns.values
            benchmark_arr = benchmark.values
            mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr)
            slope, intercept, _, _, _ = stats.linregress(benchmark_arr[mask],
                                                         returns_arr[mask])

            assert_almost_equal(
                observed,
                slope
            ) 
Example 33
Project: plotnine   Author: has2k1   File: position_stack.py    (license) View Source Project 6 votes vote down vote up
def strategy(data, params):
        """
        Stack overlapping intervals.

        Assumes that each set has the same horizontal position
        """
        vjust = params['vjust']

        y = data['y'].copy()
        y[np.isnan(y)] = 0
        heights = np.append(0, y.cumsum())

        if params['fill']:
            heights = heights / np.abs(heights[-1])

        data['ymin'] = np.min([heights[:-1], heights[1:]], axis=0)
        data['ymax'] = np.max([heights[:-1], heights[1:]], axis=0)
        # less intuitive than (ymin + vjust(ymax-ymin)), but
        # this way avoids subtracting numbers of potentially
        # similar precision
        data['y'] = ((1-vjust)*data['ymin'] + vjust*data['ymax'])
        return data 
Example 34
Project: OpenAPS   Author: medicinexlab   File: bgdata.py    (license) View Source Project 6 votes vote down vote up
def _find_index(bg_df, start_date, end_date, make_col_bool):
    if (make_col_bool): bg_df['date'] = bg_df['created_at'].apply(lambda x: x.date()) #create column with just the date if make_col_bool is True

    #Find the first date with the start date (first entry) and the last date with the end date (last entry)
    #Since the older dates have higher indices, we use max() for start and min() for the end dates
    start_index = bg_df[bg_df['date'] == start_date.date()].index.max()
    end_index = bg_df[bg_df['date'] == end_date.date()].index.min()

    #Raises exception if invalid dates (which are labeled as NaN)
    if np.isnan(start_index): raise Exception("Invalid start date: " + str(start_date.date()))
    if np.isnan(end_index): raise Exception("Invalid end date: " + str(end_date.date()))

    return bg_df, start_index, end_index


#Function to get the bg data 
Example 35
Project: LinearCorex   Author: gregversteeg   File: vis_corex.py    (license) View Source Project 6 votes vote down vote up
def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''):
    cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
    m, nv = mis.shape
    for j in range(m):
        inds = np.argsort(- mis[j, :])[:topk]
        if len(inds) >= 2:
            plt.clf()
            order = np.argsort(cont[:,j])
            subdata = data[:, inds][order].T
            subdata -= np.nanmean(subdata, axis=1, keepdims=True)
            subdata /= np.nanstd(subdata, axis=1, keepdims=True)
            columns = [column_label[i] for i in inds]
            sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata))
            filename = '{}/heatmaps/group_num={}.png'.format(prefix, j)
            if not os.path.exists(os.path.dirname(filename)):
                os.makedirs(os.path.dirname(filename))
            plt.title("Latent factor {}".format(j))
            plt.yticks(rotation=0)
            plt.savefig(filename, bbox_inches='tight')
            plt.close('all')
            #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j],
            #          outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1) 
Example 36
Project: feagen   Author: ianlini   File: data_handlers.py    (license) View Source Project 6 votes vote down vote up
def write_data(self, result_dict):
        for key, result in six.iteritems(result_dict):
            if ss.isspmatrix(result):
                if np.isnan(result.data).any():
                    raise ValueError("data {} have nan".format(key))
            elif np.isnan(result).any():
                raise ValueError("data {} have nan".format(key))
            with SimpleTimer("Writing generated data {} to hdf5 file"
                             .format(key),
                             end_in_new_line=False):
                if key in self.h5f:
                    # self.h5f[key][...] = result
                    raise NotImplementedError("Overwriting not supported.")
                else:
                    if (isinstance(result, ss.csc_matrix)
                            or isinstance(result, ss.csr_matrix)):
                        # sparse matrix
                        h5sparse.Group(self.h5f).create_dataset(key,
                                                                data=result)
                    else:
                        self.h5f.create_dataset(key, data=result)
        self.h5f.flush() 
Example 37
Project: MachineLearningPracticePrograms   Author: Subarno   File: kmeans.py    (license) View Source Project 6 votes vote down vote up
def repeat_until_convergence(labelled_data, labelled_clusters, unlabelled_centroids):
    #find best fitting centroids to the labelled_data
    previous_max_difference = 0
    while True:
        unlabelled_old_centroids = unlabelled_centroids
        unlabelled_centroids = move_centroids(labelled_clusters)
        labelled_clusters = form_clusters(labelled_data, unlabelled_centroids)

        differences = list(map(lambda a, b: np.linalg.norm(a-b),unlabelled_old_centroids,unlabelled_centroids))
        max_difference = max(differences)
        if np.isnan(max_difference-previous_max_difference):
            difference_change = np.nan
        else:
            difference_change = abs((max_difference-previous_max_difference)/np.mean([previous_max_difference,max_difference])) * 100

        previous_max_difference = max_difference
        # difference change is nan once the list of differences is all zeroes.
        if np.isnan(difference_change):
            break
    return labelled_clusters, unlabelled_centroids 
Example 38
Project: AutoML5   Author: djajetic   File: data_manager.py    (MIT License) View Source Project 5 votes vote down vote up
def loadData (self, filename, verbose=True, replace_missing=True):
		''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
		if verbose:  print("========= Reading " + filename)
		start = time.time()
		if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
				vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
				return pickle.load(pickle_file)
		if 'format' not in self.info.keys():
			self.getFormatData(filename)
		if 'feat_num' not in self.info.keys():
			self.getNbrFeatures(filename)
			
		data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}
		
		data = data_func[self.info['format']](filename, self.info['feat_num'])
  
		# INPORTANT: when we replace missing values we double the number of variables
  
		if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
			vprint (verbose, "Replace missing values by 0 (slow, sorry)")
			data = data_converter.replace_missing(data)
		if self.use_pickle:
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
				vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
				p = pickle.Pickler(pickle_file) 
				p.fast = True 
				p.dump(data)
		end = time.time()
		if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
		return data 
Example 39
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def sanitize_array(array):
    ''' Replace NaN and Inf (there should not be any!)'''
    a=np.ravel(array)
    maxi = np.nanmax((filter(lambda x: x != float('inf'), a))) # Max except NaN and Inf
    mini = np.nanmin((filter(lambda x: x != float('-inf'), a))) # Mini except NaN and Inf
    array[array==float('inf')]=maxi
    array[array==float('-inf')]=mini
    mid = (maxi + mini)/2
    array[np.isnan(array)]=mid
    return array 
Example 40
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 5 votes vote down vote up
def htmt(self):

        htmt_ = pd.DataFrame(pd.DataFrame.corr(self.data_),
                             index=self.manifests, columns=self.manifests)

        mean = []
        allBlocks = []
        for i in range(self.lenlatent):
            block_ = self.Variables['measurement'][
                self.Variables['latent'] == self.latent[i]]
            allBlocks.append(list(block_.values))
            block = htmt_.ix[block_, block_]
            mean_ = (block - np.diag(np.diag(block))).values
            mean_[mean_ == 0] = np.nan
            mean.append(np.nanmean(mean_))

        comb = [[k, j] for k in range(self.lenlatent)
                for j in range(self.lenlatent)]

        comb_ = [(np.sqrt(mean[comb[i][1]] * mean[comb[i][0]]))
                 for i in range(self.lenlatent ** 2)]

        comb__ = []
        for i in range(self.lenlatent ** 2):
            block = (htmt_.ix[allBlocks[comb[i][1]],
                              allBlocks[comb[i][0]]]).values
#            block[block == 1] = np.nan
            comb__.append(np.nanmean(block))

        htmt__ = np.divide(comb__, comb_)
        where_are_NaNs = np.isnan(htmt__)
        htmt__[where_are_NaNs] = 0

        htmt = pd.DataFrame(np.tril(htmt__.reshape(
            (self.lenlatent, self.lenlatent)), k=-1), index=self.latent, columns=self.latent)

        return htmt 
Example 41
Project: YellowFin_Pytorch   Author: JianGoForIt   File: yellowfin.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_cubic_root(self):
    # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
    # where x = sqrt(mu).
    # We substitute x, which is sqrt(mu), with x = y + 1.
    # It gives y^3 + py = q
    # where p = (D^2 h_min^2)/(2*C) and q = -p.
    # We use the Vieta's substution to compute the root.
    # There is only one real solution y (which is in [0, 1] ).
    # http://mathworld.wolfram.com/VietasSubstitution.html
    # eps in the numerator is to prevent momentum = 1 in case of zero gradient
    if np.isnan(self._dist_to_opt) or np.isnan(self._h_min) or np.isnan(self._grad_var) \
      or np.isinf(self._dist_to_opt) or np.isinf(self._h_min) or np.isinf(self._grad_var):
      logging.warning("Input to cubic solver has invalid nan/inf value!")
      raise Exception("Input to cubic solver has invalid nan/inf value!")

    p = (self._dist_to_opt + eps)**2 * (self._h_min + eps)**2 / 2 / (self._grad_var + eps)
    w3 = (-math.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
    w = math.copysign(1.0, w3) * math.pow(math.fabs(w3), 1.0/3.0)
    y = w - p / 3.0 / (w + eps)
    x = y + 1

    if self._verbose:
      logging.debug("p %f, denominator %f", p, self._grad_var + eps)
      logging.debug("w3 %f ", w3)
      logging.debug("y %f, denominator %f", y, w + eps)

    if np.isnan(x) or np.isinf(x):
      logging.warning("Output from cubic is invalid nan/inf value!")
      raise Exception("Output from cubic is invalid nan/inf value!")

    return x 
Example 42
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def treegauss_add_row(
        data_row,
        tree_grid,
        program,
        latent_row,
        vert_ss,
        edge_ss,
        feat_ss, ):
    # Sample latent state using dynamic programming.
    TODO('https://github.com/posterior/treecat/issues/26')

    # Update sufficient statistics.
    for v in range(latent_row.shape[0]):
        z = latent_row[v, :]
        vert_ss[v, :, :] += np.outer(z, z)
    for e in range(tree_grid.shape[1]):
        z1 = latent_row[tree_grid[1, e], :]
        z2 = latent_row[tree_grid[2, e], :]
        edge_ss[e, :, :] += np.outer(z1, z2)
    for v, x in enumerate(data_row):
        if np.isnan(x):
            continue
        z = latent_row[v, :]
        feat_ss[v] += 1
        feat_ss[v, 1] += x
        feat_ss[v, 2:] += x * z  # TODO Use central covariance. 
Example 43
Project: MKLMM   Author: omerwe   File: gpUtils.py    (BSD 2-Clause "Simplified" License) View Source Project 5 votes vote down vote up
def imputeSNPs(X):
	snpsMean = np.nanmean(X, axis=0)
	isNan = np.isnan(X)
	for i,m in enumerate(snpsMean): X[isNan[:,i], i] = m
		
	return X 
Example 44
Project: distributional_perspective_on_RL   Author: Kiwoo   File: tf_util.py    (license) View Source Project 5 votes vote down vote up
def __call__(self, *args, **kwargs):
        assert len(args) <= len(self.inputs), "Too many arguments provided"
        feed_dict = {}
        # Update the args
        for inpt, value in zip(self.inputs, args):
            self._feed_input(feed_dict, inpt, value)
        # Update the kwargs
        kwargs_passed_inpt_names = set()
        for inpt in self.inputs[len(args):]:
            inpt_name = inpt.name.split(':')[0]
            inpt_name = inpt_name.split('/')[-1]
            assert inpt_name not in kwargs_passed_inpt_names, \
                "this function has two arguments with the same name \"{}\", so kwargs cannot be used.".format(inpt_name)
            if inpt_name in kwargs:
                kwargs_passed_inpt_names.add(inpt_name)
                self._feed_input(feed_dict, inpt, kwargs.pop(inpt_name))
            else:
                assert inpt in self.givens, "Missing argument " + inpt_name
        assert len(kwargs) == 0, "Function got extra arguments " + str(list(kwargs.keys()))
        # Update feed dict with givens.
        for inpt in self.givens:
            feed_dict[inpt] = feed_dict.get(inpt, self.givens[inpt])
        results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1]
        if self.check_nan:
            if any(np.isnan(r).any() for r in results):
                raise RuntimeError("Nan detected")
        return results 
Example 45
Project: npstreams   Author: LaurentRDC   File: array_utils.py    (license) View Source Project 5 votes vote down vote up
def nan_to_num(array, fill_value = 0.0, copy = True):
    """
    Replace NaNs with another fill value. 

    Parameters
    ----------
    array : array_like
        Input data.
    fill_value : float, optional
        NaNs will be replaced by ``fill_value``. Default is 0.0, in keeping
        with ``numpy.nan_to_num``.
    copy : bool, optional
        Whether to create a copy of `array` (True) or to replace values
        in-place (False). The in-place operation only occurs if
        casting to an array does not require a copy.
    
    Returns
    -------
    out : ndarray
        Array without NaNs. If ``array`` was not of floating or complearray type,
        ``array`` is returned unchanged.
    
    Notes
    -----
    Contrary to ``numpy.nan_to_num``, this functions does not handle
    infinite values.

    See Also
    --------
    numpy.nan_to_num : replace NaNs and Infs with zeroes.
    """
    array = np.array(array, subok = True, copy = copy)
    dtype = array.dtype.type

    # Non-inexact types do not have NaNs
    if not np.issubdtype(dtype, np.inexact):
        return array
    
    iscomplex = np.issubdtype(dtype, np.complexfloating)
    dest = (array.real, array.imag) if iscomplex else (array,)
    for d in dest:
        np.copyto(d, fill_value, where = np.isnan(d))
    return array 
Example 46
Project: seq2seq   Author: google   File: decoder_test.py    (license) View Source Project 5 votes vote down vote up
def test_gradients(self):
    inputs = tf.random_normal(
        [self.batch_size, self.sequence_length, self.input_depth])
    seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length
    labels = np.random.randint(0, self.vocab_size,
                               [self.batch_size, self.sequence_length])

    helper = decode_helper.TrainingHelper(
        inputs=inputs, sequence_length=seq_length)
    decoder_fn = self.create_decoder(
        helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN)
    initial_state = decoder_fn.cell.zero_state(
        self.batch_size, dtype=tf.float32)
    decoder_output, _ = decoder_fn(initial_state, helper)

    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=decoder_output.logits, labels=labels)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    grads_and_vars = optimizer.compute_gradients(tf.reduce_mean(losses))

    #pylint: disable=E1101
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      grads_and_vars_ = sess.run(grads_and_vars)

    for grad, _ in grads_and_vars_:
      self.assertFalse(np.isnan(grad).any())

    return grads_and_vars_ 
Example 47
Project: zipline-chinese   Author: zhanghan1990   File: history_container.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def frame_to_series(self, field, frame, columns=None):
        """
        Convert a frame with a DatetimeIndex and sid columns into a series with
        a sid index, using the aggregator defined by the given field.
        """
        if isinstance(frame, pd.DataFrame):
            columns = frame.columns
            frame = frame.values

        if not len(frame):
            return pd.Series(
                data=(0 if field == 'volume' else np.nan),
                index=columns,
            ).values

        if field in ['price', 'close']:
            # shortcircuit for full last row
            vals = frame[-1]
            if np.all(~np.isnan(vals)):
                return vals
            return ffill(frame)[-1]
        elif field == 'open':
            return bfill(frame)[0]
        elif field == 'volume':
            return np.nansum(frame, axis=0)
        elif field == 'high':
            return np.nanmax(frame, axis=0)
        elif field == 'low':
            return np.nanmin(frame, axis=0)
        else:
            raise ValueError("Unknown field {}".format(field)) 
Example 48
Project: zipline-chinese   Author: zhanghan1990   File: history_container.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def update_last_known_values(self):
        """
        Store the non-NaN values from our oldest frame in each frequency.
        """
        ffillable = self.ffillable_fields
        if not len(ffillable):
            return

        for frequency in self.unique_frequencies:
            digest_panel = self.digest_panels.get(frequency, None)
            if digest_panel:
                oldest_known_values = digest_panel.oldest_frame(raw=True)
            else:
                oldest_known_values = self.buffer_panel.oldest_frame(raw=True)

            oldest_vals = oldest_known_values
            oldest_columns = self.fields
            for field in ffillable:
                f_idx = oldest_columns.get_loc(field)
                field_vals = oldest_vals[f_idx]
                # isnan would be fast, possible to use?
                non_nan_sids = np.where(pd.notnull(field_vals))
                key = (frequency.freq_str, field)
                key_loc = self.last_known_prior_values.index.get_loc(key)
                self.last_known_prior_values.values[
                    key_loc, non_nan_sids
                ] = field_vals[non_nan_sids] 
Example 49
Project: zipline-chinese   Author: zhanghan1990   File: risk.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def check_entry(key, value):
    if key != 'period_label':
        return np.isnan(value) or np.isinf(value)
    else:
        return False


############################
# Risk Metric Calculations #
############################ 
Example 50
Project: zipline-chinese   Author: zhanghan1990   File: assets.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def _compute_asset_lifetimes(self):
        """
        Compute and cache a recarry of asset lifetimes.
        """
        equities_cols = self.equities.c
        buf = np.array(
            tuple(
                sa.select((
                    equities_cols.sid,
                    equities_cols.start_date,
                    equities_cols.end_date,
                )).execute(),
            ), dtype='<f8',  # use doubles so we get NaNs
        )
        lifetimes = np.recarray(
            buf=buf,
            shape=(len(buf),),
            dtype=[
                ('sid', '<f8'),
                ('start', '<f8'),
                ('end', '<f8')
            ],
        )
        start = lifetimes.start
        end = lifetimes.end
        start[np.isnan(start)] = 0  # convert missing starts to 0
        end[np.isnan(end)] = np.iinfo(int).max  # convert missing end to INTMAX
        # Cast the results back down to int.
        return lifetimes.astype([
            ('sid', '<i8'),
            ('start', '<i8'),
            ('end', '<i8'),
        ])