Python sklearn.datasets.fetch_california_housing() Examples

The following are 3 code examples of sklearn.datasets.fetch_california_housing(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function

Example #1

Source File: test_california_housing.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def fetch(*args, **kwargs):
    return fetch_california_housing(*args, download_if_missing=False, **kwargs)

Example #2

Source File: bench_ml.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License

5 votes

def load_data_target(name):
    """
    Loads data and target given the name of the dataset.
    """
    if name == "Boston":
        data = load_boston()
    elif name == "Housing":
        data = fetch_california_housing()
        dataset_size = 1000 # this is necessary so that SVR does not slow down too much
        data["data"] = data["data"][:dataset_size]
        data["target"] =data["target"][:dataset_size]
    elif name == "digits":
        data = load_digits()
    elif name == "Climate Model Crashes":
        try:
            data = fetch_mldata("climate-model-simulation-crashes")
        except HTTPError as e:
            url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00252/pop_failures.dat"
            data = urlopen(url).read().split('\n')[1:]
            data = [[float(v) for v in d.split()] for d in data]
            samples = np.array(data)
            data = dict()
            data["data"] = samples[:, :-1]
            data["target"] = np.array(samples[:, -1], dtype=np.int)
    else:
        raise ValueError("dataset not supported.")
    return data["data"], data["target"]

Example #3

Source File: loaddata.py From nonlinearIB with MIT License

5 votes

def load_housing():
    from sklearn.datasets import fetch_california_housing
    d=fetch_california_housing()
    d['data'] -= d['data'].mean(axis=0)
    d['data'] /= d['data'].std(axis=0)
    
    # Housing prices above 5 are all collapsed to 5, which makes the Y distribution very strange. Drop these
    d['data']   = d['data'][d['target'] < 5]
    d['target'] = d['target'][d['target'] < 5]
    
    d['target'] = np.log(d['target'])
    
    np.random.seed(12345)
    permutation = np.random.permutation(len(d['data']))
    d['data']   = d['data'][permutation]
    d['target'] = d['target'][permutation]
    
    l = int(len(d['data'])*0.8)
    
    data = {'err':'mse',
            'trn_X': d['data'][:l],
            'trn_Y': np.atleast_2d(d['target'][:l]).T,
            'tst_X': d['data'][l:],
            'tst_Y': np.atleast_2d(d['target'][l:]).T,
           }
    
    return data