python source code of plot_kneighbors_regularization

Project: scipy_2015_sklearn_tutorial (GitHub Link)

scipy_2015_sklearn_tutorial-master
- abstract.rst
- LICENSE
- fetch_data.py
- notebooks
  - 01.3 Data Representation for Machine Learning.ipynb
  - 04.2 Model Complexity and GridSearchCV.ipynb
  - 07.1 Case Study - Large Scale Text Classification.ipynb
  - 04.4 Model Evaluation and Scoring Metrics.ipynb
  - 03.2 Methods - Unsupervised Preprocessing.ipynb
  - datasets
    - titanic
      - titanic3.csv
    - smsspam
      - readme
  - 02.3 Unsupervised Learning - Transformations and Dimensionality Reduction.ipynb
  - 04.3 Analyzing Model Capacity.ipynb
  - figures
    - ML_flow_chart.py
    - train_validation_test2.svg
    - plot_digits_datasets.py
    - plot_interactive_forest.py
    - supervised_workflow.svg
    - hashing_vectorizer.svg
    - cross_validation.svg
    - plot_interactive_tree.py
    - bag_of_words.svg
    - plot_kneighbors_regularization.py
    - train_test_split.svg
    - feature_union.svg
    - overfitting_underfitting_cartoon.svg
    - unsupervised_workflow.svg
    - grid_search_cross_validation.svg
    - __init__.py
    - plot_2d_separator.py
    - data_representation.svg
    - pipeline_cross_validation.svg
    - plot_linear_svc_regularization.py
    - plot_rbf_svm_parameters.py
  - images
  - 04.1 Cross Validation.ipynb
  - 03.4 Methods - Text Feature Extraction.ipynb
  - 03.3 Case Study - Face Recognition with Eigenfaces.ipynb
  - 02.4 Unsupervised Learning - Clustering.ipynb
  - 01.2 IPython Numpy and Matplotlib Refresher.ipynb
  - 01.4 Training and Testing Data.ipynb
  - solutions
    - 04C_validation_exercise.py
    - 08B_digits_clustering.py
    - 02A_faces_plot.py
    - 06B_basic_grid_search.py
    - 08A_digits_projection.py
    - 07B_grid_search.py
    - 05B_strip_headers.py
    - 04B_houses_regression.py
    - 06B_learning_curves.py
  - 06.1 Pipelining Estimators.ipynb
  - 05.1 In Depth - Linear Models.ipynb
  - 03.5 Case Study - SMS Spam Detection.ipynb
  - 05.2 In Depth - Support Vector Machines.ipynb
  - 05.3 In Depth - Trees and Forests.ipynb
  - 03.1 Case Study - Supervised Classification of Handwritten Digits.ipynb
  - 03.6 Case Study - Titanic Survival.ipynb
  - 02.1 Supervised Learning - Classification.ipynb
  - 01.1 Introduction to Machine Learning.ipynb
  - helpers.py
  - 02.5 Review of Scikit-learn API.ipynb
  - 02.2 Supervised Learning - Regression.ipynb
- check_env.py
- README.md
- requirements.txt
- .gitignore

import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsRegressor


def make_dataset(n_samples=100):
    rnd = np.random.RandomState(42)
    x = np.linspace(-3, 3, n_samples)
    y_no_noise = np.sin(4 * x) + x
    y = y_no_noise + rnd.normal(size=len(x))
    return x, y


def plot_regression_datasets():
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    for n_samples, ax in zip([10, 100, 1000], axes):
        x, y = make_dataset(n_samples)
        ax.plot(x, y, 'o', alpha=.6)


def plot_kneighbors_regularization():
    rnd = np.random.RandomState(42)
    x = np.linspace(-3, 3, 100)
    y_no_noise = np.sin(4 * x) + x
    y = y_no_noise + rnd.normal(size=len(x))
    X = x[:, np.newaxis]
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    x_test = np.linspace(-3, 3, 1000)

    for n_neighbors, ax in zip([2, 5, 20], axes.ravel()):
        kneighbor_regression = KNeighborsRegressor(n_neighbors=n_neighbors)
        kneighbor_regression.fit(X, y)
        ax.plot(x, y_no_noise, label="true function")
        ax.plot(x, y, "o", label="data")
        ax.plot(x_test, kneighbor_regression.predict(x_test[:, np.newaxis]),
                label="prediction")
        ax.legend()
        ax.set_title("n_neighbors = %d" % n_neighbors)

if __name__ == "__main__":
    plot_kneighbors_regularization()
    plt.show()