python source code of randomized_search

Project: healthcareai-py (GitHub Link)

healthcareai-py-master
- example_classification_1.py
- how_to_release_a_version.md
- conda-build-pipeline.sh
- environment.yml
- conda-recipe
  - meta.yaml
  - build.sh
  - bld.bat
- AUTHORS
- example_regression_2.py
- example_advanced.py
- healthcareai
  - datasets
    - data
      - diagnostic_breast_cancer.csv
      - fertility.csv
      - thoracic_surgery.csv
      - heart_disease.csv
      - prognostic_breast_cancer.csv
      - pima_indians_diabetes.csv
      - acute_inflammations.csv
      - cervical_cancer.csv
      - mammographic_masses.csv
      - diabetes.csv
    - __init__.py
    - base.py
  - trained_models
    - trained_supervised_model.py
    - __init__.py
  - supervised_model_trainer.py
  - common
    - database_validators.py
    - csv_loader.py
    - database_library_validators.py
    - transformers.py
    - top_factors.py
    - impact_coding.py
    - model_eval.py
    - table_archiver.py
    - randomized_search.py
    - cardinality_checks.py
    - healthcareai_error.py
    - azure_blob_storage_helper.py
    - predict.py
    - trainer_output.py
    - get_categorical_levels.py
    - filters.py
    - file_io_utilities.py
    - __init__.py
    - feature_availability_profiler.py
    - database_writers.py
    - catalyst_sqlite_db_fixtures.py
    - helpers.py
    - database_connections.py
  - advanced_supvervised_model_trainer.py
  - pipelines
    - data_preparation.py
    - __init__.py
  - __init__.py
  - tests
    - test_top_factors.py
    - test_csv_loader.py
    - test_file_utilities.py
    - test_trainer_decorator.py
    - fixtures
      - iris_classification.csv
      - .gitkeep
    - test_dataframe_transformers_Dataframe_Imputer.py
    - test_trainer.py
    - test_advanced_trainer.py
    - test_trained_supervised_model.py
    - test_database_validation.py
    - test_cardinality_checks.py
    - test_impact_coding.py
    - __init__.py
    - test_model_eval_helpers.py
    - test_dataframe_transformers.py
    - test_categorical_levels.py
    - test_datasets.py
    - test_feature_availability_profiler.py
    - test_dataframe_filters.py
    - test_model_eval.py
    - test_predict.py
    - helpers.py
- example_regression_1.py
- mkdocs.yml
- LICENSE
- example_classification_2.py
- dev-requirements.txt
- appveyor.yml
- CONTRIBUTING.md
- appveyor
  - run_with_env.cmd
  - install.ps1
- tasks.py
- CHANGELOG.md
- setup.py
- dox
  - Makefile
  - make.bat
  - healthcareai.trained_models.rst
  - healthcareai.rst
  - healthcareai.pipelines.rst
  - modules.rst
  - index.rst
  - conf.py
  - healthcareai.tests.rst
  - healthcareai.common.rst
- setup.cfg
- .travis.yml
- README.md
- .landscape.yml
- Dockerfile
- .gitignore
- docs
  - deploy.md
  - compile.md
  - img
  - catalyst_edw_instructions.md
  - profiling_your_data.md
  - training.md
  - hints.md
  - architecture_overview_for_developers.md
  - index.md
  - favicon.ico
  - prediction_types.md
  - FAQ.md
  - getting_started.md
  - conf.py
  - mermaid.js
  - databases.md
  - utilities.md
- .dockerignore
- MANIFEST.in

from sklearn.model_selection import RandomizedSearchCV


def get_algorithm(estimator,
                  scoring_metric,
                  hyperparameter_grid,
                  randomized_search,
                  number_iteration_samples=10,
                  **non_randomized_estimator_kwargs):
    """
    Given an estimator and various params, initialize an algorithm with optional randomized search.

    Args:
        estimator (sklearn.base.BaseEstimator): a scikit-learn estimator (for example: KNeighborsClassifier)
        scoring_metric (str): The scoring metric to optimized for if using random search. See
            http://scikit-learn.org/stable/modules/model_evaluation.html
        hyperparameter_grid (dict): An object containing key value pairs of the specific hyperparameter space to search
            through.
        randomized_search (bool): Whether the method should return a randomized search estimator (as opposed to a
            simple algorithm).
        number_iteration_samples (int): If performing randomized search, this is the number of samples that are run in 
            the hyperparameter space. Higher numbers will be slower, but end up with better results, since it is more
            likely that the true optimal hyperparameter is found.
        **non_randomized_estimator_kwargs: Keyword arguments that you can pass directly to the algorithm. Only used when
            radomized_search is False

    Returns:
        sklearn.base.BaseEstimator: a scikit learn algorithm ready to `.fit()`

    """
    if randomized_search:
        algorithm = RandomizedSearchCV(estimator=estimator(),
                                       scoring=scoring_metric,
                                       param_distributions=hyperparameter_grid,
                                       n_iter=number_iteration_samples,
                                       verbose=0,
                                       n_jobs=1)

    else:
        algorithm = estimator(**non_randomized_estimator_kwargs)

    return algorithm