python source code of model

deepchem-master
- devtools
  - conda-recipe
    - deepchem
      - run_test.py
      - conda_build_config.yaml
      - meta.yaml
      - build.sh
    - mdtraj
      - meta.yaml
  - jenkins
    - table_to_csv.py
    - molnet_update.sh
    - results.table
    - convert_to_rst.py
    - test_examples.sh
    - Readme.md
    - test_notebooks.sh
    - build_and_upload_docs.sh
    - conda_build.sh
    - generate_graph.py
    - push-docs-to-s3.py
    - compare_results.py
    - desired_results.csv
    - jenkins.sh
  - travis-ci
    - pre-commit
    - test_format_code.sh
  - README.md
- datasets
  - tox21.csv.gz
  - Positive Modulators Summary_ 918.TUC _ v1.xlsx
  - mini_muv.csv.gz
  - crystal_desc_canvas_aug30.csv
  - medium_muv.csv.gz
  - flash_points.csv.gz
  - gdb1k.sdf.csv
  - delaney-processed.csv
  - construct_pdbbind_df.py
  - membrane_permeability.sdf.csv
- .coveragerc
- examples
  - qm7
    - qm7_splits.csv
    - qm7_sklearn.py
    - get_qm7.sh
    - qm7b_tf_model.py
    - qm7b_DTNN.py
    - README.md
    - qm7_tf_model.py
    - qm7_tensorgraph_GraphConv.py
    - qm7_tensorgraph_DTNN.py
  - hyperparam_opt
    - grid_hyperparam_opt.py
    - gaussian_hyperparam_opt_with_logdir.py
    - gaussian_hyperparam_opt.py
    - README.md
  - transformers
    - README.md
  - datasets
    - scaffold_split_print.py
    - README.md
    - pretty_print.py
  - binding_pockets
    - binding_pocket_rf.py
    - binding_pocket_datasets.py
    - README.md
  - qm8
    - qm8_DTNN.py
    - qm8_tf_model.py
    - get_qm8.sh
    - qm8_MPNN.py
  - pdbbind
    - pdbbind_atomic_conv.py
    - get_featurized_pdbbind.sh
    - pdbbind_rf.py
    - get_pdbbind.sh
    - pdbbind_tf.py
    - .gitignore
  - encoders
    - encoder_example.py
    - __init__.py
  - uv
    - UV_tf_singletask.py
    - UV_datasets.py
    - UV_tf_model.py
    - uv_features.py
    - UV_rf_model.py
    - UV_correlations.py
    - README.md
    - UV_tf_progressive.py
    - UV_tf_robust.py
  - sweetlead
    - sweet.py
    - sweetlead_datasets.py
    - README.md
  - adme
    - run_benchmarks.py
    - __init__.py
    - README.md
  - nci
    - nci_rf.py
    - nci_datasets.py
  - benchmark_low_data.py
  - chembl
    - chembl_tf_models.py
    - chembl_graph_conv.py
    - year_sets
      - chembl_5thresh_ts_valid.csv.gz
    - chembl_tasks.py
    - __init__.py
  - muv
    - muv_tf.py
    - README.md
    - muv_sklearn.py
  - model_restore
    - chemception_restore.py
    - chemception_model.py
    - README.md
    - .gitignore
  - kaggle
    - KAGGLE_tf_robust.py
    - get_kaggle.sh
    - KAGGLE_tf_model.py
    - KAGGLE_tf_singletask.py
    - README.md
    - KAGGLE_tf_progressive.py
    - kaggle_features.py
    - KAGGLE_rf_model.py
  - pretraining
    - fcnet_pretraining.py
    - README.md
  - pcba
    - pcba_tf.py
    - pcba_sklearn.py
    - pcba_datasets.py
    - README.md
  - tox21
    - tox21_IRV.py
    - tox21_KernelSVM.py
    - tox21_tensorgraph_weave.py
    - tox21_tensorgraph_DAG.py
    - tox21_logreg.py
    - tox21_robustMT_models.py
    - tox21_tf_progressive.py
    - tox21_sklearn_models.py
    - tox21_graphcnn.py
    - tox21_tensorgraph_graphconv_sluice.py
    - README.md
    - tox21_fcnet.py
    - tox21_torch.py
    - tox21_tensorgraph_graph_conv.py
  - notebooks
    - TensorGraph_Mechanics.ipynb
    - deepchem_tensorflow_eager.ipynb
    - tests.py
    - README.md
    - .gitignore
    - assets
  - roitberg
    - app.py
    - pyanitools.py
    - roitberg.py
  - tutorials
    - 19_Large_Scale_Chemical_Screens.ipynb
    - 07_Uncertainty_In_Deep_Learning.ipynb
    - 17_Training_a_Generative_Adversarial_Network_on_MNIST.ipynb
    - 16_Conditional_Generative_Adversarial_Networks.ipynb
    - 02_Learning_MNIST_Digit_Classifiers.ipynb
    - 06_Going_Deeper_on_Molecular_Featurizations.ipynb
    - 05_Putting_Multitask_Learning_to_Work.ipynb
    - 21_Introduction_to_Bioinformatics.ipynb
    - 09_Creating_a_high_fidelity_model_from_experimental_data.ipynb
    - 10_Exploring_Quantum_Chemistry_with_GDB1k.ipynb
    - 15_Synthetic_Feasibility_Scoring.ipynb
    - 04_Introduction_to_Graph_Convolutions.ipynb
    - 14_Modeling_Protein_Ligand_Interactions_With_Atomic_Convolutions.ipynb
    - basic_graphs.gif
    - README.md
    - WIP_20_Converting_DeepChem_Models_to_TensorFlow_Estimators.ipynb
    - 11_Learning_Unsupervised_Embeddings_for_Molecules.ipynb
    - 18_Using_Reinforcement_Learning_to_Play_Pong.ipynb
    - 01_The_Basic_Tools_of_the_Deep_Life_Sciences.ipynb
  - splitters
    - random_split.py
    - README.md
    - scaffold_split.py
  - qm9
    - qm9_tf_model.py
    - get_qm9.sh
    - qm9_DTNN.py
  - stable_results.csv
  - kinase
    - KINASE_rf_model.py
    - kinase_features.py
    - KINASE_datasets.py
    - KINASE_tf_model.py
    - KINASE_tf_robust.py
    - README.md
    - KINASE_tf_singletask.py
    - KINASE_correlations.py
    - KINASE_tf_progressive.py
  - data_loading
    - membrane_permeability.sdf
    - pandas_csv.py
    - sdf_load.py
    - README.md
    - example.csv
  - hiv
    - hiv_irv.py
    - hiv_tf_models.py
    - README.md
  - factors
    - FACTORS_rf_model.py
    - FACTORS_correlations.py
    - FACTORS_tf_singletask.py
    - FACTORS_datasets.py
    - factors_features.py
    - FACTORS_tf_model.py
    - FACTORS_tf_robust.py
    - FACTORS_tf_progressive.py
    - README.md
  - hopv
    - hopv_tf_progressive.py
    - get_hopv.sh
    - hopv_sklearn_models.py
    - README.md
    - hopv_robustMT_models.py
    - hopv_tf_models.py
    - hopv_graph_conv.py
  - sider
    - sider.csv.gz
    - sider_rf.py
    - sider_datasets.py
  - sampl
    - sampl_tf_models.py
    - sampl_graph_conv.py
    - SAMPL.csv
  - membrane_permeability
    - membrane_permeability_datasets.py
    - membrane_permeability_graph_conv.py
    - __init__.py
  - benchmark_curve.py
  - benchmark.py
  - low_data
    - muv_graph_conv_one_fold.py
    - tox_graph_conv_one_fold.py
    - tox_rf_one_fold.py
    - tox_rf_K_fold.py
    - sider_graph_conv_one_fold.py
    - toxcast_maml.py
    - datasets.py
    - muv_rf_one_fold.py
    - sider_rf_one_fold.py
    - __init__.py
  - delaney
    - delaney_graph_conv.py
    - delaney_DAG.py
    - delaney_krr.py
    - delaney_torch.py
    - delaney_tf_models.py
    - delaney_chemception.py
    - delaney_graphconv_error_bars.py
    - delaney_MPNN.py
    - delaney_tf_progressive.py
    - README.md
    - delaney_weave.py
    - delaney_textcnn.py
  - bace
    - bace_datasets.py
    - bace_rf.py
  - toxcast
    - toxcast_rf.py
    - processing
      - tox.py
    - toxcast_datasets.py
    - README.md
  - clintox
    - clintox_tf_models.py
    - datasets
      - aacttox
        smiles_cache.csv.gz
        aacttox.py
        aacttox.csv.gz
        aacttox_phase_multiclass.csv.gz
      - aacttox_sweetfda_join.py
      - clintox.csv.gz
      - sweetfda
        sweetfda_approved_processed.csv.gz
    - __init__.py
    - clintox_graph_conv.py
- LICENSE
- .readthedocs.yml
- CONTRIBUTING.md
- deepchem
  - splits
    - splitters.py
    - test_specified_index_splitter.py
    - task_splitter.py
    - test_scaffold_splitter.py
    - __init__.py
    - tests
      - test_splitter.py
      - test_task_splitter.py
      - __init__.py
  - trans
    - transformers.py
    - __init__.py
    - tests
      - test_transformers.py
      - __init__.py
  - feat
    - mol_graphs.py
    - basic.py
    - fingerprints.py
    - graph_features.py
    - smiles_featurizers.py
    - raw_featurizer.py
    - materials_featurizers.py
    - one_hot.py
    - adjacency_fingerprints.py
    - __init__.py
    - rdkit_grid_featurizer.py
    - atomic_coordinates.py
    - tests
      - test_materials_featurizers.py
      - test_basic.py
      - test_smiles_featurizers.py
      - test_sdf_reader.py
      - test_coulomb_matrices.py
      - test_graph_features.py
      - 3ws9_ligand.sdf
      - test_mol_graphs.py
      - data
        3zp9_ligand_hyd.pdbqt
        3bwf_ligand_hyd.pdb
        chembl_25_small.csv
        3zp9_ligand_hyd.pdb
        water.sdf.csv
        3zso_ligand_hyd.pdb
        3zp9_protein_hyd.pdbqt
        3bwf_ligand_hyd.pdbqt
        water.sdf
      - test_features.py
      - test_fingerprints.py
      - __init__.py
      - test_binding_pocket_features.py
      - test_convmol.py
      - test_atomic_coordinates.py
      - test_one_hot.py
      - test_rdkit_grid_features.py
    - coulomb_matrices.py
    - base_classes.py
    - binding_pocket_features.py
  - utils
    - genomics.py
    - conformers.py
    - pdbqt_utils.py
    - fragment_util.py
    - voxel_utils.py
    - evaluate.py
    - rdkit_util.py
    - save.py
    - coordinate_box_utils.py
    - test
      - test_vina_utils.py
      - test_rdkit_util.py
      - test_seq.py
      - 1jld_ligand_docked.pdbqt
      - test_geometry_utils.py
      - test_hash_utils.py
      - data
        example.fasta
        example.fastq
      - test_fragment_util.py
      - __init__.py
      - test_voxel_utils.py
      - test_coordinate_box_utils.py
      - test_pdbqt_utils.py
      - test_generator_evaluator.py
    - hash_utils.py
    - geometry_utils.py
    - vina_utils.py
    - __init__.py
    - mol_xyz_util.py
  - metalearning
    - maml.py
    - __init__.py
    - tests
      - test_maml.py
  - models
    - fcnet.py
    - chemnet_layers.py
    - sklearn_models
      - __init__.py
    - IRV.py
    - keras_model.py
    - optimizers.py
    - robust_multitask.py
    - graph_models.py
    - xgboost_models
      - __init__.py
    - chemnet_models.py
    - text_cnn.py
    - gan.py
    - losses.py
    - multitask.py
    - models.py
    - cnn.py
    - atomic_conv.py
    - layers.py
    - __init__.py
    - seqtoseq.py
    - scscore.py
    - tests
      - test_multitask.py
      - multitask_example.csv
      - test_gan.py
      - test_callbacks.py
      - test_cnn.py
      - sparse_multitask_example.csv
      - user_specified_example.csv
      - test_optimizers.py
      - gaussian_cdf_example.csv
      - test_graph_models.py
      - test_atomic_conv.py
      - test_chemnet_models.py
      - test_generalize.py
      - butina_example.csv
      - test_overfit.py
      - example_DTNN.mat
      - test_kerasmodel.py
      - example_classification.csv
      - example_regression.csv
      - chembl_25_small.csv
      - feat_multitask_example.csv
      - test_layers.py
      - test_textcnnmodel.py
      - test_seqtoseq.py
      - test_reload.py
      - __init__.py
      - test_pretrained.py
      - test_layers_from_config.py
      - test_scscore.py
      - test_api.py
      - test_predict.py
      - test_singletask_to_multitask.py
      - example.csv
    - callbacks.py
    - progressive_multitask.py
  - molnet
    - load_function
      - bace_datasets.py
      - sampl_datasets.py
      - qm7_datasets.py
      - qm9_datasets.py
      - chembl25_datasets.py
      - cell_counting_datasets.py
      - bbbc_datasets.py
      - chembl_datasets.py
      - kinase_datasets.py
      - bbbp_datasets.py
      - toxcast_datasets.py
      - delaney_datasets.py
      - pcba_datasets.py
      - bace_features.py
      - hppb_datasets.py
      - clintox_datasets.py
      - tox21_datasets.py
      - uspto_datasets.py
      - sweetlead_datasets.py
      - hiv_datasets.py
      - lipo_datasets.py
      - muv_datasets.py
      - factors_datasets.py
      - chembl_tasks.py
      - __init__.py
      - nci_datasets.py
      - uv_datasets.py
      - kaggle_datasets.py
      - thermosol_datasets.py
      - clearance_datasets.py
      - pdbbind_datasets.py
      - kaggle_features.py
      - uv_tasks.py
      - qm8_datasets.py
      - sider_datasets.py
      - ppb_datasets.py
      - hopv_datasets.py
    - run_benchmark_low_data.py
    - preset_hyper_parameters.py
    - run_benchmark_models.py
    - dnasim.py
    - run_benchmark.py
    - __init__.py
    - check_availability.py
    - tests
      - test_molnet.py
      - test_dnasim.py
      - __init__.py
  - rl
    - ppo.py
    - __init__.py
    - tests
      - test_ppo.py
      - test_a2c.py
    - a2c.py
    - envs
      - tictactoe.py
      - test_tictactoe.py
      - __init__.py
  - data
    - supports.py
    - data_loader.py
    - datasets.py
    - __init__.py
    - tests
      - no_labels.csv
      - test_csv_loader.py
      - test_drop.py
      - a_image.tif
      - mini_emols.csv
      - images
      - test_fasta_loader.py
      - test_image_dataset.py
      - test_load.py
      - example.fasta
      - test_data_loader.py
      - test_support_generator.py
      - test_shuffle.py
      - test_merge.py
      - test_reload.py
      - __init__.py
      - test_image_loader.py
      - test_datasets.py
  - __init__.py
  - metrics
    - genomic_metrics.py
    - __init__.py
    - tests
      - metrics_test.py
      - test_genomics.py
      - __init__.py
  - dock
    - pose_generation.py
    - pose_scoring.py
    - binding_pocket.py
    - __init__.py
    - tests
      - 1jld_ligand.sdf
      - test_pose_scoring.py
      - test_binding_pocket.py
      - test_pose_generation.py
      - __init__.py
      - test_docking.py
    - docking.py
  - hyper
    - gaussian_process.py
    - grid_search.py
    - __init__.py
    - tests
      - test_hyperparam_opt.py
      - test_gaussian_hyperparam_opt.py
      - __init__.py
      - test_grid_hyperparam_opt.py
    - base_classes.py
- ISSUE_TEMPLATE.md
- setup.py
- docker
  - master
    - Dockerfile
  - conda-forge
    - Dockerfile
- .style.yapf
- .travis.yml
- README.md
- CODE_OF_CONDUCT.md
- pytest.ini
- scripts
  - install_deepchem_conda.sh
  - colab_install.py
  - install_deepchem_conda.ps1
- .gitignore
- docs
  - metalearning.rst
  - moleculenet.rst
  - datasets.rst
  - Makefile
  - source
    - conf.py
  - _config.yml
  - metrics.rst
  - layers.rst
  - sphinxext
    - notebook_sphinxext.py
  - dataloaders.rst
  - splitters.rst
  - transformers.rst
  - hyper.rst
  - utils.rst
  - docking.rst
  - rl.rst
  - README.md
  - requirements.txt
  - index.rst
  - conf.py
  - models.rst
  - featurizers.rst
  - _static
    - theme_overrides.css
  - installation.rst
- contrib
  - mol2vec
    - eval_mol2vec_results.py
    - train_mol2vec.sh
    - README.md
    - mol2vec.py
  - one_shot_models
    - multitask_classifier.py
    - examples
      - muv_siamese_one_fold.py
      - tox_attn_one_fold.py
      - sider_attn_one_fold.py
      - sider_from_tox21_attn_one_fold.py
      - sider_from_tox21_siamese_one_fold.py
      - sider_siamese_one_fold.py
      - sider_from_tox21_res_one_fold.py
      - sider_alternate_weave.py
      - tox_res_one_fold.py
      - muv_res_one_fold.py
      - muv_attn_one_fold.py
      - tox_siamese_one_fold.py
      - tox21_alternate_weave.py
      - sider_res_one_fold.py
    - graph_models.py
    - support_classifier.py
    - multitask_regressor.py
    - tests
      - 1jld_ligand.pdb
      - test_graph_models.py
      - 1jld_ligand.sdf
      - test_graph_topology.py
      - __init__.py
    - graph_topology.py
  - pubchem_dataset
    - create_smiles_mapping.py
    - download_pubchem_ftp.py
    - README.md
    - create_assay_overview.py
  - DeepMHC
    - deepmhc.py
    - bd13_datasets.py
    - run_deepmhc.py
  - autoencoder_models
    - model.py
    - test_tensorflowEncoders.py
    - __init__.py
    - autoencoder.py
    - .gitignore
  - dragonn
    - tutorial_images
    - GTC_workshop_tutorial.ipynb
    - tutorial_utils.py
    - models.py
    - simulations.py
    - utils.py
  - nn
    - constraints.py
    - weave_layers.py
    - copy.py
    - objectives.py
    - layers.py
    - __init__.py
    - tests
      - test_layers.py
  - hagcn
    - run_model.py
    - hagcn_model.py
    - hagcn_layers.py
  - rl
    - tictactoe.py
    - mcts.py
    - test_mcts.py
  - tensorflow_models
    - test_utils.py
    - robust_multitask.py
    - progressive_joint.py
    - __init__.py
    - utils.py
    - test_progressive.py
    - deepchem_multitask_classifer_distributed_training_example.py
    - progressive_multitask.py
  - torch
    - torch_multitask_regression.py
    - pytorch_graphconv.py
    - examples
      - tox21_pytorch_graphconv.ipynb
    - torch_model.py
    - torch_multitask_classification.py
  - mpnn
    - mpnn.py
    - README.md
    - donkey.py
  - DiabeticRetinopathy
    - data.py
    - run.py
    - model.py
  - vina_model
    - 1jld_ligand.pdb
    - test_vina_model.py
    - 1jld_ligand.sdf
    - vina_model.py
  - laplacian
    - petroskisuch.py
  - visualization
    - utils.py
  - atomicconv
    - splits
      - pdbbind_random_split.py
      - splitters.py
      - pdbbind_temporal_split.py
      - pdbbind_stratified_split.py
      - pdbbind_scaffold_split.py
      - .gitignore
    - feat
      - atomicnet_coordinates.py
      - featurize.py
      - convert_ligand_sdf_to_pdb.sh
      - atomicnet_pdbbind_datasets.py
    - acnn
      - refined
        opt_scaffold.py
        opt_temporal.py
        opt_random.py
        tensor_graph_hyper_param_eval.py
        get_acnn_refined.sh
        opt_stratified.py
        .gitignore
      - core
        get_acnn_core.sh
        opt_scaffold.py
        opt_temporal.py
        opt_random.py
        tensor_graph_hyper_param_search.py
        tensor_graph_hyper_param_eval.py
        opt_stratified.py
        opt_random_tensorgraph.py
    - models
      - atomicnet_ops.py
      - atomicnet.py
      - legacy.py
- MANIFEST.in

import warnings
from keras import backend as K
from keras import objectives
from keras.layers import Input, Lambda
from keras.layers.convolutional import Convolution1D
from keras.layers.core import Dense, Flatten, RepeatVector
from keras.layers.recurrent import GRU
from keras.layers.wrappers import TimeDistributed
from keras.models import Model


class MoleculeVAE():

  autoencoder = None

  def __init__(self):
    warnings.warn("Deprecated. Will be removed in DeepChem 1.4.",
                  DeprecationWarning)

  def create(self,
             charset_length,
             max_length=120,
             latent_rep_size=292,
             weights_file=None):
    x = Input(shape=(max_length, charset_length))
    _, z = self._buildEncoder(x, latent_rep_size, max_length)
    self.encoder = Model(x, z)

    encoded_input = Input(shape=(latent_rep_size,))
    self.decoder = Model(encoded_input,
                         self._buildDecoder(encoded_input, latent_rep_size,
                                            max_length, charset_length))

    x1 = Input(shape=(max_length, charset_length))
    vae_loss, z1 = self._buildEncoder(x1, latent_rep_size, max_length)
    self.autoencoder = Model(x1,
                             self._buildDecoder(z1, latent_rep_size, max_length,
                                                charset_length))

    if weights_file:
      self.autoencoder.load_weights(weights_file)
      self.encoder.load_weights(weights_file, by_name=True)
      self.decoder.load_weights(weights_file, by_name=True)

    self.autoencoder.compile(
        optimizer='Adam', loss=vae_loss, metrics=['accuracy'])

  def _buildEncoder(self, x, latent_rep_size, max_length, epsilon_std=0.01):
    h = Convolution1D(9, 9, activation='relu', name='conv_1')(x)
    h = Convolution1D(9, 9, activation='relu', name='conv_2')(h)
    h = Convolution1D(10, 11, activation='relu', name='conv_3')(h)
    h = Flatten(name='flatten_1')(h)
    h = Dense(435, activation='relu', name='dense_1')(h)

    def sampling(args):
      z_mean_, z_log_var_ = args
      batch_size = K.shape(z_mean_)[0]
      epsilon = K.random_normal(
          shape=(batch_size, latent_rep_size), mean=0., std=epsilon_std)
      return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

    z_mean = Dense(latent_rep_size, name='z_mean', activation='linear')(h)
    z_log_var = Dense(latent_rep_size, name='z_log_var', activation='linear')(h)

    def vae_loss(x, x_decoded_mean):
      x = K.flatten(x)
      x_decoded_mean = K.flatten(x_decoded_mean)
      xent_loss = max_length * objectives.binary_crossentropy(x, x_decoded_mean)
      kl_loss = -0.5 * K.mean(
          1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
      return xent_loss + kl_loss

    return (vae_loss, Lambda(
        sampling, output_shape=(latent_rep_size,),
        name='lambda')([z_mean, z_log_var]))

  def _buildDecoder(self, z, latent_rep_size, max_length, charset_length):
    h = Dense(latent_rep_size, name='latent_input', activation='relu')(z)
    h = RepeatVector(max_length, name='repeat_vector')(h)
    h = GRU(501, return_sequences=True, name='gru_1')(h)
    h = GRU(501, return_sequences=True, name='gru_2')(h)
    h = GRU(501, return_sequences=True, name='gru_3')(h)
    return TimeDistributed(
        Dense(charset_length, activation='softmax'), name='decoded_mean')(h)

  def save(self, filename):
    self.autoencoder.save_weights(filename)

  def load(self, charset_length, weights_file, latent_rep_size=292):
    self.create(
        charset_length,
        weights_file=weights_file,
        latent_rep_size=latent_rep_size)