from os import getenv from distributed import Client import dask.dataframe as dd from model_generator import ModelGenerator DAY_AS_STR = getenv('DAY_AS_STR') UNIQUE_HASH = getenv('UNIQUE_HASH') TRAINING_OR_PREDICTION = getenv('TRAINING_OR_PREDICTION') MORPHL_SERVER_IP_ADDRESS = getenv('MORPHL_SERVER_IP_ADDRESS') HDFS_PORT = 9000 HDFS_DIR_INPUT = f'hdfs://{MORPHL_SERVER_IP_ADDRESS}:{HDFS_PORT}/{DAY_AS_STR}_{UNIQUE_HASH}_ga_chp_scaled_features_training' def main(): client = Client() dask_df = client.persist(dd.read_parquet(HDFS_DIR_INPUT)) ModelGenerator(dask_df).generate_and_save_model() if __name__ == '__main__': main()