Java Code Examples for org.apache.spark.sql.SparkSession#close()

The following examples show how to use org.apache.spark.sql.SparkSession#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MutationToStructureDemo.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    SparkSession spark = SparkSession.builder().master("local[*]").appName(MutationToStructureDemo.class.getSimpleName())
            .getOrCreate();

    // find missense mutations that map to UniProt ID P15056 (BRAF)
    // that are annotated as pathogenic or likely pathogenic in ClinVar.
    List<String> uniprotIds = Arrays.asList("P15056"); // BRAF: P15056
    String query = "clinvar.rcv.clinical_significance:pathogenic OR clinvar.rcv.clinical_significance:likely pathogenic";
    Dataset<Row> df = MyVariantDataset.getVariations(uniprotIds, query).cache();
    System.out.println("BRAF missense mutations: " + df.count());
    df.show();
    
    // extract the list of variant Ids
    List<String> variantIds = df.select("variationId").as(Encoders.STRING()).collectAsList();
    
    // map to PDB structures
    Dataset<Row> ds = G2SDataset.getPositionDataset(variantIds);
    ds = ds.sort("structureId","chainId","pdbPosition");
    ds.show();

    spark.close(); 
}
 
Example 2
Source File: PdbLigandDemo.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
 SparkSession spark = SparkSession.builder().master("local[*]").appName(PdbLigandDemo.class.getSimpleName())
            .getOrCreate();

 // find non-polymeric chemical components that contain carbon 
 // and have a formula weight > 150 da
     String sqlQuery = "SELECT pdbid, id, formula, formula_weight, name from chem_comp "
             + " WHERE type = 'non-polymer' AND formula LIKE 'C%' AND formula_weight > 150";
     Dataset<Row> ds = PdbjMineDataset.getDataset(sqlQuery);

     System.out.println("First 10 results from query: " + sqlQuery);
     ds.show(10, false);

     System.out.println("Top 10 ligands in PDB:");
     ds.groupBy("id").count().sort(col("count").desc()).show(10);
     
     System.out.println("Formula weight (>150) statistics:");
     ds.describe("formula_weight").show();

     spark.close();
}
 
Example 3
Source File: PdbDrugBankMapping.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    SparkSession spark = SparkSession.builder().master("local[*]").appName(PdbDrugBankMapping.class.getSimpleName())
            .getOrCreate();

    // download open DrugBank dataset
    Dataset<Row> drugBank = DrugBankDataset.getOpenDrugLinks();
    
    // find some tryrosine kinase inhibitors with generic name stem: "tinib"
    drugBank = drugBank.filter("Commonname LIKE '%tinib'");
    
    // get PDB ligand annotations
    Dataset<Row> ligands = CustomReportService.getDataset("ligandId","ligandMolecularWeight","ligandFormula","ligandSmiles","InChIKey");

    // join ligand dataset with DrugBank info by InChIKey
    ligands = ligands.join(drugBank, ligands.col("InChIKey").equalTo(drugBank.col("StandardInChIKey")));
   
    // show one example per drug molecule
    ligands = ligands.dropDuplicates("Commonname");
    ligands.select("structureChainId", "ligandId", "DrugBankID", "Commonname", "ligandMolecularWeight","ligandFormula", "InChIKey", "ligandSmiles")
    .sort("Commonname").show(50);

    spark.close(); 
}
 
Example 4
Source File: DrugBankDemo.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    SparkSession spark = SparkSession.builder().master("local[*]").appName(DrugBankDemo.class.getSimpleName())
            .getOrCreate();

    // download open DrugBank dataset
    Dataset<Row> openDrugLinks = DrugBankDataset.getOpenDrugLinks();

    // find all drugs with an InChIKey
    openDrugLinks = openDrugLinks.filter("StandardInChIKey IS NOT NULL");

    // show some sample data
    openDrugLinks.select("DrugBankID", "Commonname", "CAS", "StandardInChIKey").show();

    // The DrugBank password protected datasets contain more information.
    // You need to create a DrugBank account and supply username/password
    // to access these datasets.

    // Download DrugBank dataset for approved drugs
    // String username = args[0];
    // String password = args[1];
    // Dataset<Row> drugLinks =
    // DrugBankDataset.getDrugLinks(DrugGroup.APPROVED, username, password);
    // drugLinks.show();

    spark.close(); 
}
 
Example 5
Source File: DatasetBalancerTest.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
	List<Row> rows = Arrays.asList(
			RowFactory.create("a", 1), RowFactory.create("a", 2), 
			RowFactory.create("b", 1), RowFactory.create("b", 2), RowFactory.create("b", 3), 
			RowFactory.create("c", 1), RowFactory.create("c", 2), RowFactory.create("c", 3), RowFactory.create("c", 4));

	SparkSession spark = SparkSession.builder().master("local[1]").getOrCreate();

	StructType schema = new StructType(
			new StructField[] { DataTypes.createStructField("key", DataTypes.StringType, false),
					DataTypes.createStructField("value", DataTypes.IntegerType, false) });

	Dataset<Row> data = spark.createDataFrame(rows, schema);

	long seed = 19;
	Dataset<Row> balancedData = DatasetBalancer.downsample(data, "key", seed);
	assertTrue(balancedData.count() > 0);
	
    spark.close();
}
 
Example 6
Source File: KafkaImportApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testKafkaStreamingImportProcessLevel() throws Exception {
    //run main class
    String args[] = {"-kb", KAFKA_HOST + ":" + KAFKA_PORT, "-fd", IMPORT_TEST_OUTPUT_DIRECTORY_PROCESS, "-bm", "true", "-sr", "false", "-dl", "process", "-wd", "./src/test/resources/config/kafka_import_process/", "-sm", "overwrite"};
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    SparkSession.builder().config(sparkConf).getOrCreate();
    KafkaImportApplication.main(args);

    //start Spark session
    SparkSession sparkSession = SparkSession.builder()
            .master("local[*]")
            .appName("IntegrationTest")
            .getOrCreate();

    //generate Dataset and create hash to compare
    Dataset<Row> importedDataset = sparkSession.read().load(IMPORT_TEST_OUTPUT_DIRECTORY_PROCESS);

    //check that dataset contains 43 lines
    assertEquals(43, importedDataset.count());

    //check hash of dataset
    String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect());
    assertEquals("15254E402E5D700FB125E2BD670FE716", hash);

    //close Spark session
    sparkSession.close();
}
 
Example 7
Source File: KafkaImportApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testKafkaStreamingImportActivityLevel() throws Exception {
    //run main class
    String args[] = {"-kb", KAFKA_HOST + ":" + KAFKA_PORT, "-fd", IMPORT_TEST_OUTPUT_DIRECTORY_ACTIVITY, "-bm", "true", "-sr", "false", "-dl", "activity", "-wd", "./src/test/resources/config/kafka_import_activity/","-sm", "overwrite"};
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    SparkSession.builder().config(sparkConf).getOrCreate();
    KafkaImportApplication.main(args);

    //start Spark session
    SparkSession sparkSession = SparkSession.builder()
            .master("local[*]")
            .appName("IntegrationTest")
            .getOrCreate();

    //generate Dataset and create hash to compare
    Dataset<Row> importedDataset = sparkSession.read().load(IMPORT_TEST_OUTPUT_DIRECTORY_ACTIVITY);

    //check that dataset contains 55 lines
    assertEquals(55, importedDataset.count());

    //check hash of dataset
    String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect());
    assertEquals("9CEE92C16D7803E0ECF57666FDAC60D7", hash);

    //close Spark session
    sparkSession.close();
}
 
Example 8
Source File: KafkaProcessingApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testKafkaDataProcessingActivityLevel() throws Exception {
    //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0");

    //run main class
    String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_ACTIVITY, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY, "-d", "|", "-sr", "false", "-dl", "activity", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_activity/"};
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    SparkSession.builder().config(sparkConf).getOrCreate();

    // run main class
    KafkaProcessingApplication.main(args);

    //start Spark session
    SparkSession sparkSession = SparkSession.builder()
            .master("local[*]")
            .appName("IntegrationTest")
            .getOrCreate();

    //generate Dataset and create hash to compare
    Dataset<Row> importedDataset = sparkSession.read()
            .option("inferSchema", "true")
            .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY + "/result/parquet");

    //check that dataset contains 12 lines
    assertEquals(12, importedDataset.count());

    //check that dataset contains 43 columns
    assertEquals(43, importedDataset.columns().length);

    //check hash of dataset
    String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect());
    System.out.println(hash);
    assertEquals("A8BBFC3B17C00C40C9883DA1F396D453", hash);

    //close Spark session
    sparkSession.close();
}
 
Example 9
Source File: SwissModelDemo.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
    SparkSession spark = SparkSession.builder().master("local[*]").appName(SwissModelDemo.class.getSimpleName())
            .getOrCreate();
   
    List<String> uniProtIds = Arrays.asList("P36575","P24539","O00244");
    Dataset<Row> ds = SwissModelDataset.getSwissModels(uniProtIds);
    ds.show();

    spark.close(); 
}
 
Example 10
Source File: PdbMetadataDemo.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
 SparkSession spark = SparkSession.builder().master("local[*]").appName(PdbMetadataDemo.class.getSimpleName())
            .getOrCreate();

 // query the following fields from the _citation category using PDBj's Mine2 web service:
 // journal_abbrev, pdbx_database_id_PubMed, year.   
 // Note, mixed case column names must be quoted and escaped with \".
 String sqlQuery = "SELECT pdbid, journal_abbrev, \"pdbx_database_id_PubMed\", year from citation WHERE id = 'primary'";
 Dataset<Row>ds = PdbjMineDataset.getDataset(sqlQuery);
 
 System.out.println("First 10 results from query: " + sqlQuery);
 ds.show(10, false);
  
 // filter out unpublished entries (they contain the word "published" in various upper/lower case combinations)
 ds = ds.filter("UPPER(journal_abbrev) NOT LIKE '%PUBLISHED%'");
 
 // print the top 10 journals
 System.out.println("Top 10 journals that publish PDB structures:");
 ds.groupBy("journal_abbrev").count().sort(col("count").desc()).show(10, false);
	
 // filter out entries without a PubMed Id (is -1 if PubMed Id is not available)
 ds = ds.filter("pdbx_database_id_PubMed > 0");
 System.out.println("Entries with PubMed Ids: " + ds.count());
 
 // show growth of papers in PubMed
 System.out.println("PubMed Ids per year: ");
 ds.groupBy("year").count().sort(col("year").desc()).show(10, false);

 spark.close();
}
 
Example 11
Source File: DrugBankDatasetTest.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws IOException {
	SparkSession spark = SparkSession
			.builder()
			.master("local[*]")
			.appName(DrugBankDatasetTest.class.getSimpleName())
			.getOrCreate();
	
	Dataset<Row> ds = DrugBankDataset.getOpenDrugLinks();
	assertTrue(ds.count() > 10000);
	assertEquals("DrugBankID", ds.columns()[0]);
	
	spark.close();
}
 
Example 12
Source File: PortfolioCollector.java    From ExecDashboard with Apache License 2.0 4 votes vote down vote up
/**
   * Main collection loop
   */
  @SuppressWarnings("PMD.NPathComplexity")
  public void collect() {
      HygieiaSparkConnection sparkConnection = new HygieiaSparkConnection(setting.getReadUri(), setting.getReadDatabase(),
              setting.getWriteUri(), setting.getWriteDatabase());
      SparkSession sparkSession = sparkConnection.getInstance();
      JavaSparkContext javaSparkContext = new JavaSparkContext(sparkSession.sparkContext());

      //Build portfolio structure: Portfolio -> Product (ASV) -> Environment -> Component (BAP)
      collectCMDB(sparkSession, javaSparkContext);
      List<Portfolio> portfolioList = createPortfolios();
      ArrayList<Lob> lobList = (ArrayList<Lob>) createLobs();

      if (CollectionUtils.isEmpty(portfolioList)) {
          LOGGER.info("##### Portfolio List is empty, cannot procedd further, returning ... #####");
          return;
      }

      if(setting.isScmCollectorFlag()) {
          LOGGER.info("##### Starting SCM Collector #####");
          scmCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed SCM Collector #####");
      }
      if(setting.isLibraryPolicyCollectorFlag()) {
          LOGGER.info("##### Starting Library Policy Collector #####");
          libraryPolicyCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed Library Policy Collector #####");
      }
      if(setting.isIncidentsCollectorFlag()){
          LOGGER.info("##### Starting Incident Collector #####");
          incidentCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed Incident Collector #####");
      }
      if(setting.isStaticCodeAnalysisCollectorFlag()){
          LOGGER.info("##### Starting Static Code Collector #####");
          staticCodeAnalysisCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed Static Code Analysis Collector #####");
      }
      if(setting.isUnitTestCoverageCollectorFlag()){
          LOGGER.info("##### Starting Unit Test Collector #####");
          unitTestCoverageCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed Unit Test Collector #####");
      }
if(setting.isPipelineCollectorFlag()){
          LOGGER.info("##### Starting Pipeline Collector #####");
          pipelineCollector.collect(sparkSession, javaSparkContext, portfolioList);
          LOGGER.info("##### Completed Pipeline Collector #####");
      }
      if(setting.isTraceabilityCollectorFlag()){
          LOGGER.info("##### Starting Traceability Collector #####");
          traceabilityCollector.collect(sparkSession, javaSparkContext, portfolioList);
          LOGGER.info("##### Completed Traceability Collector #####");
      }
      if(setting.isSecurityCollectorFlag()) {
          LOGGER.info("##### Starting Security Collector #####");
          securityCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed Security Collector #####");
      }
      if(setting.isPerformanceCollectorFlag()) {
          LOGGER.info("##### Starting Performance Collector #####");
          performanceCollector.collect(sparkSession, javaSparkContext, portfolioList);
	LOGGER.info("##### Completed Performance Collector #####");
      }
      if(setting.isEngineeringMaturityFlag()) {
          LOGGER.info("##### Starting Engineering Maturity Collector #####");
          engineeringMaturityCollector.collect(sparkSession, javaSparkContext, lobList);
      }

      sparkSession.close();
      javaSparkContext.close();
  }
 
Example 13
Source File: KafkaProcessingApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
    public void testKafkaDataProcessingProcessLevel() throws Exception {
        //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0");

        //run main class
        String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_PROCESS, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS, "-d", "|", "-sr", "false", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_process/"};
        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("local[*]");
        SparkSession.builder().config(sparkConf).getOrCreate();

        // run main class
        KafkaProcessingApplication.main(args);

        //start Spark session
        SparkSession sparkSession = SparkSession.builder()
                .master("local[*]")
                .appName("IntegrationTest")
                .getOrCreate();

        //generate Dataset and create hash to compare
        Dataset<Row> importedDataset = sparkSession.read()
                .option("inferSchema", "true")
                .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS + "/result/parquet");

        //check that dataset contains 4 lines
        assertEquals(4, importedDataset.count());

        //check that dataset contains 42 columns
        assertEquals(42, importedDataset.columns().length);

        //convert rows to string
        String[] resultLines = (String[]) importedDataset.map(row -> row.mkString(), Encoders.STRING()).collectAsList().toArray();
        for(String l : resultLines) {
            System.out.println(l);
        }

        //check if hashes of line values are correct
        //kept in for easier amendment after test case change
//        System.out.println(DigestUtils.md5Hex(resultLines[0]).toUpperCase());
//        System.out.println(DigestUtils.md5Hex(resultLines[1]).toUpperCase());
//        System.out.println(DigestUtils.md5Hex(resultLines[2]).toUpperCase());
//        System.out.println(DigestUtils.md5Hex(resultLines[3]).toUpperCase());

        assertEquals("9088849D6374163C3E9DACB3090D4E56", DigestUtils.md5Hex(resultLines[0]).toUpperCase());
        assertEquals("415A0A505F9A32002C1342171E7649F9", DigestUtils.md5Hex(resultLines[1]).toUpperCase());
        assertEquals("C83F9CC0618D7FA50D63753FBC429188", DigestUtils.md5Hex(resultLines[2]).toUpperCase());
        assertEquals("0559C383855FDE566069B483188E06C0", DigestUtils.md5Hex(resultLines[3]).toUpperCase());

        //close Spark session
        sparkSession.close();
    }
 
Example 14
Source File: AnalyzeWaterInteractions4.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {

		SparkSession spark = SparkSession
				.builder()
				.master("local[*]")
				.appName(AnalyzeWaterInteractions4.class.getSimpleName())
				.getOrCreate();

		String inputfile = args[0];
        Dataset<Row> data = spark.read().parquet(inputfile).cache();
		
        System.out.println("Interactions: " + data.count());
		
        data.describe("distance1").show();
        data.describe("distance2").show();
        data.describe("distance3").show();
        data.describe("distance4").show();
        
		// all interaction distance statistics
		Dataset<Row> distance = data.select(col("distance1").as("distance"))
				.union(data.select(col("distance2").as("distance")))
				.union(data.select(col("distance3").as("distance")))
				.union(data.select(col("distance4").as("distance")));
		
		distance.describe("distance").show();
		
		// water oxygen - oxygen distance statistics
		Dataset<Row> ooDistance = data.filter("element1 = 'O'").select(col("distance1").as("O-Odistance"))
				.union(data.filter("element2 = 'O'").select(col("distance2").as("O-Odistance")))
				.union(data.filter("element3 = 'O'").select(col("distance3").as("O-Odistance")))
				.union(data.filter("element4 = 'O'").select(col("distance4").as("O-Odistance")));
		
		ooDistance.describe("O-Odistance").show();
		
		// water oxygen - nitrogen distance statistics
		Dataset<Row> onDistance = data.filter("element1 = 'N'").select(col("distance1").as("O-Ndistance"))
				.union(data.filter("element2 = 'N'").select(col("distance2").as("O-Ndistance")))
				.union(data.filter("element3 = 'N'").select(col("distance3").as("O-Ndistance")))
				.union(data.filter("element4 = 'N'").select(col("distance4").as("O-Ndistance")));
		
		onDistance.describe("O-Ndistance").show();
		
		// orientational order statistics
		data.describe("q3").show();
		data.describe("q4").show();


		// angle statistics
		Dataset<Row> allAngles = data.select(col("angle1-2").as("angle"))
				.union(data.select(col("angle1-3").as("angle")))
				.union(data.select(col("angle1-4").as("angle")))
				.union(data.select(col("angle2-3").as("angle")))
				.union(data.select(col("angle2-4").as("angle")))
				.union(data.select(col("angle3-4").as("angle")));
		
		allAngles.describe("angle").show();
		
		// normalized B-factor statistics
		Dataset<Row> nbFactor = data.select(col("nbFactor0").as("nbFactor"))
				.union(data.select(col("nbFactor1").as("nbFactor")))
				.union(data.select(col("nbFactor2").as("nbFactor")))
				.union(data.select(col("nbFactor3").as("nbFactor")))
				.union(data.select(col("nbFactor4").as("nbFactor")));
		
		nbFactor.describe("nbFactor").show();

		spark.close();
	}
 
Example 15
Source File: SiftsDataDemo.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
    SparkSession spark = SparkSession.builder().master("local[*]").appName(SiftsDataDemo.class.getSimpleName())
            .getOrCreate();

    // get PDB entry to PubMed Id mappings
    String pubmedQuery = "SELECT * FROM sifts.pdb_pubmed LIMIT 10";
    Dataset<Row> pubmed = PdbjMineDataset.getDataset(pubmedQuery);
    System.out.println("First 10 results for query: " + pubmedQuery);
    pubmed.show(10);

    // get PDB chain to InterPro mappings
    String interproQuery = "SELECT * FROM sifts.pdb_chain_interpro LIMIT 10";
    Dataset<Row> interpro = PdbjMineDataset.getDataset(interproQuery);
    System.out.println("First 10 results for query: " + interproQuery);
    interpro.show();

    // get PDB chain to UniProt mappings
    String uniprotQuery = "SELECT * FROM sifts.pdb_chain_uniprot LIMIT 10";
    Dataset<Row> uniprot = PdbjMineDataset.getDataset(uniprotQuery);
    System.out.println("First 10 results for query: " + uniprotQuery);
    uniprot.show();

    // get PDB chain to taxonomy mappings
    String taxonomyQuery = "SELECT * FROM sifts.pdb_chain_taxonomy LIMIT 10";
    Dataset<Row> taxonomy = PdbjMineDataset.getDataset(taxonomyQuery);
    System.out.println("First 10 results for query: " + taxonomyQuery);
    taxonomy.show();

    // get PDB chain to PFAM mappings
    String pfamQuery = "SELECT * FROM sifts.pdb_chain_pfam LIMIT 10";
    Dataset<Row> pfam = PdbjMineDataset.getDataset(pfamQuery);
    System.out.println("First 10 results for query: " + pfamQuery);
    pfam.show();

    // get PDB chain to CATH mappings
    String cathQuery = "SELECT * FROM sifts.pdb_chain_cath_uniprot LIMIT 10";
    Dataset<Row> cath = PdbjMineDataset.getDataset(cathQuery);
    System.out.println("First 10 results for query: " + cathQuery);
    cath.show();

    // get PDB chain to SCOP mappings
    String scopQuery = "SELECT * FROM sifts.pdb_chain_scop_uniprot LIMIT 10";
    Dataset<Row> scop = PdbjMineDataset.getDataset(scopQuery);
    System.out.println("First 10 results for query: " + scopQuery);
    scop.show();

    // get PDB chain to Enzyme classification (EC) mappings
    String enzymeQuery = "SELECT * FROM sifts.pdb_chain_enzyme LIMIT 10";
    Dataset<Row> enzyme = PdbjMineDataset.getDataset(enzymeQuery);
    System.out.println("First 10 results for query: " + enzymeQuery);
    enzyme.show();

    // get PDB chain to Gene Ontology term mappings
    String goQuery = "SELECT * FROM sifts.pdb_chain_go LIMIT 10";
    Dataset<Row> go = PdbjMineDataset.getDataset(goQuery);
    System.out.println("First 10 results for query: " + goQuery);
    go.show(10);

    spark.close();
}
 
Example 16
Source File: SparkIngestDriver.java    From geowave with Apache License 2.0 4 votes vote down vote up
public void close(SparkSession session) {
  if (session != null) {
    session.close();
    session = null;
  }
}