Java Code Examples for org.apache.spark.sql.hive.HiveContext

The following examples show how to use org.apache.spark.sql.hive.HiveContext. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: DDF   Source File: SparkDataSourceManager.java    License: Apache License 2.0 7 votes vote down vote up
@Override
public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();

    JDBCDataSourceCredentials cred = (JDBCDataSourceCredentials)dataSource.getDataSourceCredentials();
    String fullURL = dataSource.getDataSourceUri().getUri().toString();
    if (cred.getUsername() != null &&  !cred.getUsername().equals("")) {
        fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword());
    }

    Map<String, String> options = new HashMap<String, String>();
    options.put("url", fullURL);
    options.put("dbtable", dataSource.getDbTable());
    DataFrame df = sqlContext.load("jdbc", options);

    DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
        null, SparkUtils.schemaFromDataFrame(df));
    // TODO?
    ddf.getRepresentationHandler().get(RDD.class, Row.class);
    ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource);
    return ddf;
}
 
Example 2
Source Project: DDF   Source File: SparkDataSourceManager.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();
    DataFrame jdf = null;
    switch (format) {
        case JSON:
            jdf = sqlContext.jsonFile(fileURI.toString());
            break;
        case PQT:
            jdf = sqlContext.parquetFile(fileURI.toString());
            break;
        default:
            throw new DDFException(String.format("Unsupported data format: %s", format.toString()));
    }

    DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf);
    DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
        null, SparkUtils.schemaFromDataFrame(df));

    if(flatten == true)
        return ddf.getFlattenedDDF();
    else
        return ddf;
}
 
Example 3
Source Project: quetzal   Source File: TestWebServiceGet.java    License: Eclipse Public License 2.0 6 votes vote down vote up
public static void main( String[] args )
   {   	
//   	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
//      	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
   
   	JavaSparkContext sc = new JavaSparkContext(conf);
   	
   	HiveContext sqlContext = new HiveContext(sc.sc());
   	Dataset urls = sqlContext.read().json("/tmp/urls.json");

   	urls.registerTempTable("urls");
   	Dataset<Row> temp = sqlContext.sql("select * from urls");
   	temp.show();
   	
	   	sqlContext.sql("add jar /tmp/quetzal.jar");
	sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
	Dataset<Row> drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
			+ " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
	drugs.show();
	System.out.println("Num rows:" + drugs.count());
   }
 
Example 4
/**
 * 获取SQLContext
 * 如果是在本地测试环境的话,那么就生成SQLContext对象
 * 如果是在生产环境运行的话,那么就生成HiveContext对象
 * @param sc SparkContext
 * @return SQLContext
 */
private static SQLContext getSQLContext(SparkContext sc) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		return new SQLContext(sc);
	} else {
		return new HiveContext(sc);
	}
}
 
Example 5
Source Project: BigDataPlatform   Source File: SparkUtils.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * 获取SQLContext
 * 如果spark.local设置为true,那么就创建SQLContext;否则,创建HiveContext
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		return new SQLContext(sc);
	} else {
		return new HiveContext(sc);
	}
}
 
Example 6
/**
 * 用于判断是否是生产环境
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc)
{
    boolean local= ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
    if(local)
    {
        return new SQLContext(sc);
    }
    return new HiveContext(sc);
}
 
Example 7
Source Project: DDF   Source File: SparkDDFManager.java    License: Apache License 2.0 5 votes vote down vote up
private void initialize(SparkContext sparkContext, Map<String, String> params) throws DDFException {
  this.setSparkContext(sparkContext == null ? this.createSparkContext(params) : sparkContext);
  this.mHiveContext = new HiveContext(this.mSparkContext);
  String compression = System.getProperty("spark.sql.inMemoryColumnarStorage.compressed", "true");
  String batchSize = System.getProperty("spark.sql.inMemoryColumnarStorage.batchSize", "1000");
  mLog.info(">>>> spark.sql.inMemoryColumnarStorage.compressed= " + compression);
  mLog.info(">>>> spark.sql.inMemoryColumnarStorage.batchSize= " + batchSize);
  this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.compressed", compression);
  this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.batchSize", batchSize);

  // register SparkSQL UDFs
  this.registerUDFs();
  this.mDataSourceManager = new SparkDataSourceManager(this);
}
 
Example 8
Source Project: DDF   Source File: SparkDDF.java    License: Apache License 2.0 5 votes vote down vote up
public boolean isTable() {
  HiveContext hiveContext = ((SparkDDFManager) this.getManager()).getHiveContext();
  String[] tableNames = hiveContext.tableNames();
  Boolean tableExists = false;
  for(String table: tableNames) {
    if(table.equals(this.getTableName())) {
      tableExists = true;
    }
  }
  return tableExists;
}
 
Example 9
Source Project: Explorer   Source File: ExplorerSQLContextTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void whenHiveContextIsSelectedInConfiguration(){

   Properties properties = new Properties();
   properties.put(AttributteNames.CT_HIVE_CONTEXT,"YES");
   sqlContext.loadConfiguration(properties);
   assertThat("When exist HiveContext then create instanceof HiveContext", sqlContext.getConnector(), instanceOf(HiveContext.class));
}
 
Example 10
Source Project: ViraPipe   Source File: SQLQueryBAM.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException {
  SparkConf conf = new SparkConf().setAppName("SQLQueryBAM");

  JavaSparkContext sc = new JavaSparkContext(conf);
  SQLContext sqlContext = new HiveContext(sc.sc());

  Options options = new Options();
  Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." );
  Option queryOpt = new Option( "query", true, "SQL query string." );
  Option baminOpt = new Option( "in", true, "" );

  options.addOption( opOpt );
  options.addOption( queryOpt );
  options.addOption( baminOpt );
  CommandLineParser parser = new BasicParser();
  CommandLine cmd = null;
  try {
    cmd = parser.parse( options, args );

  }
  catch( ParseException exp ) {
    System.err.println( "Parsing failed.  Reason: " + exp.getMessage() );
  }

  String bwaOutDir = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null;
  String query = (cmd.hasOption("query")==true)? cmd.getOptionValue("query"):null;
  String bamin = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null;

  sc.hadoopConfiguration().setBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, true);

  //Read BAM/SAM from HDFS
  JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(bamin, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration());
  //Map to SAMRecord RDD
  JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get());
  JavaRDD<MyAlignment> rdd = samRDD.map(bam -> new MyAlignment(bam.getReadName(), bam.getStart(), bam.getReferenceName(), bam.getReadLength(), new String(bam.getReadBases(), StandardCharsets.UTF_8), bam.getCigarString(), bam.getReadUnmappedFlag(), bam.getDuplicateReadFlag()));

  Dataset<Row> samDF = sqlContext.createDataFrame(rdd, MyAlignment.class);
  samDF.registerTempTable(tablename);
  if(query!=null) {

    //Save as parquet file
    Dataset df2 = sqlContext.sql(query);
    df2.show(100,false);

    if(bwaOutDir!=null)
      df2.write().parquet(bwaOutDir);

  }else{
    if(bwaOutDir!=null)
      samDF.write().parquet(bwaOutDir);
  }

  sc.stop();

}
 
Example 11
Source Project: DDF   Source File: SparkDDFManager.java    License: Apache License 2.0 4 votes vote down vote up
public HiveContext getHiveContext() {
  return mHiveContext;
}
 
Example 12
Source Project: DDF   Source File: SqlHandler.java    License: Apache License 2.0 4 votes vote down vote up
private HiveContext getHiveContext() {
  return ((SparkDDFManager) this.getManager()).getHiveContext();
}
 
Example 13
Source Project: Explorer   Source File: SparkHiveContextBuilder.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Build an HiveContext type .
 * @param sparkContext to configure HiveContext
 * @return HiveContext
 */
@Override
public HiveContext build(SparkContext sparkContext) {
    return new HiveContext(sparkContext);
}