org.apache.spark.sql.hive.HiveContext Java Examples

The following examples show how to use org.apache.spark.sql.hive.HiveContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkDataSourceManager.java    From DDF with Apache License 2.0 8 votes vote down vote up
@Override
public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();

    JDBCDataSourceCredentials cred = (JDBCDataSourceCredentials)dataSource.getDataSourceCredentials();
    String fullURL = dataSource.getDataSourceUri().getUri().toString();
    if (cred.getUsername() != null &&  !cred.getUsername().equals("")) {
        fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword());
    }

    Map<String, String> options = new HashMap<String, String>();
    options.put("url", fullURL);
    options.put("dbtable", dataSource.getDbTable());
    DataFrame df = sqlContext.load("jdbc", options);

    DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
        null, SparkUtils.schemaFromDataFrame(df));
    // TODO?
    ddf.getRepresentationHandler().get(RDD.class, Row.class);
    ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource);
    return ddf;
}
 
Example #2
Source File: SparkDataSourceManager.java    From DDF with Apache License 2.0 6 votes vote down vote up
@Override
public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();
    DataFrame jdf = null;
    switch (format) {
        case JSON:
            jdf = sqlContext.jsonFile(fileURI.toString());
            break;
        case PQT:
            jdf = sqlContext.parquetFile(fileURI.toString());
            break;
        default:
            throw new DDFException(String.format("Unsupported data format: %s", format.toString()));
    }

    DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf);
    DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
        null, SparkUtils.schemaFromDataFrame(df));

    if(flatten == true)
        return ddf.getFlattenedDDF();
    else
        return ddf;
}
 
Example #3
Source File: TestWebServiceGet.java    From quetzal with Eclipse Public License 2.0 6 votes vote down vote up
public static void main( String[] args )
   {   	
//   	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
//      	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
   
   	JavaSparkContext sc = new JavaSparkContext(conf);
   	
   	HiveContext sqlContext = new HiveContext(sc.sc());
   	Dataset urls = sqlContext.read().json("/tmp/urls.json");

   	urls.registerTempTable("urls");
   	Dataset<Row> temp = sqlContext.sql("select * from urls");
   	temp.show();
   	
	   	sqlContext.sql("add jar /tmp/quetzal.jar");
	sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
	Dataset<Row> drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
			+ " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
	drugs.show();
	System.out.println("Num rows:" + drugs.count());
   }
 
Example #4
Source File: UserVisitSessionAnalyzeSpark.java    From BigDataPlatform with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 获取SQLContext
 * 如果是在本地测试环境的话,那么就生成SQLContext对象
 * 如果是在生产环境运行的话,那么就生成HiveContext对象
 * @param sc SparkContext
 * @return SQLContext
 */
private static SQLContext getSQLContext(SparkContext sc) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		return new SQLContext(sc);
	} else {
		return new HiveContext(sc);
	}
}
 
Example #5
Source File: SparkUtils.java    From BigDataPlatform with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 获取SQLContext
 * 如果spark.local设置为true,那么就创建SQLContext;否则,创建HiveContext
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		return new SQLContext(sc);
	} else {
		return new HiveContext(sc);
	}
}
 
Example #6
Source File: UserVisitAnalyze.java    From UserActionAnalyzePlatform with Apache License 2.0 5 votes vote down vote up
/**
 * 用于判断是否是生产环境
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc)
{
    boolean local= ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
    if(local)
    {
        return new SQLContext(sc);
    }
    return new HiveContext(sc);
}
 
Example #7
Source File: SparkDDFManager.java    From DDF with Apache License 2.0 5 votes vote down vote up
private void initialize(SparkContext sparkContext, Map<String, String> params) throws DDFException {
  this.setSparkContext(sparkContext == null ? this.createSparkContext(params) : sparkContext);
  this.mHiveContext = new HiveContext(this.mSparkContext);
  String compression = System.getProperty("spark.sql.inMemoryColumnarStorage.compressed", "true");
  String batchSize = System.getProperty("spark.sql.inMemoryColumnarStorage.batchSize", "1000");
  mLog.info(">>>> spark.sql.inMemoryColumnarStorage.compressed= " + compression);
  mLog.info(">>>> spark.sql.inMemoryColumnarStorage.batchSize= " + batchSize);
  this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.compressed", compression);
  this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.batchSize", batchSize);

  // register SparkSQL UDFs
  this.registerUDFs();
  this.mDataSourceManager = new SparkDataSourceManager(this);
}
 
Example #8
Source File: SparkDDF.java    From DDF with Apache License 2.0 5 votes vote down vote up
public boolean isTable() {
  HiveContext hiveContext = ((SparkDDFManager) this.getManager()).getHiveContext();
  String[] tableNames = hiveContext.tableNames();
  Boolean tableExists = false;
  for(String table: tableNames) {
    if(table.equals(this.getTableName())) {
      tableExists = true;
    }
  }
  return tableExists;
}
 
Example #9
Source File: ExplorerSQLContextTest.java    From Explorer with Apache License 2.0 5 votes vote down vote up
@Test
public void whenHiveContextIsSelectedInConfiguration(){

   Properties properties = new Properties();
   properties.put(AttributteNames.CT_HIVE_CONTEXT,"YES");
   sqlContext.loadConfiguration(properties);
   assertThat("When exist HiveContext then create instanceof HiveContext", sqlContext.getConnector(), instanceOf(HiveContext.class));
}
 
Example #10
Source File: SQLQueryBAM.java    From ViraPipe with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException {
  SparkConf conf = new SparkConf().setAppName("SQLQueryBAM");

  JavaSparkContext sc = new JavaSparkContext(conf);
  SQLContext sqlContext = new HiveContext(sc.sc());

  Options options = new Options();
  Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." );
  Option queryOpt = new Option( "query", true, "SQL query string." );
  Option baminOpt = new Option( "in", true, "" );

  options.addOption( opOpt );
  options.addOption( queryOpt );
  options.addOption( baminOpt );
  CommandLineParser parser = new BasicParser();
  CommandLine cmd = null;
  try {
    cmd = parser.parse( options, args );

  }
  catch( ParseException exp ) {
    System.err.println( "Parsing failed.  Reason: " + exp.getMessage() );
  }

  String bwaOutDir = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null;
  String query = (cmd.hasOption("query")==true)? cmd.getOptionValue("query"):null;
  String bamin = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null;

  sc.hadoopConfiguration().setBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, true);

  //Read BAM/SAM from HDFS
  JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(bamin, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration());
  //Map to SAMRecord RDD
  JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get());
  JavaRDD<MyAlignment> rdd = samRDD.map(bam -> new MyAlignment(bam.getReadName(), bam.getStart(), bam.getReferenceName(), bam.getReadLength(), new String(bam.getReadBases(), StandardCharsets.UTF_8), bam.getCigarString(), bam.getReadUnmappedFlag(), bam.getDuplicateReadFlag()));

  Dataset<Row> samDF = sqlContext.createDataFrame(rdd, MyAlignment.class);
  samDF.registerTempTable(tablename);
  if(query!=null) {

    //Save as parquet file
    Dataset df2 = sqlContext.sql(query);
    df2.show(100,false);

    if(bwaOutDir!=null)
      df2.write().parquet(bwaOutDir);

  }else{
    if(bwaOutDir!=null)
      samDF.write().parquet(bwaOutDir);
  }

  sc.stop();

}
 
Example #11
Source File: SparkDDFManager.java    From DDF with Apache License 2.0 4 votes vote down vote up
public HiveContext getHiveContext() {
  return mHiveContext;
}
 
Example #12
Source File: SqlHandler.java    From DDF with Apache License 2.0 4 votes vote down vote up
private HiveContext getHiveContext() {
  return ((SparkDDFManager) this.getManager()).getHiveContext();
}
 
Example #13
Source File: SparkHiveContextBuilder.java    From Explorer with Apache License 2.0 2 votes vote down vote up
/**
 * Build an HiveContext type .
 * @param sparkContext to configure HiveContext
 * @return HiveContext
 */
@Override
public HiveContext build(SparkContext sparkContext) {
    return new HiveContext(sparkContext);
}