package com.huangyueran.spark.sql; import com.huangyueran.spark.utils.SparkUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import java.util.List; /** * @category 读取Hive数据源 * @author huangyueran * @time 2019-7-24 13:58:59 */ public class HiveDataSource { public static void main(String[] args) { /* * 0.把hive里面的hive-site.xml放到spark/conf目录下 * 1.启动Mysql * 2.启动HDFS * 3.启动Hive ./hive * 4.初始化HiveContext * 5.打包运行 * * ./bin/spark-submit --master yarn-cluster --class com.huangyueran.spark.sql.HiveDataSource /root/spark_hive_datasource.jar * ./bin/spark-submit --master yarn-client --class com.huangyueran.spark.sql.HiveDataSource /root/spark_hive_datasource.jar */ JavaSparkContext sc = SparkUtils.getRemoteSparkContext(HiveDataSource.class); // 创建HiveContext,注意,这里,它接收的是SparkContext作为参数,不是JavaSparkContext,其实也可以使用JavaSparkContext,只不过内部也是做了sc.sc()的操作 // HiveContext hiveContext = new HiveContext(sc.sc()); // 已过时 官方建议使用SparkSession SparkSession sparkSession = new SparkSession(sc.sc()); Dataset<Row> person = sparkSession.sql("show databases"); person.show(); List<Row> list = person.javaRDD().collect(); System.out.println("============================================================="); for(Row r:list){ System.out.println(r); } System.out.println("============================================================="); sc.close(); } }