org.apache.hive.hcatalog.data.DefaultHCatRecord Java Examples

The following examples show how to use org.apache.hive.hcatalog.data.DefaultHCatRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HdfsUtil.java    From ES-Fastloader with Apache License 2.0 6 votes vote down vote up
public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception {
    Job job = Job.getInstance(conf, MAIN_CLASS);
    job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate());
    job.setJarByClass(FastIndex.class);
    job.setMapperClass(FastIndexMapper.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);
    HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr());

    job.setReducerClass(FastIndexReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(indexInfo.getReducerNum());
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath()));

    return job;
}
 
Example #2
Source File: FastIndexMapper.java    From ES-Fastloader with Apache License 2.0 6 votes vote down vote up
@Override
protected void map(Object key, HCatRecord value, Context context) throws IOException, InterruptedException {
    DefaultHCatRecord hCatRecord = (DefaultHCatRecord) value;
    int shardNo;

    List<String> keyList = taskConfig.getKeyList();
    if(keyList==null || keyList.size()==0) {
        shardNo = (int) (Math.random()*templateConfig.getReducerNum());
    } else {
        String keyStr = getKeyValue(keyList, hCatRecord);
        shardNo = CommonUtils.getShardId(keyStr, templateConfig.getReducerNum());
    }

    //shard分片个数与reduce个数一样
    context.write(new IntWritable(shardNo), hCatRecord);
}
 
Example #3
Source File: FastIndexMapper.java    From ES-Fastloader with Apache License 2.0 6 votes vote down vote up
private String getKeyValue(List<String> keys, DefaultHCatRecord hCatRecord) throws HCatException {
    StringBuilder sb = new StringBuilder();
    for (String key : keys) {
        Object id = hCatRecord.get(key, this.schema);
        if (id == null || StringUtils.isBlank(id.toString())) {
            sb.append("");
        } else {
            sb.append(id.toString());
        }
        sb.append("_");
    }

    if (sb.length() > 1) {
        return sb.substring(0, sb.length() - 1);
    } else {
        return sb.toString();
    }
}
 
Example #4
Source File: HCatalogTestUtils.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
private List<HCatRecord> generateHCatRecords(int numRecords,
  HCatSchema hCatTblSchema, ColumnGenerator... extraCols) throws Exception {
  List<HCatRecord> records = new ArrayList<HCatRecord>();
  List<HCatFieldSchema> hCatTblCols = hCatTblSchema.getFields();
  int size = hCatTblCols.size();
  for (int i = 0; i < numRecords; ++i) {
    DefaultHCatRecord record = new DefaultHCatRecord(size);
    record.set(hCatTblCols.get(0).getName(), hCatTblSchema, i);
    record.set(hCatTblCols.get(1).getName(), hCatTblSchema, "textfield" + i);
    int idx = 0;
    for (int j = 0; j < extraCols.length; ++j) {
      if (extraCols[j].getKeyType() == KeyType.STATIC_KEY) {
        continue;
      }
      record.set(hCatTblCols.get(idx + 2).getName(), hCatTblSchema,
        extraCols[j].getHCatValue(i));
      ++idx;
    }

    records.add(record);
  }
  return records;
}
 
Example #5
Source File: TableDataInserterTest.java    From HiveRunner with Apache License 2.0 5 votes vote down vote up
@Test
public void insertsRowsIntoExistingTable() {
  Multimap<Map<String, String>, HCatRecord> data = ImmutableMultimap
      .<Map<String, String>, HCatRecord>builder()
      .put(of("local_date", "2015-10-14"), new DefaultHCatRecord(asList((Object) "aa", "bb")))
      .put(of("local_date", "2015-10-14"), new DefaultHCatRecord(asList((Object) "aa2", "bb2")))
      .put(of("local_date", "2015-10-14"), new DefaultHCatRecord(asList((Object) "cc", "dd")))
      .put(of("local_date", "2015-10-15"), new DefaultHCatRecord(asList((Object) "ee", "ff")))
      .build();

  TableDataInserter inserter = new TableDataInserter(TEST_DB, TEST_TABLE, hiveShell.getHiveConf());
  inserter.insert(data);

  List<String> result = hiveShell.executeQuery("select * from testdb.test_table");
  Collections.sort(result);

  assertEquals(4, result.size());
  assertEquals("aa", result.get(0).split("\t")[0]);
  assertEquals("bb", result.get(0).split("\t")[1]);
  assertEquals("2015-10-14", result.get(0).split("\t")[2]);

  assertEquals("aa2", result.get(1).split("\t")[0]);
  assertEquals("bb2", result.get(1).split("\t")[1]);
  assertEquals("2015-10-14", result.get(1).split("\t")[2]);

  assertEquals("cc", result.get(2).split("\t")[0]);
  assertEquals("dd", result.get(2).split("\t")[1]);
  assertEquals("2015-10-14", result.get(2).split("\t")[2]);

  assertEquals("ee", result.get(3).split("\t")[0]);
  assertEquals("ff", result.get(3).split("\t")[1]);
  assertEquals("2015-10-15", result.get(3).split("\t")[2]);
}
 
Example #6
Source File: HCatInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
	super();
	this.configuration = config;
	HadoopUtils.mergeHadoopConf(this.configuration);

	this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
	this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);

	// configure output schema of HCatFormat
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
	// set type information
	this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}
 
Example #7
Source File: TableDataBuilder.java    From HiveRunner with Apache License 2.0 5 votes vote down vote up
TableDataBuilder copyRow() {
  checkState(row != null, "No previous row to copy.");
  HCatRecord copy = new DefaultHCatRecord(new ArrayList<>(row.getAll()));
  flushRow();
  row = copy;
  return this;
}
 
Example #8
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
	super();
	this.configuration = config;
	HadoopUtils.mergeHadoopConf(this.configuration);

	this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
	this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);

	// configure output schema of HCatFormat
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
	// set type information
	this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}
 
Example #9
Source File: FactDistinctColumnsMapperTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapper() throws IOException {
    Configuration configuration = mapDriver.getConfiguration();
    configuration.set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "100");
    configuration.set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_1_new_segment");
    configuration.set(BatchConstants.CFG_CUBE_SEGMENT_ID, "198va32a-a33e-4b69-83dd-0bb8b1f8c53b");
    HCatRecord value1 = new DefaultHCatRecord(11);
    value1.set(0, "2012-08-16");
    value1.set(1, "48027");
    value1.set(2, "0");
    value1.set(3, "Home & Garden");
    value1.set(4, "Cheese & Crackers");
    value1.set(5, "Cheese & Crackers");
    value1.set(6, "48027");
    value1.set(7, "16");
    value1.set(8, "10000010");
    value1.set(9, "204.28");
    value1.set(10, "5");
    mapDriver.addInput(new LongWritable(0), value1);

    List<Pair<SelfDefineSortableKey, Text>> result = mapDriver.run();
    int colsNeedDictSize = cubeDesc.getAllColumnsNeedDictionaryBuilt().size();
    int cuboidsCnt = cubeDesc.getAllCuboids().size();

    assertEquals(
            colsNeedDictSize + (cubeDesc.getRowkey().getRowKeyColumns().length - colsNeedDictSize) * 2 + cuboidsCnt,
            result.size());
}
 
Example #10
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
	super();
	this.configuration = config;
	HadoopUtils.mergeHadoopConf(this.configuration);

	this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
	this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);

	// configure output schema of HCatFormat
	configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
	// set type information
	this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}
 
Example #11
Source File: FastIndexReducer.java    From ES-Fastloader with Apache License 2.0 5 votes vote down vote up
@Override
protected void reduce(IntWritable key, Iterable<DefaultHCatRecord> values, Context context) throws IOException, InterruptedException {
    this.reduceId = key.get();

    LogUtils.info("reduce start, es reduceNo is:" + reduceId);
    Iterator<DefaultHCatRecord> records = values.iterator();


    while (records.hasNext()) {
        DefaultHCatRecord record = records.next();
        if (record != null) {
            JSONObject jsonObject = transformer.tranform(record.getAll());

            String Primekey;
            List<String> keyList = taskConfig.getKeyList();
            if(keyList==null || keyList.size()==0) {
                Primekey = UUID.randomUUID().toString();
            } else {
                Primekey = getKeyValue(keyList, jsonObject);
            }

            esWriter.bulk(Primekey, jsonObject);
        }
    }

    esWriter.finish();
    context.write(NullWritable.get(), NullWritable.get());
    log.info("reduce finish!");
}
 
Example #12
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Test
public void testRW() throws Exception {
    List<Object> rlist = new ArrayList<Object>(13);
    {
        rlist.add(new Byte("123"));
        rlist.add(new Short("456"));
        rlist.add(new Integer(789));
        rlist.add(new Long(1000L));
        rlist.add(new Double(5.3D));
        rlist.add(new Float(2.39F));
        rlist.add(new String("hcat\nand\nhadoop"));
        rlist.add(null);

        List<Object> innerStruct = new ArrayList<Object>(2);
        innerStruct.add(new String("abc"));
        innerStruct.add(new String("def"));
        rlist.add(innerStruct);

        List<Integer> innerList = new ArrayList<Integer>();
        innerList.add(314);
        innerList.add(007);
        rlist.add(innerList);

        Map<Short, String> map = new HashMap<Short, String>(3);
        map.put(new Short("2"), "hcat is cool");
        map.put(new Short("3"), "is it?");
        map.put(new Short("4"), "or is it not?");
        rlist.add(map);

        rlist.add(new Boolean(true));

        List<Object> c1 = new ArrayList<Object>();
        List<Object> c1_1 = new ArrayList<Object>();
        c1_1.add(new Integer(12));
        List<Object> i2 = new ArrayList<Object>();
        List<Integer> ii1 = new ArrayList<Integer>();
        ii1.add(new Integer(13));
        ii1.add(new Integer(14));
        i2.add(ii1);
        Map<String, List<?>> ii2 = new HashMap<String, List<?>>();
        List<Integer> iii1 = new ArrayList<Integer>();
        iii1.add(new Integer(15));
        ii2.put("phew", iii1);
        i2.add(ii2);
        c1_1.add(i2);
        c1.add(c1_1);
        rlist.add(c1);
        rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2
        rlist.add(new HiveChar("hive\nchar", 10));
        rlist.add(new HiveVarchar("hive\nvarchar", 20));
        rlist.add(Date.valueOf("2014-01-07"));
        rlist.add(new Timestamp(System.currentTimeMillis()));
        rlist.add("hive\nbinary".getBytes("UTF-8"));
    }

    DefaultHCatRecord r = new DefaultHCatRecord(rlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(rlist, deserialized);
}
 
Example #13
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Test
public void testRWNull() throws Exception {
    List<Object> nlist = new ArrayList<Object>(13);
    {
        nlist.add(null); // tinyint
        nlist.add(null); // smallint
        nlist.add(null); // int
        nlist.add(null); // bigint
        nlist.add(null); // double
        nlist.add(null); // float
        nlist.add(null); // string
        nlist.add(null); // string
        nlist.add(null); // struct
        nlist.add(null); // array
        nlist.add(null); // map
        nlist.add(null); // bool
        nlist.add(null); // complex
        nlist.add(null); //decimal(5,2)
        nlist.add(null); //char(10)
        nlist.add(null); //varchar(20)
        nlist.add(null); //date
        nlist.add(null); //timestamp
        nlist.add(null); //binary
    }

    DefaultHCatRecord r = new DefaultHCatRecord(nlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(nlist, deserialized);
}
 
Example #14
Source File: HCatalogIOTestUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
/** returns a DefaultHCatRecord instance for passed value. */
private static DefaultHCatRecord toHCatRecord(int value) {
  return new DefaultHCatRecord(Arrays.asList("record " + value, value));
}
 
Example #15
Source File: HCatalogIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings({"unchecked", "rawtypes"})
public Coder<HCatRecord> getOutputCoder() {
  return (Coder) WritableCoder.of(DefaultHCatRecord.class);
}
 
Example #16
Source File: HCatalogIOTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Perform end-to-end test of Write-then-Read operation. */
@Test
@NeedsEmptyTestTablesForUnboundedReads
public void testWriteThenUnboundedReadSuccess() throws Exception {

  defaultPipeline
      .apply(Create.of(buildHCatRecords(TEST_RECORDS_COUNT)))
      .apply(
          HCatalogIO.write()
              .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
              .withDatabase(TEST_DATABASE)
              .withTable(TEST_TABLE)
              .withPartition(getPartitions())
              .withBatchSize(512L));
  defaultPipeline.run();
  final ImmutableList<String> partitions = ImmutableList.of("load_date", "product_type");
  final PCollection<HCatRecord> data =
      readAfterWritePipeline
          .apply(
              "ReadData",
              HCatalogIO.read()
                  .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
                  .withDatabase(TEST_DATABASE)
                  .withPartitionCols(partitions)
                  .withTable(TEST_TABLE)
                  .withPollingInterval(Duration.millis(15000))
                  .withTerminationCondition(Watch.Growth.afterTotalOf(Duration.millis(60000))))
          .setCoder((Coder) WritableCoder.of(DefaultHCatRecord.class));

  final PCollection<String> output =
      data.apply(
          ParDo.of(
              new DoFn<HCatRecord, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  c.output(c.element().get(0).toString());
                }
              }));

  PAssert.that(output).containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT));
  readAfterWritePipeline.run();
}
 
Example #17
Source File: HCatalogTestUtils.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
public List<HCatRecord> loadHCatTable(String dbName,
  String tableName, Map<String, String> partKeyMap,
  HCatSchema tblSchema, List<HCatRecord> records)
  throws Exception {

  Job job = new Job(conf, "HCat load job");

  job.setJarByClass(this.getClass());
  job.setMapperClass(HCatWriterMapper.class);


  // Just writ 10 lines to the file to drive the mapper
  Path path = new Path(fs.getWorkingDirectory(),
    "mapreduce/HCatTableIndexInput");

  job.getConfiguration()
    .setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
  int writeCount = records.size();
  recsToLoad.clear();
  recsToLoad.addAll(records);
  createInputFile(path, writeCount);
  // input/output settings
  HCatWriterMapper.setWrittenRecordCount(0);

  FileInputFormat.setInputPaths(job, path);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(HCatOutputFormat.class);
  OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName,
    partKeyMap);

  HCatOutputFormat.setOutput(job, outputJobInfo);
  HCatOutputFormat.setSchema(job, tblSchema);
  job.setMapOutputKeyClass(BytesWritable.class);
  job.setMapOutputValueClass(DefaultHCatRecord.class);

  job.setNumReduceTasks(0);
  SqoopHCatUtilities.addJars(job, new SqoopOptions());
  boolean success = job.waitForCompletion(true);

  if (!success) {
    throw new IOException("Loading HCatalog table with test records failed");
  }
  utils.invokeOutputCommitterForLocalMode(job);
  LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
  return recsToLoad;
}
 
Example #18
Source File: SqoopHCatImportHelper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
public HCatRecord convertToHCatRecord(SqoopRecord sqr) throws IOException,
  InterruptedException {
  try {
    // Loading of LOBs was delayed until we have a Context.
    sqr.loadLargeObjects(lobLoader);
  } catch (SQLException sqlE) {
    throw new IOException(sqlE);
  }
  if (colCount == -1) {
    colCount = sqr.getFieldMap().size();
  }

  Map<String, Object> fieldMap = sqr.getFieldMap();
  HCatRecord result = new DefaultHCatRecord(fieldCount);

  for (Map.Entry<String, Object> entry : fieldMap.entrySet()) {
    String key = entry.getKey();
    Object val = entry.getValue();
    String hfn = key.toLowerCase();
    boolean skip = false;
    if (staticPartitionKeys != null && staticPartitionKeys.length > 0) {
      for (int i = 0; i < staticPartitionKeys.length; ++i) {
        if (staticPartitionKeys[i].equals(hfn)) {
          skip = true;
          break;
        }
      }
    }
    if (skip) {
      continue;
    }
    HCatFieldSchema hfs = null;
    try {
      hfs = hCatFullTableSchema.get(hfn);
    } catch (Exception e) {
      throw new IOException("Unable to lookup " + hfn + " in the hcat schema");
    }
    if (debugHCatImportMapper) {
      LOG.debug("SqoopRecordVal: field = " + key + " Val " + val
        + " of type " + (val == null ? null : val.getClass().getName())
        + ", hcattype " + hfs.getTypeString());
    }
    Object hCatVal = toHCat(val, hfs);

    result.set(hfn, hCatFullTableSchema, hCatVal);
  }

  return result;
}
 
Example #19
Source File: TableDataBuilder.java    From HiveRunner with Apache License 2.0 4 votes vote down vote up
TableDataBuilder newRow() {
  flushRow();
  row = new DefaultHCatRecord(schema.size());
  return this;
}