org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveWriteUtils.java    From presto with Apache License 2.0 6 votes vote down vote up
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
Example #2
Source File: HCatalogUtil.java    From incubator-tajo with Apache License 2.0 6 votes vote down vote up
public static String getStoreType(String fileFormat) {
  Preconditions.checkNotNull(fileFormat);

  String[] fileFormatArrary = fileFormat.split("\\.");
  if(fileFormatArrary.length < 1) {
    throw new CatalogException("Hive file output format is wrong. - file output format:" + fileFormat);
  }

  String outputFormatClass = fileFormatArrary[fileFormatArrary.length-1];
  if(outputFormatClass.equals(HiveIgnoreKeyTextOutputFormat.class.getSimpleName())) {
    return CatalogProtos.StoreType.CSV.name();
  } else if(outputFormatClass.equals(RCFileOutputFormat.class.getSimpleName())) {
      return CatalogProtos.StoreType.RCFILE.name();
  } else {
    throw new CatalogException("Not supported file output format. - file output format:" + fileFormat);
  }
}
 
Example #3
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableUsingRegex() throws Exception {
  TableMeta meta = new TableMeta(BuiltinStorages.REGEX, new KeyValueSet());
  meta.putProperty(StorageConstants.TEXT_REGEX, "([^ ]*)");
  meta.putProperty(StorageConstants.TEXT_REGEX_OUTPUT_FORMAT_STRING, "%1$s");

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("c_custkey", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, CUSTOMER));

  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER);
  assertEquals(TextInputFormat.class.getName(), hiveTable.getSd().getInputFormat());
  assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());
  assertEquals(RegexSerDe.class.getName(), hiveTable.getSerializationLib());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  store.dropTable(DB_NAME, CUSTOMER);
}
 
Example #4
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableUsingTextFile() throws Exception {
  TableMeta meta = new TableMeta(BuiltinStorages.TEXT, new KeyValueSet());

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("c_custkey", TajoDataTypes.Type.INT4)
      .add("c_name", TajoDataTypes.Type.TEXT)
      .add("c_address", TajoDataTypes.Type.TEXT)
      .add("c_nationkey", TajoDataTypes.Type.INT4)
      .add("c_phone", TajoDataTypes.Type.TEXT)
      .add("c_acctbal", TajoDataTypes.Type.FLOAT8)
      .add("c_mktsegment", TajoDataTypes.Type.TEXT)
      .add("c_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri());
  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, CUSTOMER));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  //IgnoreKeyTextOutputFormat was deprecated
  assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(StringEscapeUtils.escapeJava(StorageConstants.DEFAULT_FIELD_DELIMITER),
      table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER));

  Map<String, String> expected = getProperties(DB_NAME, CUSTOMER);
  Map<String, String> toSet = new ImmutableMap.Builder<String, String>()
      .put("key1", "value1")
      .put("key2", "value2")
      .build();
  expected.putAll(toSet);

  setProperty(DB_NAME, CUSTOMER, toSet);
  Map<String, String> actual = getProperties(DB_NAME, CUSTOMER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertEquals(actual.get("key2"), expected.get("key2"));

  Set<String> toUnset = Sets.newHashSet("key2", "key3");
  for (String key : toUnset) {
    expected.remove(key);
  }
  unSetProperty(DB_NAME, CUSTOMER, toUnset);
  actual = getProperties(DB_NAME, CUSTOMER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertNull(actual.get("key2"));

  store.dropTable(DB_NAME, CUSTOMER);
}
 
Example #5
Source File: TestHiveCatalogStore.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testTableWithNullValue() throws Exception {
  KeyValueSet options = new KeyValueSet();
  options.set(StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava("\u0002"));
  options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava("\u0003"));
  TableMeta meta = new TableMeta(BuiltinStorages.TEXT, options);

  org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder()
      .add("s_suppkey", TajoDataTypes.Type.INT4)
      .add("s_name", TajoDataTypes.Type.TEXT)
      .add("s_address", TajoDataTypes.Type.TEXT)
      .add("s_nationkey", TajoDataTypes.Type.INT4)
      .add("s_phone", TajoDataTypes.Type.TEXT)
      .add("s_acctbal", TajoDataTypes.Type.FLOAT8)
      .add("s_comment", TajoDataTypes.Type.TEXT)
      .build();

  TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, SUPPLIER), schema, meta,
      new Path(warehousePath, new Path(DB_NAME, SUPPLIER)).toUri());

  store.createTable(table.getProto());
  assertTrue(store.existTable(DB_NAME, SUPPLIER));

  StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE);
  org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, SUPPLIER);
  assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat());
  //IgnoreKeyTextOutputFormat was deprecated
  assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat());

  TableDesc table1 = new TableDesc(store.getTable(DB_NAME, SUPPLIER));
  assertEquals(table.getName(), table1.getName());
  assertEquals(table.getUri(), table1.getUri());
  assertEquals(table.getSchema().size(), table1.getSchema().size());
  for (int i = 0; i < table.getSchema().size(); i++) {
    assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
  }

  assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_DELIMITER),
      table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER));

  assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_NULL),
      table1.getMeta().getProperty(StorageConstants.TEXT_NULL));

  assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER),
      StringEscapeUtils.escapeJava("\u0002"));

  assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_NULL),
      StringEscapeUtils.escapeJava("\u0003"));

  Map<String, String> expected = getProperties(DB_NAME, SUPPLIER);
  Map<String, String> toSet = new ImmutableMap.Builder<String, String>()
          .put("key1", "value1")
          .put("key2", "value2")
          .build();
  expected.putAll(toSet);

  setProperty(DB_NAME, SUPPLIER, toSet);
  Map<String, String> actual = getProperties(DB_NAME, SUPPLIER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertEquals(actual.get("key2"), expected.get("key2"));

  Set<String> toUnset = Sets.newHashSet("key2", "key3");
  for (String key : toUnset) {
    expected.remove(key);
  }
  unSetProperty(DB_NAME, SUPPLIER, toUnset);
  actual = getProperties(DB_NAME, SUPPLIER);
  assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER));
  assertEquals(actual.get("key1"), expected.get("key1"));
  assertNull(actual.get("key2"));

  store.dropTable(DB_NAME, SUPPLIER);

}