Java Code Examples for org.apache.parquet.example.data.simple.SimpleGroup#addGroup()

The following examples show how to use org.apache.parquet.example.data.simple.SimpleGroup#addGroup() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTupleRecordConsumer.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testArtSchema() throws ExecException, ParserException {

  String pigSchemaString =
          "DocId:long, " +
          "Links:(Backward:{(long)}, Forward:{(long)}), " +
          "Name:{(Language:{(Code:chararray,Country:chararray)}, Url:chararray)}";

  SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
  g.add("DocId", 1l);
  Group links = g.addGroup("Links");
  links.addGroup("Backward").addGroup("bag").add(0, 1l);
  links.addGroup("Forward").addGroup("bag").add(0, 1l);
  Group name = g.addGroup("Name").addGroup("bag");
  name.addGroup("Language").addGroup("bag").append("Code", "en").append("Country", "US");
  name.add("Url", "http://foo/bar");

  testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}
 
Example 2
Source File: TestPruneColumnsCommand.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private String createParquetFile(String prefix) throws IOException {
  MessageType schema = new MessageType("schema",
    new PrimitiveType(REQUIRED, INT64, "DocId"),
    new PrimitiveType(REQUIRED, BINARY, "Name"),
    new PrimitiveType(REQUIRED, BINARY, "Gender"),
    new GroupType(OPTIONAL, "Links",
      new PrimitiveType(REPEATED, INT64, "Backward"),
      new PrimitiveType(REPEATED, INT64, "Forward")));

  conf.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString());

  String file = createTempFile(prefix);
  ExampleParquetWriter.Builder builder = ExampleParquetWriter.builder(new Path(file)).withConf(conf);
  try (ParquetWriter writer = builder.build()) {
    for (int i = 0; i < numRecord; i++) {
      SimpleGroup g = new SimpleGroup(schema);
      g.add("DocId", 1l);
      g.add("Name", "foo");
      g.add("Gender", "male");
      Group links = g.addGroup("Links");
      links.add(0, 2l);
      links.add(1, 3l);
      writer.write(g);
    }
  }

  return file;
}
 
Example 3
Source File: CompressionConveterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private String createParquetFile(Configuration conf, Map<String, String> extraMeta, int numRecord, String prefix, String codec,
                                       ParquetProperties.WriterVersion writerVersion, int pageSize, TestDocs testDocs) throws IOException {
  MessageType schema = new MessageType("schema",
    new PrimitiveType(REQUIRED, INT64, "DocId"),
    new PrimitiveType(REQUIRED, BINARY, "Name"),
    new PrimitiveType(REQUIRED, BINARY, "Gender"),
    new GroupType(OPTIONAL, "Links",
      new PrimitiveType(REPEATED, BINARY, "Backward"),
      new PrimitiveType(REPEATED, BINARY, "Forward")));

  conf.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString());

  String file = createTempFile(prefix);
  ExampleParquetWriter.Builder builder = ExampleParquetWriter.builder(new Path(file))
    .withConf(conf)
    .withWriterVersion(writerVersion)
    .withExtraMetaData(extraMeta)
    .withDictionaryEncoding("DocId", true)
    .withValidation(true)
    .enablePageWriteChecksum()
    .withPageSize(pageSize)
    .withCompressionCodec(CompressionCodecName.valueOf(codec));
  try (ParquetWriter writer = builder.build()) {
    for (int i = 0; i < numRecord; i++) {
      SimpleGroup g = new SimpleGroup(schema);
      g.add("DocId", testDocs.docId[i]);
      g.add("Name", testDocs.name[i]);
      g.add("Gender", testDocs.gender[i]);
      Group links = g.addGroup("Links");
      links.add(0, testDocs.linkBackward[i]);
      links.add(1, testDocs.linkForward[i]);
      writer.write(g);
    }
  }

  return file;
}
 
Example 4
Source File: PhoneBookWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static SimpleGroup groupFromUser(User user) {
  SimpleGroup root = new SimpleGroup(schema);
  root.append("id", user.getId());

  if (user.getName() != null) {
    root.append("name", user.getName());
  }

  if (user.getPhoneNumbers() != null) {
    Group phoneNumbers = root.addGroup("phoneNumbers");
    for (PhoneNumber number : user.getPhoneNumbers()) {
      Group phone = phoneNumbers.addGroup("phone");
      phone.append("number", number.getNumber());
      if (number.getKind() != null) {
        phone.append("kind", number.getKind());
      }
    }
  }

  if (user.getLocation() != null) {
    Group location = root.addGroup("location");
    if (user.getLocation().getLon() != null) {
      location.append("lon", user.getLocation().getLon());
    }
    if (user.getLocation().getLat() != null) {
      location.append("lat", user.getLocation().getLat());
    }
  }
  return root;
}
 
Example 5
Source File: TestTupleRecordConsumer.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testBags() throws ExecException, ParserException {
  String pigSchemaString = "a: {(b: chararray)}";

  SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
  Group addGroup = g.addGroup("a");
  addGroup.addGroup("bag").append("b", "foo");
  addGroup.addGroup("bag").append("b", "bar");

  testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}
 
Example 6
Source File: TestTupleRecordConsumer.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testMaps() throws ExecException, ParserException {
      String pigSchemaString = "a: [(b: chararray)]";
  SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
  Group map = g.addGroup("a");
  map.addGroup("map").append("key", "foo").addGroup("value").append("b", "foo");
  map.addGroup("map").append("key", "bar").addGroup("value").append("b", "bar");

  testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}