Java Code Examples for org.apache.avro.file.DataFileWriter#close()

The following examples show how to use org.apache.avro.file.DataFileWriter#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAvroEventDeserializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
private File newTestFile(boolean deleteOnExit) throws IOException {
  File tempFile = File.createTempFile("testDirectFile", "tmp");
  if (deleteOnExit) {
    tempFile.deleteOnExit();
  }

  DataFileWriter<GenericRecord> writer =
      new DataFileWriter<GenericRecord>(
          new GenericDatumWriter<GenericRecord>(schema));
  writer.create(schema, tempFile);
  GenericRecordBuilder recordBuilder;
  recordBuilder = new GenericRecordBuilder(schema);
  recordBuilder.set("foo", "bar");
  GenericRecord record = recordBuilder.build();
  writer.append(record);
  writer.sync();
  recordBuilder = new GenericRecordBuilder(schema);
  recordBuilder.set("foo", "baz");
  record = recordBuilder.build();
  writer.append(record);
  writer.sync();
  writer.flush();
  writer.close();

  return tempFile;
}
 
Example 2
Source File: RedshiftIT.java    From digdag with Apache License 2.0 6 votes vote down vote up
private byte[] avroTestData(List<Schema.Field> fields, List<Map<String, Object>> records)
        throws IOException
{
    Schema schema = Schema.createRecord("testdata", null, null, false);
    schema.setFields(fields);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericDatumWriter<GenericData.Record> datum = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(datum);
    writer.create(schema, out);
    for (Map<String, Object> record : records) {
        GenericData.Record r = new GenericData.Record(schema);
        for (Map.Entry<String, Object> item : record.entrySet()) {
            r.put(item.getKey(), item.getValue());
        }
        writer.append(r);
    }
    writer.close();

    return out.toByteArray();
}
 
Example 3
Source File: FsSpecProducer.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private void writeAvroJobSpec(AvroJobSpec jobSpec) throws IOException {
  DatumWriter<AvroJobSpec> datumWriter = new SpecificDatumWriter<>(AvroJobSpec.SCHEMA$);
  DataFileWriter<AvroJobSpec> dataFileWriter = new DataFileWriter<>(datumWriter);

  Path jobSpecPath = new Path(this.specConsumerPath, jobSpec.getUri());

  //Write the new JobSpec to a temporary path first.
  Path tmpDir = new Path(this.specConsumerPath, "_tmp");
  if (!fs.exists(tmpDir)) {
    fs.mkdirs(tmpDir);
  }

  Path tmpJobSpecPath = new Path(tmpDir, jobSpec.getUri());

  OutputStream out = fs.create(tmpJobSpecPath);

  dataFileWriter.create(AvroJobSpec.SCHEMA$, out);
  dataFileWriter.append(jobSpec);
  dataFileWriter.close();

  //Rename the JobSpec from temporary to final location.
  HadoopUtils.renamePath(fs, tmpJobSpecPath, jobSpecPath, true);
}
 
Example 4
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_onTrigger_routing_to_failure_empty_array_type() throws Exception {
    String testString = "Hello World";
    GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithEmptyArray(testString);

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, emptyArray ARRAY<BOOLEAN>) STORED AS ORC",
            resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
}
 
Example 5
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_onTrigger_routing_to_failure_null_type() throws Exception {
    String testString = "Hello World";
    GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString);

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC",
            resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
}
 
Example 6
Source File: AvroTestTools.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private void writeAsAvroBinary(Iterator<GenericRecord> input, Schema schema, FileSystem fs,
    Path outputPath) throws IOException {

  DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());

  writer.create(schema, fs.create(outputPath, true));
  while (input.hasNext()) {
    writer.append(input.next());
  }
  writer.close();

  log.info("Successfully wrote avro file to path " + outputPath);
}
 
Example 7
Source File: StageRunData.java    From geowave with Apache License 2.0 5 votes vote down vote up
public synchronized void close() {
  for (final DataFileWriter dfw : cachedWriters.values()) {
    try {
      dfw.close();
    } catch (final IOException e) {
      LOGGER.warn("Unable to close sequence file stream", e);
    }
  }
  cachedWriters.clear();
}
 
Example 8
Source File: Purge.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private void purge(String src, String dst) throws IOException
{
    DataFileReader<GenericRecord> dataFileReader = createDataFileReader(src, false);
    DataFileWriter<GenericRecord> writer = createDataFileWriter(dataFileReader);

    numRecords = 0;
    recordsPurged = 0;
    remainingRecords = 0;

    // Copy
    while (dataFileReader.hasNext())
    {
        numRecords++;
        GenericRecord record = dataFileReader.next();
        if (record == null)
        {
            continue;
        }

        Number column = (Number) record.get(columnName);
        if ((column == null) || (!membersToPurge.contains(column.intValue())))
        {
            remainingRecords++;
            writer.append(record);
        }
    }

    recordsPurged = numRecords - remainingRecords;
    writer.close();
    dataFileReader.close();
}
 
Example 9
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_onTrigger_routing_to_failure_fixed_type() throws Exception {
    String testString = "Hello!";
    GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithFixed(testString);

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_FAILURE, 1);
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_FAILURE).get(0);
    assertEquals("test.avro", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));

    final InputStream in = new ByteArrayInputStream(resultFlowFile.toByteArray());
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
        assertTrue(dataFileReader.hasNext());
        GenericRecord testedRecord = dataFileReader.next();

        assertNotNull(testedRecord.get("fixed"));
        assertArrayEquals(testString.getBytes(StandardCharsets.UTF_8), ((GenericData.Fixed) testedRecord.get("fixed")).bytes());
    }
}
 
Example 10
Source File: LinearModelUtils.java    From ml-ease with Apache License 2.0 5 votes vote down vote up
public static void writeLinearModel(JobConf conf, String path, Map<String, LinearModel> models) throws IOException
{
  AvroHdfsFileWriter<GenericRecord> writer =
      new AvroHdfsFileWriter<GenericRecord>(conf, path, LinearModelAvro.SCHEMA$);
  DataFileWriter<GenericRecord> recordWriter = writer.get();
  for (String k : models.keySet())
  {
    GenericRecord record = new GenericData.Record(LinearModelAvro.SCHEMA$);
    List modellist = models.get(k).toAvro(LibLinearDataset.INTERCEPT_NAME);
    record.put("key", k);
    record.put("model", modellist);
    recordWriter.append(record);
  }
  recordWriter.close();
}
 
Example 11
Source File: TestExtractAvroMetadata.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testExtractionWithNonRecordSchema() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata());
    runner.setProperty(ExtractAvroMetadata.COUNT_ITEMS, "true");

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc"));

    final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three"));
    final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema);

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, out);
    dataFileWriter.append(data);
    dataFileWriter.append(data);
    dataFileWriter.close();

    runner.enqueue(out.toByteArray());
    runner.run();

    runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1);

    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0);
    flowFile.assertAttributeExists(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR);
    flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.ARRAY.getName());
    flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "array");
    flowFile.assertAttributeEquals(ExtractAvroMetadata.ITEM_COUNT_ATTR, "2"); // number of arrays, not elements
}
 
Example 12
Source File: PartitionCollapsingExecutionPlannerTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
private void createOutput(DateRange dateRange) throws IOException
{
  DataFileWriter<GenericRecord> dataWriter;
  OutputStream outputStream;
  
  Path path = new Path(_outputPath,PathUtils.datedPathFormat.format(dateRange.getEndDate()));
  
  Schema ouputSchema = Schemas.createRecordSchema(PartitionCollapsingTests.class, "Output",
                                            new Field("id", Schema.create(Type.LONG), "ID", null));
  
  outputStream = getFileSystem().create(new Path(path, "part-00000.avro"));
  
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
  dataWriter = new DataFileWriter<GenericRecord>(writer);      
  
  dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_START,
                     Long.toString(dateRange.getBeginDate().getTime()));
  
  dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_END,
                     Long.toString(dateRange.getEndDate().getTime()));
  
  dataWriter.create(ouputSchema, outputStream);
      
  // empty file
  
  dataWriter.close();
  outputStream.close();
  dataWriter = null;
  outputStream = null; 
}
 
Example 13
Source File: AvroOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(
  TaskAttemptContext context) throws IOException, InterruptedException {

  boolean isMapOnly = context.getNumReduceTasks() == 0;
  Schema schema =
    isMapOnly ? AvroJob.getMapOutputSchema(context.getConfiguration())
      : AvroJob.getOutputSchema(context.getConfiguration());

  final DataFileWriter<T> WRITER =
    new DataFileWriter<T>(new ReflectDatumWriter<T>());

  configureDataFileWriter(WRITER, context);

  Path path = getDefaultWorkFile(context, EXT);
  WRITER.create(schema,
    path.getFileSystem(context.getConfiguration()).create(path));

  return new RecordWriter<AvroWrapper<T>, NullWritable>() {
    @Override
    public void write(AvroWrapper<T> wrapper, NullWritable ignore)
      throws IOException {
      WRITER.append(wrapper.datum());
    }

    @Override
    public void close(TaskAttemptContext taskAttemptContext)
      throws IOException, InterruptedException {
      WRITER.close();
    }
  };
}
 
Example 14
Source File: RecordSetUtil.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Writes all records from the test set into a single Avro file on the file system.
 * 
 * @param fs The filesystem.
 * @param path The path of the file on the filesystem.
 * @param td The test data to write.
 * @throws IOException If there was an exception writing to the filesystem.
 */
public static void writeRandomAvroFile(FileSystem fs, String path, RecordSet td) throws IOException {
    try (OutputStream out = fs.create(new Path(path))) {
        DatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(td.getSchema());
        DataFileWriter<IndexedRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
        dataFileWriter.create(td.getSchema(), out);
        for (List<IndexedRecord> partition : td.getPartitions()) {
            for (IndexedRecord record : partition) {
                dataFileWriter.append(record);
            }
        }
        dataFileWriter.close();
    }
}
 
Example 15
Source File: TestAvroFileHdfsReader.java    From samza with Apache License 2.0 5 votes vote down vote up
public static void writeTestEventsToFile(String path, int numEvents)
  throws Exception {
  Schema schema = Schema.parse(TestAvroFileHdfsReader.class.getResourceAsStream("/reader/TestEvent.avsc"));
  File file = new File(path);
  DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
  DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer);
  dataFileWriter.create(schema, file);
  for (int i = 0; i < numEvents; i++) {
    GenericRecord datum = new GenericData.Record(schema);
    datum.put(FIELD_1, i);
    datum.put(FIELD_2, "string_" + i);
    dataFileWriter.append(datum);
  }
  dataFileWriter.close();
}
 
Example 16
Source File: AvroWithoutSchemaRegistryProducer.java    From snowflake-kafka-connector with Apache License 2.0 4 votes vote down vote up
@Override
public void send(final Enums.TestCases testCase)
{
  System.out.println("loading table: " + testCase.getTableName() +
    " in format: " + testCase.getFormatName() + " to Kafka");
  try
  {
    Scanner scanner = getFileScanner(testCase);
    Schema schema = testCase.getTable().getSchema();
    while (scanner.hasNextLine())
    {
      GenericData.Record record = new GenericData.Record(schema);
      GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
      ByteArrayOutputStream output = new ByteArrayOutputStream();
      DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(writer);
      fileWriter.create(schema, output);

      JsonNode data = Utils.MAPPER.readTree(scanner.nextLine());
      switch (testCase.getTable())
      {
        case ONE_G_TABLE:
          record.put("C_CUSTKEY", data.get("C_CUSTKEY").asLong());
          record.put("C_NAME", data.get("C_NAME").asText());
          record.put("C_ADDRESS", data.get("C_ADDRESS").asText());
          record.put("C_PHONE", data.get("C_PHONE").asText());
          record.put("C_ACCTBAL", data.get("C_ACCTBAL").asDouble());
          record.put("C_MKTSEGMENT", data.get("C_MKTSEGMENT").asText());
          record.put("C_COMMENT", data.get("C_COMMENT").asText());
          record.put("C_NATIONKEY", data.get("C_NATIONKEY").asLong());
          break;
        case THREE_HUNDRED_COLUMN_TABLE:
          for (int i = 0; i < 300; i++)
          {
            switch (i % 8)
            {
              case 0:
                record.put("C" + i, data.get("C" + i).asDouble());
                break;
              case 2:
                record.put("C" + i, data.get("C" + i).asInt());
                break;
              case 4:
                record.put("C" + i, data.get("C" + i).asLong());
                break;
              case 6:
                record.put("C" + i, data.get("C" + i).asBoolean());
                break;
              default:
                record.put("C" + i, data.get("C" + i).asText());
            }
          }
      }

      fileWriter.append(record);
      fileWriter.flush();
      fileWriter.close();
      send(Utils.TEST_TOPIC, output.toByteArray());
    }
    scanner.close();
    close();
  }
  catch (Exception e)
  {
    e.printStackTrace();
    System.exit(1);
  }
  System.out.println("finished loading");

}
 
Example 17
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_nested_complex_record() throws Exception {

    Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {{
        put("key1", Arrays.asList(1.0, 2.0));
        put("key2", Arrays.asList(3.0, 4.0));
    }};

    Map<String, String> arrayMap11 = new TreeMap<String, String>() {{
        put("key1", "v1");
        put("key2", "v2");
    }};
    Map<String, String> arrayMap12 = new TreeMap<String, String>() {{
        put("key3", "v3");
        put("key4", "v4");
    }};

    GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {{
        put("key1", Arrays.asList(-1.0, -2.0));
        put("key2", Arrays.asList(-3.0, -4.0));
    }};

    Map<String, String> arrayMap21 = new TreeMap<String, String>() {{
        put("key1", "v-1");
        put("key2", "v-2");
    }};
    Map<String, String> arrayMap22 = new TreeMap<String, String>() {{
        put("key3", "v-3");
        put("key4", "v-4");
    }};

    record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " +
            "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);


    // check values
    Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
    assertTrue(myMapOfArray instanceof Map);
    Map map = (Map) myMapOfArray;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof List);
    assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);

    Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
    assertTrue(myArrayOfMap instanceof List);
    List list = (List) myArrayOfMap;
    Object el0 = list.get(0);
    assertNotNull(el0);
    assertTrue(el0 instanceof Map);
    assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}
 
Example 18
Source File: TestConvertAvroToParquet.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {
    processor = new ConvertAvroToParquet();
    runner = TestRunners.newTestRunner(processor);

    Schema schema = new Schema.Parser().parse(Resources.getResource("avro/all-minus-enum.avsc").openStream());

    DataFileWriter<Object> awriter = new DataFileWriter<Object>(new GenericDatumWriter<Object>());
    GenericData.Record nestedRecord = new GenericRecordBuilder(
            schema.getField("mynestedrecord").schema())
            .set("mynestedint", 1).build();

    GenericData.Record record = new GenericRecordBuilder(schema)
            .set("mynull", null)
            .set("myboolean", true)
            .set("myint", 1)
            .set("mylong", 2L)
            .set("myfloat", 3.1f)
            .set("mydouble", 4.1)
            .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
            .set("mystring", "hello")
            .set("mynestedrecord", nestedRecord)
            .set("myarray", new GenericData.Array<Integer>(Schema.createArray(Schema.create(Schema.Type.INT)), Arrays.asList(1, 2)))
            .set("mymap", ImmutableMap.of("a", 1, "b", 2))
            .set("myfixed", new GenericData.Fixed(Schema.createFixed("ignored", null, null, 1), new byte[] { (byte) 65 }))
            .build();

    awriter.create(schema, tmpAvro);
    awriter.append(record);
    awriter.flush();
    awriter.close();

    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmpAvro, datumReader);
    GenericRecord record1 = null;
    while (dataFileReader.hasNext()) {
        record1 = dataFileReader.next(record1);
        records.add(record1);
    }

}
 
Example 19
Source File: AvroRecordInputFormatTest.java    From stratosphere with Apache License 2.0 4 votes vote down vote up
@Before
public void createFiles() throws IOException {
	testFile = File.createTempFile("AvroInputFormatTest", null);
	
	ArrayList<CharSequence> stringArray = new ArrayList<CharSequence>();
	stringArray.add(TEST_ARRAY_STRING_1);
	stringArray.add(TEST_ARRAY_STRING_2);
	
	ArrayList<Boolean> booleanArray = new ArrayList<Boolean>();
	booleanArray.add(TEST_ARRAY_BOOLEAN_1);
	booleanArray.add(TEST_ARRAY_BOOLEAN_2);
	
	HashMap<CharSequence, Long> longMap = new HashMap<CharSequence, Long>();
	longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
	longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);
	
	
	User user1 = new User();
	user1.setName(TEST_NAME);
	user1.setFavoriteNumber(256);
	user1.setTypeDoubleTest(123.45d);
	user1.setTypeBoolTest(true);
	user1.setTypeArrayString(stringArray);
	user1.setTypeArrayBoolean(booleanArray);
	user1.setTypeEnum(TEST_ENUM_COLOR);
	user1.setTypeMap(longMap);
     
	// Construct via builder
	User user2 = User.newBuilder()
	             .setName("Charlie")
	             .setFavoriteColor("blue")
	             .setFavoriteNumber(null)
	             .setTypeBoolTest(false)
	             .setTypeDoubleTest(1.337d)
	             .setTypeNullTest(null)
	             .setTypeLongTest(1337L)
	             .setTypeArrayString(new ArrayList<CharSequence>())
	             .setTypeArrayBoolean(new ArrayList<Boolean>())
	             .setTypeNullableArray(null)
	             .setTypeEnum(Colors.RED)
	             .setTypeMap(new HashMap<CharSequence, Long>())
	             .build();
	DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
	DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
	dataFileWriter.create(user1.getSchema(), testFile);
	dataFileWriter.append(user1);
	dataFileWriter.append(user2);
	dataFileWriter.close();
}
 
Example 20
Source File: AvroRecordInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void writeTestFile(File testFile) throws IOException {
	ArrayList<CharSequence> stringArray = new ArrayList<>();
	stringArray.add(TEST_ARRAY_STRING_1);
	stringArray.add(TEST_ARRAY_STRING_2);

	ArrayList<Boolean> booleanArray = new ArrayList<>();
	booleanArray.add(TEST_ARRAY_BOOLEAN_1);
	booleanArray.add(TEST_ARRAY_BOOLEAN_2);

	HashMap<CharSequence, Long> longMap = new HashMap<>();
	longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1);
	longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2);

	Address addr = new Address();
	addr.setNum(TEST_NUM);
	addr.setStreet(TEST_STREET);
	addr.setCity(TEST_CITY);
	addr.setState(TEST_STATE);
	addr.setZip(TEST_ZIP);

	User user1 = new User();

	user1.setName(TEST_NAME);
	user1.setFavoriteNumber(256);
	user1.setTypeDoubleTest(123.45d);
	user1.setTypeBoolTest(true);
	user1.setTypeArrayString(stringArray);
	user1.setTypeArrayBoolean(booleanArray);
	user1.setTypeEnum(TEST_ENUM_COLOR);
	user1.setTypeMap(longMap);
	user1.setTypeNested(addr);
	user1.setTypeBytes(ByteBuffer.allocate(10));
	user1.setTypeDate(LocalDate.parse("2014-03-01"));
	user1.setTypeTimeMillis(LocalTime.parse("12:12:12"));
	user1.setTypeTimeMicros(123456);
	user1.setTypeTimestampMillis(DateTime.parse("2014-03-01T12:12:12.321Z"));
	user1.setTypeTimestampMicros(123456L);
	// 20.00
	user1.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
	// 20.00
	user1.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));

	// Construct via builder
	User user2 = User.newBuilder()
			.setName("Charlie")
			.setFavoriteColor("blue")
			.setFavoriteNumber(null)
			.setTypeBoolTest(false)
			.setTypeDoubleTest(1.337d)
			.setTypeNullTest(null)
			.setTypeLongTest(1337L)
			.setTypeArrayString(new ArrayList<>())
			.setTypeArrayBoolean(new ArrayList<>())
			.setTypeNullableArray(null)
			.setTypeEnum(Colors.RED)
			.setTypeMap(new HashMap<>())
			.setTypeFixed(null)
			.setTypeUnion(null)
			.setTypeNested(
					Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET)
							.setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP)
							.build())
			.setTypeBytes(ByteBuffer.allocate(10))
			.setTypeDate(LocalDate.parse("2014-03-01"))
			.setTypeTimeMillis(LocalTime.parse("12:12:12"))
			.setTypeTimeMicros(123456)
			.setTypeTimestampMillis(DateTime.parse("2014-03-01T12:12:12.321Z"))
			.setTypeTimestampMicros(123456L)
			// 20.00
			.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()))
			// 20.00
			.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()))
			.build();
	DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
	DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
	dataFileWriter.create(user1.getSchema(), testFile);
	dataFileWriter.append(user1);
	dataFileWriter.append(user2);
	dataFileWriter.close();
}