Java Code Examples for org.apache.avro.file.DataFileReader#close()

The following examples show how to use org.apache.avro.file.DataFileReader#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroRecordWriterTest.java    From data-highway with Apache License 2.0 6 votes vote down vote up
@Test
public void typical() throws Exception {
  Schema schema = SchemaBuilder
      .builder()
      .record("record")
      .fields()
      .requiredLong("id")
      .requiredString("name")
      .endRecord();
  Record value = new GenericRecordBuilder(schema).set("id", 1L).set("name", "hello").build();
  ByteArrayOutputStream output = new ByteArrayOutputStream();

  Factory factory = new Factory(CodecFactory.nullCodec());
  RecordWriter writer = factory.create(schema, output);
  writer.write(value);
  writer.close();

  SeekableInput input = new SeekableByteArrayInput(output.toByteArray());
  DatumReader<Record> datumReader = new GenericDatumReader<>(schema);
  DataFileReader<Record> dataFileReader = new DataFileReader<>(input, datumReader);
  assertThat(dataFileReader.next(), is(value));
  assertThat(dataFileReader.hasNext(), is(false));
  dataFileReader.close();
}
 
Example 2
Source File: BinaryAvroSchemaFileReader.java    From pxf with Apache License 2.0 6 votes vote down vote up
@Override
public Schema readSchema(Configuration configuration, String schemaName, HcfsType hcfsType, AvroUtilities.FileSearcher fileSearcher) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    DataFileReader<GenericRecord> fileReader = null;

    try {
        File file = fileSearcher.searchForFile(schemaName);
        if (file == null) {
            final Path path = new Path(hcfsType.getDataUri(configuration, schemaName));
            FsInput inStream = new FsInput(path, configuration);
            fileReader = new DataFileReader<>(inStream, datumReader);
        } else {
            fileReader = new DataFileReader<>(file, datumReader);
        }
        return fileReader.getSchema();
    } finally {
        if (fileReader != null) {
            fileReader.close();
        }
    }
}
 
Example 3
Source File: Purge.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private void loadMembersToPurge(String filename) throws IOException
{
    // TODO: "memberId" column name should be configurable
    DataFileReader<GenericRecord> dataFileReader =
            createDataFileReader(filename, true);
    while (dataFileReader.hasNext())
    {
        GenericRecord record = dataFileReader.next();
        Integer memberId = (Integer) record.get("memberId");
        if (memberId == null)
        {
            throw new NullPointerException("memberId is null");
        }
        membersToPurge.add(((Number) record.get("memberId")).intValue());
    }
    dataFileReader.close();
}
 
Example 4
Source File: AvroToRestJsonEntryConverterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException {
  Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);

  File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
  tmp.deleteOnExit();
  try {
    FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
    GenericRecord avroRecord = dataFileReader.next();

    AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter();
    RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next();

    Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath());
    JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false);

    converter.close();
    dataFileReader.close();
  } finally {
    if (tmp != null) {
      tmp.delete();
    }
  }
}
 
Example 5
Source File: TestFlumeEventAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    ByteBuffer body = (ByteBuffer) record.get("body");
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode(body).toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example 6
Source File: TestAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    String bodyStr = record.get("message").toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example 7
Source File: SinkAvroTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Test for a single doctype being written out to the correct location.
 */
@Test
public void testSingleDocumentType() throws IOException, SchemaNotFoundException {
  String input = Resources.getResource("testdata/avro-message-single-doctype.ndjson").getPath();
  String schemas = Resources.getResource("avro/test-schema.tar.gz").getPath();
  String output = outputPath
      + "/${document_namespace:-NONE}.${document_type:-NONE}.${document_version:-0}";

  Sink.main(new String[] { "--inputFileFormat=json", "--inputType=file", "--input=" + input,
      "--outputType=avro", "--output=" + output, "--outputFileCompression=UNCOMPRESSED",
      "--schemasLocation=" + schemas, "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutputType=stdout" });

  assertThat("output count", getPrefixFileCount(outputPath, "namespace_0"),
      Matchers.greaterThan(0L));

  AvroSchemaStore store = AvroSchemaStore.of(schemas, null);

  List<Path> paths = Files.walk(Paths.get(outputPath)).filter(Files::isRegularFile)
      .collect(Collectors.toList());

  List<Integer> results = new ArrayList<>();
  for (Path path : paths) {
    Schema schema = store.getSchema("namespace_0/foo/foo.1.avro.json");
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
    DataFileReader<GenericRecord> fileReader = new DataFileReader<>(path.toFile(), reader);
    while (fileReader.hasNext()) {
      GenericRecord record = fileReader.next();
      results.add((Integer) record.get("test_int"));
    }
    fileReader.close();
  }
  results.sort(null);
  assertEquals(results, Arrays.asList(1, 2, 3));
}
 
Example 8
Source File: JdbcAvroJobTest.java    From dbeam with Apache License 2.0 5 votes vote down vote up
private List<GenericRecord> readAvroRecords(File avroFile, Schema schema) throws IOException {
  GenericDatumReader<GenericRecord> datum = new GenericDatumReader<>(schema);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datum);
  List<GenericRecord> records =
      StreamSupport.stream(dataFileReader.spliterator(), false).collect(Collectors.toList());
  dataFileReader.close();
  return records;
}
 
Example 9
Source File: Purge.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private void purge(String src, String dst) throws IOException
{
    DataFileReader<GenericRecord> dataFileReader = createDataFileReader(src, false);
    DataFileWriter<GenericRecord> writer = createDataFileWriter(dataFileReader);

    numRecords = 0;
    recordsPurged = 0;
    remainingRecords = 0;

    // Copy
    while (dataFileReader.hasNext())
    {
        numRecords++;
        GenericRecord record = dataFileReader.next();
        if (record == null)
        {
            continue;
        }

        Number column = (Number) record.get(columnName);
        if ((column == null) || (!membersToPurge.contains(column.intValue())))
        {
            remainingRecords++;
            writer.append(record);
        }
    }

    recordsPurged = numRecords - remainingRecords;
    writer.close();
    dataFileReader.close();
}
 
Example 10
Source File: GenerateDictionary.java    From Cubert with Apache License 2.0 4 votes vote down vote up
public static Map<String, CodeDictionary> loadDictionary(String path,
                                                         boolean isHDFS,
                                                         Configuration conf) throws IOException
{
    Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>();
    Schema schema = getSchema();

    DatumReader<GenericRecord> datumReader =
            new GenericDatumReader<GenericRecord>(schema);
    DataFileReader<GenericRecord> dataFileReader;

    if (isHDFS)
    {
        dataFileReader =
                new DataFileReader<GenericRecord>(new FsInput(new Path(path), conf),
                                                  datumReader);
    }
    else
    {
        dataFileReader =
                new DataFileReader<GenericRecord>(new File(path), datumReader);
    }
    GenericRecord record = null;
    while (dataFileReader.hasNext())
    {
        record = dataFileReader.next();
        String colName = record.get("colname").toString();
        String colValue = record.get("colvalue").toString();
        int code = (Integer) record.get("code");

        CodeDictionary dict = dictionaries.get(colName);
        if (dict == null)
        {
            dict = new CodeDictionary();
            dictionaries.put(colName, dict);
        }

        dict.addKeyCode(colValue, code);
    }

    dataFileReader.close();

    return dictionaries;
}
 
Example 11
Source File: AvroHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testWrite() throws IOException {
  // Write all test records
  for (String record : TestConstants.JSON_RECORDS) {
    this.writer.write(convertRecord(record));
  }

  Assert.assertEquals(this.writer.recordsWritten(), 3);

  this.writer.close();
  this.writer.commit();

  File outputFile =
      new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME);
  DataFileReader<GenericRecord> reader =
      new DataFileReader<>(outputFile, new GenericDatumReader<GenericRecord>());
  Schema fileSchema = reader.getSchema();
  Assert.assertEquals(fileSchema.getProp(TEST_PROPERTY_KEY), TEST_PROPERTY_VALUE);

  // Read the records back and assert they are identical to the ones written
  GenericRecord user1 = reader.next();
  // Strings are in UTF8, so we have to call toString() here and below
  Assert.assertEquals(user1.get("name").toString(), "Alyssa");
  Assert.assertEquals(user1.get("favorite_number"), 256);
  Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");

  GenericRecord user2 = reader.next();
  Assert.assertEquals(user2.get("name").toString(), "Ben");
  Assert.assertEquals(user2.get("favorite_number"), 7);
  Assert.assertEquals(user2.get("favorite_color").toString(), "red");

  GenericRecord user3 = reader.next();
  Assert.assertEquals(user3.get("name").toString(), "Charlie");
  Assert.assertEquals(user3.get("favorite_number"), 68);
  Assert.assertEquals(user3.get("favorite_color").toString(), "blue");

  reader.close();

  FsWriterMetrics metrics = FsWriterMetrics.fromJson(properties.getProp(FsDataWriter.FS_WRITER_METRICS_KEY));
  Assert.assertEquals(metrics.fileInfos.size(),1);
  FsWriterMetrics.FileInfo fileInfo = metrics.fileInfos.iterator().next();

  Assert.assertEquals(fileInfo.fileName, TestConstants.TEST_FILE_NAME);
  Assert.assertEquals(fileInfo.numRecords, 3);
  Assert.assertNull(metrics.partitionInfo.partitionKey);
  Assert.assertEquals(metrics.partitionInfo.branchId, 0);
}
 
Example 12
Source File: TestSyslogAvroEventSerializer.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Test
public void test() throws FileNotFoundException, IOException {
  // Snappy currently broken on Mac in OpenJDK 7 per FLUME-2012
  Assume.assumeTrue(!"Mac OS X".equals(System.getProperty("os.name")) ||
    !System.getProperty("java.version").startsWith("1.7."));

  //Schema schema = new Schema.Parser().parse(schemaFile);

  // create the file, write some data
  OutputStream out = new FileOutputStream(testFile);
  String builderName = SyslogAvroEventSerializer.Builder.class.getName();

  Context ctx = new Context();
  ctx.put("syncInterval", "4096");
  ctx.put("compressionCodec", "snappy");

  EventSerializer serializer =
      EventSerializerFactory.getInstance(builderName, ctx, out);
  serializer.afterCreate(); // must call this when a file is newly created

  List<Event> events = generateSyslogEvents();
  for (Event e : events) {
    serializer.write(e);
  }
  serializer.flush();
  serializer.beforeClose();
  out.flush();
  out.close();

  // now try to read the file back

  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(testFile, reader);

  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    int facility = (Integer) record.get("facility");
    int severity = (Integer) record.get("severity");
    long timestamp = (Long) record.get("timestamp");
    String hostname = record.get("hostname").toString();
    String message = record.get("message").toString();

    Assert.assertEquals("Facility should be 1", 1, facility);
    System.out.println(timestamp + ": " + message);
    numEvents++;
  }

  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);

  FileUtils.forceDelete(testFile);
}
 
Example 13
Source File: TestJavaAvroEventSerializer.java    From flume-plugins with MIT License 4 votes vote down vote up
@Test
public void test() throws FileNotFoundException, IOException {

    // create the file, write some data
    OutputStream out = new FileOutputStream(testFile);
    String builderName = JavaLogAvroEventSerializer.Builder.class.getName();

    Context ctx = new Context();
    ctx.put("syncInterval", "4096");

    EventSerializer serializer =
            EventSerializerFactory.getInstance(builderName, ctx, out);
    serializer.afterCreate(); // must call this when a file is newly created

    List<Event> events = generateJavaEvents();
    for (Event e : events) {
        serializer.write(e);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();

    // now try to read the file back

    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader =
            new DataFileReader<GenericRecord>(testFile, reader);

    GenericRecord record = new GenericData.Record(fileReader.getSchema());
    int numEvents = 0;
    while (fileReader.hasNext()) {
        fileReader.next(record);
        long timestamp = (Long) record.get("timestamp");
        String datetime = record.get("datetime").toString();
        String classname = record.get("classname").toString();
        String message = record.get("message").toString();

        System.out.println(classname + ": " + message + " (at " + datetime + ")");
        numEvents++;
    }

    fileReader.close();
    Assert.assertEquals("Should have found a total of 4 events", 4, numEvents);

    FileUtils.forceDelete(testFile);
}
 
Example 14
Source File: TestSyslogAvroEventSerializer.java    From flume-plugins with MIT License 4 votes vote down vote up
@Test
public void test() throws FileNotFoundException, IOException {

    // create the file, write some data
    OutputStream out = new FileOutputStream(testFile);
    String builderName = SyslogAvroEventSerializer.Builder.class.getName();

    Context ctx = new Context();
    ctx.put("syncInterval", "4096");
    ctx.put("path", "src/test/resources/customerToHostsFile.txt");

    EventSerializer serializer =
            EventSerializerFactory.getInstance(builderName, ctx, out);
    serializer.afterCreate(); // must call this when a file is newly created

    List<Event> events = generateSyslogEvents();
    for (Event e : events) {
        serializer.write(e);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();

    // now try to read the file back

    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader =
            new DataFileReader<GenericRecord>(testFile, reader);

    GenericRecord record = new GenericData.Record(fileReader.getSchema());
    int numEvents = 0;
    while (fileReader.hasNext()) {
        fileReader.next(record);
        long timestamp = (Long) record.get("timestamp");
        String datetime = record.get("datetime").toString();
        String hostname = record.get("hostname").toString();
        Map<String, String> headers = (Map<String, String>) record.get("headers");
        String message = record.get("message").toString();

        System.out.println(hostname + " (" + headers + ")" + ": " + message);
        numEvents++;
    }

    fileReader.close();
    Assert.assertEquals("Should have found a total of 6 events", 6, numEvents);

    FileUtils.forceDelete(testFile);
}
 
Example 15
Source File: TestApacheAvroEventSerializer.java    From flume-plugins with MIT License 4 votes vote down vote up
@Test
public void test() throws FileNotFoundException, IOException {

    // create the file, write some data
    OutputStream out = new FileOutputStream(testFile);
    String builderName = ApacheLogAvroEventSerializer.Builder.class.getName();

    Context ctx = new Context();
    ctx.put("syncInterval", "4096");

    EventSerializer serializer =
            EventSerializerFactory.getInstance(builderName, ctx, out);
    serializer.afterCreate(); // must call this when a file is newly created

    List<Event> events = generateApacheEvents();
    for (Event e : events) {
        serializer.write(e);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();

    // now try to read the file back

    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader =
            new DataFileReader<GenericRecord>(testFile, reader);

    GenericRecord record = new GenericData.Record(fileReader.getSchema());
    int numEvents = 0;
    while (fileReader.hasNext()) {
        fileReader.next(record);
        String ip = record.get("ip").toString();
        String uri = record.get("uri").toString();
        Integer statuscode = (Integer) record.get("statuscode");
        String original = record.get("original").toString();
        String connectionstatus = record.get("connectionstatus").toString();

        Assert.assertEquals("Ip should be 80.79.194.3", "80.79.194.3", ip);
        System.out.println("IP " + ip + " requested: " + uri + " with status code " + statuscode + " and connectionstatus: " + connectionstatus);
        System.out.println("Original logline: " + original);
        numEvents++;
    }

    fileReader.close();
    Assert.assertEquals("Should have found a total of 3 events", 2, numEvents);

    FileUtils.forceDelete(testFile);
}