Java Code Examples for org.apache.avro.file.DataFileReader#next()

The following examples show how to use org.apache.avro.file.DataFileReader#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroOutputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericRecord() throws IOException {
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
	final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
	Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	outputFormat.setSchema(schema);
	output(outputFormat, schema);

	GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
	DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);

	while (dataFileReader.hasNext()) {
		GenericRecord record = dataFileReader.next();
		assertEquals(record.get("user_name").toString(), "testUser");
		assertEquals(record.get("favorite_number"), 1);
		assertEquals(record.get("favorite_color").toString(), "blue");
	}

	//cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
}
 
Example 2
Source File: TestAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    String bodyStr = record.get("message").toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example 3
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testOverrideTypeMapping() throws IOException {
  String [] types = { "INT" };
  String [] vals = { "10" };
  createTableWithColTypes(types, vals);

  String [] extraArgs = { "--map-column-java", "DATA_COL0=String"};

  runImport(getOutputArgv(true, extraArgs));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING);

  GenericRecord record1 = reader.next();
  assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0"));
}
 
Example 4
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testFirstUnderscoreInColumnName() throws IOException {
  String [] names = { "_NAME" };
  String [] types = { "INT" };
  String [] vals = { "1987" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "__NAME", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("__NAME", 1987, record1.get("__NAME"));
}
 
Example 5
Source File: ReadActivityFile.java    From big-data-lite with MIT License 6 votes vote down vote up
/**
 *Reads the avro file
 * @throws IOException
 */
private void readFile() throws IOException {
    // Deserialize Activities from disk
    
    File file = new File(filename);
            
    DatumReader<Activity> activityDatumReader = new SpecificDatumReader<Activity>(Activity.class);
    DataFileReader<Activity> dataFileReader = new DataFileReader<Activity>(file, activityDatumReader);

    Activity activity = null;
    int i = 0;
    
    while (dataFileReader.hasNext() && i < numrecs) {
        i++;
        activity = dataFileReader.next(activity);
        System.out.println(activity);
    }
}
 
Example 6
Source File: TestFlumeEventAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    ByteBuffer body = (ByteBuffer) record.get("body");
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode(body).toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example 7
Source File: Converter.java    From xml-avro with Apache License 2.0 6 votes vote down vote up
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);

    GenericRecord record = dataFileReader.next();

    Document doc;
    try {
        doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    } catch (ParserConfigurationException e) {
        throw new RuntimeException(e);
    }

    Element el = unwrapElement(record, doc);
    doc.appendChild(el);

    saveDocument(doc, xmlFile);
}
 
Example 8
Source File: AvroReader.java    From HBase-ToHDFS with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length == 0) {
    System.out.println("AvroReader {dataFile} {schemaFile} {max.lines.to.read.optional}");
  }
  
  
  String dataFile = args[0];
  String schemaFile = args[1];
  int recordsToRead = Integer.MAX_VALUE;
  if (args.length > 2) {
    recordsToRead = Integer.parseInt(args[2]);
  }
  
  Schema.Parser parser = new Schema.Parser();
  Configuration config = new Configuration();
  FileSystem fs = FileSystem.get(config);
  
  Schema schema = parser.parse(fs.open(new Path(schemaFile)));
  
  Path dataFilePath = new Path(dataFile);
  FileStatus fileStatus = fs.getFileStatus(dataFilePath);
  
  AvroFSInput input = new AvroFSInput(fs.open(dataFilePath), fileStatus.getLen());
  
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
  System.out.println("Schema: " + dataFileReader.getSchema());
  System.out.println();
  int counter = 0;
  while (dataFileReader.hasNext() && counter++ < recordsToRead) {
    GenericRecord r = dataFileReader.next();
    System.out.println(counter + " : " + r);
  }
}
 
Example 9
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorDateType() throws Exception {
  Map<String, Field> map = new LinkedHashMap<>();
  map.put("d", Field.create(Field.Type.DATE, new Date(116, 0, 1)));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    DATE_SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals(16801, readRecord.get("d"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 10
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private void testGenerateCompressed(String codecName) throws Exception {

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataGenerator gen = new AvroDataOutputStreamGenerator(
        false,
        baos,
        codecName,
        SCHEMA,
        AvroTypeUtil.getDefaultValuesFromSchema(SCHEMA, new HashSet<String>()),
        null,
        null,
        0
    );
    Record record = createRecord();
    gen.write(record);
    gen.close();

    //reader schema must be extracted from the data file
    GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
        new SeekableByteArrayInput(baos.toByteArray()), reader);
    Assert.assertEquals(codecName, dataFileReader.getMetaString("avro.codec"));
    Assert.assertTrue(dataFileReader.hasNext());
    GenericRecord readRecord = dataFileReader.next();

    Assert.assertEquals("hari", readRecord.get("name").toString());
    Assert.assertEquals(3100, readRecord.get("age"));
    Assert.assertFalse(dataFileReader.hasNext());
  }
 
Example 11
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorDecimalType() throws Exception {
  Map<String, Field> map = new LinkedHashMap<>();
  map.put("decimal", Field.create(Field.Type.DECIMAL, BigDecimal.valueOf(1.5)));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    DECIMAL_SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertArrayEquals(new byte[] {0x0F}, ((ByteBuffer)readRecord.get("decimal")).array());
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 12
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public void testNullableAvroImport() throws IOException, SQLException {
  String [] types = { "INT" };
  String [] vals = { null };
  createTableWithColTypes(types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);

  GenericRecord record1 = reader.next();
  assertNull(record1.get("DATA_COL0"));

}
 
Example 13
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertIntToStringInUnion() throws Exception {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    true,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    null,
    null,
    null,
    null,
    0
  );

  Map<String, Field> rootField = new HashMap<>();
  rootField.put("string", Field.create(Field.Type.INTEGER, 10));

  Record r = RecordCreator.create();
  r.getHeader().setAttribute(BaseAvroDataGenerator.AVRO_SCHEMA_HEADER, STRING_UNION_SCHEMA);
  r.set(Field.create(rootField));
  gen.write(r);
  gen.close();

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals(new Utf8("10"), readRecord.get("string"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 14
Source File: LobAvroImportTestCase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
/**
 * Import blob data that is larger than inline lob limit. The reference file
 * should be saved as Avro bytes. Blob data should be saved in LOB file
 * format.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportExternal() throws IOException, SQLException {
  String [] types = { getBlobType() };
  String data = "This is short BLOB data";
  String [] vals = { getBlobInsertStr(data) };

  createTableWithColTypes(types, vals);

  // Set inline lob limit to a small value so that blob data will be
  // written to an external file.
  runImport(getArgv("--inline-lob-limit", "1"));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  GenericRecord record = reader.next();

  // Verify that the reference file is written in Avro bytes.
  ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
  String returnVal = new String(buf.array());
  String expectedStart = "externalLob(lf,_lob/large_obj";
  String expectedEnd = getTableNum() + "_m_0000000.lob,68,"
    + data.length() + ")";

  assertNotNull(returnVal);
  assertTrue("ExpectedStart: " + expectedStart + ", value: " + returnVal, returnVal.startsWith(expectedStart));
  assertTrue("ExpectedEnd: " + expectedEnd + ", value: " + returnVal, returnVal.endsWith(expectedEnd));

  // Verify that blob data stored in the external lob file is correct.
  BlobRef br = BlobRef.parse(returnVal);
  Path lobFileDir = new Path(getWarehouseDir(), getTableName());
  InputStream in = br.getDataStream(getConf(), lobFileDir);

  byte [] bufArray = new byte[data.length()];
  int chars = in.read(bufArray);
  in.close();

  assertEquals(chars, data.length());

  returnVal = new String(bufArray);
  String expectedVal = data;

  assertEquals(getColName(0), returnVal, expectedVal);
}
 
Example 15
Source File: TestConvertAvroToParquet.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {
    processor = new ConvertAvroToParquet();
    runner = TestRunners.newTestRunner(processor);

    Schema schema = new Schema.Parser().parse(Resources.getResource("avro/all-minus-enum.avsc").openStream());

    DataFileWriter<Object> awriter = new DataFileWriter<Object>(new GenericDatumWriter<Object>());
    GenericData.Record nestedRecord = new GenericRecordBuilder(
            schema.getField("mynestedrecord").schema())
            .set("mynestedint", 1).build();

    GenericData.Record record = new GenericRecordBuilder(schema)
            .set("mynull", null)
            .set("myboolean", true)
            .set("myint", 1)
            .set("mylong", 2L)
            .set("myfloat", 3.1f)
            .set("mydouble", 4.1)
            .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
            .set("mystring", "hello")
            .set("mynestedrecord", nestedRecord)
            .set("myarray", new GenericData.Array<Integer>(Schema.createArray(Schema.create(Schema.Type.INT)), Arrays.asList(1, 2)))
            .set("mymap", ImmutableMap.of("a", 1, "b", 2))
            .set("myfixed", new GenericData.Fixed(Schema.createFixed("ignored", null, null, 1), new byte[] { (byte) 65 }))
            .build();

    awriter.create(schema, tmpAvro);
    awriter.append(record);
    awriter.flush();
    awriter.close();

    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmpAvro, datumReader);
    GenericRecord record1 = null;
    while (dataFileReader.hasNext()) {
        record1 = dataFileReader.next(record1);
        records.add(record1);
    }

}
 
Example 16
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroGeneratorShortType() throws Exception {
  final String SCHEMA_JSON = "{\n"
  +"\"type\": \"record\",\n"
  +"\"name\": \"WithDecimal\",\n"
  +"\"fields\": [\n"
  +" {\"name\": \"short\", \"type\": \"int\"}"
  +"]}";
  final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON);

  Map<String, Field> map = new LinkedHashMap<>();
  map.put("short", Field.create(Field.Type.SHORT, (short)1));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Object retrievedField = readRecord.get("short");
  Assert.assertEquals(1, retrievedField);

  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 17
Source File: TestSyslogAvroEventSerializer.java    From flume-plugins with MIT License 4 votes vote down vote up
@Test
public void test() throws FileNotFoundException, IOException {

    // create the file, write some data
    OutputStream out = new FileOutputStream(testFile);
    String builderName = SyslogAvroEventSerializer.Builder.class.getName();

    Context ctx = new Context();
    ctx.put("syncInterval", "4096");
    ctx.put("path", "src/test/resources/customerToHostsFile.txt");

    EventSerializer serializer =
            EventSerializerFactory.getInstance(builderName, ctx, out);
    serializer.afterCreate(); // must call this when a file is newly created

    List<Event> events = generateSyslogEvents();
    for (Event e : events) {
        serializer.write(e);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();

    // now try to read the file back

    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader =
            new DataFileReader<GenericRecord>(testFile, reader);

    GenericRecord record = new GenericData.Record(fileReader.getSchema());
    int numEvents = 0;
    while (fileReader.hasNext()) {
        fileReader.next(record);
        long timestamp = (Long) record.get("timestamp");
        String datetime = record.get("datetime").toString();
        String hostname = record.get("hostname").toString();
        Map<String, String> headers = (Map<String, String>) record.get("headers");
        String message = record.get("message").toString();

        System.out.println(hostname + " (" + headers + ")" + ": " + message);
        numEvents++;
    }

    fileReader.close();
    Assert.assertEquals("Should have found a total of 6 events", 6, numEvents);

    FileUtils.forceDelete(testFile);
}
 
Example 18
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
/**
 * Helper method that runs an import using Avro with optional command line
 * arguments and checks that the created file matches the expectations.
 * <p/>
 * This can be used to test various extra options that are implemented for
 * the Avro input.
 *
 * @param extraArgs extra command line arguments to pass to Sqoop in addition
 *                  to those that {@link #getOutputArgv(boolean, String[])}
 *                  returns
 */
private void avroImportTestHelper(String[] extraArgs, String codec)
  throws IOException {
  String[] types =
    {"BIT", "INTEGER", "BIGINT", "REAL", "DOUBLE", "VARCHAR(6)",
      "VARBINARY(2)", };
  String[] vals = {"true", "100", "200", "1.0", "2.0", "'s'", "'0102'", };
  createTableWithColTypes(types, vals);

  runImport(getOutputArgv(true, extraArgs));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "DATA_COL0", Schema.Type.BOOLEAN);
  checkField(fields.get(1), "DATA_COL1", Schema.Type.INT);
  checkField(fields.get(2), "DATA_COL2", Schema.Type.LONG);
  checkField(fields.get(3), "DATA_COL3", Schema.Type.FLOAT);
  checkField(fields.get(4), "DATA_COL4", Schema.Type.DOUBLE);
  checkField(fields.get(5), "DATA_COL5", Schema.Type.STRING);
  checkField(fields.get(6), "DATA_COL6", Schema.Type.BYTES);

  GenericRecord record1 = reader.next();
  assertEquals("DATA_COL0", true, record1.get("DATA_COL0"));
  assertEquals("DATA_COL1", 100, record1.get("DATA_COL1"));
  assertEquals("DATA_COL2", 200L, record1.get("DATA_COL2"));
  assertEquals("DATA_COL3", 1.0f, record1.get("DATA_COL3"));
  assertEquals("DATA_COL4", 2.0, record1.get("DATA_COL4"));
  assertEquals("DATA_COL5", new Utf8("s"), record1.get("DATA_COL5"));
  Object object = record1.get("DATA_COL6");
  assertTrue(object instanceof ByteBuffer);
  ByteBuffer b = ((ByteBuffer) object);
  assertEquals((byte) 1, b.get(0));
  assertEquals((byte) 2, b.get(1));

  if (codec != null) {
    assertEquals(codec, reader.getMetaString(DataFileConstants.CODEC));
  }

  checkSchemaFile(schema);
}
 
Example 19
Source File: AvroHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testWrite() throws IOException {
  // Write all test records
  for (String record : TestConstants.JSON_RECORDS) {
    this.writer.write(convertRecord(record));
  }

  Assert.assertEquals(this.writer.recordsWritten(), 3);

  this.writer.close();
  this.writer.commit();

  File outputFile =
      new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME);
  DataFileReader<GenericRecord> reader =
      new DataFileReader<>(outputFile, new GenericDatumReader<GenericRecord>());
  Schema fileSchema = reader.getSchema();
  Assert.assertEquals(fileSchema.getProp(TEST_PROPERTY_KEY), TEST_PROPERTY_VALUE);

  // Read the records back and assert they are identical to the ones written
  GenericRecord user1 = reader.next();
  // Strings are in UTF8, so we have to call toString() here and below
  Assert.assertEquals(user1.get("name").toString(), "Alyssa");
  Assert.assertEquals(user1.get("favorite_number"), 256);
  Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");

  GenericRecord user2 = reader.next();
  Assert.assertEquals(user2.get("name").toString(), "Ben");
  Assert.assertEquals(user2.get("favorite_number"), 7);
  Assert.assertEquals(user2.get("favorite_color").toString(), "red");

  GenericRecord user3 = reader.next();
  Assert.assertEquals(user3.get("name").toString(), "Charlie");
  Assert.assertEquals(user3.get("favorite_number"), 68);
  Assert.assertEquals(user3.get("favorite_color").toString(), "blue");

  reader.close();

  FsWriterMetrics metrics = FsWriterMetrics.fromJson(properties.getProp(FsDataWriter.FS_WRITER_METRICS_KEY));
  Assert.assertEquals(metrics.fileInfos.size(),1);
  FsWriterMetrics.FileInfo fileInfo = metrics.fileInfos.iterator().next();

  Assert.assertEquals(fileInfo.fileName, TestConstants.TEST_FILE_NAME);
  Assert.assertEquals(fileInfo.numRecords, 3);
  Assert.assertNull(metrics.partitionInfo.partitionKey);
  Assert.assertEquals(metrics.partitionInfo.branchId, 0);
}
 
Example 20
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testGenerateWithDefaults() throws Exception {

  Stage.Context context = ContextInfoCreator.createTargetContext("i", false, OnRecordError.TO_ERROR);

  DataFactory dataFactory = new DataGeneratorFactoryBuilder(context, DataGeneratorFormat.AVRO)
    .setCharset(Charset.forName("UTF-16"))
    .setConfig(SCHEMA_KEY, RECORD_SCHEMA)
    .setConfig(
        DEFAULT_VALUES_KEY,
        AvroTypeUtil.getDefaultValuesFromSchema(new Schema.Parser().parse(RECORD_SCHEMA), new HashSet<String>())
    )
    .build();
  Assert.assertTrue(dataFactory instanceof AvroDataGeneratorFactory);
  AvroDataGeneratorFactory factory = (AvroDataGeneratorFactory) dataFactory;

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  AvroDataOutputStreamGenerator gen = (AvroDataOutputStreamGenerator) factory.getGenerator(baos);
  Assert.assertNotNull(gen);

  Record record = RecordCreator.create();
  Map<String, Field> employee = new HashMap<>();
  record.set(Field.create(employee));

  gen.write(record);
  gen.close();

  // reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord result = dataFileReader.next();

  Assert.assertEquals("Hello", result.get("name").toString());
  Assert.assertEquals(25, result.get("age"));
  Assert.assertEquals(false, result.get("resident"));
  Assert.assertEquals("DIAMONDS", result.get("enum").toString());

  List<Utf8> emails = (List<Utf8>) result.get("emails");
  Assert.assertEquals(4, emails.size());
  Assert.assertEquals("SPADES", emails.get(0).toString());
  Assert.assertEquals("HEARTS", emails.get(1).toString());
  Assert.assertEquals("DIAMONDS", emails.get(2).toString());
  Assert.assertEquals("CLUBS", emails.get(3).toString());

  Assert.assertEquals(null, result.get("boss"));

  Map<Utf8, Object> phones = (Map<Utf8, Object>) result.get("phones");
  Assert.assertEquals(8675309, (long)phones.get(new Utf8("home")));
  Assert.assertEquals(8675308, (long)phones.get(new Utf8("mobile")));
}