org.apache.avro.file.DataFileReader Java Examples

The following examples show how to use org.apache.avro.file.DataFileReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LobAvroImportTestCase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
/** Import blob data that is smaller than inline lob limit. Blob data
 * should be saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportInline() throws IOException, SQLException {
  String [] types = { getBlobType() };
  String expectedVal = "This is short BLOB data";
  String [] vals = { getBlobInsertStr(expectedVal) };

  createTableWithColTypes(types, vals);

  runImport(getArgv());

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  GenericRecord record = reader.next();

  // Verify that blob data is imported as Avro bytes.
  ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
  String returnVal = new String(buf.array());

  assertEquals(getColName(0), expectedVal, returnVal);
}
 
Example #2
Source File: AvroInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example #3
Source File: AvroOutputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericRecord() throws IOException {
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
	final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
	Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	outputFormat.setSchema(schema);
	output(outputFormat, schema);

	GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
	DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);

	while (dataFileReader.hasNext()) {
		GenericRecord record = dataFileReader.next();
		assertEquals(record.get("user_name").toString(), "testUser");
		assertEquals(record.get("favorite_number"), 1);
		assertEquals(record.get("favorite_color").toString(), "blue");
	}

	//cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
}
 
Example #4
Source File: Purge.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private DataFileReader<GenericRecord> createDataFileReader(String filename,
                                                           boolean localFS) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader;

    if (localFS)
    {
        dataFileReader =
                new DataFileReader<GenericRecord>(new File(filename), datumReader);
    }
    else
    {
        Path path = new Path(filename);
        SeekableInput input = new FsInput(path, conf);
        dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
    }

    return dataFileReader;
}
 
Example #5
Source File: AvroOutputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericRecord() throws IOException {
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
	final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
	Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	outputFormat.setSchema(schema);
	output(outputFormat, schema);

	GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
	DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);

	while (dataFileReader.hasNext()) {
		GenericRecord record = dataFileReader.next();
		assertEquals(record.get("user_name").toString(), "testUser");
		assertEquals(record.get("favorite_number"), 1);
		assertEquals(record.get("favorite_color").toString(), "blue");
	}

	//cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
}
 
Example #6
Source File: AvroInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example #7
Source File: AvroToOrcRecordConverter.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public void convert(SeekableInput avroInputFile, Path orcOutputFile) throws IOException {
  DatumReader<GenericRecord> reader = new GenericDatumReader<>();
  try (FileReader<GenericRecord> fileReader = DataFileReader.openReader(avroInputFile, reader)) {
    Schema avroSchema = fileReader.getSchema();

    initializeWriter(avroSchema, orcOutputFile);

    while (fileReader.hasNext()) {
      GenericRecord record = fileReader.next();

      addAvroRecord(record);
    }

    closeWriter();
  }
}
 
Example #8
Source File: AvroOutputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericRecord() throws IOException {
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
	final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
	Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	outputFormat.setSchema(schema);
	output(outputFormat, schema);

	GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
	DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);

	while (dataFileReader.hasNext()) {
		GenericRecord record = dataFileReader.next();
		assertEquals(record.get("user_name").toString(), "testUser");
		assertEquals(record.get("favorite_number"), 1);
		assertEquals(record.get("favorite_color").toString(), "blue");
	}

	//cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
}
 
Example #9
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Get the latest avro schema for a directory
 * @param directory the input dir that contains avro files
 * @param fs the {@link FileSystem} for the given directory.
 * @param latest true to return latest schema, false to return oldest schema
 * @return the latest/oldest schema in the directory
 * @throws IOException
 */
public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException {
  Schema schema = null;
  try (Closer closer = Closer.create()) {
    List<FileStatus> files = getDirectorySchemaHelper(directory, fs);
    if (files == null || files.size() == 0) {
      LOG.warn("There is no previous avro file in the directory: " + directory);
    } else {
      FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
      LOG.debug("Path to get the avro schema: " + file);
      FsInput fi = new FsInput(file.getPath(), fs.getConf());
      GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>();
      schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema();
    }
  } catch (IOException ioe) {
    throw new IOException("Cannot get the schema for directory " + directory, ioe);
  }
  return schema;
}
 
Example #10
Source File: TestAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    String bodyStr = record.get("message").toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example #11
Source File: FileSystemDatasetReader.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize() {
  Preconditions.checkState(state.equals(ReaderWriterState.NEW),
    "A reader may not be opened more than once - current state:%s", state);

  LOG.debug("Opening reader on path:{}", path);

  try {
    reader = new DataFileReader<E>(new AvroFSInput(fileSystem.open(path),
      fileSystem.getFileStatus(path).getLen()),
        DataModelUtil.getDatumReaderForType(type, schema));
  } catch (IOException e) {
    throw new DatasetIOException("Unable to create reader path:" + path, e);
  }

  state = ReaderWriterState.OPEN;
}
 
Example #12
Source File: AvroMorphlineTest.java    From kite with Apache License 2.0 6 votes vote down vote up
private void runTweetContainer(String morphlineConfigFile, String[] fieldNames) throws Exception {
  File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
  morphline = createMorphline(morphlineConfigFile);    
  for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
    Record record = new Record();
    byte[] body = Files.toByteArray(file);    
    record.put(Fields.ATTACHMENT_BODY, body);
    collector.reset();
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    assertEquals(2104, collector.getRecords().size());
    
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    int i = 0;
    while (reader.hasNext()) {
      Record actual = collector.getRecords().get(i);
      GenericData.Record expected = reader.next();
      assertTweetEquals(expected, actual, fieldNames, i);
      i++;
    }    
    assertEquals(collector.getRecords().size(), i);
  }
}
 
Example #13
Source File: AvroToRestJsonEntryConverterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException {
  Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);

  File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
  tmp.deleteOnExit();
  try {
    FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
    GenericRecord avroRecord = dataFileReader.next();

    AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter();
    RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next();

    Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath());
    JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false);

    converter.close();
    dataFileReader.close();
  } finally {
    if (tmp != null) {
      tmp.delete();
    }
  }
}
 
Example #14
Source File: Display.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  FileContext fc = FileContext.getFileContext(new Configuration());
  fileReader =
    DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
Example #15
Source File: Converter.java    From xml-avro with Apache License 2.0 6 votes vote down vote up
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);

    GenericRecord record = dataFileReader.next();

    Document doc;
    try {
        doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    } catch (ParserConfigurationException e) {
        throw new RuntimeException(e);
    }

    Element el = unwrapElement(record, doc);
    doc.appendChild(el);

    saveDocument(doc, xmlFile);
}
 
Example #16
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testFirstUnderscoreInColumnName() throws IOException {
  String [] names = { "_NAME" };
  String [] types = { "INT" };
  String [] vals = { "1987" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "__NAME", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("__NAME", 1987, record1.get("__NAME"));
}
 
Example #17
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testOverrideTypeMapping() throws IOException {
  String [] types = { "INT" };
  String [] vals = { "10" };
  createTableWithColTypes(types, vals);

  String [] extraArgs = { "--map-column-java", "DATA_COL0=String"};

  runImport(getOutputArgv(true, extraArgs));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING);

  GenericRecord record1 = reader.next();
  assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0"));
}
 
Example #18
Source File: AvroInputFormat.java    From stratosphere with Apache License 2.0 6 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	DatumReader<E> datumReader;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumReader = new SpecificDatumReader<E>(avroValueType);
	} else {
		datumReader = new ReflectDatumReader<E>(avroValueType);
	}
	
	LOG.info("Opening split " + split);
	
	SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength());
	
	dataFileReader = DataFileReader.openReader(in, datumReader);
	dataFileReader.sync(split.getStart());
}
 
Example #19
Source File: AvroScanner.java    From tajo with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the AvroScanner.
 */
@Override
public void init() throws IOException {
  if (targets == null) {
    targets = schema.toArray();
  }
  prepareProjection(targets);
  outTuple = new VTuple(projectionMap.length);

  Schema avroSchema = AvroUtil.getAvroSchema(meta, conf);
  avroFields = avroSchema.getFields();

  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
  SeekableInput input = new FsInput(fragment.getPath(), conf);
  dataFileReader = new DataFileReader<>(input, datumReader);
  super.init();
}
 
Example #20
Source File: AvroToDdlTool.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length == 0) {
    System.out.println("Please specify the avro files");
    System.exit(1);
  }

  List<Schema> schemaList = new ArrayList<>();
  for (String filePath : args) {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    DataFileReader<GenericRecord> dataFileReader =
        new DataFileReader<>(new File(filePath), datumReader);
    Schema schema = dataFileReader.getSchema();
    System.out.println(schema.toString(true));
    schemaList.add(schema);
  }
  Ddl ddl = new AvroSchemaToDdlConverter().toDdl(schemaList);
  ddl.prettyPrint(System.out);
}
 
Example #21
Source File: AvroFileReader.java    From streamx with Apache License 2.0 5 votes vote down vote up
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
  SeekableInput input = new FsInput(path, conf);
  DatumReader<Object> reader = new GenericDatumReader<>();
  FileReader<Object> fileReader = DataFileReader.openReader(input, reader);
  org.apache.avro.Schema schema = fileReader.getSchema();
  fileReader.close();
  return avroData.toConnectSchema(schema);
}
 
Example #22
Source File: Purge.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private void purge(String src, String dst) throws IOException
{
    DataFileReader<GenericRecord> dataFileReader = createDataFileReader(src, false);
    DataFileWriter<GenericRecord> writer = createDataFileWriter(dataFileReader);

    numRecords = 0;
    recordsPurged = 0;
    remainingRecords = 0;

    // Copy
    while (dataFileReader.hasNext())
    {
        numRecords++;
        GenericRecord record = dataFileReader.next();
        if (record == null)
        {
            continue;
        }

        Number column = (Number) record.get(columnName);
        if ((column == null) || (!membersToPurge.contains(column.intValue())))
        {
            remainingRecords++;
            writer.append(record);
        }
    }

    recordsPurged = numRecords - remainingRecords;
    writer.close();
    dataFileReader.close();
}
 
Example #23
Source File: TestMergeContent.java    From nifi with Apache License 2.0 5 votes vote down vote up
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException {
    // create a reader for the merged contet
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    SeekableByteArrayInput input = new SeekableByteArrayInput(data);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader);

    // read all the records into a map to verify all the records are there
    Map<String,GenericRecord> records = new HashMap<>();
    while (dataFileReader.hasNext()) {
        GenericRecord user = dataFileReader.next();
        records.put(user.get(key).toString(), user);
    }
    return records;
}
 
Example #24
Source File: FileFlusherLocalHdfsTest.java    From divolte-collector with Apache License 2.0 5 votes vote down vote up
private DataFileReader<Record> readAvroFile(final Schema schema, final File file) {
    final DatumReader<Record> dr = new GenericDatumReader<>(schema);
    try {
        return new DataFileReader<>(file, dr);
    } catch (final IOException e) {
        throw new UncheckedIOException(e);
    }
}
 
Example #25
Source File: QueryGenerator.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to read in an Avro file and add data to the storage.
 *
 * @param avroFile Avro file.
 */
private void addAvroData(File avroFile) {
  // Read in records and update the values stored.
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
  try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader)) {
    for (GenericRecord genericRecord : fileReader) {
      for (String columnName : _columnNames) {
        Set<String> values = _columnToValueSet.get(columnName);

        // Turn the Avro value into a valid SQL String token.
        Object avroValue = genericRecord.get(columnName);
        if (avroValue != null) {
          Integer storedMaxNumElements = _multiValueColumnMaxNumElements.get(columnName);
          if (storedMaxNumElements != null) {
            // Multi-value column
            GenericData.Array array = (GenericData.Array) avroValue;
            int numElements = array.size();
            if (storedMaxNumElements < numElements) {
              _multiValueColumnMaxNumElements.put(columnName, numElements);
            }
            for (Object element : array) {
              storeAvroValueIntoValueSet(values, element);
            }
          } else {
            // Single-value column
            storeAvroValueIntoValueSet(values, avroValue);
          }
        }
      }
    }
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #26
Source File: ProtoGetSchemaTool.java    From gcs-tools with Apache License 2.0 5 votes vote down vote up
@Override
public int run(InputStream in, PrintStream out, PrintStream err,
               List<String> args) throws Exception {
  if (args.size() != 1) {
    err.println("Expected 1 argument: input_file");
    return 1;
  }
  DataFileReader<Void> reader =
      new DataFileReader<>(Util.openSeekableFromFS(args.get(0)),
          new GenericDatumReader<Void>());
  out.println(reader.getMetaString("protobuf.generic.schema"));
  return 0;
}
 
Example #27
Source File: TimelineMetadataUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
    throws IOException {
  DatumReader<T> reader = new SpecificDatumReader<>(clazz);
  FileReader<T> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
  ValidationUtils.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
  return fileReader.next();
}
 
Example #28
Source File: FakeJobService.java    From beam with Apache License 2.0 5 votes vote down vote up
private List<TableRow> readAvroTableRows(String filename, TableSchema tableSchema)
    throws IOException {
  List<TableRow> tableRows = Lists.newArrayList();
  FileReader<GenericRecord> dfr =
      DataFileReader.openReader(new File(filename), new GenericDatumReader<>());

  while (dfr.hasNext()) {
    GenericRecord record = dfr.next(null);
    tableRows.add(BigQueryUtils.convertGenericRecordToTableRow(record, tableSchema));
  }
  return tableRows;
}
 
Example #29
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Get Avro schema from an Avro data file.
 */
public static Schema getSchemaFromDataFile(Path dataFile, FileSystem fs) throws IOException {
  try (SeekableInput sin = new FsInput(dataFile, fs.getConf());
      DataFileReader<GenericRecord> reader = new DataFileReader<>(sin, new GenericDatumReader<GenericRecord>())) {
    return reader.getSchema();
  }
}
 
Example #30
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaInHeader() throws Exception {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    true,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    null,
    null,
    null,
    null,
    0
  );
  Record record = createRecord();
  record.getHeader().setAttribute(BaseAvroDataGenerator.AVRO_SCHEMA_HEADER, AVRO_SCHEMA);
  gen.write(record);
  gen.close();

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals("hari", readRecord.get("name").toString());
  Assert.assertEquals(3100, readRecord.get("age"));
  Assert.assertFalse(dataFileReader.hasNext());
}