org.apache.hadoop.fs.AvroFSInput Java Examples

The following examples show how to use org.apache.hadoop.fs.AvroFSInput. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Display.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  FileContext fc = FileContext.getFileContext(new Configuration());
  fileReader =
    DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
Example #2
Source File: Display.java    From big-c with Apache License 2.0 6 votes vote down vote up
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  FileContext fc = FileContext.getFileContext(new Configuration());
  fileReader =
    DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
Example #3
Source File: AvroFileReader.java    From kafka-connect-fs with Apache License 2.0 5 votes vote down vote up
public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
    super(fs, filePath, new GenericRecordToStruct(), config);

    AvroFSInput input = new AvroFSInput(FileContext.getFileContext(filePath.toUri()), filePath);
    if (this.schema == null) {
        this.reader = new DataFileReader<>(input, new SpecificDatumReader<>());
    } else {
        this.reader = new DataFileReader<>(input, new SpecificDatumReader<>(this.schema));
    }
    this.closed = false;
}
 
Example #4
Source File: AvroFileHdfsReader.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void open(String pathStr, String singleFileOffset) {
  LOG.info(String.format("%s: Open file [%s] with file offset [%s] for read", systemStreamPartition, pathStr, singleFileOffset));
  Path path = new Path(pathStr);
  try {
    AvroFSInput input = new AvroFSInput(FileContext.getFileContext(path.toUri()), path);
    fileReader = new DataFileReader<>(input, new GenericDatumReader<>());
    seek(singleFileOffset);
  } catch (IOException e) {
    throw new SamzaException(e);
  }
}
 
Example #5
Source File: AvroReader.java    From HBase-ToHDFS with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length == 0) {
    System.out.println("AvroReader {dataFile} {schemaFile} {max.lines.to.read.optional}");
  }
  
  
  String dataFile = args[0];
  String schemaFile = args[1];
  int recordsToRead = Integer.MAX_VALUE;
  if (args.length > 2) {
    recordsToRead = Integer.parseInt(args[2]);
  }
  
  Schema.Parser parser = new Schema.Parser();
  Configuration config = new Configuration();
  FileSystem fs = FileSystem.get(config);
  
  Schema schema = parser.parse(fs.open(new Path(schemaFile)));
  
  Path dataFilePath = new Path(dataFile);
  FileStatus fileStatus = fs.getFileStatus(dataFilePath);
  
  AvroFSInput input = new AvroFSInput(fs.open(dataFilePath), fileStatus.getLen());
  
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
  System.out.println("Schema: " + dataFileReader.getSchema());
  System.out.println();
  int counter = 0;
  while (dataFileReader.hasNext() && counter++ < recordsToRead) {
    GenericRecord r = dataFileReader.next();
    System.out.println(counter + " : " + r);
  }
}