Java Code Examples for org.apache.avro.file.FileReader#getSchema()
The following examples show how to use
org.apache.avro.file.FileReader#getSchema() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroUtil.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** * Get the schema of AVRO files stored in a directory */ public static Schema getAvroSchema(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); Path fileToTest; if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); if (fileStatuses.length == 0) { return null; } fileToTest = fileStatuses[0].getPath(); } else { fileToTest = path; } SeekableInput input = new FsInput(fileToTest, conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); Schema result = fileReader.getSchema(); fileReader.close(); return result; }
Example 2
Source File: AvroFileReader.java From streamx with Apache License 2.0 | 5 votes |
@Override public Schema getSchema(Configuration conf, Path path) throws IOException { SeekableInput input = new FsInput(path, conf); DatumReader<Object> reader = new GenericDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(input, reader); org.apache.avro.Schema schema = fileReader.getSchema(); fileReader.close(); return avroData.toConnectSchema(schema); }
Example 3
Source File: AvroToJsonConverter.java From celos with Apache License 2.0 | 5 votes |
@Override public FixFile convert(TestRun testRun, FixFile ff) throws IOException { byte[] bytes = IOUtils.toByteArray(ff.getContent()); if (bytes.length == 0) { return ff; } ByteArrayOutputStream os = new ByteArrayOutputStream(); GenericDatumReader<Object> reader = new GenericDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader); try { Schema schema = fileReader.getSchema(); DatumWriter<Object> writer = new GenericDatumWriter<>(schema); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, os); for (Object datum : fileReader) { writer.write(datum, encoder); } encoder.flush(); } finally { fileReader.close(); } return new FixFile(new ByteArrayInputStream(os.toByteArray())); }
Example 4
Source File: AvroConversionBaseMapper.java From datacollector with Apache License 2.0 | 4 votes |
@Override protected void map(String input, String output, Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Configuration conf = context.getConfiguration(); LOG.info("Converting input file: {}", input); LOG.info("Output directory: {}", output); Path inputPath = new Path(input); Path outputDir = new Path(output); fs.mkdirs(outputDir); Path tempFile = new Path(outputDir, getTempFilePrefix() + inputPath.getName()); if(fs.exists(tempFile)) { if(conf.getBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, false)) { fs.delete(tempFile, true); } else { throw new IOException("Temporary file " + tempFile + " already exists."); } } LOG.info("Using temp file: {}", tempFile); // Output file is the same as input except of dropping .avro extension if it exists and appending .parquet or .orc String outputFileName = inputPath.getName().replaceAll("\\.avro$", "") + getOutputFileSuffix(); Path finalFile = new Path(outputDir, outputFileName); LOG.info("Final path will be: {}", finalFile); // Avro reader SeekableInput seekableInput = new FsInput(inputPath, conf); DatumReader<GenericRecord> reader = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(seekableInput, reader); Schema avroSchema = fileReader.getSchema(); initializeWriter(tempFile, avroSchema, conf, context); LOG.info("Started reading input file"); long recordCount = 0; try { while (fileReader.hasNext()) { GenericRecord record = fileReader.next(); handleAvroRecord(record); context.getCounter(Counters.PROCESSED_RECORDS).increment(1); recordCount++; } } catch (Exception e) { // Various random stuff can happen while converting, so we wrap the underlying exception with more details String message = String.format( "Exception at offset %d (record %d): %s", fileReader.tell(), recordCount, e.toString() ); throw new IOException(message, e); } LOG.info("Done reading input file"); closeWriter(); LOG.info("Moving temporary file {} to final destination {}", tempFile, finalFile); fs.rename(tempFile, finalFile); if(!context.getConfiguration().getBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, false)) { LOG.info("Removing input file", inputPath); fs.delete(inputPath, true); } LOG.info("Done converting input file into output directory {}", output); }