Java Code Examples for org.apache.avro.file.DataFileWriter#append()
The following examples show how to use
org.apache.avro.file.DataFileWriter#append() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SmallFilesWrite.java From hiped2 with Apache License 2.0 | 6 votes |
public static void writeToAvro(File srcPath, OutputStream outputStream) throws IOException { DataFileWriter<Object> writer = new DataFileWriter<Object>( new GenericDatumWriter<Object>()) .setSyncInterval(100); //<co id="ch02_smallfilewrite_comment2"/> writer.setCodec(CodecFactory.snappyCodec()); //<co id="ch02_smallfilewrite_comment3"/> writer.create(SCHEMA, outputStream); //<co id="ch02_smallfilewrite_comment4"/> for (Object obj : FileUtils.listFiles(srcPath, null, false)) { File file = (File) obj; String filename = file.getAbsolutePath(); byte content[] = FileUtils.readFileToByteArray(file); GenericRecord record = new GenericData.Record(SCHEMA); //<co id="ch02_smallfilewrite_comment5"/> record.put(FIELD_FILENAME, filename); //<co id="ch02_smallfilewrite_comment6"/> record.put(FIELD_CONTENTS, ByteBuffer.wrap(content)); //<co id="ch02_smallfilewrite_comment7"/> writer.append(record); //<co id="ch02_smallfilewrite_comment8"/> System.out.println( file.getAbsolutePath() + ": " + DigestUtils.md5Hex(content)); } IOUtils.cleanup(null, writer); IOUtils.cleanup(null, outputStream); }
Example 2
Source File: AvroKeyValueFileWrite.java From hiped2 with Apache License 2.0 | 6 votes |
public static void writeToAvro(File inputFile, OutputStream outputStream) throws IOException { DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>()); writer.setCodec(CodecFactory.snappyCodec()); writer.create(SCHEMA, outputStream); for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) { AvroKeyValue<CharSequence, Stock> record = new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA)); record.setKey(stock.getSymbol()); record.setValue(stock); writer.append(record.get()); } IOUtils.closeStream(writer); IOUtils.closeStream(outputStream); }
Example 3
Source File: AvroStockFileWrite.java From hiped2 with Apache License 2.0 | 6 votes |
public static void writeToAvro(File inputFile, OutputStream outputStream) throws IOException { DataFileWriter<Stock> writer = new DataFileWriter<Stock>( new SpecificDatumWriter<Stock>()); writer.setCodec(CodecFactory.snappyCodec()); writer.create(Stock.SCHEMA$, outputStream); for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) { writer.append(stock); } IOUtils.closeStream(writer); IOUtils.closeStream(outputStream); }
Example 4
Source File: Hdfs.java From pxf with Apache License 2.0 | 6 votes |
@Override public void writeAvroFile(String pathToFile, String schemaName, String codecName, IAvroSchema[] data) throws Exception { Path path = getDatapath(pathToFile); OutputStream outStream = fs.create(path, true, bufferSize, replicationSize, blockSize); Schema schema = new Schema.Parser().parse(new FileInputStream( schemaName)); DatumWriter<GenericRecord> writer = new GenericDatumWriter<>( schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>( writer); if (!StringUtils.isEmpty(codecName)) { dataFileWriter.setCodec(CodecFactory.fromString(codecName)); } dataFileWriter.create(schema, outStream); for (IAvroSchema iAvroSchema : data) { GenericRecord datum = iAvroSchema.serialize(); dataFileWriter.append(datum); } dataFileWriter.close(); }
Example 5
Source File: RegressionAdmmTrain.java From ml-ease with Apache License 2.0 | 5 votes |
private void computeU(JobConf conf, String uPath, String uplusxPath, Map<String, LinearModel> z) throws IOException { AvroHdfsFileWriter<GenericRecord> writer = new AvroHdfsFileWriter<GenericRecord>(conf, uPath, LinearModelAvro.SCHEMA$); DataFileWriter<GenericRecord> recordwriter = writer.get(); // read u+x for (Path path : Util.findPartFiles(conf, new Path(uplusxPath))) { DataFileStream<Object> stream = AvroUtils.getAvroDataStream(conf, path); while (stream.hasNext()) { GenericData.Record record = (GenericData.Record) stream.next(); String partitionID = Util.getStringAvro(record, "key", false); if (record.get("uplusx") != null) { String lambda = Util.getLambda(partitionID); LinearModel newu = new LinearModel(LibLinearDataset.INTERCEPT_NAME, (List<?>) record.get("uplusx")); newu.linearCombine(1.0, -1.0, z.get(lambda)); GenericData.Record newvaluemap = new GenericData.Record(LinearModelAvro.SCHEMA$); List modellist = newu.toAvro(LibLinearDataset.INTERCEPT_NAME); newvaluemap.put("key", partitionID); newvaluemap.put("model", modellist); recordwriter.append(newvaluemap); } } } recordwriter.close(); }
Example 6
Source File: AvroTestTools.java From incubator-gobblin with Apache License 2.0 | 5 votes |
private void writeAsAvroBinary(Iterator<GenericRecord> input, Schema schema, FileSystem fs, Path outputPath) throws IOException { DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); writer.create(schema, fs.create(outputPath, true)); while (input.hasNext()) { writer.append(input.next()); } writer.close(); log.info("Successfully wrote avro file to path " + outputPath); }
Example 7
Source File: JsonToAvroConverter.java From celos with Apache License 2.0 | 5 votes |
@Override public FixFile convert(TestRun tr, FixFile ff) throws Exception { Schema schema = new Schema.Parser().parse(schemaCreator.create(tr).getContent()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); InputStream input = ff.getContent(); DataFileWriter<Object> writer;; try { DatumReader<Object> reader = new GenericDatumReader<>(schema); DataInputStream din = new DataInputStream(input); writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.create(schema, baos); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); Object datum; while (true) { try { datum = reader.read(null, decoder); } catch (EOFException eofe) { break; } writer.append(datum); } writer.flush(); } finally { input.close(); } return new FixFile(new ByteArrayInputStream(baos.toByteArray())); }
Example 8
Source File: TestUtil.java From localization_nifi with Apache License 2.0 | 5 votes |
private static byte[] bytesFor(List<Record> records) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); DataFileWriter<Record> writer = new DataFileWriter<>( AvroUtil.newDatumWriter(records.get(0).getSchema(), Record.class)); writer.setCodec(CodecFactory.snappyCodec()); writer = writer.create(records.get(0).getSchema(), out); for (Record record : records) { writer.append(record); } writer.flush(); return out.toByteArray(); }
Example 9
Source File: TestAvroDataFileParser.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testIncorrectOffset() throws Exception { File avroDataFile = SdcAvroTestUtil.createAvroDataFile(); avroDataFile.delete(); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, avroDataFile); for (int i = 0; i < 5; i++) { GenericRecord r = new GenericData.Record(schema); r.put("name", NAMES[i % NAMES.length]); r.put("id", i); dataFileWriter.setSyncInterval(1073741824); dataFileWriter.append(r); dataFileWriter.sync(); } dataFileWriter.flush(); dataFileWriter.close(); DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(), DataParserFormat.AVRO); DataParserFactory factory = dataParserFactoryBuilder .setMaxDataLen(1024 * 1024) .setOverRunLimit(1000 * 1000) .setConfig(SCHEMA_SOURCE_KEY, SOURCE) .build(); DataParser dataParser = factory.getParser(avroDataFile, null); Map<String, Record> records = new HashMap<>(); Record record; while((record = dataParser.parse()) != null) { records.put(dataParser.getOffset(), record); } Assert.assertEquals(String.valueOf(records), 5, records.size()); Assert.assertEquals(0, records.get("141::1").get("/id").getValueAsInteger()); Assert.assertEquals(1, records.get("166::1").get("/id").getValueAsInteger()); Assert.assertEquals(2, records.get("190::1").get("/id").getValueAsInteger()); Assert.assertEquals(3, records.get("215::1").get("/id").getValueAsInteger()); Assert.assertEquals(4, records.get("239::1").get("/id").getValueAsInteger()); }
Example 10
Source File: GenerateIds.java From datafu with Apache License 2.0 | 4 votes |
private void createDataForDate(FileSystem fs, Path outputPath, Date date) throws IOException { // make sure output path exists if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); } Path datePath = new Path(outputPath,dateFormat.format(date)); System.out.println("Writing to " + datePath.toString() + " with range " + startId + " to " + endId); DataFileWriter<GenericRecord> dataWriter; OutputStream outputStream; Path dailyPath = outputPath; Path path = new Path(dailyPath,dateFormat.format(date)); // delete directory if it already exists if (fs.exists(path)) { fs.delete(path, true); } outputStream = fs.create(new Path(path, "part-00000.avro")); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(); dataWriter = new DataFileWriter<GenericRecord>(writer); dataWriter.create(EVENT_SCHEMA, outputStream); GenericRecord record = new GenericData.Record(EVENT_SCHEMA); // create 1000 random IDs for (int i=0; i<1000; i++) { long val; if (startId == endId) { val = startId; } else { val = (long)(startId + random.nextInt(endId-startId+1)); } record.put("id", val); dataWriter.append(record); } dataWriter.close(); outputStream.close(); }
Example 11
Source File: AvroRecordInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
public static void writeTestFile(File testFile) throws IOException { ArrayList<CharSequence> stringArray = new ArrayList<>(); stringArray.add(TEST_ARRAY_STRING_1); stringArray.add(TEST_ARRAY_STRING_2); ArrayList<Boolean> booleanArray = new ArrayList<>(); booleanArray.add(TEST_ARRAY_BOOLEAN_1); booleanArray.add(TEST_ARRAY_BOOLEAN_2); HashMap<CharSequence, Long> longMap = new HashMap<>(); longMap.put(TEST_MAP_KEY1, TEST_MAP_VALUE1); longMap.put(TEST_MAP_KEY2, TEST_MAP_VALUE2); Address addr = new Address(); addr.setNum(TEST_NUM); addr.setStreet(TEST_STREET); addr.setCity(TEST_CITY); addr.setState(TEST_STATE); addr.setZip(TEST_ZIP); User user1 = new User(); user1.setName(TEST_NAME); user1.setFavoriteNumber(256); user1.setTypeDoubleTest(123.45d); user1.setTypeBoolTest(true); user1.setTypeArrayString(stringArray); user1.setTypeArrayBoolean(booleanArray); user1.setTypeEnum(TEST_ENUM_COLOR); user1.setTypeMap(longMap); user1.setTypeNested(addr); user1.setTypeBytes(ByteBuffer.allocate(10)); user1.setTypeDate(LocalDate.parse("2014-03-01")); user1.setTypeTimeMillis(LocalTime.parse("12:12:12")); user1.setTypeTimeMicros(123456); user1.setTypeTimestampMillis(DateTime.parse("2014-03-01T12:12:12.321Z")); user1.setTypeTimestampMicros(123456L); // 20.00 user1.setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())); // 20.00 user1.setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())); // Construct via builder User user2 = User.newBuilder() .setName("Charlie") .setFavoriteColor("blue") .setFavoriteNumber(null) .setTypeBoolTest(false) .setTypeDoubleTest(1.337d) .setTypeNullTest(null) .setTypeLongTest(1337L) .setTypeArrayString(new ArrayList<>()) .setTypeArrayBoolean(new ArrayList<>()) .setTypeNullableArray(null) .setTypeEnum(Colors.RED) .setTypeMap(new HashMap<>()) .setTypeFixed(null) .setTypeUnion(null) .setTypeNested( Address.newBuilder().setNum(TEST_NUM).setStreet(TEST_STREET) .setCity(TEST_CITY).setState(TEST_STATE).setZip(TEST_ZIP) .build()) .setTypeBytes(ByteBuffer.allocate(10)) .setTypeDate(LocalDate.parse("2014-03-01")) .setTypeTimeMillis(LocalTime.parse("12:12:12")) .setTypeTimeMicros(123456) .setTypeTimestampMillis(DateTime.parse("2014-03-01T12:12:12.321Z")) .setTypeTimestampMicros(123456L) // 20.00 .setTypeDecimalBytes(ByteBuffer.wrap(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())) // 20.00 .setTypeDecimalFixed(new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray())) .build(); DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class); DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter); dataFileWriter.create(user1.getSchema(), testFile); dataFileWriter.append(user1); dataFileWriter.append(user2); dataFileWriter.close(); }
Example 12
Source File: AvroExternalJarProgram.java From stratosphere with Apache License 2.0 | 4 votes |
public static void writeTestData(File testFile, int numRecords) throws IOException { DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class); DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter); dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile); Generator generator = new Generator(); for (int i = 0; i < numRecords; i++) { MyUser user = generator.nextUser(); dataFileWriter.append(user); } dataFileWriter.close(); }
Example 13
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); // This is pretty awkward and messy. The map object is a Map (not a MapWritable) but the keys are writables (in this case Text) // and so are the values (DoubleWritables in this case). Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example 14
Source File: AvroWithoutSchemaRegistryProducer.java From snowflake-kafka-connector with Apache License 2.0 | 4 votes |
@Override public void send(final Enums.TestCases testCase) { System.out.println("loading table: " + testCase.getTableName() + " in format: " + testCase.getFormatName() + " to Kafka"); try { Scanner scanner = getFileScanner(testCase); Schema schema = testCase.getTable().getSchema(); while (scanner.hasNextLine()) { GenericData.Record record = new GenericData.Record(schema); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); ByteArrayOutputStream output = new ByteArrayOutputStream(); DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(writer); fileWriter.create(schema, output); JsonNode data = Utils.MAPPER.readTree(scanner.nextLine()); switch (testCase.getTable()) { case ONE_G_TABLE: record.put("C_CUSTKEY", data.get("C_CUSTKEY").asLong()); record.put("C_NAME", data.get("C_NAME").asText()); record.put("C_ADDRESS", data.get("C_ADDRESS").asText()); record.put("C_PHONE", data.get("C_PHONE").asText()); record.put("C_ACCTBAL", data.get("C_ACCTBAL").asDouble()); record.put("C_MKTSEGMENT", data.get("C_MKTSEGMENT").asText()); record.put("C_COMMENT", data.get("C_COMMENT").asText()); record.put("C_NATIONKEY", data.get("C_NATIONKEY").asLong()); break; case THREE_HUNDRED_COLUMN_TABLE: for (int i = 0; i < 300; i++) { switch (i % 8) { case 0: record.put("C" + i, data.get("C" + i).asDouble()); break; case 2: record.put("C" + i, data.get("C" + i).asInt()); break; case 4: record.put("C" + i, data.get("C" + i).asLong()); break; case 6: record.put("C" + i, data.get("C" + i).asBoolean()); break; default: record.put("C" + i, data.get("C" + i).asText()); } } } fileWriter.append(record); fileWriter.flush(); fileWriter.close(); send(Utils.TEST_TOPIC, output.toByteArray()); } scanner.close(); close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } System.out.println("finished loading"); }
Example 15
Source File: AvroTableFileAsMutationsTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Test public void testAvroToMutationsTransform() throws Exception { DdlToAvroSchemaConverter converter = new DdlToAvroSchemaConverter("spannertest", "booleans"); Ddl ddl = Ddl.builder() .createTable("Users") .column("id") .int64() .notNull() .endColumn() .column("first_name") .string() .size(10) .endColumn() .column("last_name") .type(Type.string()) .max() .endColumn() .primaryKey() .asc("id") .desc("last_name") .end() .endTable() .build(); Collection<Schema> result = converter.convert(ddl); assertThat(result, hasSize(1)); Schema usersSchema = result.iterator().next(); GenericRecord user1 = new GenericData.Record(usersSchema); user1.put("id", 123L); user1.put("first_name", "John"); user1.put("last_name", "Smith"); GenericRecord user2 = new GenericData.Record(usersSchema); user2.put("id", 456L); user2.put("first_name", "Jane"); user2.put("last_name", "Doe"); File file = tmpFolder.newFile("users.avro"); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(usersSchema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); dataFileWriter.create(usersSchema, file); dataFileWriter.append(user1); dataFileWriter.append(user2); dataFileWriter.close(); PCollectionView<Ddl> ddlView = p.apply("ddl", Create.of(ddl)).apply(View.asSingleton()); PCollection<Mutation> mutations = p.apply("files/tables", Create.of(ImmutableMap.of(file.toPath().toString(), "Users"))) .apply(new AvroTableFileAsMutations(ddlView)); PAssert.that(mutations) .containsInAnyOrder( Mutation.newInsertOrUpdateBuilder("Users") .set("id") .to(123L) .set("first_name") .to("John") .set("last_name") .to("Smith") .build(), Mutation.newInsertOrUpdateBuilder("Users") .set("id") .to(456L) .set("first_name") .to("Jane") .set("last_name") .to("Doe") .build()); p.run(); }
Example 16
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_nested_complex_record() throws Exception { Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {{ put("key1", Arrays.asList(1.0, 2.0)); put("key2", Arrays.asList(3.0, 4.0)); }}; Map<String, String> arrayMap11 = new TreeMap<String, String>() {{ put("key1", "v1"); put("key2", "v2"); }}; Map<String, String> arrayMap12 = new TreeMap<String, String>() {{ put("key3", "v3"); put("key4", "v4"); }}; GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {{ put("key1", Arrays.asList(-1.0, -2.0)); put("key2", Arrays.asList(-3.0, -4.0)); }}; Map<String, String> arrayMap21 = new TreeMap<String, String>() {{ put("key1", "v-1"); put("key2", "v-2"); }}; Map<String, String> arrayMap22 = new TreeMap<String, String>() {{ put("key3", "v-3"); put("key4", "v-4"); }}; record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // check values Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray")); assertTrue(myMapOfArray instanceof Map); Map map = (Map) myMapOfArray; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof List); assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue); Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap")); assertTrue(myArrayOfMap instanceof List); List list = (List) myArrayOfMap; Object el0 = list.get(0); assertNotNull(el0); assertTrue(el0 instanceof Map); assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1"))); }
Example 17
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example 18
Source File: AvroExternalJarProgram.java From flink with Apache License 2.0 | 4 votes |
public static void writeTestData(File testFile, int numRecords) throws IOException { DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class); DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter); dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile); Generator generator = new Generator(); for (int i = 0; i < numRecords; i++) { MyUser user = generator.nextUser(); dataFileWriter.append(user); } dataFileWriter.close(); }
Example 19
Source File: AvroUserTest.java From yuzhouwan with Apache License 2.0 | 4 votes |
@Test public void createUserTest() throws Exception { // 1. Creating Users User user1 = new User(); user1.setName("Alyssa"); user1.setFavoriteNumber(256); // Alternate constructor User user2 = new User("Ben", 7, "red"); // Construct via builder User user3 = User.newBuilder().setName("Charlie").setFavoriteColor("blue").setFavoriteNumber(null).build(); // 2. Serializing // Serialize user1, user2 and user3 to disk DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class); DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter); String avroDir = DirUtils.RESOURCES_PATH.concat("/avro"); DirUtils.makeSureExist(avroDir, false); File file = new File(avroDir.concat("/users.avro")); dataFileWriter.create(user1.getSchema(), file); dataFileWriter.append(user1); dataFileWriter.append(user2); dataFileWriter.append(user3); // There should have more user object, then will get more performance dataFileWriter.close(); // 3. Deserializing // Deserialize Users from disk DatumReader<User> userDatumReader = new SpecificDatumReader<>(User.class); DataFileReader<User> dataFileReader = new DataFileReader<>(file, userDatumReader); User user = null; String userStr; int count = 0; while (dataFileReader.hasNext()) { // Reuse user object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with many items. user = dataFileReader.next(user); if ("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}".equals(userStr = user.toString()) || "{\"name\": \"Ben\", \"favorite_number\": 7, \"favorite_color\": \"red\"}".equals(userStr) || "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\"}".equals(userStr)) count++; } assertEquals(3, count); file.deleteOnExit(); }
Example 20
Source File: BackfillPhaseMapJob.java From incubator-pinot with Apache License 2.0 | 4 votes |
private void createAvro(String dataFilePath) throws Exception { Path hdfsDataPath = new Path(dataFilePath); File dataPath = new File(currentDiskWorkDir, "data"); if (dataPath.exists()) { dataPath.delete(); } dataPath.mkdir(); LOGGER.info("Creating temporary data dir {}", dataPath); final File avroPath = new File(currentDiskWorkDir, "avro"); if (avroPath.exists()) { avroPath.delete(); } avroPath.mkdir(); LOGGER.info("Creating temporary avro dir {}", avroPath); String segmentName = hdfsDataPath.getName(); final Path localFilePath = new Path(dataPath + "/" + segmentName); fs.copyToLocalFile(hdfsDataPath, localFilePath); LOGGER.info("Copying segment {} from {} to local {}", segmentName, hdfsDataPath, localFilePath); File segmentIndexDir = new File(localFilePath.toString()); if (!segmentIndexDir.exists()) { throw new IllegalStateException("Failed to copy " + hdfsDataPath + " to " + localFilePath); } LOGGER.info("Initializing PinotSegmentRecordReader with segment index dir {}", segmentIndexDir); PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(segmentIndexDir); LOGGER.info("Schema {}", pinotSegmentRecordReader.getSchema()); Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSegmentRecordReader.getSchema()); GenericDatumWriter<GenericRecord> datum = new GenericDatumWriter<GenericRecord>(avroSchema); DataFileWriter<GenericRecord> recordWriter = new DataFileWriter<GenericRecord>(datum); File localAvroFile = new File(avroPath, segmentName + ThirdEyeConstants.AVRO_SUFFIX); recordWriter.create(avroSchema, localAvroFile); LOGGER.info("Converting pinot segment to avro at {}", localAvroFile); while (pinotSegmentRecordReader.hasNext()) { GenericRecord outputRecord = new Record(avroSchema); GenericRow row = pinotSegmentRecordReader.next(); for (String fieldName : row.getFieldNames()) { outputRecord.put(fieldName, row.getValue(fieldName)); } recordWriter.append(outputRecord); } LOGGER.info("Writing to avro file at {}", localAvroFile); recordWriter.close(); if (!localAvroFile.exists()) { LOGGER.info("Failed to write avro file to {}", localAvroFile); } pinotSegmentRecordReader.close(); LOGGER.info("Coping avro file from {} to hdfs at {}", localAvroFile, outputPath); fs.copyFromLocalFile(true, true, new Path(localAvroFile.toString()), new Path(outputPath)); if (!fs.exists(new Path(outputPath))) { throw new IllegalStateException("Failed to copy avro file to hdfs at " + outputPath ); } LOGGER.info("Successfully copied {} to {}", localAvroFile, outputPath); }