Java Code Examples for org.apache.avro.file.DataFileWriter#flush()
The following examples show how to use
org.apache.avro.file.DataFileWriter#flush() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 6 votes |
@Test public void test_onTrigger_routing_to_failure_empty_array_type() throws Exception { String testString = "Hello World"; GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithEmptyArray(testString); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, emptyArray ARRAY<BOOLEAN>) STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); }
Example 2
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 6 votes |
@Test public void test_onTrigger_routing_to_failure_null_type() throws Exception { String testString = "Hello World"; GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); }
Example 3
Source File: TestAvroEventDeserializer.java From mt-flume with Apache License 2.0 | 6 votes |
private File newTestFile(boolean deleteOnExit) throws IOException { File tempFile = File.createTempFile("testDirectFile", "tmp"); if (deleteOnExit) { tempFile.deleteOnExit(); } DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(schema)); writer.create(schema, tempFile); GenericRecordBuilder recordBuilder; recordBuilder = new GenericRecordBuilder(schema); recordBuilder.set("foo", "bar"); GenericRecord record = recordBuilder.build(); writer.append(record); writer.sync(); recordBuilder = new GenericRecordBuilder(schema); recordBuilder.set("foo", "baz"); record = recordBuilder.build(); writer.append(record); writer.sync(); writer.flush(); writer.close(); return tempFile; }
Example 4
Source File: AvroUtils.java From Cubert with Apache License 2.0 | 6 votes |
public static void createFileIfNotExists(BlockSchema fileSchema, String path) throws IOException { Configuration conf = new JobConf(); FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(path))) return; Schema avroSchema = convertFromBlockSchema("CUBERT_MV_RECORD", fileSchema); System.out.println("Creating avro file with schema = " + avroSchema); GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema); DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(datumWriter); FSDataOutputStream fout = FileSystem.create(fs, new Path(path), new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.READ_EXECUTE)); writer.create(avroSchema, fout); writer.flush(); writer.close(); }
Example 5
Source File: TestUtil.java From localization_nifi with Apache License 2.0 | 5 votes |
private static byte[] bytesFor(List<Record> records) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); DataFileWriter<Record> writer = new DataFileWriter<>( AvroUtil.newDatumWriter(records.get(0).getSchema(), Record.class)); writer.setCodec(CodecFactory.snappyCodec()); writer = writer.create(records.get(0).getSchema(), out); for (Record record : records) { writer.append(record); } writer.flush(); return out.toByteArray(); }
Example 6
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 5 votes |
@Test public void test_onTrigger_routing_to_failure_fixed_type() throws Exception { String testString = "Hello!"; GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithFixed(testString); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_FAILURE, 1); MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_FAILURE).get(0); assertEquals("test.avro", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); final InputStream in = new ByteArrayInputStream(resultFlowFile.toByteArray()); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { assertTrue(dataFileReader.hasNext()); GenericRecord testedRecord = dataFileReader.next(); assertNotNull(testedRecord.get("fixed")); assertArrayEquals(testString.getBytes(StandardCharsets.UTF_8), ((GenericData.Fixed) testedRecord.get("fixed")).bytes()); } }
Example 7
Source File: JsonToAvroConverter.java From celos with Apache License 2.0 | 5 votes |
@Override public FixFile convert(TestRun tr, FixFile ff) throws Exception { Schema schema = new Schema.Parser().parse(schemaCreator.create(tr).getContent()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); InputStream input = ff.getContent(); DataFileWriter<Object> writer;; try { DatumReader<Object> reader = new GenericDatumReader<>(schema); DataInputStream din = new DataInputStream(input); writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.create(schema, baos); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); Object datum; while (true) { try { datum = reader.read(null, decoder); } catch (EOFException eofe) { break; } writer.append(datum); } writer.flush(); } finally { input.close(); } return new FixFile(new ByteArrayInputStream(baos.toByteArray())); }
Example 8
Source File: AvroFileGenerator.java From flink-perf with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // generate only avro file if (args.length == 2) { ordersPath = args[0]; outputOrderAvroPath = args[1]; // Generate file for avro test DatumWriter<Order> orderDatumWriter = new SpecificDatumWriter<Order>(Order.class); DataFileWriter<Order> dataFileWriter = new DataFileWriter<Order>(orderDatumWriter); dataFileWriter.create(Order.getClassSchema(), new File(outputOrderAvroPath)); Scanner s = new Scanner(new File(ordersPath)); while (s.hasNextLine()) { @SuppressWarnings("resource") Scanner lineScanner = new Scanner(s.nextLine()).useDelimiter("\\|"); Order o = new Order(); o.setOOrderkey(lineScanner.nextInt()); o.setOCustkey(lineScanner.nextInt()); o.setOOrderstatus(lineScanner.next()); o.setOTotalprice(lineScanner.nextFloat()); o.setOOrderdate(lineScanner.next()); o.setOOrderpriority(lineScanner.next()); o.setOClerk(lineScanner.next()); o.setOShipproprity(lineScanner.nextInt()); o.setOComment(lineScanner.next()); dataFileWriter.append(o); lineScanner.close(); } dataFileWriter.flush(); s.close(); dataFileWriter.close(); return; } else { System.err.println("Usage: <inputFilePath> <outputAvroPath>"); System.exit(1); } }
Example 9
Source File: AvroSpoolDirSourceTestUtil.java From datacollector with Apache License 2.0 | 5 votes |
public static File createAvroDataFile() throws Exception { File f = new File(createTestDir(), "file-0.avro"); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); GenericRecord boss = new GenericData.Record(schema); boss.put("name", "boss"); boss.put("age", 60); boss.put("emails", ImmutableList.of("[email protected]", "[email protected]")); boss.put("boss", null); GenericRecord e3 = new GenericData.Record(schema); e3.put("name", "c"); e3.put("age", 50); e3.put("emails", ImmutableList.of("[email protected]", "[email protected]")); e3.put("boss", boss); GenericRecord e2 = new GenericData.Record(schema); e2.put("name", "b"); e2.put("age", 40); e2.put("emails", ImmutableList.of("[email protected]", "[email protected]")); e2.put("boss", boss); GenericRecord e1 = new GenericData.Record(schema); e1.put("name", "a"); e1.put("age", 30); e1.put("emails", ImmutableList.of("[email protected]", "[email protected]")); e1.put("boss", boss); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, f); dataFileWriter.append(e1); dataFileWriter.append(e2); dataFileWriter.append(e3); dataFileWriter.flush(); dataFileWriter.close(); return f; }
Example 10
Source File: SdcAvroTestUtil.java From datacollector with Apache License 2.0 | 5 votes |
public static File createAvroDataFile() throws Exception { File f = new File(createTestDir(), "file-0.avro"); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); GenericRecord boss = new GenericData.Record(schema); boss.put("name", "boss"); boss.put("age", 60); boss.put("emails", ImmutableList.of("[email protected]", "[email protected]")); boss.put("boss", null); GenericRecord e3 = new GenericData.Record(schema); e3.put("name", "c"); e3.put("age", 50); e3.put("emails", ImmutableList.of("[email protected]", "[email protected]")); e3.put("boss", boss); GenericRecord e2 = new GenericData.Record(schema); e2.put("name", "b"); e2.put("age", 40); e2.put("emails", ImmutableList.of("[email protected]", "[email protected]")); e2.put("boss", boss); GenericRecord e1 = new GenericData.Record(schema); e1.put("name", "a"); e1.put("age", 30); e1.put("emails", ImmutableList.of("[email protected]", "[email protected]")); e1.put("boss", boss); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, f); dataFileWriter.append(e1); dataFileWriter.append(e2); dataFileWriter.append(e3); dataFileWriter.flush(); dataFileWriter.close(); return f; }
Example 11
Source File: TestAvroDataFileParser.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testIncorrectOffset() throws Exception { File avroDataFile = SdcAvroTestUtil.createAvroDataFile(); avroDataFile.delete(); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, avroDataFile); for (int i = 0; i < 5; i++) { GenericRecord r = new GenericData.Record(schema); r.put("name", NAMES[i % NAMES.length]); r.put("id", i); dataFileWriter.setSyncInterval(1073741824); dataFileWriter.append(r); dataFileWriter.sync(); } dataFileWriter.flush(); dataFileWriter.close(); DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(), DataParserFormat.AVRO); DataParserFactory factory = dataParserFactoryBuilder .setMaxDataLen(1024 * 1024) .setOverRunLimit(1000 * 1000) .setConfig(SCHEMA_SOURCE_KEY, SOURCE) .build(); DataParser dataParser = factory.getParser(avroDataFile, null); Map<String, Record> records = new HashMap<>(); Record record; while((record = dataParser.parse()) != null) { records.put(dataParser.getOffset(), record); } Assert.assertEquals(String.valueOf(records), 5, records.size()); Assert.assertEquals(0, records.get("141::1").get("/id").getValueAsInteger()); Assert.assertEquals(1, records.get("166::1").get("/id").getValueAsInteger()); Assert.assertEquals(2, records.get("190::1").get("/id").getValueAsInteger()); Assert.assertEquals(3, records.get("215::1").get("/id").getValueAsInteger()); Assert.assertEquals(4, records.get("239::1").get("/id").getValueAsInteger()); }
Example 12
Source File: TimelineMetadataUtils.java From hudi with Apache License 2.0 | 5 votes |
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz) throws IOException { DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz); DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter); ByteArrayOutputStream baos = new ByteArrayOutputStream(); fileWriter.create(metadata.getSchema(), baos); fileWriter.append(metadata); fileWriter.flush(); return Option.of(baos.toByteArray()); }
Example 13
Source File: JdbcAvroRecordTest.java From dbeam with Apache License 2.0 | 4 votes |
@Test public void shouldEncodeResultSetToValidAvro() throws ClassNotFoundException, SQLException, IOException { ResultSet rs = DbTestHelper.createConnection(CONNECTION_URL) .createStatement() .executeQuery("SELECT * FROM COFFEES"); Schema schema = JdbcAvroSchema.createAvroSchema(rs, "dbeam_generated", "connection", "doc", false); JdbcAvroRecordConverter converter = JdbcAvroRecordConverter.create(rs); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema)); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); dataFileWriter.create(schema, outputStream); // convert and write while (rs.next()) { dataFileWriter.appendEncoded(converter.convertResultSetIntoAvroBytes()); } dataFileWriter.flush(); outputStream.close(); // transform to generic record SeekableByteArrayInput inputStream = new SeekableByteArrayInput(outputStream.toByteArray()); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(inputStream, new GenericDatumReader<>(schema)); final List<GenericRecord> records = StreamSupport.stream(dataFileReader.spliterator(), false).collect(Collectors.toList()); Assert.assertEquals(2, records.size()); GenericRecord record = records.stream() .filter(r -> Coffee.COFFEE1.name().equals(r.get(0).toString())) .findFirst() .orElseThrow(() -> new IllegalArgumentException("not found")); Assert.assertEquals(12, record.getSchema().getFields().size()); Assert.assertEquals(schema, record.getSchema()); Coffee actual = Coffee.create( record.get(0).toString(), Optional.ofNullable((Integer) record.get(1)), new java.math.BigDecimal(record.get(2).toString()), (Float) record.get(3), (Double) record.get(4), (Boolean) record.get(5), (Integer) record.get(6), (Long) record.get(7), new java.sql.Timestamp((Long) record.get(8)), Optional.ofNullable((Long) record.get(9)).map(Timestamp::new), TestHelper.byteBufferToUuid((ByteBuffer) record.get(10)), (Long) record.get(11)); Assert.assertEquals(Coffee.COFFEE1, actual); }
Example 14
Source File: TestConvertAvroToParquet.java From nifi with Apache License 2.0 | 4 votes |
@Before public void setUp() throws Exception { processor = new ConvertAvroToParquet(); runner = TestRunners.newTestRunner(processor); Schema schema = new Schema.Parser().parse(Resources.getResource("avro/all-minus-enum.avsc").openStream()); DataFileWriter<Object> awriter = new DataFileWriter<Object>(new GenericDatumWriter<Object>()); GenericData.Record nestedRecord = new GenericRecordBuilder( schema.getField("mynestedrecord").schema()) .set("mynestedint", 1).build(); GenericData.Record record = new GenericRecordBuilder(schema) .set("mynull", null) .set("myboolean", true) .set("myint", 1) .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myarray", new GenericData.Array<Integer>(Schema.createArray(Schema.create(Schema.Type.INT)), Arrays.asList(1, 2))) .set("mymap", ImmutableMap.of("a", 1, "b", 2)) .set("myfixed", new GenericData.Fixed(Schema.createFixed("ignored", null, null, 1), new byte[] { (byte) 65 })) .build(); awriter.create(schema, tmpAvro); awriter.append(record); awriter.flush(); awriter.close(); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmpAvro, datumReader); GenericRecord record1 = null; while (dataFileReader.hasNext()) { record1 = dataFileReader.next(record1); records.add(record1); } }
Example 15
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); // This is pretty awkward and messy. The map object is a Map (not a MapWritable) but the keys are writables (in this case Text) // and so are the values (DoubleWritables in this case). Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example 16
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_primitive_record() throws Exception { GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World"); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record"); fileWriter.append(record); // And one more record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!"); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string")); assertTrue(stringFieldObject instanceof Text); assertEquals("World", stringFieldObject.toString()); }
Example 17
Source File: AvroWithoutSchemaRegistryProducer.java From snowflake-kafka-connector with Apache License 2.0 | 4 votes |
@Override public void send(final Enums.TestCases testCase) { System.out.println("loading table: " + testCase.getTableName() + " in format: " + testCase.getFormatName() + " to Kafka"); try { Scanner scanner = getFileScanner(testCase); Schema schema = testCase.getTable().getSchema(); while (scanner.hasNextLine()) { GenericData.Record record = new GenericData.Record(schema); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); ByteArrayOutputStream output = new ByteArrayOutputStream(); DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(writer); fileWriter.create(schema, output); JsonNode data = Utils.MAPPER.readTree(scanner.nextLine()); switch (testCase.getTable()) { case ONE_G_TABLE: record.put("C_CUSTKEY", data.get("C_CUSTKEY").asLong()); record.put("C_NAME", data.get("C_NAME").asText()); record.put("C_ADDRESS", data.get("C_ADDRESS").asText()); record.put("C_PHONE", data.get("C_PHONE").asText()); record.put("C_ACCTBAL", data.get("C_ACCTBAL").asDouble()); record.put("C_MKTSEGMENT", data.get("C_MKTSEGMENT").asText()); record.put("C_COMMENT", data.get("C_COMMENT").asText()); record.put("C_NATIONKEY", data.get("C_NATIONKEY").asLong()); break; case THREE_HUNDRED_COLUMN_TABLE: for (int i = 0; i < 300; i++) { switch (i % 8) { case 0: record.put("C" + i, data.get("C" + i).asDouble()); break; case 2: record.put("C" + i, data.get("C" + i).asInt()); break; case 4: record.put("C" + i, data.get("C" + i).asLong()); break; case 6: record.put("C" + i, data.get("C" + i).asBoolean()); break; default: record.put("C" + i, data.get("C" + i).asText()); } } } fileWriter.append(record); fileWriter.flush(); fileWriter.close(); send(Utils.TEST_TOPIC, output.toByteArray()); } scanner.close(); close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } System.out.println("finished loading"); }
Example 18
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_primitive_record() throws Exception { GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World"); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record"); fileWriter.append(record); // And one more record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!"); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string")); assertTrue(stringFieldObject instanceof Text); assertEquals("World", stringFieldObject.toString()); }
Example 19
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example 20
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_nested_complex_record() throws Exception { Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {{ put("key1", Arrays.asList(1.0, 2.0)); put("key2", Arrays.asList(3.0, 4.0)); }}; Map<String, String> arrayMap11 = new TreeMap<String, String>() {{ put("key1", "v1"); put("key2", "v2"); }}; Map<String, String> arrayMap12 = new TreeMap<String, String>() {{ put("key3", "v3"); put("key4", "v4"); }}; GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {{ put("key1", Arrays.asList(-1.0, -2.0)); put("key2", Arrays.asList(-3.0, -4.0)); }}; Map<String, String> arrayMap21 = new TreeMap<String, String>() {{ put("key1", "v-1"); put("key2", "v-2"); }}; Map<String, String> arrayMap22 = new TreeMap<String, String>() {{ put("key3", "v-3"); put("key4", "v-4"); }}; record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // check values Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray")); assertTrue(myMapOfArray instanceof Map); Map map = (Map) myMapOfArray; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof List); assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue); Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap")); assertTrue(myArrayOfMap instanceof List); List list = (List) myArrayOfMap; Object el0 = list.get(0); assertNotNull(el0); assertTrue(el0 instanceof Map); assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1"))); }