org.apache.avro.generic.GenericRecord Java Examples
The following examples show how to use
org.apache.avro.generic.GenericRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: DataflowTemplates Author: GoogleCloudPlatform File: AvroRecordConverter.java License: Apache License 2.0 | 7 votes |
private static Optional<Double> readFloat64( GenericRecord record, Schema.Type avroType, String fieldName) { switch (avroType) { case INT: return Optional.ofNullable((Integer) record.get(fieldName)).map(x -> (double) x); case LONG: return Optional.ofNullable((Long) record.get(fieldName)).map(x -> (double) x); case FLOAT: return Optional.ofNullable((Float) record.get(fieldName)).map(x -> (double) x); case DOUBLE: return Optional.ofNullable((Double) record.get(fieldName)); case STRING: return Optional.ofNullable((Utf8) record.get(fieldName)) .map(Utf8::toString) .map(Double::valueOf); default: throw new IllegalArgumentException("Cannot interpret " + avroType + " as FLOAT64"); } }
Example #2
Source Project: entrada Author: SIDN File: ParquetPartitionWriter.java License: GNU General Public License v3.0 | 6 votes |
public void write(GenericRecord rec, Schema schema, Partition partition) { String partitionStr = FileUtil.appendPath(path, partition.toPath()); // check is partition already exists, if not create a new partition ParquetPartition<GenericRecord> parquetPartition = partitions.computeIfAbsent(partitionStr, k -> new ParquetPartition<>(partitionStr, schema)); // write the rec to the partition parquetPartition.write(rec); // check if size of parquet partition is too big if (parquetPartition.getRows() >= maxRows) { log .info( "Max DNS packets reached for this Parquet parition {}, close current file and create new", partitionStr); parquetPartition.close(); // remove partition from partitions map, for a possible next row for this partitions // a new partition object and parquet file will be created. partitions.remove(partitionStr); } }
Example #3
Source Project: registry Author: hortonworks File: KafkaAvroSerdesTest.java License: Apache License 2.0 | 6 votes |
@Test public void testGenericSerializedSpecificDeserialized() { Map<String, Object> config = new HashMap<>(); config.put(AvroSnapshotDeserializer.SPECIFIC_AVRO_READER, true); KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient); kafkaAvroDeserializer.configure(config, false); KafkaAvroSerializer kafkaAvroSerializer = new KafkaAvroSerializer(schemaRegistryClient); kafkaAvroSerializer.configure(Collections.emptyMap(), false); GenericRecord record = new GenericRecordBuilder(schema).set("field1", "some value").set("field2", "some other value").build(); byte[] payload = kafkaAvroSerializer.serialize(topic, record); Object o = kafkaAvroDeserializer.deserialize(topic, payload); checkGenericSerializedSpecificDeserializedEquals(record, o); Headers headers = new RecordHeaders(); payload = kafkaAvroSerializer.serialize(topic, headers, record); o = kafkaAvroDeserializer.deserialize(topic, headers, payload); checkGenericSerializedSpecificDeserializedEquals(record, o); }
Example #4
Source Project: schema-evolution-samples Author: viniciusccarvalho File: AvroCodecTests.java License: Apache License 2.0 | 6 votes |
@Test public void genericEncoderV2GenericDecoderV2() throws Exception{ Schema schema = load("users_v2.schema"); SchemaRegistryClient client = mock(SchemaRegistryClient.class); AvroCodec codec = new AvroCodec(); codec.setSchemaRegistryClient(client); when(client.register(any())).thenReturn(2); when(client.fetch(eq(2))).thenReturn(schema); GenericRecord record = new GenericData.Record(schema); record.put("name","joe"); record.put("favoriteNumber",42); record.put("favoriteColor","blue"); record.put("favoritePlace","Paris"); byte[] results = codec.encode(record); GenericRecord decoded = codec.decode(results,GenericRecord.class); Assert.assertEquals(record.get("favoritePlace").toString(),decoded.get("favoritePlace").toString()); }
Example #5
Source Project: kite-examples Author: kite-sdk File: ReadDataset.java License: Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { // Load the events dataset Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events"); // Get a reader for the dataset and read all the events DatasetReader<GenericRecord> reader = events.newReader(); try { for (GenericRecord event : reader) { System.out.println(event); } } finally { reader.close(); } return 0; }
Example #6
Source Project: parquet-mr Author: apache File: TestReflectLogicalTypes.java License: Apache License 2.0 | 6 votes |
@Test public void testReadUUIDList() throws IOException { Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName()) .fields() .name("uuids").type().array().items().stringType().noDefault() .endRecord(); uuidListSchema.getField("uuids").schema().addProp( SpecificData.CLASS_PROP, List.class.getName()); LogicalTypes.uuid().addToSchema( uuidListSchema.getField("uuids").schema().getElementType()); UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); GenericRecord r = new GenericData.Record(uuidListSchema); r.put("uuids", Arrays.asList(u1.toString(), u2.toString())); RecordWithUUIDList expected = new RecordWithUUIDList(); expected.uuids = Arrays.asList(u1, u2); File test = write(uuidListSchema, r); Assert.assertEquals("Should convert Strings to UUIDs", expected, read(REFLECT, uuidListSchema, test).get(0)); }
Example #7
Source Project: kite Author: kite-sdk File: TestTransformCommandCluster.java License: Apache License 2.0 | 6 votes |
@Test public void testTransform() throws Exception { command.repoURI = repoUri; command.transform = "org.kitesdk.cli.example.ToUpperCase"; command.datasets = Lists.newArrayList(source, dest); int rc = command.run(); Assert.assertEquals("Should return success", 0, rc); DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri); Set<GenericRecord> records = DatasetTestUtilities.materialize( repo.<GenericRecord>load("default", dest)); Assert.assertEquals("Should contain copied records", 6, records.size()); for (GenericRecord record : records) { Assert.assertTrue("Username should be upper case", UPPER_CASE.matcher(record.get("username").toString()).matches()); } }
Example #8
Source Project: kite Author: kite-sdk File: TestHiveDatasetURIsCompatibility.java License: Apache License 2.0 | 6 votes |
@Test public void testLoadChangedRelativePathURICompatibility() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:data/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ds") // old location .build(); Dataset<GenericRecord> expected = repo.create( "default", "ds", withLocation, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset default.ds", expected, actual); Assert.assertEquals("URI should use apparent namespace", "dataset:hive:data/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); }
Example #9
Source Project: kareldb Author: rayokota File: KafkaValueDeserializer.java License: Apache License 2.0 | 6 votes |
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) { NavigableMap<Long, VersionedValue> map = new TreeMap<>(); Schema recordSchema = avroSchema.getElementType(); List<Schema.Field> fields = recordSchema.getFields(); int size = fields.size(); for (GenericRecord record : array) { Long version = (Long) record.get(0); Long commit = (Long) record.get(1); boolean deleted = (Boolean) record.get(2); Comparable[] row = new Comparable[size - 3]; for (int i = 0; i < row.length; i++) { Schema schema = fields.get(i + 3).schema(); Comparable value = (Comparable) record.get(i + 3); row[i] = AvroSchema.fromAvroValue(schema, value); } map.put(version, new VersionedValue(version, commit, deleted, row)); } return map; }
Example #10
Source Project: aliyun-maxcompute-data-collectors Author: aliyun File: TestParquetImport.java License: Apache License 2.0 | 6 votes |
public void testIncrementalParquetImport() throws IOException, SQLException { String [] types = { "INT" }; String [] vals = { "1" }; createTableWithColTypes(types, vals); runImport(getOutputArgv(true, null)); runImport(getOutputArgv(true, new String[]{"--append"})); DatasetReader<GenericRecord> reader = getReader(); try { assertTrue(reader.hasNext()); GenericRecord record1 = reader.next(); assertEquals(1, record1.get("DATA_COL0")); record1 = reader.next(); assertEquals(1, record1.get("DATA_COL0")); assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example #11
Source Project: samza Author: apache File: TestSamzaSqlEndToEnd.java License: Apache License 2.0 | 6 votes |
@Ignore @Test public void testEndToEndFanOut() throws SamzaSqlValidatorException { int numMessages = 20; TestAvroSystemFactory.messages.clear(); Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages); String sql1 = "Insert into testavro.SIMPLE2 select * from testavro.SIMPLE1"; String sql2 = "Insert into testavro.SIMPLE3 select * from testavro.SIMPLE1"; List<String> sqlStmts = Arrays.asList(sql1, sql2); staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts)); Config config = new MapConfig(staticConfigs); new SamzaSqlValidator(config).validate(sqlStmts); runApplication(config); List<Integer> outMessages = TestAvroSystemFactory.messages.stream() .map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString())) .sorted() .collect(Collectors.toList()); Assert.assertEquals(numMessages * 2, outMessages.size()); Set<Integer> outMessagesSet = new HashSet<>(outMessages); Assert.assertEquals(numMessages, outMessagesSet.size()); Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet))); }
Example #12
Source Project: flink Author: apache File: AvroKeyValueSinkWriter.java License: Apache License 2.0 | 6 votes |
AvroKeyValueWriter(Schema keySchema, Schema valueSchema, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue .getSchema(keySchema, valueSchema); // Create an Avro container file and a writer to it. DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>( mKeyValuePairSchema); mAvroFileWriter = new DataFileWriter<GenericRecord>( genericDatumWriter); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>( new GenericData.Record(mKeyValuePairSchema)); }
Example #13
Source Project: DataflowTemplates Author: GoogleCloudPlatform File: AvroConvertersTest.java License: Apache License 2.0 | 6 votes |
/** Tests if {@link AvroConverters.ReadAvroFile} reads an Avro file correctly. */ @Test public void testReadAvroFile() { Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_PATH); GenericRecord genericRecord = new GenericData.Record(schema); genericRecord.put("id", "007"); genericRecord.put("state", "CA"); genericRecord.put("price", 26.23); PCollection<GenericRecord> pCollection = pipeline.apply( "ReadAvroFile", AvroConverters.ReadAvroFile.newBuilder() .withInputFileSpec(AVRO_FILE_PATH) .withSchema(SCHEMA_FILE_PATH) .build()); PAssert.that(pCollection).containsInAnyOrder(genericRecord); pipeline.run(); }
Example #14
Source Project: incubator-gobblin Author: apache File: AsyncHttpJoinConverter.java License: Apache License 2.0 | 6 votes |
/** * Convert an input record to a future object where an output record will be filled in sometime later * Sequence: * Convert input (DI) to an http request * Send http request asynchronously, and registers an http callback * Create an {@link CompletableFuture} object. When the callback is invoked, this future object is filled in by an output record which is converted from http response. * Return the future object. */ @Override public final CompletableFuture<DO> convertRecordAsync(SO outputSchema, DI inputRecord, WorkUnitState workUnitState) throws DataConversionException { // Convert DI to HttpOperation HttpOperation operation = generateHttpOperation(inputRecord, workUnitState); BufferedRecord<GenericRecord> bufferedRecord = new BufferedRecord<>(operation, WriteCallback.EMPTY); // Convert HttpOperation to RQ Queue<BufferedRecord<GenericRecord>> buffer = new LinkedBlockingDeque<>(); buffer.add(bufferedRecord); AsyncRequest<GenericRecord, RQ> request = this.requestBuilder.buildRequest(buffer); RQ rawRequest = request.getRawRequest(); // Execute query and get response AsyncHttpJoinConverterContext context = new AsyncHttpJoinConverterContext(this, outputSchema, inputRecord, request); try { httpClient.sendAsyncRequest(rawRequest, context.getCallback()); } catch (IOException e) { throw new DataConversionException(e); } return context.future; }
Example #15
Source Project: Cubert Author: linkedin File: AvroUtils.java License: Apache License 2.0 | 6 votes |
public static void createFileIfNotExists(BlockSchema fileSchema, String path) throws IOException { Configuration conf = new JobConf(); FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(path))) return; Schema avroSchema = convertFromBlockSchema("CUBERT_MV_RECORD", fileSchema); System.out.println("Creating avro file with schema = " + avroSchema); GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema); DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(datumWriter); FSDataOutputStream fout = FileSystem.create(fs, new Path(path), new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.READ_EXECUTE)); writer.create(avroSchema, fout); writer.flush(); writer.close(); }
Example #16
Source Project: beam Author: apache File: AvroIO.java License: Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <T> AvroSource<T> createSource( ValueProvider<String> filepattern, EmptyMatchTreatment emptyMatchTreatment, Class<T> recordClass, Schema schema, @Nullable AvroSource.DatumReaderFactory<T> readerFactory) { AvroSource<?> source = AvroSource.from(filepattern).withEmptyMatchTreatment(emptyMatchTreatment); if (readerFactory != null) { source = source.withDatumReaderFactory(readerFactory); } return recordClass == GenericRecord.class ? (AvroSource<T>) source.withSchema(schema) : source.withSchema(recordClass); }
Example #17
Source Project: HBase-ToHDFS Author: tmalaska File: ParquetReader.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { if (args.length == 0) { System.out.println("AvroReader {dataFile} {max.lines.to.read.optional}"); } String dataFile = args[0]; int recordsToRead = Integer.MAX_VALUE; if (args.length > 1) { recordsToRead = Integer.parseInt(args[1]); } //Schema.Parser parser = new Schema.Parser(); //Configuration config = new Configuration(); //FileSystem fs = FileSystem.get(config); //Schema schema = parser.parse(fs.open(new Path(schemaFile))); Path dataFilePath = new Path(dataFile); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(dataFilePath); Object tmpValue; int counter = 0; while ((tmpValue = reader.read()) != null && counter++ < recordsToRead) { GenericRecord r = (GenericRecord)tmpValue; System.out.println(counter + " : " + r); } }
Example #18
Source Project: beam Author: apache File: AvroIOTest.java License: Apache License 2.0 | 6 votes |
private <T extends GenericRecord> void testWriteThenReadGeneratedClass( AvroIO.Write<T> writeTransform, AvroIO.Read<T> readTransform) throws Exception { File outputFile = tmpFolder.newFile("output.avro"); List<T> values = ImmutableList.of( (T) new AvroGeneratedUser("Bob", 256, null), (T) new AvroGeneratedUser("Alice", 128, null), (T) new AvroGeneratedUser("Ted", null, "white")); writePipeline .apply(Create.of(values)) .apply( writeTransform .to(writePipeline.newProvider(outputFile.getAbsolutePath())) .withoutSharding()); writePipeline.run(); PAssert.that( readPipeline.apply( "Read", readTransform.from(readPipeline.newProvider(outputFile.getAbsolutePath())))) .containsInAnyOrder(values); readPipeline.run(); }
Example #19
Source Project: avro-util Author: linkedin File: FastGenericSerializerGeneratorTest.java License: BSD 2-Clause "Simplified" License | 6 votes |
@Test(groups = {"serializationTest"}) public void shouldWriteRightUnionIndex() { // Create two record schemas Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING))); Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING))); Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2); Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema)); GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2); objectOfRecordSchema2.put("record2_field1", "abc"); GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema); wrapperObject.put("union_field", objectOfRecordSchema2); GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject)); Object unionField = record.get("union_field"); Assert.assertTrue(unionField instanceof GenericData.Record); GenericData.Record unionRecord = (GenericData.Record)unionField; Assert.assertEquals(unionRecord.getSchema().getName(), "record2"); }
Example #20
Source Project: datafu Author: apache File: ImpressionClickPartitionPreservingJob.java License: Apache License 2.0 | 6 votes |
@Override public void accumulate(GenericRecord value) { if (value.get("type").toString().equals("click")) { clicks++; } else if (value.get("type").toString().equals("impression")) { impressions++; } else { throw new RuntimeException("Didn't expect: " + value.get("type")); } }
Example #21
Source Project: components Author: Talend File: PythonRowDoFnTest.java License: Apache License 2.0 | 6 votes |
/** * Compare Avro record field values. */ public void compareRecords(final IndexedRecord expectedRecord, final GenericRecord outputRecord) { // a1 assertEquals(expectedRecord.get(0).toString(), outputRecord.get(0).toString()); // B GenericRecord outputRecordB = (GenericRecord) outputRecord.get(1); GenericRecord expectedRecordB = (GenericRecord) expectedRecord.get(1); // B.b1 assertEquals(expectedRecordB.get("b1").toString(), outputRecordB.get(0).toString()); // B.b2 assertEquals(expectedRecordB.get("b2").toString(), outputRecordB.get(2).toString()); // C GenericRecord outputRecordC = (GenericRecord) outputRecordB.get(1); GenericRecord expectedRecordC = (GenericRecord) expectedRecordB.get(1); assertEquals(expectedRecordC.toString(), outputRecordC.toString()); }
Example #22
Source Project: Flink-CEPplus Author: ljygz File: AvroOutputFormatTest.java License: Apache License 2.0 | 6 votes |
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
Example #23
Source Project: localization_nifi Author: wangrenlei File: TransformAvroToCSV.java License: Apache License 2.0 | 6 votes |
/** * */ @Override protected Map<String, String> transform(InputStream in, OutputStream out, InvocationContextProperties contextProperties, Schema schema) { byte[] buff = null; try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); IOUtils.copy(in, bos); buff = bos.toByteArray(); } catch (Exception e) { e.printStackTrace(); } ByteArrayInputStream is = new ByteArrayInputStream(buff); GenericRecord avroRecord = AvroUtils.read(is, schema); CSVUtils.write(avroRecord, this.delimiter, out); return Collections.singletonMap(CoreAttributes.MIME_TYPE.key(), "text/csv"); }
Example #24
Source Project: incubator-gobblin Author: apache File: ApacheHttpRequestBuilderTest.java License: Apache License 2.0 | 6 votes |
/** * Build a {@link HttpUriRequest} from a {@link GenericRecord} */ public void testBuildWriteRequest() throws IOException { String urlTemplate = "http://www.test.com/a/part1:${part1}/a/part2:${part2}"; String verb = "post"; ApacheHttpRequestBuilder builder = spy(new ApacheHttpRequestBuilder(urlTemplate, verb, "application/json")); ArgumentCaptor<RequestBuilder> requestBuilderArgument = ArgumentCaptor.forClass(RequestBuilder.class); Queue<BufferedRecord<GenericRecord>> queue = HttpTestUtils.createQueue(1, false); AsyncRequest<GenericRecord, HttpUriRequest> request = builder.buildRequest(queue); verify(builder).build(requestBuilderArgument.capture()); RequestBuilder expected = RequestBuilder.post(); expected.setUri("http://www.test.com/a/part1:01/a/part2:02?param1=01"); String payloadStr = "{\"id\":\"id0\"}"; expected.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()) .setEntity(new StringEntity(payloadStr, ContentType.APPLICATION_JSON)); // Compare HttpUriRequest HttpTestUtils.assertEqual(requestBuilderArgument.getValue(), expected); Assert.assertEquals(request.getRecordCount(), 1); Assert.assertEquals(queue.size(), 0); }
Example #25
Source Project: secor Author: pinterest File: AvroSplitByFieldMessageParser.java License: Apache License 2.0 | 5 votes |
protected String extractEventType(GenericRecord record) { Object fieldValue = record.get(mSplitFieldName); if (fieldValue == null) { throw new RuntimeException("Could not find key " + mSplitFieldName + " in Avro message"); } return fieldValue.toString(); }
Example #26
Source Project: avro-util Author: linkedin File: FastGenericSerializerGeneratorTest.java License: BSD 2-Clause "Simplified" License | 5 votes |
@Test(groups = {"serializationTest"}) public void shouldWriteMultipleChoiceUnion() { // given Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); Schema recordSchema = createRecord( createUnionField("union", subRecordSchema, Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT))); GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema); subRecordBuilder.put("subField", "abc"); GenericData.Record builder = new GenericData.Record(recordSchema); builder.put("union", subRecordBuilder); // when GenericRecord record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder)); // then Assert.assertEquals("abc", ((GenericData.Record) record.get("union")).get("subField").toString()); // given builder = new GenericData.Record(recordSchema); builder.put("union", "abc"); // when record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder)); // then Assert.assertEquals("abc", record.get("union").toString()); // given builder = new GenericData.Record(recordSchema); builder.put("union", 1); // when record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder)); // then Assert.assertEquals(1, record.get("union")); }
Example #27
Source Project: localization_nifi Author: wangrenlei File: TestMergeContent.java License: Apache License 2.0 | 5 votes |
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException { // create a reader for the merged contet DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema); SeekableByteArrayInput input = new SeekableByteArrayInput(data); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader); // read all the records into a map to verify all the records are there Map<String,GenericRecord> records = new HashMap<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); records.put(user.get(key).toString(), user); } return records; }
Example #28
Source Project: incubator-pinot Author: apache File: IntArraysTest.java License: Apache License 2.0 | 5 votes |
@BeforeClass public static void before() throws Exception { final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); final SegmentGeneratorConfig config = SegmentTestUtils .getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday", TimeUnit.DAYS, "test"); // The segment generation code in SegmentColumnarIndexCreator will throw // exception if start and end time in time column are not in acceptable // range. For this test, we first need to fix the input avro data // to have the time column values in allowed range. Until then, the check // is explicitly disabled config.setSkipTimeValueCheck(true); driver.init(config); driver.build(); final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath)); final org.apache.avro.Schema avroSchema = avroReader.getSchema(); final String[] columns = new String[avroSchema.getFields().size()]; int i = 0; for (final Field f : avroSchema.getFields()) { columns[i] = f.name(); i++; } }
Example #29
Source Project: incubator-gobblin Author: apache File: TimeBasedAvroWriterPartitioner.java License: Apache License 2.0 | 5 votes |
/** * Retrieve the value of the partition column field specified by this.partitionColumns */ private Optional<Object> getWriterPartitionColumnValue(GenericRecord record) { if (!this.partitionColumns.isPresent()) { return Optional.absent(); } for (String partitionColumn : this.partitionColumns.get()) { Optional<Object> fieldValue = AvroUtils.getFieldValue(record, partitionColumn); if (fieldValue.isPresent()) { return fieldValue; } } return Optional.absent(); }
Example #30
Source Project: samza Author: apache File: TestSamzaSqlEndToEnd.java License: Apache License 2.0 | 5 votes |
@Test public void testEndToEndStreamTableInnerJoinWithFilter() throws Exception { int numMessages = 20; TestAvroSystemFactory.messages.clear(); Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages); String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, p.name as companyName, p.name as profileName," + " p.address as profileAddress " + "from testavro.PROFILE.`$table` as p " + "join testavro.PAGEVIEW as pv " + " on p.id = pv.profileId " + "where p.name = 'Mike'"; List<String> sqlStmts = Arrays.asList(sql); staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts)); Config config = new MapConfig(staticConfigs); new SamzaSqlValidator(config).validate(sqlStmts); runApplication(config); List<String> outMessages = TestAvroSystemFactory.messages.stream() .map(x -> ((GenericRecord) x.getMessage()).get("pageKey").toString() + "," + (((GenericRecord) x.getMessage()).get("profileName") == null ? "null" : ((GenericRecord) x.getMessage()).get("profileName").toString())) .collect(Collectors.toList()); Assert.assertEquals(4, outMessages.size()); List<String> expectedOutMessages = TestAvroSystemFactory.getPageKeyProfileNameJoin(numMessages) .stream() .filter(msg -> msg.endsWith("Mike")) .collect(Collectors.toList()); Assert.assertEquals(expectedOutMessages, outMessages); }