Java Code Examples for org.apache.avro.generic.GenericRecord

The following examples show how to use org.apache.avro.generic.GenericRecord. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: DataflowTemplates   Source File: AvroRecordConverter.java    License: Apache License 2.0 7 votes vote down vote up
private static Optional<Double> readFloat64(
    GenericRecord record, Schema.Type avroType, String fieldName) {
  switch (avroType) {
    case INT:
      return Optional.ofNullable((Integer) record.get(fieldName)).map(x -> (double) x);
    case LONG:
      return Optional.ofNullable((Long) record.get(fieldName)).map(x -> (double) x);
    case FLOAT:
      return Optional.ofNullable((Float) record.get(fieldName)).map(x -> (double) x);
    case DOUBLE:
      return Optional.ofNullable((Double) record.get(fieldName));
    case STRING:
      return Optional.ofNullable((Utf8) record.get(fieldName))
          .map(Utf8::toString)
          .map(Double::valueOf);
    default:
      throw new IllegalArgumentException("Cannot interpret " + avroType + " as FLOAT64");
  }
}
 
Example 2
public void write(GenericRecord rec, Schema schema, Partition partition) {

    String partitionStr = FileUtil.appendPath(path, partition.toPath());
    // check is partition already exists, if not create a new partition
    ParquetPartition<GenericRecord> parquetPartition =
        partitions.computeIfAbsent(partitionStr, k -> new ParquetPartition<>(partitionStr, schema));

    // write the rec to the partition
    parquetPartition.write(rec);

    // check if size of parquet partition is too big
    if (parquetPartition.getRows() >= maxRows) {
      log
          .info(
              "Max DNS packets reached for this Parquet parition {}, close current file and create new",
              partitionStr);

      parquetPartition.close();
      // remove partition from partitions map, for a possible next row for this partitions
      // a new partition object and parquet file will be created.
      partitions.remove(partitionStr);
    }
  }
 
Example 3
Source Project: registry   Source File: KafkaAvroSerdesTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericSerializedSpecificDeserialized() {
    Map<String, Object> config = new HashMap<>();
    config.put(AvroSnapshotDeserializer.SPECIFIC_AVRO_READER, true);
    KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
    kafkaAvroDeserializer.configure(config, false);

    KafkaAvroSerializer kafkaAvroSerializer = new KafkaAvroSerializer(schemaRegistryClient);
    kafkaAvroSerializer.configure(Collections.emptyMap(), false);

    GenericRecord record = new GenericRecordBuilder(schema).set("field1", "some value").set("field2", "some other value").build();

    byte[] payload = kafkaAvroSerializer.serialize(topic, record);
    Object o = kafkaAvroDeserializer.deserialize(topic, payload);
    checkGenericSerializedSpecificDeserializedEquals(record, o);

    Headers headers = new RecordHeaders();
    payload = kafkaAvroSerializer.serialize(topic, headers, record);
    o = kafkaAvroDeserializer.deserialize(topic, headers, payload);
    checkGenericSerializedSpecificDeserializedEquals(record, o);
}
 
Example 4
Source Project: schema-evolution-samples   Source File: AvroCodecTests.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void genericEncoderV2GenericDecoderV2() throws Exception{
	Schema schema = load("users_v2.schema");
	SchemaRegistryClient client = mock(SchemaRegistryClient.class);
	AvroCodec codec = new AvroCodec();
	codec.setSchemaRegistryClient(client);
	when(client.register(any())).thenReturn(2);
	when(client.fetch(eq(2))).thenReturn(schema);
	GenericRecord record = new GenericData.Record(schema);
	record.put("name","joe");
	record.put("favoriteNumber",42);
	record.put("favoriteColor","blue");
	record.put("favoritePlace","Paris");
	byte[] results = codec.encode(record);
	GenericRecord decoded = codec.decode(results,GenericRecord.class);
	Assert.assertEquals(record.get("favoritePlace").toString(),decoded.get("favoritePlace").toString());
}
 
Example 5
Source Project: kite-examples   Source File: ReadDataset.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {

  // Load the events dataset
  Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events");

  // Get a reader for the dataset and read all the events
  DatasetReader<GenericRecord> reader = events.newReader();
  try {
    for (GenericRecord event : reader) {
      System.out.println(event);
    }
  } finally {
    reader.close();
  }

  return 0;
}
 
Example 6
Source Project: parquet-mr   Source File: TestReflectLogicalTypes.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadUUIDList() throws IOException {
  Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName())
      .fields()
      .name("uuids").type().array().items().stringType().noDefault()
      .endRecord();
  uuidListSchema.getField("uuids").schema().addProp(
      SpecificData.CLASS_PROP, List.class.getName());
  LogicalTypes.uuid().addToSchema(
      uuidListSchema.getField("uuids").schema().getElementType());

  UUID u1 = UUID.randomUUID();
  UUID u2 = UUID.randomUUID();

  GenericRecord r = new GenericData.Record(uuidListSchema);
  r.put("uuids", Arrays.asList(u1.toString(), u2.toString()));

  RecordWithUUIDList expected = new RecordWithUUIDList();
  expected.uuids = Arrays.asList(u1, u2);

  File test = write(uuidListSchema, r);

  Assert.assertEquals("Should convert Strings to UUIDs",
      expected, read(REFLECT, uuidListSchema, test).get(0));
}
 
Example 7
Source Project: kite   Source File: TestTransformCommandCluster.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTransform() throws Exception {
  command.repoURI = repoUri;
  command.transform = "org.kitesdk.cli.example.ToUpperCase";
  command.datasets = Lists.newArrayList(source, dest);

  int rc = command.run();
  Assert.assertEquals("Should return success", 0, rc);

  DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri);
  Set<GenericRecord> records = DatasetTestUtilities.materialize(
      repo.<GenericRecord>load("default", dest));
  Assert.assertEquals("Should contain copied records", 6, records.size());
  for (GenericRecord record : records) {
    Assert.assertTrue("Username should be upper case",
        UPPER_CASE.matcher(record.get("username").toString()).matches());
  }
}
 
Example 8
Source Project: kite   Source File: TestHiveDatasetURIsCompatibility.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadChangedRelativePathURICompatibility() {
  // this used to be a relative external URI, but is now a managed URI
  String uri = "dataset:hive:data/ds";

  DatasetRepository repo = DatasetRepositories
      .repositoryFor("repo:hive:/tmp/data");
  DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR)
      .location("file:/tmp/data/ds") // old location
      .build();
  Dataset<GenericRecord> expected = repo.create(
      "default", "ds", withLocation, GenericRecord.class);

  Dataset<GenericRecord> actual = Datasets.load(uri);
  Assert.assertEquals("Should load existing dataset default.ds",
      expected, actual);

  Assert.assertEquals("URI should use apparent namespace",
      "dataset:hive:data/ds", actual.getUri().toString());

  Assert.assertTrue(Datasets.delete(uri));
}
 
Example 9
Source Project: kareldb   Source File: KafkaValueDeserializer.java    License: Apache License 2.0 6 votes vote down vote up
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) {
    NavigableMap<Long, VersionedValue> map = new TreeMap<>();
    Schema recordSchema = avroSchema.getElementType();
    List<Schema.Field> fields = recordSchema.getFields();
    int size = fields.size();
    for (GenericRecord record : array) {
        Long version = (Long) record.get(0);
        Long commit = (Long) record.get(1);
        boolean deleted = (Boolean) record.get(2);
        Comparable[] row = new Comparable[size - 3];
        for (int i = 0; i < row.length; i++) {
            Schema schema = fields.get(i + 3).schema();
            Comparable value = (Comparable) record.get(i + 3);
            row[i] = AvroSchema.fromAvroValue(schema, value);
        }
        map.put(version, new VersionedValue(version, commit, deleted, row));
    }
    return map;
}
 
Example 10
public void testIncrementalParquetImport() throws IOException, SQLException {
  String [] types = { "INT" };
  String [] vals = { "1" };
  createTableWithColTypes(types, vals);

  runImport(getOutputArgv(true, null));
  runImport(getOutputArgv(true, new String[]{"--append"}));

  DatasetReader<GenericRecord> reader = getReader();
  try {
    assertTrue(reader.hasNext());
    GenericRecord record1 = reader.next();
    assertEquals(1, record1.get("DATA_COL0"));
    record1 = reader.next();
    assertEquals(1, record1.get("DATA_COL0"));
    assertFalse(reader.hasNext());
  } finally {
    reader.close();
  }
}
 
Example 11
Source Project: samza   Source File: TestSamzaSqlEndToEnd.java    License: Apache License 2.0 6 votes vote down vote up
@Ignore
@Test
public void testEndToEndFanOut() throws SamzaSqlValidatorException {
  int numMessages = 20;
  TestAvroSystemFactory.messages.clear();
  Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
  String sql1 = "Insert into testavro.SIMPLE2 select * from testavro.SIMPLE1";
  String sql2 = "Insert into testavro.SIMPLE3 select * from testavro.SIMPLE1";
  List<String> sqlStmts = Arrays.asList(sql1, sql2);
  staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));

  Config config = new MapConfig(staticConfigs);
  new SamzaSqlValidator(config).validate(sqlStmts);

  runApplication(config);

  List<Integer> outMessages = TestAvroSystemFactory.messages.stream()
      .map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString()))
      .sorted()
      .collect(Collectors.toList());
  Assert.assertEquals(numMessages * 2, outMessages.size());
  Set<Integer> outMessagesSet = new HashSet<>(outMessages);
  Assert.assertEquals(numMessages, outMessagesSet.size());
  Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet)));
}
 
Example 12
Source Project: flink   Source File: AvroKeyValueSinkWriter.java    License: Apache License 2.0 6 votes vote down vote up
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
		CodecFactory compressionCodec, OutputStream outputStream,
		int syncInterval) throws IOException {
	// Create the generic record schema for the key/value pair.
	mKeyValuePairSchema = AvroKeyValue
			.getSchema(keySchema, valueSchema);

	// Create an Avro container file and a writer to it.
	DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
			mKeyValuePairSchema);
	mAvroFileWriter = new DataFileWriter<GenericRecord>(
			genericDatumWriter);
	mAvroFileWriter.setCodec(compressionCodec);
	mAvroFileWriter.setSyncInterval(syncInterval);
	mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

	// Create a reusable output record.
	mOutputRecord = new AvroKeyValue<Object, Object>(
			new GenericData.Record(mKeyValuePairSchema));
}
 
Example 13
Source Project: DataflowTemplates   Source File: AvroConvertersTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests if {@link AvroConverters.ReadAvroFile} reads an Avro file correctly. */
@Test
public void testReadAvroFile() {
  Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_PATH);

  GenericRecord genericRecord = new GenericData.Record(schema);
  genericRecord.put("id", "007");
  genericRecord.put("state", "CA");
  genericRecord.put("price", 26.23);

  PCollection<GenericRecord> pCollection =
      pipeline.apply(
          "ReadAvroFile",
          AvroConverters.ReadAvroFile.newBuilder()
              .withInputFileSpec(AVRO_FILE_PATH)
              .withSchema(SCHEMA_FILE_PATH)
              .build());

  PAssert.that(pCollection).containsInAnyOrder(genericRecord);

  pipeline.run();
}
 
Example 14
Source Project: incubator-gobblin   Source File: AsyncHttpJoinConverter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Convert an input record to a future object where an output record will be filled in sometime later
 * Sequence:
 *    Convert input (DI) to an http request
 *    Send http request asynchronously, and registers an http callback
 *    Create an {@link CompletableFuture} object. When the callback is invoked, this future object is filled in by an output record which is converted from http response.
 *    Return the future object.
 */
@Override
public final CompletableFuture<DO> convertRecordAsync(SO outputSchema, DI inputRecord, WorkUnitState workUnitState)
    throws DataConversionException {

  // Convert DI to HttpOperation
  HttpOperation operation = generateHttpOperation(inputRecord, workUnitState);
  BufferedRecord<GenericRecord> bufferedRecord = new BufferedRecord<>(operation, WriteCallback.EMPTY);

  // Convert HttpOperation to RQ
  Queue<BufferedRecord<GenericRecord>> buffer = new LinkedBlockingDeque<>();
  buffer.add(bufferedRecord);
  AsyncRequest<GenericRecord, RQ> request = this.requestBuilder.buildRequest(buffer);
  RQ rawRequest = request.getRawRequest();

  // Execute query and get response
  AsyncHttpJoinConverterContext context = new AsyncHttpJoinConverterContext(this, outputSchema, inputRecord, request);

  try {
    httpClient.sendAsyncRequest(rawRequest, context.getCallback());
  } catch (IOException e) {
    throw new DataConversionException(e);
  }

  return context.future;
}
 
Example 15
Source Project: Cubert   Source File: AvroUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static void createFileIfNotExists(BlockSchema fileSchema, String path) throws IOException
{
    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(path)))
        return;

    Schema avroSchema = convertFromBlockSchema("CUBERT_MV_RECORD", fileSchema);
    System.out.println("Creating avro file with schema = " + avroSchema);
    GenericDatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(avroSchema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    FSDataOutputStream fout =
            FileSystem.create(fs,
                              new Path(path),
                              new FsPermission(FsAction.ALL,
                                               FsAction.READ_EXECUTE,
                                               FsAction.READ_EXECUTE));
    writer.create(avroSchema, fout);
    writer.flush();
    writer.close();

}
 
Example 16
Source Project: beam   Source File: AvroIO.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> AvroSource<T> createSource(
    ValueProvider<String> filepattern,
    EmptyMatchTreatment emptyMatchTreatment,
    Class<T> recordClass,
    Schema schema,
    @Nullable AvroSource.DatumReaderFactory<T> readerFactory) {
  AvroSource<?> source =
      AvroSource.from(filepattern).withEmptyMatchTreatment(emptyMatchTreatment);

  if (readerFactory != null) {
    source = source.withDatumReaderFactory(readerFactory);
  }
  return recordClass == GenericRecord.class
      ? (AvroSource<T>) source.withSchema(schema)
      : source.withSchema(recordClass);
}
 
Example 17
Source Project: beam   Source File: AvroIOTest.java    License: Apache License 2.0 6 votes vote down vote up
private <T extends GenericRecord> void testWriteThenReadGeneratedClass(
    AvroIO.Write<T> writeTransform, AvroIO.Read<T> readTransform) throws Exception {
  File outputFile = tmpFolder.newFile("output.avro");

  List<T> values =
      ImmutableList.of(
          (T) new AvroGeneratedUser("Bob", 256, null),
          (T) new AvroGeneratedUser("Alice", 128, null),
          (T) new AvroGeneratedUser("Ted", null, "white"));

  writePipeline
      .apply(Create.of(values))
      .apply(
          writeTransform
              .to(writePipeline.newProvider(outputFile.getAbsolutePath()))
              .withoutSharding());
  writePipeline.run();

  PAssert.that(
          readPipeline.apply(
              "Read",
              readTransform.from(readPipeline.newProvider(outputFile.getAbsolutePath()))))
      .containsInAnyOrder(values);

  readPipeline.run();
}
 
Example 18
@Test(groups = {"serializationTest"})
public void shouldWriteRightUnionIndex() {
  // Create two record schemas
  Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING)));
  Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING)));
  Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2);
  Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema));

  GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2);
  objectOfRecordSchema2.put("record2_field1", "abc");
  GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema);
  wrapperObject.put("union_field", objectOfRecordSchema2);

  GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject));

  Object unionField = record.get("union_field");
  Assert.assertTrue(unionField instanceof GenericData.Record);
  GenericData.Record unionRecord = (GenericData.Record)unionField;
  Assert.assertEquals(unionRecord.getSchema().getName(), "record2");
}
 
Example 19
@Override
public void accumulate(GenericRecord value)
{
  if (value.get("type").toString().equals("click"))
  {
    clicks++;
  }
  else if (value.get("type").toString().equals("impression"))
  {
    impressions++;
  }
  else
  {
    throw new RuntimeException("Didn't expect: " + value.get("type"));
  }
}
 
Example 20
Source Project: components   Source File: PythonRowDoFnTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Compare Avro record field values.
 */
public void compareRecords(final IndexedRecord expectedRecord, final GenericRecord outputRecord) {
    // a1
    assertEquals(expectedRecord.get(0).toString(), outputRecord.get(0).toString());

    // B
    GenericRecord outputRecordB = (GenericRecord) outputRecord.get(1);
    GenericRecord expectedRecordB = (GenericRecord) expectedRecord.get(1);
    // B.b1
    assertEquals(expectedRecordB.get("b1").toString(), outputRecordB.get(0).toString());
    // B.b2
    assertEquals(expectedRecordB.get("b2").toString(), outputRecordB.get(2).toString());

    // C
    GenericRecord outputRecordC = (GenericRecord) outputRecordB.get(1);
    GenericRecord expectedRecordC = (GenericRecord) expectedRecordB.get(1);
    assertEquals(expectedRecordC.toString(), outputRecordC.toString());
}
 
Example 21
Source Project: Flink-CEPplus   Source File: AvroOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericRecord() throws IOException {
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
	final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
	Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	outputFormat.setSchema(schema);
	output(outputFormat, schema);

	GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
	DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);

	while (dataFileReader.hasNext()) {
		GenericRecord record = dataFileReader.next();
		assertEquals(record.get("user_name").toString(), "testUser");
		assertEquals(record.get("favorite_number"), 1);
		assertEquals(record.get("favorite_color").toString(), "blue");
	}

	//cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
}
 
Example 22
Source Project: localization_nifi   Source File: TransformAvroToCSV.java    License: Apache License 2.0 6 votes vote down vote up
/**
 *
 */
@Override
protected Map<String, String> transform(InputStream in, OutputStream out, InvocationContextProperties contextProperties, Schema schema) {
    byte[] buff = null;
    try {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        IOUtils.copy(in, bos);
        buff = bos.toByteArray();
    } catch (Exception e) {
        e.printStackTrace();
    }
    ByteArrayInputStream is = new ByteArrayInputStream(buff);
    GenericRecord avroRecord = AvroUtils.read(is, schema);
    CSVUtils.write(avroRecord, this.delimiter, out);
    return Collections.singletonMap(CoreAttributes.MIME_TYPE.key(), "text/csv");
}
 
Example 23
/**
 * Build a {@link HttpUriRequest} from a {@link GenericRecord}
 */
public void testBuildWriteRequest()
    throws IOException {
  String urlTemplate = "http://www.test.com/a/part1:${part1}/a/part2:${part2}";
  String verb = "post";
  ApacheHttpRequestBuilder builder = spy(new ApacheHttpRequestBuilder(urlTemplate, verb, "application/json"));
  ArgumentCaptor<RequestBuilder> requestBuilderArgument = ArgumentCaptor.forClass(RequestBuilder.class);

  Queue<BufferedRecord<GenericRecord>> queue = HttpTestUtils.createQueue(1, false);
  AsyncRequest<GenericRecord, HttpUriRequest> request = builder.buildRequest(queue);
  verify(builder).build(requestBuilderArgument.capture());

  RequestBuilder expected = RequestBuilder.post();
  expected.setUri("http://www.test.com/a/part1:01/a/part2:02?param1=01");
  String payloadStr = "{\"id\":\"id0\"}";
  expected.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType())
      .setEntity(new StringEntity(payloadStr, ContentType.APPLICATION_JSON));

  // Compare HttpUriRequest
  HttpTestUtils.assertEqual(requestBuilderArgument.getValue(), expected);
  Assert.assertEquals(request.getRecordCount(), 1);
  Assert.assertEquals(queue.size(), 0);
}
 
Example 24
Source Project: secor   Source File: AvroSplitByFieldMessageParser.java    License: Apache License 2.0 5 votes vote down vote up
protected String extractEventType(GenericRecord record) {
    Object fieldValue = record.get(mSplitFieldName);
    if (fieldValue == null) {
        throw new RuntimeException("Could not find key " + mSplitFieldName + " in Avro message");
    }
    return fieldValue.toString();
}
 
Example 25
@Test(groups = {"serializationTest"})
public void shouldWriteMultipleChoiceUnion() {
  // given
  Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));

  Schema recordSchema = createRecord(
      createUnionField("union", subRecordSchema, Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)));

  GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema);
  subRecordBuilder.put("subField", "abc");

  GenericData.Record builder = new GenericData.Record(recordSchema);
  builder.put("union", subRecordBuilder);

  // when
  GenericRecord record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals("abc", ((GenericData.Record) record.get("union")).get("subField").toString());

  // given
  builder = new GenericData.Record(recordSchema);
  builder.put("union", "abc");

  // when
  record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals("abc", record.get("union").toString());

  // given
  builder = new GenericData.Record(recordSchema);
  builder.put("union", 1);

  // when
  record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals(1, record.get("union"));
}
 
Example 26
Source Project: localization_nifi   Source File: TestMergeContent.java    License: Apache License 2.0 5 votes vote down vote up
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException {
    // create a reader for the merged contet
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    SeekableByteArrayInput input = new SeekableByteArrayInput(data);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader);

    // read all the records into a map to verify all the records are there
    Map<String,GenericRecord> records = new HashMap<>();
    while (dataFileReader.hasNext()) {
        GenericRecord user = dataFileReader.next();
        records.put(user.get(key).toString(), user);
    }
    return records;
}
 
Example 27
Source Project: incubator-pinot   Source File: IntArraysTest.java    License: Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void before()
    throws Exception {
  final String filePath =
      TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
  if (INDEX_DIR.exists()) {
    FileUtils.deleteQuietly(INDEX_DIR);
  }

  final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);

  final SegmentGeneratorConfig config = SegmentTestUtils
      .getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday",
          TimeUnit.DAYS, "test");
  // The segment generation code in SegmentColumnarIndexCreator will throw
  // exception if start and end time in time column are not in acceptable
  // range. For this test, we first need to fix the input avro data
  // to have the time column values in allowed range. Until then, the check
  // is explicitly disabled
  config.setSkipTimeValueCheck(true);
  driver.init(config);
  driver.build();

  final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
  final org.apache.avro.Schema avroSchema = avroReader.getSchema();
  final String[] columns = new String[avroSchema.getFields().size()];
  int i = 0;
  for (final Field f : avroSchema.getFields()) {
    columns[i] = f.name();
    i++;
  }
}
 
Example 28
/**
 * Retrieve the value of the partition column field specified by this.partitionColumns
 */
private Optional<Object> getWriterPartitionColumnValue(GenericRecord record) {
  if (!this.partitionColumns.isPresent()) {
    return Optional.absent();
  }

  for (String partitionColumn : this.partitionColumns.get()) {
    Optional<Object> fieldValue = AvroUtils.getFieldValue(record, partitionColumn);
    if (fieldValue.isPresent()) {
      return fieldValue;
    }
  }
  return Optional.absent();
}
 
Example 29
Source Project: samza   Source File: TestSamzaSqlEndToEnd.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEndToEndStreamTableInnerJoinWithFilter() throws Exception {
  int numMessages = 20;

  TestAvroSystemFactory.messages.clear();
  Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
  String sql =
      "Insert into testavro.enrichedPageViewTopic "
          + "select pv.pageKey as __key__, pv.pageKey as pageKey, p.name as companyName, p.name as profileName,"
          + "       p.address as profileAddress "
          + "from testavro.PROFILE.`$table` as p "
          + "join testavro.PAGEVIEW as pv "
          + " on p.id = pv.profileId "
          + "where p.name = 'Mike'";

  List<String> sqlStmts = Arrays.asList(sql);
  staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));

  Config config = new MapConfig(staticConfigs);
  new SamzaSqlValidator(config).validate(sqlStmts);

  runApplication(config);

  List<String> outMessages = TestAvroSystemFactory.messages.stream()
      .map(x -> ((GenericRecord) x.getMessage()).get("pageKey").toString() + ","
          + (((GenericRecord) x.getMessage()).get("profileName") == null ? "null" :
          ((GenericRecord) x.getMessage()).get("profileName").toString()))
      .collect(Collectors.toList());
  Assert.assertEquals(4, outMessages.size());
  List<String> expectedOutMessages =
      TestAvroSystemFactory.getPageKeyProfileNameJoin(numMessages)
          .stream()
          .filter(msg -> msg.endsWith("Mike"))
          .collect(Collectors.toList());
  Assert.assertEquals(expectedOutMessages, outMessages);
}
 
Example 30
private DataWriter<GenericRecord> getWriter(Schema schema, State state)
    throws IOException {
  // Build a writer to write test records
  DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder()
      .writeTo(Destination.of(Destination.DestinationType.HDFS, state)).writeInFormat(WriterOutputFormat.AVRO)
      .withWriterId(WRITER_ID).withSchema(schema).withBranches(1).forBranch(0);
  return new PartitionedDataWriter<Schema, GenericRecord>(builder, state);
}