org.apache.avro.generic.GenericRecord Java Examples

The following examples show how to use org.apache.avro.generic.GenericRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroRecordConverter.java    From DataflowTemplates with Apache License 2.0 7 votes vote down vote up
private static Optional<Double> readFloat64(
    GenericRecord record, Schema.Type avroType, String fieldName) {
  switch (avroType) {
    case INT:
      return Optional.ofNullable((Integer) record.get(fieldName)).map(x -> (double) x);
    case LONG:
      return Optional.ofNullable((Long) record.get(fieldName)).map(x -> (double) x);
    case FLOAT:
      return Optional.ofNullable((Float) record.get(fieldName)).map(x -> (double) x);
    case DOUBLE:
      return Optional.ofNullable((Double) record.get(fieldName));
    case STRING:
      return Optional.ofNullable((Utf8) record.get(fieldName))
          .map(Utf8::toString)
          .map(Double::valueOf);
    default:
      throw new IllegalArgumentException("Cannot interpret " + avroType + " as FLOAT64");
  }
}
 
Example #2
Source File: ApacheHttpRequestBuilderTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Build a {@link HttpUriRequest} from a {@link GenericRecord}
 */
public void testBuildWriteRequest()
    throws IOException {
  String urlTemplate = "http://www.test.com/a/part1:${part1}/a/part2:${part2}";
  String verb = "post";
  ApacheHttpRequestBuilder builder = spy(new ApacheHttpRequestBuilder(urlTemplate, verb, "application/json"));
  ArgumentCaptor<RequestBuilder> requestBuilderArgument = ArgumentCaptor.forClass(RequestBuilder.class);

  Queue<BufferedRecord<GenericRecord>> queue = HttpTestUtils.createQueue(1, false);
  AsyncRequest<GenericRecord, HttpUriRequest> request = builder.buildRequest(queue);
  verify(builder).build(requestBuilderArgument.capture());

  RequestBuilder expected = RequestBuilder.post();
  expected.setUri("http://www.test.com/a/part1:01/a/part2:02?param1=01");
  String payloadStr = "{\"id\":\"id0\"}";
  expected.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType())
      .setEntity(new StringEntity(payloadStr, ContentType.APPLICATION_JSON));

  // Compare HttpUriRequest
  HttpTestUtils.assertEqual(requestBuilderArgument.getValue(), expected);
  Assert.assertEquals(request.getRecordCount(), 1);
  Assert.assertEquals(queue.size(), 0);
}
 
Example #3
Source File: AvroCodecTests.java    From schema-evolution-samples with Apache License 2.0 6 votes vote down vote up
@Test
public void genericEncoderV2GenericDecoderV2() throws Exception{
	Schema schema = load("users_v2.schema");
	SchemaRegistryClient client = mock(SchemaRegistryClient.class);
	AvroCodec codec = new AvroCodec();
	codec.setSchemaRegistryClient(client);
	when(client.register(any())).thenReturn(2);
	when(client.fetch(eq(2))).thenReturn(schema);
	GenericRecord record = new GenericData.Record(schema);
	record.put("name","joe");
	record.put("favoriteNumber",42);
	record.put("favoriteColor","blue");
	record.put("favoritePlace","Paris");
	byte[] results = codec.encode(record);
	GenericRecord decoded = codec.decode(results,GenericRecord.class);
	Assert.assertEquals(record.get("favoritePlace").toString(),decoded.get("favoritePlace").toString());
}
 
Example #4
Source File: ReadDataset.java    From kite-examples with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {

  // Load the events dataset
  Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events");

  // Get a reader for the dataset and read all the events
  DatasetReader<GenericRecord> reader = events.newReader();
  try {
    for (GenericRecord event : reader) {
      System.out.println(event);
    }
  } finally {
    reader.close();
  }

  return 0;
}
 
Example #5
Source File: KafkaAvroSerdesTest.java    From registry with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericSerializedSpecificDeserialized() {
    Map<String, Object> config = new HashMap<>();
    config.put(AvroSnapshotDeserializer.SPECIFIC_AVRO_READER, true);
    KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
    kafkaAvroDeserializer.configure(config, false);

    KafkaAvroSerializer kafkaAvroSerializer = new KafkaAvroSerializer(schemaRegistryClient);
    kafkaAvroSerializer.configure(Collections.emptyMap(), false);

    GenericRecord record = new GenericRecordBuilder(schema).set("field1", "some value").set("field2", "some other value").build();

    byte[] payload = kafkaAvroSerializer.serialize(topic, record);
    Object o = kafkaAvroDeserializer.deserialize(topic, payload);
    checkGenericSerializedSpecificDeserializedEquals(record, o);

    Headers headers = new RecordHeaders();
    payload = kafkaAvroSerializer.serialize(topic, headers, record);
    o = kafkaAvroDeserializer.deserialize(topic, headers, payload);
    checkGenericSerializedSpecificDeserializedEquals(record, o);
}
 
Example #6
Source File: TestReflectLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadUUIDList() throws IOException {
  Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName())
      .fields()
      .name("uuids").type().array().items().stringType().noDefault()
      .endRecord();
  uuidListSchema.getField("uuids").schema().addProp(
      SpecificData.CLASS_PROP, List.class.getName());
  LogicalTypes.uuid().addToSchema(
      uuidListSchema.getField("uuids").schema().getElementType());

  UUID u1 = UUID.randomUUID();
  UUID u2 = UUID.randomUUID();

  GenericRecord r = new GenericData.Record(uuidListSchema);
  r.put("uuids", Arrays.asList(u1.toString(), u2.toString()));

  RecordWithUUIDList expected = new RecordWithUUIDList();
  expected.uuids = Arrays.asList(u1, u2);

  File test = write(uuidListSchema, r);

  Assert.assertEquals("Should convert Strings to UUIDs",
      expected, read(REFLECT, uuidListSchema, test).get(0));
}
 
Example #7
Source File: TestTransformCommandCluster.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransform() throws Exception {
  command.repoURI = repoUri;
  command.transform = "org.kitesdk.cli.example.ToUpperCase";
  command.datasets = Lists.newArrayList(source, dest);

  int rc = command.run();
  Assert.assertEquals("Should return success", 0, rc);

  DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri);
  Set<GenericRecord> records = DatasetTestUtilities.materialize(
      repo.<GenericRecord>load("default", dest));
  Assert.assertEquals("Should contain copied records", 6, records.size());
  for (GenericRecord record : records) {
    Assert.assertTrue("Username should be upper case",
        UPPER_CASE.matcher(record.get("username").toString()).matches());
  }
}
 
Example #8
Source File: TestHiveDatasetURIsCompatibility.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadChangedRelativePathURICompatibility() {
  // this used to be a relative external URI, but is now a managed URI
  String uri = "dataset:hive:data/ds";

  DatasetRepository repo = DatasetRepositories
      .repositoryFor("repo:hive:/tmp/data");
  DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR)
      .location("file:/tmp/data/ds") // old location
      .build();
  Dataset<GenericRecord> expected = repo.create(
      "default", "ds", withLocation, GenericRecord.class);

  Dataset<GenericRecord> actual = Datasets.load(uri);
  Assert.assertEquals("Should load existing dataset default.ds",
      expected, actual);

  Assert.assertEquals("URI should use apparent namespace",
      "dataset:hive:data/ds", actual.getUri().toString());

  Assert.assertTrue(Datasets.delete(uri));
}
 
Example #9
Source File: KafkaValueDeserializer.java    From kareldb with Apache License 2.0 6 votes vote down vote up
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) {
    NavigableMap<Long, VersionedValue> map = new TreeMap<>();
    Schema recordSchema = avroSchema.getElementType();
    List<Schema.Field> fields = recordSchema.getFields();
    int size = fields.size();
    for (GenericRecord record : array) {
        Long version = (Long) record.get(0);
        Long commit = (Long) record.get(1);
        boolean deleted = (Boolean) record.get(2);
        Comparable[] row = new Comparable[size - 3];
        for (int i = 0; i < row.length; i++) {
            Schema schema = fields.get(i + 3).schema();
            Comparable value = (Comparable) record.get(i + 3);
            row[i] = AvroSchema.fromAvroValue(schema, value);
        }
        map.put(version, new VersionedValue(version, commit, deleted, row));
    }
    return map;
}
 
Example #10
Source File: TestParquetImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testIncrementalParquetImport() throws IOException, SQLException {
  String [] types = { "INT" };
  String [] vals = { "1" };
  createTableWithColTypes(types, vals);

  runImport(getOutputArgv(true, null));
  runImport(getOutputArgv(true, new String[]{"--append"}));

  DatasetReader<GenericRecord> reader = getReader();
  try {
    assertTrue(reader.hasNext());
    GenericRecord record1 = reader.next();
    assertEquals(1, record1.get("DATA_COL0"));
    record1 = reader.next();
    assertEquals(1, record1.get("DATA_COL0"));
    assertFalse(reader.hasNext());
  } finally {
    reader.close();
  }
}
 
Example #11
Source File: TestSamzaSqlEndToEnd.java    From samza with Apache License 2.0 6 votes vote down vote up
@Ignore
@Test
public void testEndToEndFanOut() throws SamzaSqlValidatorException {
  int numMessages = 20;
  TestAvroSystemFactory.messages.clear();
  Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
  String sql1 = "Insert into testavro.SIMPLE2 select * from testavro.SIMPLE1";
  String sql2 = "Insert into testavro.SIMPLE3 select * from testavro.SIMPLE1";
  List<String> sqlStmts = Arrays.asList(sql1, sql2);
  staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));

  Config config = new MapConfig(staticConfigs);
  new SamzaSqlValidator(config).validate(sqlStmts);

  runApplication(config);

  List<Integer> outMessages = TestAvroSystemFactory.messages.stream()
      .map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString()))
      .sorted()
      .collect(Collectors.toList());
  Assert.assertEquals(numMessages * 2, outMessages.size());
  Set<Integer> outMessagesSet = new HashSet<>(outMessages);
  Assert.assertEquals(numMessages, outMessagesSet.size());
  Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet)));
}
 
Example #12
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
		CodecFactory compressionCodec, OutputStream outputStream,
		int syncInterval) throws IOException {
	// Create the generic record schema for the key/value pair.
	mKeyValuePairSchema = AvroKeyValue
			.getSchema(keySchema, valueSchema);

	// Create an Avro container file and a writer to it.
	DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
			mKeyValuePairSchema);
	mAvroFileWriter = new DataFileWriter<GenericRecord>(
			genericDatumWriter);
	mAvroFileWriter.setCodec(compressionCodec);
	mAvroFileWriter.setSyncInterval(syncInterval);
	mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

	// Create a reusable output record.
	mOutputRecord = new AvroKeyValue<Object, Object>(
			new GenericData.Record(mKeyValuePairSchema));
}
 
Example #13
Source File: AvroConvertersTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Tests if {@link AvroConverters.ReadAvroFile} reads an Avro file correctly. */
@Test
public void testReadAvroFile() {
  Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_PATH);

  GenericRecord genericRecord = new GenericData.Record(schema);
  genericRecord.put("id", "007");
  genericRecord.put("state", "CA");
  genericRecord.put("price", 26.23);

  PCollection<GenericRecord> pCollection =
      pipeline.apply(
          "ReadAvroFile",
          AvroConverters.ReadAvroFile.newBuilder()
              .withInputFileSpec(AVRO_FILE_PATH)
              .withSchema(SCHEMA_FILE_PATH)
              .build());

  PAssert.that(pCollection).containsInAnyOrder(genericRecord);

  pipeline.run();
}
 
Example #14
Source File: AvroUtils.java    From Cubert with Apache License 2.0 6 votes vote down vote up
public static void createFileIfNotExists(BlockSchema fileSchema, String path) throws IOException
{
    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(path)))
        return;

    Schema avroSchema = convertFromBlockSchema("CUBERT_MV_RECORD", fileSchema);
    System.out.println("Creating avro file with schema = " + avroSchema);
    GenericDatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(avroSchema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    FSDataOutputStream fout =
            FileSystem.create(fs,
                              new Path(path),
                              new FsPermission(FsAction.ALL,
                                               FsAction.READ_EXECUTE,
                                               FsAction.READ_EXECUTE));
    writer.create(avroSchema, fout);
    writer.flush();
    writer.close();

}
 
Example #15
Source File: AvroIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> AvroSource<T> createSource(
    ValueProvider<String> filepattern,
    EmptyMatchTreatment emptyMatchTreatment,
    Class<T> recordClass,
    Schema schema,
    @Nullable AvroSource.DatumReaderFactory<T> readerFactory) {
  AvroSource<?> source =
      AvroSource.from(filepattern).withEmptyMatchTreatment(emptyMatchTreatment);

  if (readerFactory != null) {
    source = source.withDatumReaderFactory(readerFactory);
  }
  return recordClass == GenericRecord.class
      ? (AvroSource<T>) source.withSchema(schema)
      : source.withSchema(recordClass);
}
 
Example #16
Source File: ParquetReader.java    From HBase-ToHDFS with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length == 0) {
    System.out.println("AvroReader {dataFile} {max.lines.to.read.optional}");
  }
  
  String dataFile = args[0];
  int recordsToRead = Integer.MAX_VALUE;
  if (args.length > 1) {
    recordsToRead = Integer.parseInt(args[1]);
  }
  
  //Schema.Parser parser = new Schema.Parser();
  //Configuration config = new Configuration();
  //FileSystem fs = FileSystem.get(config);
  
  //Schema schema = parser.parse(fs.open(new Path(schemaFile)));
  
  Path dataFilePath = new Path(dataFile);
  
  AvroParquetReader<GenericRecord> reader =  new AvroParquetReader<GenericRecord>(dataFilePath);
  
  Object tmpValue;
  
  
  
  int counter = 0;
  while ((tmpValue = reader.read()) != null && counter++ < recordsToRead) {
    GenericRecord r = (GenericRecord)tmpValue;
    System.out.println(counter + " : " + r);
  }
}
 
Example #17
Source File: AvroIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <T extends GenericRecord> void testWriteThenReadGeneratedClass(
    AvroIO.Write<T> writeTransform, AvroIO.Read<T> readTransform) throws Exception {
  File outputFile = tmpFolder.newFile("output.avro");

  List<T> values =
      ImmutableList.of(
          (T) new AvroGeneratedUser("Bob", 256, null),
          (T) new AvroGeneratedUser("Alice", 128, null),
          (T) new AvroGeneratedUser("Ted", null, "white"));

  writePipeline
      .apply(Create.of(values))
      .apply(
          writeTransform
              .to(writePipeline.newProvider(outputFile.getAbsolutePath()))
              .withoutSharding());
  writePipeline.run();

  PAssert.that(
          readPipeline.apply(
              "Read",
              readTransform.from(readPipeline.newProvider(outputFile.getAbsolutePath()))))
      .containsInAnyOrder(values);

  readPipeline.run();
}
 
Example #18
Source File: FastGenericSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteRightUnionIndex() {
  // Create two record schemas
  Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING)));
  Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING)));
  Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2);
  Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema));

  GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2);
  objectOfRecordSchema2.put("record2_field1", "abc");
  GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema);
  wrapperObject.put("union_field", objectOfRecordSchema2);

  GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject));

  Object unionField = record.get("union_field");
  Assert.assertTrue(unionField instanceof GenericData.Record);
  GenericData.Record unionRecord = (GenericData.Record)unionField;
  Assert.assertEquals(unionRecord.getSchema().getName(), "record2");
}
 
Example #19
Source File: ImpressionClickPartitionPreservingJob.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public void accumulate(GenericRecord value)
{
  if (value.get("type").toString().equals("click"))
  {
    clicks++;
  }
  else if (value.get("type").toString().equals("impression"))
  {
    impressions++;
  }
  else
  {
    throw new RuntimeException("Didn't expect: " + value.get("type"));
  }
}
 
Example #20
Source File: AsyncHttpJoinConverter.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Convert an input record to a future object where an output record will be filled in sometime later
 * Sequence:
 *    Convert input (DI) to an http request
 *    Send http request asynchronously, and registers an http callback
 *    Create an {@link CompletableFuture} object. When the callback is invoked, this future object is filled in by an output record which is converted from http response.
 *    Return the future object.
 */
@Override
public final CompletableFuture<DO> convertRecordAsync(SO outputSchema, DI inputRecord, WorkUnitState workUnitState)
    throws DataConversionException {

  // Convert DI to HttpOperation
  HttpOperation operation = generateHttpOperation(inputRecord, workUnitState);
  BufferedRecord<GenericRecord> bufferedRecord = new BufferedRecord<>(operation, WriteCallback.EMPTY);

  // Convert HttpOperation to RQ
  Queue<BufferedRecord<GenericRecord>> buffer = new LinkedBlockingDeque<>();
  buffer.add(bufferedRecord);
  AsyncRequest<GenericRecord, RQ> request = this.requestBuilder.buildRequest(buffer);
  RQ rawRequest = request.getRawRequest();

  // Execute query and get response
  AsyncHttpJoinConverterContext context = new AsyncHttpJoinConverterContext(this, outputSchema, inputRecord, request);

  try {
    httpClient.sendAsyncRequest(rawRequest, context.getCallback());
  } catch (IOException e) {
    throw new DataConversionException(e);
  }

  return context.future;
}
 
Example #21
Source File: PythonRowDoFnTest.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Compare Avro record field values.
 */
public void compareRecords(final IndexedRecord expectedRecord, final GenericRecord outputRecord) {
    // a1
    assertEquals(expectedRecord.get(0).toString(), outputRecord.get(0).toString());

    // B
    GenericRecord outputRecordB = (GenericRecord) outputRecord.get(1);
    GenericRecord expectedRecordB = (GenericRecord) expectedRecord.get(1);
    // B.b1
    assertEquals(expectedRecordB.get("b1").toString(), outputRecordB.get(0).toString());
    // B.b2
    assertEquals(expectedRecordB.get("b2").toString(), outputRecordB.get(2).toString());

    // C
    GenericRecord outputRecordC = (GenericRecord) outputRecordB.get(1);
    GenericRecord expectedRecordC = (GenericRecord) expectedRecordB.get(1);
    assertEquals(expectedRecordC.toString(), outputRecordC.toString());
}
 
Example #22
Source File: AvroOutputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenericRecord() throws IOException {
	final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
	final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
	Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
	outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
	outputFormat.setSchema(schema);
	output(outputFormat, schema);

	GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
	DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);

	while (dataFileReader.hasNext()) {
		GenericRecord record = dataFileReader.next();
		assertEquals(record.get("user_name").toString(), "testUser");
		assertEquals(record.get("favorite_number"), 1);
		assertEquals(record.get("favorite_color").toString(), "blue");
	}

	//cleanup
	FileSystem fs = FileSystem.getLocalFileSystem();
	fs.delete(outputPath, false);
}
 
Example #23
Source File: TransformAvroToCSV.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
/**
 *
 */
@Override
protected Map<String, String> transform(InputStream in, OutputStream out, InvocationContextProperties contextProperties, Schema schema) {
    byte[] buff = null;
    try {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        IOUtils.copy(in, bos);
        buff = bos.toByteArray();
    } catch (Exception e) {
        e.printStackTrace();
    }
    ByteArrayInputStream is = new ByteArrayInputStream(buff);
    GenericRecord avroRecord = AvroUtils.read(is, schema);
    CSVUtils.write(avroRecord, this.delimiter, out);
    return Collections.singletonMap(CoreAttributes.MIME_TYPE.key(), "text/csv");
}
 
Example #24
Source File: ParquetPartitionWriter.java    From entrada with GNU General Public License v3.0 6 votes vote down vote up
public void write(GenericRecord rec, Schema schema, Partition partition) {

    String partitionStr = FileUtil.appendPath(path, partition.toPath());
    // check is partition already exists, if not create a new partition
    ParquetPartition<GenericRecord> parquetPartition =
        partitions.computeIfAbsent(partitionStr, k -> new ParquetPartition<>(partitionStr, schema));

    // write the rec to the partition
    parquetPartition.write(rec);

    // check if size of parquet partition is too big
    if (parquetPartition.getRows() >= maxRows) {
      log
          .info(
              "Max DNS packets reached for this Parquet parition {}, close current file and create new",
              partitionStr);

      parquetPartition.close();
      // remove partition from partitions map, for a possible next row for this partitions
      // a new partition object and parquet file will be created.
      partitions.remove(partitionStr);
    }
  }
 
Example #25
Source File: TestSamzaSqlEndToEnd.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testEndToEndStreamTableInnerJoinWithFilter() throws Exception {
  int numMessages = 20;

  TestAvroSystemFactory.messages.clear();
  Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
  String sql =
      "Insert into testavro.enrichedPageViewTopic "
          + "select pv.pageKey as __key__, pv.pageKey as pageKey, p.name as companyName, p.name as profileName,"
          + "       p.address as profileAddress "
          + "from testavro.PROFILE.`$table` as p "
          + "join testavro.PAGEVIEW as pv "
          + " on p.id = pv.profileId "
          + "where p.name = 'Mike'";

  List<String> sqlStmts = Arrays.asList(sql);
  staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));

  Config config = new MapConfig(staticConfigs);
  new SamzaSqlValidator(config).validate(sqlStmts);

  runApplication(config);

  List<String> outMessages = TestAvroSystemFactory.messages.stream()
      .map(x -> ((GenericRecord) x.getMessage()).get("pageKey").toString() + ","
          + (((GenericRecord) x.getMessage()).get("profileName") == null ? "null" :
          ((GenericRecord) x.getMessage()).get("profileName").toString()))
      .collect(Collectors.toList());
  Assert.assertEquals(4, outMessages.size());
  List<String> expectedOutMessages =
      TestAvroSystemFactory.getPageKeyProfileNameJoin(numMessages)
          .stream()
          .filter(msg -> msg.endsWith("Mike"))
          .collect(Collectors.toList());
  Assert.assertEquals(expectedOutMessages, outMessages);
}
 
Example #26
Source File: FastGenericSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteMultipleChoiceUnion() {
  // given
  Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));

  Schema recordSchema = createRecord(
      createUnionField("union", subRecordSchema, Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)));

  GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema);
  subRecordBuilder.put("subField", "abc");

  GenericData.Record builder = new GenericData.Record(recordSchema);
  builder.put("union", subRecordBuilder);

  // when
  GenericRecord record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals("abc", ((GenericData.Record) record.get("union")).get("subField").toString());

  // given
  builder = new GenericData.Record(recordSchema);
  builder.put("union", "abc");

  // when
  record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals("abc", record.get("union").toString());

  // given
  builder = new GenericData.Record(recordSchema);
  builder.put("union", 1);

  // when
  record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals(1, record.get("union"));
}
 
Example #27
Source File: AvroUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Writes provided {@link GenericRecord} into the provided
 * {@link OutputStream}.
 */
public static void write(GenericRecord record, OutputStream out) {
    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
    try {
        writer.write(record, encoder);
        encoder.flush();
    } catch (Exception e) {
        throw new IllegalStateException("Failed to write AVRO record", e);
    }
}
 
Example #28
Source File: AvroSerializerSnapshotTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings({"unchecked", "rawtypes"})
public void changingFromGenericToSpecificWithCompatibleSchemaShouldResultInCompatibleSerializers() {
	// starting with a generic serializer
	AvroSerializer<Object> generic = new AvroSerializer(GenericRecord.class, User.SCHEMA$);
	TypeSerializerSnapshot<Object> genericSnapshot = generic.snapshotConfiguration();

	// then upgrading to a specific serializer
	AvroSerializer<Object> specificSerializer = new AvroSerializer(User.class);
	specificSerializer.snapshotConfiguration();

	assertThat(genericSnapshot.resolveSchemaCompatibility(specificSerializer), isCompatibleAsIs());
}
 
Example #29
Source File: InputFormatTestUtil.java    From hudi with Apache License 2.0 5 votes vote down vote up
private static Iterable<? extends GenericRecord> generateAvroRecords(Schema schema, int numberOfRecords,
    String instantTime, String fileId) throws IOException {
  List<GenericRecord> records = new ArrayList<>(numberOfRecords);
  for (int i = 0; i < numberOfRecords; i++) {
    records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, instantTime, fileId));
  }
  return records;
}
 
Example #30
Source File: AvroSplitByFieldMessageParser.java    From secor with Apache License 2.0 5 votes vote down vote up
protected String extractEventType(GenericRecord record) {
    Object fieldValue = record.get(mSplitFieldName);
    if (fieldValue == null) {
        throw new RuntimeException("Could not find key " + mSplitFieldName + " in Avro message");
    }
    return fieldValue.toString();
}