Java Code Examples for org.apache.avro.generic.GenericData#Record

The following examples show how to use org.apache.avro.generic.GenericData#Record . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestJsonUtil.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaInferenceMap() throws Exception {
  Schema recordSchema = SchemaBuilder.record("Test").fields()
      .requiredString("aString")
      .name("aMap").type().map().values().stringType().noDefault()
      .endRecord();

  String jsonSample = "{" +
      "\"aString\": \"triangle\"," +
      "\"aMap\": { \"left\": \"timid\", \"right\": \"dictionary\" }" +
      "}";

  JsonNode datum = JsonUtil.parse(jsonSample);
  Assert.assertEquals("Should produce expected schema",
      recordSchema, JsonUtil.inferSchemaWithMaps(datum, "Test"));

  Map<String, Object> aMap = Maps.newLinkedHashMap();
  aMap.put("left", "timid");
  aMap.put("right", "dictionary");
  GenericData.Record expected = new GenericData.Record(recordSchema);
  expected.put("aString", "triangle");
  expected.put("aMap", aMap);
  Assert.assertEquals("Should convert to record",
      expected, convertGeneric(datum, recordSchema));
}
 
Example 2
Source File: TestTransformCommandCluster.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testCopyWithNumWriters() throws Exception {
  Assume.assumeTrue(setLocalReducerMax(getConfiguration(), 3));

  command.repoURI = repoUri;
  command.numWriters = 3;
  command.datasets = Lists.newArrayList(source, dest);

  int rc = command.run();
  Assert.assertEquals("Should return success", 0, rc);

  DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri);
  FileSystemDataset<GenericData.Record> ds =
      (FileSystemDataset<GenericData.Record>) repo.<GenericData.Record>
          load("default", dest);
  int size = DatasetTestUtilities.datasetSize(ds);
  Assert.assertEquals("Should contain copied records", 6, size);

  Assert.assertEquals("Should produce 3 files",
      3, Iterators.size(ds.pathIterator()));

  verify(console).info("Added {} records to \"{}\"", 6l, dest);
  verifyNoMoreInteractions(console);
}
 
Example 3
Source File: AggregateCombineFnTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void MinDoubleAccumulatorFnTest() {
    List<GenericData.Record> testData = genRecords(Arrays.asList(3.3, 2.2, 10.10, 1.1, 5.5), AvroUtils._double());

    AggregateCombineFn.MinDoubleAccumulatorFn fn1 = new AggregateCombineFn.MinDoubleAccumulatorFn();
    fn1.createAccumulator();
    AggregateCombineFn.MinDoubleAccumulatorFn fn2 = new AggregateCombineFn.MinDoubleAccumulatorFn();
    fn2.createAccumulator();
    AggregateCombineFn.MinDoubleAccumulatorFn fn3 = new AggregateCombineFn.MinDoubleAccumulatorFn();
    fn3.createAccumulator();
    double delta = 0.0;
    fn1.addInput(testData.get(0));
    Assert.assertEquals(3.3, fn1.extractOutput(), delta);
    fn1.addInput(testData.get(1));
    Assert.assertEquals(2.2, fn1.getAccumulators(), delta);

    fn2.addInput(testData.get(2));
    fn2.addInput(testData.get(3));
    Assert.assertEquals(1.1, fn2.extractOutput(), delta);

    fn3.addInput(testData.get(4));
    Assert.assertEquals(5.5, fn3.extractOutput(), delta);

    fn1.mergeAccumulators(Arrays.asList(fn2.getAccumulators(), fn3.getAccumulators()));
    Assert.assertEquals(1.1, fn1.extractOutput(), delta);
}
 
Example 4
Source File: ValueReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
protected GenericData.Record reuseOrCreate(Object reuse) {
  if (reuse instanceof GenericData.Record) {
    return (GenericData.Record) reuse;
  } else {
    return new GenericData.Record(recordSchema);
  }
}
 
Example 5
Source File: MarketoInputReaderTest.java    From components with Apache License 2.0 5 votes vote down vote up
@Test
public void testStart() throws Exception {
    MarketoRecordResult mkto = new MarketoRecordResult();
    mkto.setSuccess(false);
    mkto.setErrors(Arrays.asList(new MarketoError("REST", "error")));

    when(client.bulkImport(any())).thenReturn(mkto);
    when(client.getLead(any(), any())).thenReturn(mkto);
    when(client.getMultipleLeads(any(), any())).thenReturn(mkto);
    when(client.getLeadActivity(any(), any())).thenReturn(mkto);
    when(client.getLeadChanges(any(), any())).thenReturn(mkto);
    when(client.describeCustomObject(any())).thenReturn(mkto);
    when(client.listCustomObjects(any())).thenReturn(mkto);
    when(client.getCustomObjects(any(), any())).thenReturn(mkto);

    try {
        assertFalse(reader.start());
        fail("Should not be here");
    } catch (Exception e) {
    }

    IndexedRecord record = new GenericData.Record(MarketoConstants.getEmptySchema());
    mkto.setSuccess(true);
    mkto.setRecords(Arrays.asList(record));

    when(client.bulkImport(any())).thenReturn(mkto);
    when(client.getLead(any(), any())).thenReturn(mkto);
    when(client.getMultipleLeads(any(), any())).thenReturn(mkto);
    when(client.getLeadActivity(any(), any())).thenReturn(mkto);
    when(client.getLeadChanges(any(), any())).thenReturn(mkto);
    when(client.describeCustomObject(any())).thenReturn(mkto);
    when(client.listCustomObjects(any())).thenReturn(mkto);
    when(client.getCustomObjects(any(), any())).thenReturn(mkto);

    assertFalse(reader.start());

}
 
Example 6
Source File: SchemaRegistryProducer.java    From blog with MIT License 5 votes vote down vote up
public static void main(String[] args) {

    /** TODO: 使用 Avro 解析默认 */
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(userSchema);

    /** TODO: 设置 Producer 属性 */
    Properties properties = new Properties();
    /** TODO: 设置 Kafka 服务地址 */
    properties.put(
        ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "node-160:9092,node-161:9092,node-162:9092");
    /** TODO: 设置 Key 序列化类 */
    properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    /** TODO: 设置 Value 序列化类 */
    properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);

    /** TODO: 设置 schema.registry */
    properties.put("schema.registry.url", "http://node-160:8081");

    /** TODO: 创建 Producer */
    KafkaProducer<Object, Object> producer = new KafkaProducer<>(properties);

    /** TODO: 创建消息 */
    GenericData.Record record = new GenericData.Record(schema);
    record.put("name", "hvkcoder");
    producer.send(new ProducerRecord<>("topic01", record));

    /** TODO: 关闭 Producer */
    producer.close();
  }
 
Example 7
Source File: NetSuiteOutputWriterIT.java    From components with Apache License 2.0 5 votes vote down vote up
private static List<IndexedRecord> makeRecordRefIndexedRecords(Schema schema, List<RecordRef> refList) {
    List<IndexedRecord> indexedRecordList = new ArrayList<>(refList.size());
    for (RecordRef ref : refList) {
        GenericRecord indexedRecord = new GenericData.Record(schema);
        indexedRecord.put("InternalId", ref.getInternalId());
        indexedRecordList.add(indexedRecord);
    }
    return indexedRecordList;
}
 
Example 8
Source File: RegressionAdmmTrain.java    From ml-ease with Apache License 2.0 5 votes vote down vote up
private void updateLogLikBestModel(JobConf conf, int niter,  Map<String, LinearModel> z, String testPath, 
                                  boolean ignoreValue, MutableFloat bestTestLoglik, String outBasePath, 
                                  int  numClickReplicates) throws IOException
{   
  Map<String, Double> loglik;
  loglik = testloglik(conf, z, testPath, 1, ignoreValue);
  
  AvroHdfsFileWriter<GenericRecord> writer =
      new AvroHdfsFileWriter<GenericRecord>(conf, outBasePath
          + "/sample-test-loglik/iteration-"+niter +".avro", SampleTestLoglik.SCHEMA$);
  DataFileWriter<GenericRecord> testRecordWriter = writer.get();  

  for (String k : z.keySet())
  {     
    GenericData.Record valuemap = new GenericData.Record(SampleTestLoglik.SCHEMA$);
    valuemap.put("iter", niter);
    valuemap.put("testLoglik", loglik.get(k).floatValue());
    valuemap.put("lambda", k);
    testRecordWriter.append(valuemap);
    _logger.info("Sample test loglik for lambda=" + k + " is: "
        + String.valueOf(loglik.get(k)));
   
    // output best model up to now
    if (loglik.get(k) > bestTestLoglik.floatValue() && niter>0)
    {
      String bestModelPath = outBasePath + "/best-model/best-iteration-" + niter + ".avro";
      FileSystem fs = FileSystem.get(conf);
      fs.delete(new Path(outBasePath + "/best-model"), true);
      LinearModelUtils.writeLinearModel(conf, bestModelPath, z.get(k), k);
      bestTestLoglik.setValue(loglik.get(k).floatValue());
    }
  }
  testRecordWriter.close();
}
 
Example 9
Source File: MapTypeClusterIntegrationTest.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
private File createAvroFile()
    throws Exception {
  org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false);
  org.apache.avro.Schema stringKeyMapAvroSchema =
      org.apache.avro.Schema.createMap(org.apache.avro.Schema.create(Type.INT));
  org.apache.avro.Schema intKeyMapAvroSchema =
      org.apache.avro.Schema.createMap(org.apache.avro.Schema.create(Type.STRING));
  List<Field> fields = Arrays.asList(new Field(STRING_KEY_MAP_FIELD_NAME, stringKeyMapAvroSchema, null, null),
      new Field(INT_KEY_MAP_FIELD_NAME, intKeyMapAvroSchema, null, null));
  avroSchema.setFields(fields);

  File avroFile = new File(_tempDir, "data.avro");
  try (DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) {
    fileWriter.create(avroSchema, avroFile);
    for (int i = 0; i < NUM_DOCS; i++) {
      Map<String, Integer> stringKeyMap = new HashMap<>();
      stringKeyMap.put("k1", i);
      stringKeyMap.put("k2", NUM_DOCS + i);
      Map<Integer, String> intKeyMap = new HashMap<>();
      intKeyMap.put(95, Integer.toString(i));
      intKeyMap.put(717, Integer.toString(NUM_DOCS + i));
      GenericData.Record record = new GenericData.Record(avroSchema);
      record.put(STRING_KEY_MAP_FIELD_NAME, stringKeyMap);
      record.put(INT_KEY_MAP_FIELD_NAME, intKeyMap);
      fileWriter.append(record);
    }
  }

  return avroFile;
}
 
Example 10
Source File: ParquetFileReader.java    From kafka-connect-fs with Apache License 2.0 5 votes vote down vote up
@Override
protected GenericRecord nextRecord() {
    GenericRecord record;
    if (this.projection != null) {
        record = new GenericData.Record(this.projection);
        this.projection.getFields().forEach(field -> record.put(field.name(), currentRecord.get(field.name())));
    } else {
        record = currentRecord;
    }
    currentRecord = null;
    incrementOffset();
    return record;
}
 
Example 11
Source File: ClusterIntegrationTestUtils.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
/**
 * Push random generated
 *
 * @param avroFile Sample Avro file used to extract the Avro schema
 * @param kafkaBroker Kafka broker config
 * @param kafkaTopic Kafka topic
 * @param numKafkaMessagesToPush Number of Kafka messages to push
 * @param maxNumKafkaMessagesPerBatch Maximum number of Kafka messages per batch
 * @param header Optional Kafka message header
 * @param partitionColumn Optional partition column
 * @throws Exception
 */
@SuppressWarnings("unused")
public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic,
    int numKafkaMessagesToPush, int maxNumKafkaMessagesPerBatch, @Nullable byte[] header,
    @Nullable String partitionColumn)
    throws Exception {
  Properties properties = new Properties();
  properties.put("metadata.broker.list", kafkaBroker);
  properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
  properties.put("request.required.acks", "1");
  properties.put("partitioner.class", "kafka.producer.ByteArrayPartitioner");

  StreamDataProducer producer =
      StreamDataProvider.getStreamDataProducer(KafkaStarterUtils.KAFKA_PRODUCER_CLASS_NAME, properties);
  try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536)) {
    try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) {
      BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
      Schema avroSchema = reader.getSchema();
      GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
      GenericRecord genericRecord = new GenericData.Record(avroSchema);

      while (numKafkaMessagesToPush > 0) {
        generateRandomRecord(genericRecord, avroSchema);

        outputStream.reset();
        if (header != null && 0 < header.length) {
          outputStream.write(header);
        }
        datumWriter.write(genericRecord, binaryEncoder);
        binaryEncoder.flush();

        byte[] keyBytes = (partitionColumn == null) ? Longs.toByteArray(System.currentTimeMillis())
            : (genericRecord.get(partitionColumn)).toString().getBytes();
        byte[] bytes = outputStream.toByteArray();

        producer.produce(kafkaTopic, keyBytes, bytes);
        numKafkaMessagesToPush--;
      }
    }
  }
}
 
Example 12
Source File: TestMergeContent.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroConcatWithDifferentMetadataDoNotMerge() throws IOException, InterruptedException {
    final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
    runner.setProperty(MergeContent.MAX_ENTRIES, "3");
    runner.setProperty(MergeContent.MIN_ENTRIES, "3");
    runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);
    runner.setProperty(MergeContent.METADATA_STRATEGY, MergeContent.METADATA_STRATEGY_DO_NOT_MERGE);

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);
    final Map<String, String> userMeta1 = new HashMap<String, String>() {{
        put("test_metadata1", "Test 1");
    }};

    final GenericRecord user2 = new GenericData.Record(schema);
    user2.put("name", "Ben");
    user2.put("favorite_number", 7);
    user2.put("favorite_color", "red");
    final Map<String, String> userMeta2 = new HashMap<String, String>() {{
        put("test_metadata1", "Test 2"); // Test non-matching values
    }};

    final GenericRecord user3 = new GenericData.Record(schema);
    user3.put("name", "John");
    user3.put("favorite_number", 5);
    user3.put("favorite_color", "blue");
    final Map<String, String> userMeta3 = new HashMap<String, String>() {{
        put("test_metadata1", "Test 1");
        put("test_metadata2", "Test"); // Test unique
    }};

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter, userMeta1);
    final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter, userMeta2);
    final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter, userMeta3);

    runner.enqueue(out1.toByteArray());
    runner.enqueue(out2.toByteArray());
    runner.enqueue(out3.toByteArray());

    runner.run();
    runner.assertQueueEmpty();
    runner.assertTransferCount(MergeContent.REL_MERGED, 1);
    runner.assertTransferCount(MergeContent.REL_FAILURE, 2);
    runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);

    final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
    bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");

    // create a reader for the merged content
    byte[] data = runner.getContentAsByteArray(bundle);
    final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name");

    Assert.assertEquals(1, users.size());
    Assert.assertTrue(users.containsKey("Alyssa"));
}
 
Example 13
Source File: JDBCSPTestIT.java    From components with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void test_basic_as_output_and_input() throws Exception {
    TJDBCSPDefinition definition = new TJDBCSPDefinition();
    TJDBCSPProperties properties = DBTestUtils.createCommonJDBCSPProperties(allSetting, definition);

    properties.spName.setValue("SYSCS_UTIL.SYSCS_DISABLE_LOG_ARCHIVE_MODE");
    Schema schema = DBTestUtils.createSPSchema3(tablename);
    properties.main.schema.setValue(schema);
    properties.schemaFlow.schema.setValue(schema);
    properties.spParameterTable.parameterTypes.setValue(Arrays.asList(SPParameterTable.ParameterType.IN.name()));
    properties.spParameterTable.schemaColumns.setValue(Arrays.asList("PARAMETER1"));

    JDBCSPSink sink = new JDBCSPSink();

    sink.initialize(null, properties);
    ValidationResult result = sink.validate(null);
    Assert.assertTrue(result.getStatus() == ValidationResult.Result.OK);

    WriteOperation operation = sink.createWriteOperation();
    JDBCSPWriter writer = (JDBCSPWriter) operation.createWriter(null);

    try {
        writer.open("wid");

        IndexedRecord r1 = new GenericData.Record(properties.main.schema.getValue());
        r1.put(0, 0);
        r1.put(1, "wangwei");
        
        writer.write(r1);
        
        List<IndexedRecord> writeResult = writer.getSuccessfulWrites();
        Assert.assertEquals(1, writeResult.size());
        
        IndexedRecord record = writeResult.get(0);
        Assert.assertEquals(Integer.valueOf(0), record.get(0));
        Assert.assertEquals("wangwei", record.get(1));
        
        writer.cleanWrites();
    } finally {
        writer.close();
    }
}
 
Example 14
Source File: SalesforceWriter.java    From components with Apache License 2.0 4 votes vote down vote up
private void handleReject(IndexedRecord input, Error[] resultErrors, String[] changedItemKeys, int batchIdx)
        throws IOException {
    String changedItemKey = null;
    if (batchIdx < changedItemKeys.length) {
        if (changedItemKeys[batchIdx] != null) {
            changedItemKey = changedItemKeys[batchIdx];
        } else {
            changedItemKey = String.valueOf(batchIdx + 1);
        }
    } else {
        changedItemKey = "Batch index out of bounds";
    }
    StringBuilder errors = SalesforceRuntime.addLog(resultErrors, changedItemKey, logWriter);
    if (exceptionForErrors) {
        if (errors.toString().length() > 0) {
            if (logWriter != null) {
                logWriter.close();
            }
            throw new IOException(errors.toString());
        }
    } else {
        rejectCount++;
        Schema outSchema = sprops.schemaReject.schema.getValue();
        if (outSchema == null || outSchema.getFields().size() == 0) {
            return;
        }
        if (input.getSchema().equals(outSchema)) {
            rejectedWrites.add(input);
        } else {
            IndexedRecord reject = null;
            if (AvroUtils.isIncludeAllFields(outSchema)) {
                Schema runtimeSchema = input.getSchema();
                List<Schema.Field> addedFields = new ArrayList<>();
                // Check whether design schema has additional field
                Schema.Field errorCodeField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_CODE);
                Schema.Field errorField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_FIELDS);
                Schema.Field errorMsgField = outSchema.getField(TSalesforceOutputProperties.FIELD_ERROR_MESSAGE);
                if (errorCodeField != null) {
                    addedFields.add(new Schema.Field(errorCodeField.name(), errorCodeField.schema(),
                            errorCodeField.doc(), errorCodeField.defaultVal()));
                }
                if (errorField != null) {
                    addedFields.add(new Schema.Field(errorField.name(), errorField.schema(), errorField.doc(),
                            errorField.defaultVal()));
                }
                if (errorMsgField != null) {
                    addedFields.add(new Schema.Field(errorMsgField.name(), errorMsgField.schema(),
                            errorMsgField.doc(), errorMsgField.defaultVal()));
                }
                if (addedFields.size() > 0) {
                    // Append additional fields to the runtime schema
                    runtimeSchema = AvroUtils.appendFields(runtimeSchema,
                            addedFields.toArray(new Schema.Field[addedFields.size()]));
                }
                reject = new GenericData.Record(runtimeSchema);
            } else {
                reject = new GenericData.Record(outSchema);
            }
            for (Schema.Field outField : reject.getSchema().getFields()) {
                Object outValue = null;
                Schema.Field inField = input.getSchema().getField(outField.name());
                if (inField != null) {
                    outValue = input.get(inField.pos());
                } else if (resultErrors.length > 0) {
                    Error error = resultErrors[0];
                    if (TSalesforceOutputProperties.FIELD_ERROR_CODE.equals(outField.name())) {
                        outValue = error.getStatusCode() != null ? error.getStatusCode().toString() : null;
                    } else if (TSalesforceOutputProperties.FIELD_ERROR_FIELDS.equals(outField.name())) {
                        StringBuffer fields = new StringBuffer();
                        for (String field : error.getFields()) {
                            fields.append(field);
                            fields.append(",");
                        }
                        if (fields.length() > 0) {
                            fields.deleteCharAt(fields.length() - 1);
                        }
                        outValue = fields.toString();
                    } else if (TSalesforceOutputProperties.FIELD_ERROR_MESSAGE.equals(outField.name())) {
                        outValue = error.getMessage();
                    }
                }
                reject.put(outField.pos(), outValue);
            }
            rejectedWrites.add(reject);
        }
        Property<OutputAction> outputAction = sprops.outputAction;
        LOGGER.info(MESSAGES.getMessage("info.rejectedRecord",
                sprops.outputAction.getPossibleValuesDisplayName(outputAction.getValue()).toLowerCase(), dataCount));
    }
}
 
Example 15
Source File: ConverterTest.java    From xml-avro with Apache License 2.0 4 votes vote down vote up
@Test
public void recordWithWildcardField() {
    String xsd =
            "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" +
            "  <xs:complexType name='type'>" +
            "    <xs:sequence>" +
            "      <xs:element name='field' type='xs:string'/>" +
            "      <xs:any/>" +
            "    </xs:sequence>" +
            "  </xs:complexType>" +
            "  <xs:element name='root' type='type'/>" +
            "</xs:schema>";

    Schema schema = Converter.createSchema(xsd);
    assertEquals(2, schema.getFields().size());

    Schema.Field wildcardField = schema.getField(Source.WILDCARD);
    assertEquals(Schema.Type.MAP, wildcardField.schema().getType());

    // Two wildcard-matched elements
    String xml =
            "<root>" +
            "  <field>field</field>" +
            "  <field0>field0</field0>" +
            "  <field1>field1</field1>" +
            "</root>";

    GenericData.Record record = Converter.createDatum(schema, xml);
    assertEquals("field", record.get("field"));

    @SuppressWarnings("unchecked")
    java.util.Map<String, String> map = (java.util.Map<String, String>) record.get(Source.WILDCARD);

    assertEquals(2, map.size());
    assertEquals("field0", map.get("field0"));
    assertEquals("field1", map.get("field1"));

    // No wildcard-matched element
    xml = "<root><field>field</field></root>";
    record = Converter.createDatum(schema, xml);

    assertEquals("field", record.get("field"));
    assertEquals(Collections.emptyMap(), record.get(Source.WILDCARD));
}
 
Example 16
Source File: SimpleAvroJob.java    From datafu with Apache License 2.0 4 votes vote down vote up
public TheMapper()
{
  key = new GenericData.Record(KEY_SCHEMA);
  value = new GenericData.Record(VALUE_SCHEMA);
  value.put("count", 1L);
}
 
Example 17
Source File: FastGenericDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadArrayOfRecords(Implementation implementation) {
  // given
  Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

  Schema arrayRecordSchema = Schema.createArray(recordSchema);

  GenericData.Record subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  GenericData.Array<GenericData.Record> recordsArray = new GenericData.Array<>(0, arrayRecordSchema);
  recordsArray.add(subRecordBuilder);
  recordsArray.add(subRecordBuilder);

  // when
  GenericData.Array<GenericRecord> array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray));

  // then
  Assert.assertEquals(2, array.size());
  Assert.assertEquals(new Utf8("abc"), array.get(0).get("field"));
  Assert.assertEquals(new Utf8("abc"), array.get(1).get("field"));

  // given

  arrayRecordSchema = Schema.createArray(createUnionSchema(recordSchema));

  subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  recordsArray = new GenericData.Array<>(0, arrayRecordSchema);
  recordsArray.add(subRecordBuilder);
  recordsArray.add(subRecordBuilder);

  // when
  array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray));

  // then
  Assert.assertEquals(2, array.size());
  Assert.assertEquals(new Utf8("abc"), array.get(0).get("field"));
  Assert.assertEquals(new Utf8("abc"), array.get(1).get("field"));
}
 
Example 18
Source File: ItemModelTrain.java    From ml-ease with Apache License 2.0 4 votes vote down vote up
@Override
public void reduce(Utf8 key,
                   Iterable<RegressionPrepareOutput> values,
                   AvroCollector<GenericData.Record> collector,
                   Reporter reporter) throws IOException
{
  // Prepare the data set
  LibLinearDataset dataset;
  if (_binaryFeature)
  {
    dataset = new LibLinearBinaryDataset(1.0, _shortFeatureIndex);
  }
  else
  {
    dataset = new LibLinearDataset(1.0);
  }
  for (RegressionPrepareOutput value : values)
  {
    dataset.addInstanceAvro(value);
  }
  dataset.finish();
  // First determine the prior mean for the intercept
  Map<String, Double> priorMeanMap = new HashMap<String, Double>();
  double interceptPriorMean = _interceptDefaultPriorMean;
  if (_interceptPriorMeanMap.containsKey(key.toString()))
  {
    interceptPriorMean = _interceptPriorMeanMap.get(key.toString());
    reporter.incrCounter("ItemModelTrainV3", "Found intercept prior mean in intercept prior mean map", 1);
  }
  priorMeanMap.put(LibLinearDataset.INTERCEPT_NAME, interceptPriorMean);
  
  // now cross product the lambdas for intercept and default
  for (float interceptLambda : _interceptLambdas)
    for (float defaultLambda : _defaultLambdas)
    {
      _priorVarMap.put(LibLinearDataset.INTERCEPT_NAME, 1.0/interceptLambda);
      GenericData.Record output = new GenericData.Record(LinearModelWithVarAvro.SCHEMA$);
      // Run liblinear
      LibLinear liblinear = new LibLinear();
      liblinear.setReporter(reporter, _reportfreq);
      String option = "epsilon=" + String.valueOf(_liblinearEpsilon);
      try
      {
        liblinear.train(dataset, null, priorMeanMap, _priorVarMap, 0, 1.0 / defaultLambda, option, _computeVar);
        LinearModel model = liblinear.getLinearModel();
        
        output.put("key", String.valueOf(interceptLambda) + ":" + String.valueOf(defaultLambda)+ "#" + key);
        output.put("model", model.toAvro(LIBLINEAR_INTERCEPT_KEY));
        if (_computeVar)
        {
          LinearModel posteriorVar = new LinearModel(LIBLINEAR_INTERCEPT_KEY,liblinear.getPostVarMap());
          output.put("posteriorVar", posteriorVar.toAvro(LIBLINEAR_INTERCEPT_KEY));
        } else
        {
          output.put("posteriorVar", new LinearModel().toAvro(LIBLINEAR_INTERCEPT_KEY));
        }
      }
      catch (Exception e)
      {
        // output everything to debug
        _logger.info("Dataset size=" + dataset.y.length);
        _logger.info("Number of features=" + dataset.nFeatures());
        _logger.info("Model size=" + liblinear.getParamMap().size());
        _logger.info("bias=" + liblinear.bias);
        _logger.info("Model:");
        for (String k : liblinear.getParamMap().keySet())
        {
          _logger.info(k + " " + liblinear.getParamMap().get(k).toString());
        }
        throw new IOException("Model fitting error!", e);
      }
      collector.collect(output);
    }     
}
 
Example 19
Source File: FastGenericDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadSubRecordComplexCollectionsField(Implementation implementation) {
  // given
  Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));
  Schema recordSchema = createRecord(
      createArrayFieldSchema("recordsArrayMap", Schema.createMap(createUnionSchema(subRecordSchema))),
      createMapFieldSchema("recordsMapArray", Schema.createArray(createUnionSchema(subRecordSchema))),
      createUnionField("recordsArrayMapUnion",
          Schema.createArray(Schema.createMap(createUnionSchema(subRecordSchema)))),
      createUnionField("recordsMapArrayUnion",
          Schema.createMap(Schema.createArray(createUnionSchema(subRecordSchema)))));

  GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema);
  subRecordBuilder.put("subField", "abc");

  GenericData.Record builder = new GenericData.Record(recordSchema);
  List<Map<String, GenericRecord>> recordsArrayMap = new ArrayList<>();
  Map<String, GenericRecord> recordMap = new HashMap<>();
  recordMap.put("1", subRecordBuilder);
  recordsArrayMap.add(recordMap);

  builder.put("recordsArrayMap", recordsArrayMap);
  builder.put("recordsArrayMapUnion", recordsArrayMap);

  Map<String, List<GenericRecord>> recordsMapArray = new HashMap<>();
  List<GenericRecord> recordList = new ArrayList<>();
  recordList.add(subRecordBuilder);
  recordsMapArray.put("1", recordList);

  builder.put("recordsMapArray", recordsMapArray);
  builder.put("recordsMapArrayUnion", recordsMapArray);

  // when
  GenericRecord record = implementation.decode(recordSchema, recordSchema, genericDataAsDecoder(builder));

  // then
  Assert.assertEquals(new Utf8("abc"),
      ((List<Map<String, GenericRecord>>) record.get("recordsArrayMap")).get(0).get(new Utf8("1")).get("subField"));
  Assert.assertEquals(new Utf8("abc"),
      ((Map<String, List<GenericRecord>>) record.get("recordsMapArray")).get(new Utf8("1")).get(0).get("subField"));
  Assert.assertEquals(new Utf8("abc"), ((List<Map<String, GenericRecord>>) record.get("recordsArrayMapUnion")).get(0)
      .get(new Utf8("1"))
      .get("subField"));
  Assert.assertEquals(new Utf8("abc"),
      ((Map<String, List<GenericRecord>>) record.get("recordsMapArrayUnion")).get(new Utf8("1"))
          .get(0)
          .get("subField"));
}
 
Example 20
Source File: TestMergeContent.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimpleAvroConcat() throws IOException, InterruptedException {
    final TestRunner runner = TestRunners.newTestRunner(new MergeContent());
    runner.setProperty(MergeContent.MAX_ENTRIES, "3");
    runner.setProperty(MergeContent.MIN_ENTRIES, "3");
    runner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_AVRO);

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/TestMergeContent/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final GenericRecord user2 = new GenericData.Record(schema);
    user2.put("name", "Ben");
    user2.put("favorite_number", 7);
    user2.put("favorite_color", "red");

    final GenericRecord user3 = new GenericData.Record(schema);
    user3.put("name", "John");
    user3.put("favorite_number", 5);
    user3.put("favorite_color", "blue");

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, user1, datumWriter);
    final ByteArrayOutputStream out2 = serializeAvroRecord(schema, user2, datumWriter);
    final ByteArrayOutputStream out3 = serializeAvroRecord(schema, user3, datumWriter);

    runner.enqueue(out1.toByteArray());
    runner.enqueue(out2.toByteArray());
    runner.enqueue(out3.toByteArray());

    runner.run();
    runner.assertQueueEmpty();
    runner.assertTransferCount(MergeContent.REL_MERGED, 1);
    runner.assertTransferCount(MergeContent.REL_FAILURE, 0);
    runner.assertTransferCount(MergeContent.REL_ORIGINAL, 3);

    final MockFlowFile bundle = runner.getFlowFilesForRelationship(MergeContent.REL_MERGED).get(0);
    bundle.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/avro-binary");

    // create a reader for the merged content
    byte[] data = runner.getContentAsByteArray(bundle);
    final Map<String, GenericRecord> users = getGenericRecordMap(data, schema, "name");

    Assert.assertEquals(3, users.size());
    Assert.assertTrue(users.containsKey("Alyssa"));
    Assert.assertTrue(users.containsKey("Ben"));
    Assert.assertTrue(users.containsKey("John"));
}