Java Code Examples for org.apache.avro.generic.IndexedRecord

The following examples show how to use org.apache.avro.generic.IndexedRecord. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: components   Source File: ExcelFileInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public RecordReader<Void, IndexedRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
  String encoding = context.getConfiguration().get(TALEND_ENCODING);
  String sheet = context.getConfiguration().get(TALEND_EXCEL_SHEET_NAME);
  long header = context.getConfiguration().getLong(TALEND_HEADER, 0l);
  long footer = context.getConfiguration().getLong(TALEND_FOOTER, 0l);
  String excelFormat = context.getConfiguration().get(TALEND_EXCEL_FORMAT, "EXCEL2007");
  long limit = context.getConfiguration().getLong(TALEND_EXCEL_LIMIT, -1);
  
  if("EXCEL2007".equals(excelFormat)) {
    return new Excel2007FileRecordReader(sheet, header, footer, limit);
  } else if("EXCEL97".equals(excelFormat)) {
    return new Excel97FileRecordReader(sheet, header, footer, limit);
  } else if("HTML".equals(excelFormat)) {
    return new ExcelHTMLFileRecordReader(encoding, header, footer, limit);
  }
  
  throw new IOException("not a valid excel format");
}
 
Example 2
@Test
public void testGetCustomObjectWithCompoundKey() throws Exception {
    irProps.customObjectAction.setValue(CustomObjectAction.get);
    irProps.customObjectName.setValue(TEST_CO_NAME_CAR);
    irProps.validateFetchCustomObjectSchema();
    irProps.useCompoundKey.setValue(true);
    // "searchableFields": "[[\"customerId\",\"VIN\"],[\"marketoGUID\"],[\"customerId\"]]"
    irProps.compoundKey.keyName.setValue(Arrays.asList("customerId", "VIN"));
    irProps.compoundKey.keyValue.setValue(Arrays.asList("4137181", "WBA4R7C30HK896061"));// WBA4R7C55HK895912
    MarketoSource source = new MarketoSource();
    source.initialize(null, irProps);
    MarketoRESTClient client = (MarketoRESTClient) source.getClientService(null);
    MarketoRecordResult result = client.getCustomObjects(irProps, null);
    LOG.debug("result = {}.", result);
    assertNotNull(result.getRecords());
    assertEquals(1, result.getRecords().size());
    IndexedRecord record = result.getRecords().get(0);
    Schema s = record.getSchema();
    assertEquals(4137181, record.get(s.getField("customerId").pos()));
    assertEquals("WBA4R7C30HK896061", record.get(s.getField("VIN").pos()));
    assertEquals("FIT", record.get(s.getField("brand").pos()));
}
 
Example 3
Source Project: components   Source File: NsObjectInputTransducer.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Translate NetSuite data model object to {@code IndexedRecord}.
 *
 * @param data NetSuite data object
 * @return indexed record
 */
public IndexedRecord read(Object data) {
    prepare();

    Map<String, FieldDesc> fieldMap = typeDesc.getFieldMap();
    Map<String, Object> mapView = getMapView(data, runtimeSchema, typeDesc);

    GenericRecord indexedRecord = new GenericData.Record(runtimeSchema);

    for (Schema.Field field : runtimeSchema.getFields()) {
        String nsFieldName = NetSuiteDatasetRuntimeImpl.getNsFieldName(field);

        FieldDesc fieldDesc = fieldMap.get(nsFieldName);
        if (fieldDesc == null) {
            continue;
        }

        Object value = readField(mapView, fieldDesc);

        indexedRecord.put(field.name(), value);
    }

    return indexedRecord;
}
 
Example 4
Source Project: components   Source File: MarketoSOAPClientTestIT.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetLeadsChanges() throws Exception {
    inputProperties.inputOperation.setValue(getLeadChanges);
    inputProperties.afterInputOperation();
    inputProperties.beforeMappingInput();
    inputProperties.batchSize.setValue(1000);
    //
    inputProperties.oldestCreateDate.setValue(DATE_OLDEST_CREATE);
    inputProperties.latestCreateDate.setValue(DATE_LATEST_CREATE);
    MarketoSource source = new MarketoSource();
    source.initialize(null, inputProperties);
    MarketoClientService client = source.getClientService(null);
    //
    MarketoRecordResult result = client.getLeadChanges(inputProperties, null);
    List<IndexedRecord> changes = result.getRecords();
    assertTrue(changes.size() > 0);
    assertTrue(result.getRemainCount() > 0);
    List<IndexedRecord> records = result.getRecords();
    assertTrue(records.size() > 0);
    for (IndexedRecord r : records) {
        assertNotNull(r.get(0));
        assertTrue(r.get(0) instanceof Long);
    }
}
 
Example 5
@SuppressWarnings("unchecked")
public void serializeRecord0(IndexedRecord data, Encoder encoder)
    throws IOException
{
    CharSequence field0 = ((CharSequence) data.get(0));
    if (field0 == null) {
        (encoder).writeIndex(0);
        (encoder).writeNull();
    } else {
        if (field0 instanceof CharSequence) {
            (encoder).writeIndex(1);
            if (field0 instanceof Utf8) {
                (encoder).writeString(((Utf8) field0));
            } else {
                (encoder).writeString(field0 .toString());
            }
        }
    }
}
 
Example 6
Source Project: components   Source File: MarketoDatasetRuntimeTestIT.java    License: Apache License 2.0 6 votes vote down vote up
private void checkSamples(int limit, String checkFieldExists) throws Exception {
    runtime.initialize(null, dataset);
    final List<IndexedRecord> samples = new ArrayList<>();
    runtime.getSample(limit, new Consumer<IndexedRecord>() {

        @Override
        public void accept(IndexedRecord indexedRecord) {
            samples.add(indexedRecord);
        }
    });
    assertThat(samples.size(), Matchers.greaterThan(0));
    assertThat(samples.size(), Matchers.lessThan(limit + 1));
    for (IndexedRecord r : samples) {
        assertNotNull(r.get(r.getSchema().getField(checkFieldExists).pos()));
    }
}
 
Example 7
@Override
public TableRow convertToDatum(IndexedRecord indexedRecord) {
    // When BigQueryOutput do not specify schema, so read it from the incoming data
    if (schema == null) {
        schema = indexedRecord.getSchema();
        initFieldConverters();
    }

    TableRow row = new TableRow();
    for (Schema.Field field : schema.getFields()) {
        Object v = indexedRecord.get(field.pos());
        if (v != null) {
            row.set(field.name(), fieldConverters.get(field.name()).convertToDatum(v));
        }
    }
    return row;
}
 
Example 8
Source Project: components   Source File: KeyValueUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * From the input: {"a": "a", "b": "b", "c": "c"}
 * 
 * no extracted element.
 * 
 * The result should be:
 * 
 * key: empty
 * 
 * value: {"a": "a", "b": "b", "c": "c"}
 */
@Test
public void test_EverythingIsAValue() throws Exception {
    GenericRecord inputRecord = new GenericRecordBuilder(inputSimpleSchema) //
            .set("a", "a") //
            .set("b", "b") //
            .set("c", "c") //
            .build();

    String transformedIndexedRecord = ("{'key': {}, " + "'value': {'a': 'a', 'b': 'b', 'c': 'c'}}").replaceAll("\\'", "\"");
    IndexedRecord outputRecord = KeyValueUtils.transformToKV(inputRecord,
            SchemaGeneratorUtils.extractKeyValues(inputRecord.getSchema(), new ArrayList<String>()));
    assertEquals(transformedIndexedRecord, outputRecord.toString());

    Schema kvSchema = SchemaGeneratorUtils.mergeKeyValues(outputRecord.getSchema());
    String mergedRecord = ("{'a': 'a', 'b': 'b', 'c': 'c'}").replaceAll("\\'", "\"");
    assertEquals(mergedRecord, KeyValueUtils.transformFromKV(outputRecord, kvSchema).toString());
}
 
Example 9
public void serialize(Map<CharSequence, IndexedRecord> data, Encoder encoder)
    throws IOException
{
    (encoder).writeMapStart();
    if ((data == null)||data.isEmpty()) {
        (encoder).setItemCount(0);
    } else {
        (encoder).setItemCount(data.size());
        for (CharSequence key0 : ((Map<CharSequence, IndexedRecord> ) data).keySet()) {
            (encoder).startItem();
            (encoder).writeString(key0);
            IndexedRecord union0 = null;
            union0 = ((Map<CharSequence, IndexedRecord> ) data).get(key0);
            if (union0 == null) {
                (encoder).writeIndex(0);
                (encoder).writeNull();
            } else {
                if ((union0 instanceof IndexedRecord)&&"com.adpilot.utils.generated.avro.record".equals(((IndexedRecord) union0).getSchema().getFullName())) {
                    (encoder).writeIndex(1);
                    serializeRecord0(((IndexedRecord) union0), (encoder));
                }
            }
        }
    }
    (encoder).writeMapEnd();
}
 
Example 10
Source Project: components   Source File: MarketoSOAPClientTestIT.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetLeadFail() throws Exception {
    inputProperties.inputOperation.setValue(getLead);
    inputProperties.leadKeyTypeSOAP.setValue(EMAIL);
    inputProperties.afterInputOperation();
    //
    inputProperties.leadKeyValue.setValue(EMAIL_INEXISTANT);
    MarketoSource source = new MarketoSource();
    source.initialize(null, inputProperties);
    MarketoClientService client = source.getClientService(null);
    //
    MarketoRecordResult result = client.getLead(inputProperties, null);
    List<IndexedRecord> records = result.getRecords();
    assertEquals(emptyList(), records);
    LOG.debug("record = " + records);
}
 
Example 11
public void serialize(Map<CharSequence, IndexedRecord> data, Encoder encoder)
    throws IOException
{
    (encoder).writeMapStart();
    if ((data == null)||data.isEmpty()) {
        (encoder).setItemCount(0);
    } else {
        (encoder).setItemCount(data.size());
        for (CharSequence key0 : ((Map<CharSequence, IndexedRecord> ) data).keySet()) {
            (encoder).startItem();
            (encoder).writeString(key0);
            IndexedRecord record0 = null;
            record0 = ((Map<CharSequence, IndexedRecord> ) data).get(key0);
            serializeRecord0(record0, (encoder));
        }
    }
    (encoder).writeMapEnd();
}
 
Example 12
Source Project: components   Source File: GeneratorFunctionsTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to generate a function for the given schema and check that it is appropriately deterministic.
 */
public static GeneratorFunction<IndexedRecord> generatorOfRecord(Schema schema) {
    // Create a function generator and context.
    GeneratorFunction<IndexedRecord> fn = GeneratorFunctions.ofRecord(schema);
    assertThat(fn, notNullValue());

    // Create a record.
    GeneratorContext ctx = generatorContextOf(0, 0L);
    IndexedRecord r1 = fn.apply(ctx);

    // Verify that reusing the function generator with the same context returns the same record.
    ctx.setRowId(0);
    assertThat(fn.apply(ctx).toString(), equalTo(r1.toString()));

    // Verify that generating a new function generator with the same schema returns the same record.
    assertThat(GeneratorFunctions.ofRecord(schema).apply(generatorContextOf(0, 0L)).toString(), equalTo(r1.toString()));

    // Return the generator for further testing.
    return fn;
}
 
Example 13
Source Project: spork   Source File: AvroTupleWrapper.java    License: Apache License 2.0 6 votes vote down vote up
public static Object unionResolver(Object o) {
  if (o instanceof org.apache.avro.util.Utf8) {
    return o.toString();
  } else if (o instanceof IndexedRecord) {
    return new AvroTupleWrapper<IndexedRecord>((IndexedRecord) o);
  } else if (o instanceof GenericArray) {
    return new AvroBagWrapper<GenericData.Record>(
        (GenericArray<GenericData.Record>) o);
  } else if (o instanceof Map) {
    return new AvroMapWrapper((Map<CharSequence, Object>) o);
  } else if (o instanceof GenericData.Fixed) {
    return new DataByteArray(((GenericData.Fixed) o).bytes());
  } else if (o instanceof ByteBuffer) {
    return new DataByteArray(((ByteBuffer) o).array());
  } else if (o instanceof GenericEnumSymbol) {
    return o.toString();
  } else {
    return o;
  }
}
 
Example 14
Source Project: samza   Source File: AvroRelConverter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Converts the nested avro object in SamzaMessage to relational message corresponding to
 * the tableName with relational schema.
 */
@Override
public SamzaSqlRelMessage convertToRelMessage(KV<Object, Object> samzaMessage) {
  List<String> payloadFieldNames = new ArrayList<>();
  List<Object> payloadFieldValues = new ArrayList<>();
  Object value = samzaMessage.getValue();
  if (value instanceof IndexedRecord) {
    fetchFieldNamesAndValuesFromIndexedRecord((IndexedRecord) value, payloadFieldNames, payloadFieldValues,
        payloadSchema);
  } else if (value == null) {
    // If the payload is null, set each record value as null
    payloadFieldNames.addAll(payloadSchema.getFields().stream().map(Schema.Field::name).collect(Collectors.toList()));
    IntStream.range(0, payloadFieldNames.size()).forEach(x -> payloadFieldValues.add(null));
  } else {
    String msg = "Avro message converter doesn't support messages of type " + value.getClass();
    LOG.error(msg);
    throw new SamzaException(msg);
  }

  return new SamzaSqlRelMessage(samzaMessage.getKey(), payloadFieldNames, payloadFieldValues,
      new SamzaSqlRelMsgMetadata(0L, 0L));
}
 
Example 15
Source Project: beam   Source File: AvroCoder.java    License: Apache License 2.0 6 votes vote down vote up
private void checkRecord(TypeDescriptor<?> type, Schema schema) {
  // For a record, we want to make sure that all the fields are deterministic.
  Class<?> clazz = type.getRawType();
  for (Schema.Field fieldSchema : schema.getFields()) {
    Field field = getField(clazz, fieldSchema.name());
    String fieldContext = field.getDeclaringClass().getName() + "#" + field.getName();

    if (field.isAnnotationPresent(AvroEncode.class)) {
      reportError(
          fieldContext, "Custom encoders may be non-deterministic -- remove @AvroEncode");
      continue;
    }

    if (!IndexedRecord.class.isAssignableFrom(field.getType())
        && field.isAnnotationPresent(AvroSchema.class)) {
      // TODO: We should be able to support custom schemas on POJO fields, but we shouldn't
      // need to, so we just allow it in the case of IndexedRecords.
      reportError(
          fieldContext, "Custom schemas are only supported for subtypes of IndexedRecord.");
      continue;
    }

    TypeDescriptor<?> fieldType = type.resolveType(field.getGenericType());
    recurse(fieldContext, fieldType, fieldSchema.schema());
  }
}
 
Example 16
Source Project: components   Source File: SimpleFileIODatasetRuntimeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSampleExcelHtml() throws Exception {
    String fileSpec = sourceFilePrepare("sales-force.html");

    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.path.setValue(fileSpec);
    props.format.setValue(SimpleFileIOFormat.EXCEL);
    props.excelFormat.setValue(ExcelFormat.HTML);
    props.setHeaderLine.setValue(true);
    props.headerLine.setValue(1);

    final List<IndexedRecord> actual = getSample(props,100);

    assertThat(actual, hasSize(100));
    List<Field> fields = actual.get(0).getSchema().getFields();
    assertThat(fields, hasSize(7));
    assertThat("UID", equalTo(fields.get(0).name()));
    assertThat("Hire_Date", equalTo(fields.get(6).name()));
    
    assertThat("000001", equalTo(actual.get(0).get(0)));
    assertThat("France", equalTo(actual.get(0).get(5)));
}
 
Example 17
Source Project: components   Source File: JiraUpdateWriterTestIT.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Checks {@link JiraUpdateWriter#write()} throws {@link IOException} which message contains
 * "Reason: record update failed"
 * in case server responses with 400 Bad Request status code
 * 
 * @throws IOException
 */
@Test
public void testWriteBadRequest() throws IOException {
    IndexedRecord badJsonRecord = new GenericData.Record(UPDATE_SCHEMA);
    String badProject = "{\"name\":\"Updated Integration Test Project\"\"assigneeType\":\"PROJECT_LEAD\"}";
    badJsonRecord.put(0, "TP");
    badJsonRecord.put(1, badProject);

    thrown.expect(IOException.class);
    thrown.expectMessage("Reason: record update failed");
    thrown.expectMessage("Record: " + badProject);
    thrown.expectMessage("Error: ");
    thrown.expectMessage("{\"errorMessages\":[\"Unexpected character (\'\\\"\' (code 34)):");

    JiraWriter updateProjectWriter = JiraTestsHelper.createWriter(HOST_PORT, USER, PASS, Resource.PROJECT, Action.UPDATE);

    updateProjectWriter.open("upd");
    try {
        updateProjectWriter.write(badJsonRecord);
    } finally {
        updateProjectWriter.close();
    }
}
 
Example 18
Source Project: components   Source File: NetSuiteMockTestBase.java    License: Apache License 2.0 6 votes vote down vote up
public static <T> List<IndexedRecord> makeIndexedRecords(NetSuiteClientService<?> clientService, Schema schema,
        ObjectComposer<T> objectComposer, int count) throws Exception {

    NsObjectInputTransducer transducer = new NsObjectInputTransducer(clientService, schema, schema.getName());

    List<IndexedRecord> recordList = new ArrayList<>();

    while (count > 0) {
        T nsRecord = objectComposer.composeObject();

        IndexedRecord convertedRecord = transducer.read(nsRecord);
        Schema recordSchema = convertedRecord.getSchema();

        GenericRecord record = new GenericData.Record(recordSchema);
        for (Schema.Field field : schema.getFields()) {
            Object value = convertedRecord.get(field.pos());
            record.put(field.pos(), value);
        }

        recordList.add(record);

        count--;
    }

    return recordList;
}
 
Example 19
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == arrayArrayElemSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(arrayArrayElemSchema0);
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example 20
Source Project: components   Source File: MarketoDatasetRuntime.java    License: Apache License 2.0 6 votes vote down vote up
public void getLeadsSample(int limit, Consumer<IndexedRecord> consumer) {
    final List<String> idList = new ArrayList<>();
    Consumer<IndexedRecord> changes = new Consumer<IndexedRecord>() {

        @Override
        public void accept(IndexedRecord r) {
            idList.add(String.valueOf(r.get(r.getSchema().getField("leadId").pos())));
        }
    };
    dataset.operation.setValue(Operation.getLeadChanges);
    dataset.afterOperation();
    getLeadChangesSample();
    ReaderDataProvider<IndexedRecord> readerDataProvider = new ReaderDataProvider<>(source.createReader(container), limit,
            changes);
    readerDataProvider.retrieveData();
    dataset.operation.setValue(Operation.getLeads);
    dataset.afterOperation();
    properties.leadKeyType.setValue("id");
    properties.leadKeyValue.setValue(idList.stream().collect(Collectors.joining(",")));
    source.initialize(container, properties);
}
 
Example 21
Source Project: components   Source File: MarketoCustomObjectClientTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSyncCustomObjects() throws Exception {
    oprops.customObjectSyncAction.setValue(CustomObjectSyncAction.createOrUpdate);
    oprops.customObjectDedupeBy.setValue("marketoGUID");
    //
    doThrow(new MarketoException("REST", "error")).when(client).executePostRequest(eq(SyncResult.class),
            any(JsonObject.class));
    List<IndexedRecord> records = new ArrayList<>();
    IndexedRecord record = new Record(MarketoConstants.getCustomObjectRecordSchema());
    record.put(0, "mkto-123456");
    records.add(record);
    mktoSR = client.syncCustomObjects(oprops, records);
    assertFalse(mktoSR.isSuccess());
    assertFalse(mktoSR.getErrorsString().isEmpty());
    //
    doReturn(new SyncResult()).when(client).executePostRequest(eq(SyncResult.class), any(JsonObject.class));
    mktoSR = client.syncCustomObjects(oprops, records);
    assertFalse(mktoSR.isSuccess());
    //
    doReturn(getListOperationResult(true, "deleted")).when(client).executePostRequest(eq(SyncResult.class),
            any(JsonObject.class));
    mktoSR = client.syncCustomObjects(oprops, records);
    assertTrue(mktoSR.isSuccess());
    assertTrue(mktoSR.getErrorsString().isEmpty());
}
 
Example 22
Source Project: components   Source File: BigQueryDatasetTestIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBasic() throws Exception {
    BigQueryDatasetProperties props = createDatasetProperties();
    props.sourceType.setValue(SourceType.QUERY);
    props.query.setValue("SELECT * FROM [bigquery-public-data:samples.shakespeare] LIMIT 1");
    props.useLegacySql.setValue(true);

    final List<IndexedRecord> consumed = new ArrayList<>();

    RuntimeInfo ri = def.getRuntimeInfo(props);
    try (SandboxedInstance si = RuntimeUtil.createRuntimeClass(ri, getClass().getClassLoader())) {

        DatasetRuntime runtime = (DatasetRuntime) si.getInstance();
        runtime.initialize(null, props);
        assertThat(runtime, not(nullValue()));

        Schema s = runtime.getSchema();
        assertThat(s, not(nullValue()));

        runtime.getSample(100, new Consumer<IndexedRecord>() {

            @Override
            public void accept(IndexedRecord ir) {
                consumed.add(ir);
            }
        });
    }

    assertThat(consumed, hasSize(1));
}
 
Example 23
private Row convertAvroRecordToRow(Schema schema, RowTypeInfo typeInfo, IndexedRecord record) {
	final List<Schema.Field> fields = schema.getFields();
	final TypeInformation<?>[] fieldInfo = typeInfo.getFieldTypes();
	final int length = fields.size();
	final Row row = new Row(length);
	for (int i = 0; i < length; i++) {
		final Schema.Field field = fields.get(i);
		row.setField(i, convertAvroType(field.schema(), fieldInfo[i], record.get(i)));
	}
	return row;
}
 
Example 24
@Before
public void fillInQueue() throws Throwable {
    TAzureStorageQueueOutputProperties properties = new TAzureStorageQueueOutputProperties("tests");
    properties = (TAzureStorageQueueOutputProperties) setupConnectionProperties(
            (AzureStorageProvideConnectionProperties) properties);
    properties.setupProperties();
    properties.queueName.setValue(TEST_QUEUE_NAME);
    AzureStorageQueueSink sink = new AzureStorageQueueSink();
    sink.initialize(null, properties);
    sink.validate(null);
    Writer<?> writer = sink.createWriteOperation().createWriter(null);
    writer.open("test-uid");
    for (String m : messages) {
        IndexedRecord entity = new GenericData.Record(properties.schema.schema.getValue());
        entity.put(0, m + "SIMPLE");
        writer.write(entity);
    }
    writer.close();
    queue.downloadAttributes();
    assertTrue(queue.getApproximateMessageCount() > 3);
}
 
Example 25
Source Project: components   Source File: WindowRuntimeTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFixedWindow() {

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // creation of PCollection with different timestamp PCollection<IndexedRecord>

    List<TimestampedValue<IndexedRecord>> data = Arrays.asList(TimestampedValue.of(irA, new Instant(1L)),
            TimestampedValue.of(irB, new Instant(2L)), TimestampedValue.of(irC, new Instant(3L)));

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p
            .apply(Create.timestamped(data).withCoder(LazyAvroCoder.of()));

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.windowLength.setValue(2);
    windowProperties.windowSlideLength.setValue(-1);
    windowProperties.windowSession.setValue(false);

    windowProperties.setValue("windowLength", 2);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", false);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    /////////
    // Fixed duration: 2

    PAssert.that(windowed_counts).containsInAnyOrder(KV.of(irA, 1L), KV.of(irB, 1L), KV.of(irC, 1L));

    p.run();
}
 
Example 26
Source Project: components   Source File: SnowflakeWritersTestIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testOutputModify() throws Throwable {
    SnowflakeConnectionTableProperties props = populateOutput(100);
    List<IndexedRecord> rows = makeRows(2);
    rows.get(0).put(1, "modified1");
    rows.get(1).put(1, "modified2");
    handleRows(rows, props, TSnowflakeOutputProperties.OutputAction.UPDATE);
    List<IndexedRecord> readRows = readRows(props);
    assertEquals("modified1", readRows.get(0).get(1));
    assertEquals("modified2", readRows.get(1).get(1));
    assertEquals("foo_2", readRows.get(2).get(1));
    assertEquals(100, readRows.size());
}
 
Example 27
Source Project: components   Source File: RowGeneratorIOTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testBasicDeterministic() throws Exception {
    // Non-deterministic read, just do a count.
    PCollection<IndexedRecord> output = pipeline.apply(RowGeneratorIO.read()
            .withSchema(SampleSchemas.recordCompositesRequired()).withSeed(0L).withRows(95L).withPartitions(13));
    PAssert.thatSingleton(output.apply("Count", Count.<IndexedRecord> globally())).isEqualTo(95L);
    pipeline.run();

    // TODO: we could test the generated records here.
}
 
Example 28
Source Project: hudi   Source File: TestHoodieFileReaderFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetFileReader() throws IOException {
  // parquet file format.
  final Configuration hadoopConf = new Configuration();
  final Path parquetPath = new Path("/partition/path/f1_1-0-1_000.parquet");
  HoodieFileReader<IndexedRecord> parquetReader = HoodieFileReaderFactory.getFileReader(hadoopConf, parquetPath);
  assertTrue(parquetReader instanceof HoodieParquetReader);

  // other file format exception.
  final Path logPath = new Path("/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
  final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> {
    HoodieFileReader<IndexedRecord> logWriter = HoodieFileReaderFactory.getFileReader(hadoopConf, logPath);
  }, "should fail since log storage reader is not supported yet.");
  assertTrue(thrown.getMessage().contains("format not supported yet."));
}
 
Example 29
@Test
public void testGetCurrent() {
    try {
        properties.peekMessages.setValue(true);

        AzureStorageQueueSource source = new AzureStorageQueueSource();
        ValidationResult vr = source.initialize(getDummyRuntimeContiner(), properties);
        assertNotNull(vr);
        assertEquals(ValidationResult.OK.getStatus(), vr.getStatus());

        reader = (AzureStorageQueueInputReader) source.createReader(getDummyRuntimeContiner());
        reader.queueService = queueService; // inject mocked service

        final List<CloudQueueMessage> messages = new ArrayList<>();
        messages.add(new CloudQueueMessage("message-1"));
        messages.add(new CloudQueueMessage("message-2"));
        messages.add(new CloudQueueMessage("message-3"));
        when(queueService.peekMessages(anyString(), anyInt())).thenReturn(new Iterable<CloudQueueMessage>() {

            @Override
            public Iterator<CloudQueueMessage> iterator() {
                return new DummyCloudQueueMessageIterator(messages);
            }
        });
        boolean startable = reader.start();
        assertTrue(startable);
        int i = 1;
        do {
            IndexedRecord current = reader.getCurrent();
            assertNotNull(current);
            assertNotNull(current.getSchema());
            Field msgField = current.getSchema().getField(TAzureStorageQueueInputProperties.FIELD_MESSAGE_CONTENT);
            assertTrue(current.get(msgField.pos()).equals("message-" + i));
            i++;
        } while (reader.advance());

    } catch (IOException | InvalidKeyException | URISyntaxException | StorageException e) {
        fail("sould not throw " + e.getMessage());
    }
}
 
Example 30
Source Project: components   Source File: PythonRowDoFnTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test_NullInput() throws Exception {

    PythonRowProperties properties = new PythonRowProperties("test");
    properties.init();
    properties.pythonCode.setValue("outputList.append(input)");
    PythonRowDoFn function = new PythonRowDoFn();
    assertEquals(ValidationResult.OK, function.initialize(null, properties));
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle((IndexedRecord) null);
    assertEquals(0, outputs.size());
}