Java Code Examples for org.apache.avro.generic.GenericRecord#get()
The following examples show how to use
org.apache.avro.generic.GenericRecord#get() .
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: secor File: AvroMessageParser.java License: Apache License 2.0 | 6 votes |
@Override public long extractTimestampMillis(final Message message) { try { GenericRecord record = schemaRegistry.deserialize(message.getTopic(), message.getPayload()); if (record != null) { Object fieldValue = record.get(mConfig.getMessageTimestampName()); if (fieldValue != null) { return toMillis(Double.valueOf(fieldValue.toString()).longValue()); } } else if (m_timestampRequired) { throw new RuntimeException("Missing timestamp field for message: " + message); } } catch (Exception e) { LOG.error("Failed to parse record", e); } return 0; }
Example 2
Source Project: kareldb File: KafkaValueDeserializer.java License: Apache License 2.0 | 6 votes |
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) { NavigableMap<Long, VersionedValue> map = new TreeMap<>(); Schema recordSchema = avroSchema.getElementType(); List<Schema.Field> fields = recordSchema.getFields(); int size = fields.size(); for (GenericRecord record : array) { Long version = (Long) record.get(0); Long commit = (Long) record.get(1); boolean deleted = (Boolean) record.get(2); Comparable[] row = new Comparable[size - 3]; for (int i = 0; i < row.length; i++) { Schema schema = fields.get(i + 3).schema(); Comparable value = (Comparable) record.get(i + 3); row[i] = AvroSchema.fromAvroValue(schema, value); } map.put(version, new VersionedValue(version, commit, deleted, row)); } return map; }
Example 3
Source Project: incubator-pinot File: DefaultJoinKeyExtractor.java License: Apache License 2.0 | 6 votes |
@Override public String extractJoinKey(String sourceName, GenericRecord record) { String joinKey = defaultJoinKey; if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) { joinKey = joinKeyMap.get(sourceName); } String ret = "INVALID"; if (joinKey != null) { Object object = record.get(joinKey); if (object != null) { ret = object.toString(); } } LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret); return ret; }
Example 4
Source Project: simplesource File: AvroGenericUtils.java License: Apache License 2.0 | 6 votes |
static <K> CommandResponse<GenericRecord> fromCommandResponse( final GenericRecord record) { final GenericRecord aggregateKey = (GenericRecord) record.get(AGGREGATE_KEY); final Sequence readSequence = Sequence.position((Long) record.get(READ_SEQUENCE)); final UUID commandId = UUID.fromString(String.valueOf(record.get(COMMAND_ID))); final GenericRecord genericResult = (GenericRecord) record.get(RESULT); final Result<CommandError, Sequence> result; if (nonNull(genericResult.get(WRITE_SEQUENCE))) { final Sequence writeSequence = Sequence.position((Long) genericResult.get(WRITE_SEQUENCE)); result = Result.success(writeSequence); } else { final CommandError commandError = toCommandError((GenericRecord) genericResult.get(REASON)); final List<CommandError> additionalCommandErrors = ((List<GenericRecord>) genericResult.get(ADDITIONAL_REASONS)) .stream() .map(AggregateUpdateResultAvroHelper::toCommandError) .collect(Collectors.toList()); result = Result.failure(new NonEmptyList<>(commandError, additionalCommandErrors)); } return CommandResponse.of(CommandId.of(commandId), aggregateKey, readSequence, result); }
Example 5
Source Project: aliyun-maxcompute-data-collectors File: GenericRecordExportMapper.java License: Apache License 2.0 | 6 votes |
protected SqoopRecord toSqoopRecord(GenericRecord record) throws IOException { Schema avroSchema = record.getSchema(); for (Map.Entry<Writable, Writable> e : columnTypes.entrySet()) { String columnName = e.getKey().toString(); String columnType = e.getValue().toString(); String cleanedCol = ClassWriter.toIdentifier(columnName); Schema.Field field = getFieldIgnoreCase(avroSchema, cleanedCol); if (null == field) { throw new IOException("Cannot find field " + cleanedCol + " in Avro schema " + avroSchema); } Object avroObject = record.get(field.name()); Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), columnType); recordImpl.setField(cleanedCol, fieldVal); } return recordImpl; }
Example 6
Source Project: datafu File: ImpressionClickPartitionPreservingJob.java License: Apache License 2.0 | 6 votes |
@Override public void accumulate(GenericRecord value) { if (value.get("type").toString().equals("click")) { clicks++; } else if (value.get("type").toString().equals("impression")) { impressions++; } else { throw new RuntimeException("Didn't expect: " + value.get("type")); } }
Example 7
Source Project: avro-util File: FastGenericSerializerGeneratorTest.java License: BSD 2-Clause "Simplified" License | 6 votes |
@Test(groups = {"serializationTest"}) public void shouldWriteRightUnionIndex() { // Create two record schemas Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING))); Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING))); Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2); Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema)); GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2); objectOfRecordSchema2.put("record2_field1", "abc"); GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema); wrapperObject.put("union_field", objectOfRecordSchema2); GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject)); Object unionField = record.get("union_field"); Assert.assertTrue(unionField instanceof GenericData.Record); GenericData.Record unionRecord = (GenericData.Record)unionField; Assert.assertEquals(unionRecord.getSchema().getName(), "record2"); }
Example 8
Source Project: Cubert File: Purge.java License: Apache License 2.0 | 6 votes |
private void loadMembersToPurge(String filename) throws IOException { // TODO: "memberId" column name should be configurable DataFileReader<GenericRecord> dataFileReader = createDataFileReader(filename, true); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); Integer memberId = (Integer) record.get("memberId"); if (memberId == null) { throw new NullPointerException("memberId is null"); } membersToPurge.add(((Number) record.get("memberId")).intValue()); } dataFileReader.close(); }
Example 9
Source Project: aliyun-maxcompute-data-collectors File: LobAvroImportTestCase.java License: Apache License 2.0 | 6 votes |
/** Import blob data that is smaller than inline lob limit. Blob data * should be saved as Avro bytes. * @throws IOException * @throws SQLException */ public void testBlobAvroImportInline() throws IOException, SQLException { String [] types = { getBlobType() }; String expectedVal = "This is short BLOB data"; String [] vals = { getBlobInsertStr(expectedVal) }; createTableWithColTypes(types, vals); runImport(getArgv()); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that blob data is imported as Avro bytes. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); assertEquals(getColName(0), expectedVal, returnVal); }
Example 10
Source Project: incubator-gobblin File: EnvelopeSchemaConverter.java License: Apache License 2.0 | 5 votes |
/** * Get payload field from GenericRecord and convert to byte array */ public byte[] getPayload(GenericRecord inputRecord, String payloadFieldName) { ByteBuffer bb = (ByteBuffer) inputRecord.get(payloadFieldName); byte[] payloadBytes; if (bb.hasArray()) { payloadBytes = bb.array(); } else { payloadBytes = new byte[bb.remaining()]; bb.get(payloadBytes); } String hexString = new String(payloadBytes, StandardCharsets.UTF_8); return DatatypeConverter.parseHexBinary(hexString); }
Example 11
Source Project: secor File: AvroSplitByFieldMessageParser.java License: Apache License 2.0 | 5 votes |
protected long extractTimestampMillis(GenericRecord record) { try { if (record != null) { Object fieldValue = record.get(mConfig.getMessageTimestampName()); if (fieldValue != null) { return toMillis(Double.valueOf(fieldValue.toString()).longValue()); } } else if (m_timestampRequired) { throw new RuntimeException("Missing timestamp field for message: " + record.toString()); } } catch (SerializationException e) { LOG.error("Failed to parse record", e); } return 0; }
Example 12
Source Project: incubator-pinot File: ThirdeyeAvroUtils.java License: Apache License 2.0 | 5 votes |
public static Number getMetricFromRecord(GenericRecord record, String metricName, MetricType metricType) { Number metricValue = (Number) record.get(metricName); if (metricValue == null) { metricValue = metricType.getDefaultNullValue(); } return metricValue; }
Example 13
Source Project: incubator-pinot File: GenericPartitioner.java License: Apache License 2.0 | 4 votes |
@Override public int getPartition(T genericRecordAvroKey, AvroValue<GenericRecord> genericRecordAvroValue, int numPartitions) { final GenericRecord inputRecord = genericRecordAvroValue.datum(); final Object partitionColumnValue = inputRecord.get(_partitionColumn); return _partitionFunction.getPartition(partitionColumnValue); }
Example 14
Source Project: simplesource File: AvroGenericUtils.java License: Apache License 2.0 | 4 votes |
public static ValueWithSequence<GenericRecord> fromGenericRecord(final GenericRecord record) { final GenericRecord genericValue = (GenericRecord) record.get(VALUE); final Sequence sequence = Sequence.position((Long) record.get(SEQUENCE)); return new ValueWithSequence<>(genericValue, sequence); }
Example 15
Source Project: datacollector File: TestAvroDataGenerator.java License: Apache License 2.0 | 4 votes |
@Test public void testAvroGeneratorShortType() throws Exception { final String SCHEMA_JSON = "{\n" +"\"type\": \"record\",\n" +"\"name\": \"WithDecimal\",\n" +"\"fields\": [\n" +" {\"name\": \"short\", \"type\": \"int\"}" +"]}"; final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); Map<String, Field> map = new LinkedHashMap<>(); map.put("short", Field.create(Field.Type.SHORT, (short)1)); Record record = RecordCreator.create(); record.set(Field.create(map)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataGenerator gen = new AvroDataOutputStreamGenerator( false, baos, COMPRESSION_CODEC_DEFAULT, SCHEMA, new HashMap<String, Object>(), null, null, 0 ); gen.write(record); gen.close(); //reader schema must be extracted from the data file GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>( new SeekableByteArrayInput(baos.toByteArray()), reader); Assert.assertTrue(dataFileReader.hasNext()); GenericRecord readRecord = dataFileReader.next(); Object retrievedField = readRecord.get("short"); Assert.assertEquals(1, retrievedField); Assert.assertFalse(dataFileReader.hasNext()); }
Example 16
Source Project: flume-plugins File: TestApacheAvroEventSerializer.java License: MIT License | 4 votes |
@Test public void test() throws FileNotFoundException, IOException { // create the file, write some data OutputStream out = new FileOutputStream(testFile); String builderName = ApacheLogAvroEventSerializer.Builder.class.getName(); Context ctx = new Context(); ctx.put("syncInterval", "4096"); EventSerializer serializer = EventSerializerFactory.getInstance(builderName, ctx, out); serializer.afterCreate(); // must call this when a file is newly created List<Event> events = generateApacheEvents(); for (Event e : events) { serializer.write(e); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); // now try to read the file back DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(testFile, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String ip = record.get("ip").toString(); String uri = record.get("uri").toString(); Integer statuscode = (Integer) record.get("statuscode"); String original = record.get("original").toString(); String connectionstatus = record.get("connectionstatus").toString(); Assert.assertEquals("Ip should be 80.79.194.3", "80.79.194.3", ip); System.out.println("IP " + ip + " requested: " + uri + " with status code " + statuscode + " and connectionstatus: " + connectionstatus); System.out.println("Original logline: " + original); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 2, numEvents); FileUtils.forceDelete(testFile); }
Example 17
Source Project: incubator-samoa File: AvroLoader.java License: Apache License 2.0 | 4 votes |
/** * Method to read Sparse Instances from Avro File * * @return Instance */ protected Instance readInstanceSparse(GenericRecord record) { Instance instance = new SparseInstance(1.0, null); int numAttribute = -1; ArrayList<Double> attributeValues = new ArrayList<Double>(); List<Integer> indexValues = new ArrayList<Integer>(); for (Attribute attribute : attributes) { numAttribute++; Object value = record.get(attribute.name); boolean isNumeric = attributes.get(numAttribute).isNumeric(); boolean isNominal = attributes.get(numAttribute).isNominal(); /** If value is empty/null iterate to the next attribute. **/ if (value == null) continue; if (isNumeric) { if (value instanceof Double) { Double v = (double) value; //if (Double.isFinite(v)) if (!Double.isNaN(v) && !Double.isInfinite(v)) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value); } else if (value instanceof Long) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (long) value); else if (value instanceof Integer) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (int) value); else throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name); } else if (isNominal) { double valueAttribute; if (!(value instanceof EnumSymbol)) throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name); EnumSymbol enumSymbolalue = (EnumSymbol) value; String stringValue = enumSymbolalue.toString(); if (("?".equals(stringValue)) || (stringValue == null)) { valueAttribute = Double.NaN; } else { valueAttribute = this.instanceInformation.attribute(numAttribute).indexOfValue(stringValue); } this.setSparseValue(instance, indexValues, attributeValues, numAttribute, valueAttribute); } } int[] arrayIndexValues = new int[attributeValues.size()]; double[] arrayAttributeValues = new double[attributeValues.size()]; for (int i = 0; i < arrayIndexValues.length; i++) { arrayIndexValues[i] = indexValues.get(i).intValue(); arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); } instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes()); return instance; }
Example 18
Source Project: sqoop-on-spark File: AVROIntermediateDataFormat.java License: Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") private String toCSV(GenericRecord record) { Column[] columns = this.schema.getColumnsArray(); StringBuilder csvString = new StringBuilder(); for (int i = 0; i < columns.length; i++) { Object obj = record.get(columns[i].getName()); if (obj == null && !columns[i].isNullable()) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005, columns[i].getName() + " does not support null values"); } if (obj == null) { csvString.append(NULL_VALUE); } else { switch (columns[i].getType()) { case ARRAY: case SET: List<Object> objList = (List<Object>) obj; csvString.append(toCSVList(toObjectArray(objList), columns[i])); break; case MAP: Map<Object, Object> objMap = (Map<Object, Object>) obj; csvString.append(toCSVMap(objMap, columns[i])); break; case ENUM: case TEXT: csvString.append(toCSVString(obj.toString())); break; case BINARY: case UNKNOWN: csvString.append(toCSVByteArray(getBytesFromByteBuffer(obj))); break; case FIXED_POINT: csvString.append(toCSVFixedPoint(obj, columns[i])); break; case FLOATING_POINT: csvString.append(toCSVFloatingPoint(obj, columns[i])); break; case DECIMAL: // stored as string csvString.append(toCSVDecimal(obj)); break; case DATE: // stored as long Long dateInMillis = (Long) obj; csvString.append(toCSVDate(new org.joda.time.LocalDate(dateInMillis))); break; case TIME: // stored as long Long timeInMillis = (Long) obj; csvString.append(toCSVTime(new org.joda.time.LocalTime(timeInMillis), columns[i])); break; case DATE_TIME: // stored as long Long dateTimeInMillis = (Long) obj; csvString.append(toCSVDateTime(new org.joda.time.DateTime(dateTimeInMillis), columns[i])); break; case BIT: csvString.append(toCSVBit(obj)); break; default: throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, "Column type from schema was not recognized for " + columns[i].getType()); } } if (i < columns.length - 1) { csvString.append(CSV_SEPARATOR_CHARACTER); } } return csvString.toString(); }
Example 19
Source Project: incubator-gobblin File: JsonIntermediateToAvroConverterTest.java License: Apache License 2.0 | 4 votes |
@Test public void testConverter() throws Exception { initResources("/converter/schema.json"); JsonIntermediateToAvroConverter converter = new JsonIntermediateToAvroConverter(); Schema avroSchema = converter.convertSchema(jsonSchema, state); GenericRecord record = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next(); //testing output values are expected types and values Assert.assertEquals(jsonRecord.get("Id").getAsString(), record.get("Id").toString()); Assert.assertEquals(jsonRecord.get("IsDeleted").getAsBoolean(), record.get("IsDeleted")); if (!(record.get("Salutation") instanceof GenericArray)) { Assert.fail("expected array, found " + record.get("Salutation").getClass().getName()); } if (!(record.get("MapAccount") instanceof Map)) { Assert.fail("expected map, found " + record.get("MapAccount").getClass().getName()); } Assert.assertEquals(jsonRecord.get("Industry").getAsString(), record.get("Industry").toString()); DateTimeFormatter format = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss") .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); Assert.assertEquals(jsonRecord.get("LastModifiedDate").getAsString(), new DateTime(record.get("LastModifiedDate")).toString(format)); Assert.assertEquals(jsonRecord.get("date_type").getAsString(), new DateTime(record.get("date_type")).toString(format)); format = DateTimeFormat.forPattern("HH:mm:ss").withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); Assert.assertEquals(jsonRecord.get("time_type").getAsString(), new DateTime(record.get("time_type")).toString(format)); Assert.assertEquals(jsonRecord.get("bytes_type").getAsString().getBytes(), ((ByteBuffer) record.get("bytes_type")).array()); Assert.assertEquals(jsonRecord.get("int_type").getAsInt(), record.get("int_type")); Assert.assertEquals(jsonRecord.get("long_type").getAsLong(), record.get("long_type")); Assert.assertEquals(jsonRecord.get("float_type").getAsFloat(), record.get("float_type")); Assert.assertEquals(jsonRecord.get("double_type").getAsDouble(), record.get("double_type")); //Testing timezone state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "EST"); avroSchema = converter.convertSchema(jsonSchema, state); GenericRecord record2 = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next(); Assert.assertNotEquals(record.get("LastModifiedDate"), record2.get("LastModifiedDate")); }
Example 20
Source Project: flink File: AvroRecordInputFormatTest.java License: Apache License 2.0 | 4 votes |
/** * Helper method to test GenericRecord serialisation. * * @param format * the format to test * @param parameters * the configuration to use * @throws IOException * thrown id there is a issue */ @SuppressWarnings("unchecked") private void doTestDeserializationGenericRecord(final AvroInputFormat<GenericRecord> format, final Configuration parameters) throws IOException { try { format.configure(parameters); FileInputSplit[] splits = format.createInputSplits(1); assertEquals(splits.length, 1); format.open(splits[0]); GenericRecord u = format.nextRecord(null); assertNotNull(u); assertEquals("The schemas should be equal", userSchema, u.getSchema()); String name = u.get("name").toString(); assertNotNull("empty record", name); assertEquals("name not equal", TEST_NAME, name); // check arrays List<CharSequence> sl = (List<CharSequence>) u.get("type_array_string"); assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString()); assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString()); List<Boolean> bl = (List<Boolean>) u.get("type_array_boolean"); assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0)); assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1)); // check enums GenericData.EnumSymbol enumValue = (GenericData.EnumSymbol) u.get("type_enum"); assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), enumValue.toString()); // check maps Map<CharSequence, Long> lm = (Map<CharSequence, Long>) u.get("type_map"); assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue()); assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue()); assertFalse("expecting second element", format.reachedEnd()); assertNotNull("expecting second element", format.nextRecord(u)); assertNull(format.nextRecord(u)); assertTrue(format.reachedEnd()); } finally { format.close(); } }