Java Code Examples for org.apache.avro.generic.GenericRecord#get()

The following examples show how to use org.apache.avro.generic.GenericRecord#get() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: secor   File: AvroMessageParser.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public long extractTimestampMillis(final Message message) {
    try {
        GenericRecord record = schemaRegistry.deserialize(message.getTopic(), message.getPayload());
        if (record != null) {
            Object fieldValue = record.get(mConfig.getMessageTimestampName());
            if (fieldValue != null) {
                return toMillis(Double.valueOf(fieldValue.toString()).longValue());
            }
        } else if (m_timestampRequired) {
            throw new RuntimeException("Missing timestamp field for message: " + message);
        }
    } catch (Exception e) {
        LOG.error("Failed to parse record", e);
    }
    return 0;
}
 
Example 2
Source Project: kareldb   File: KafkaValueDeserializer.java    License: Apache License 2.0 6 votes vote down vote up
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) {
    NavigableMap<Long, VersionedValue> map = new TreeMap<>();
    Schema recordSchema = avroSchema.getElementType();
    List<Schema.Field> fields = recordSchema.getFields();
    int size = fields.size();
    for (GenericRecord record : array) {
        Long version = (Long) record.get(0);
        Long commit = (Long) record.get(1);
        boolean deleted = (Boolean) record.get(2);
        Comparable[] row = new Comparable[size - 3];
        for (int i = 0; i < row.length; i++) {
            Schema schema = fields.get(i + 3).schema();
            Comparable value = (Comparable) record.get(i + 3);
            row[i] = AvroSchema.fromAvroValue(schema, value);
        }
        map.put(version, new VersionedValue(version, commit, deleted, row));
    }
    return map;
}
 
Example 3
@Override
public String extractJoinKey(String sourceName, GenericRecord record) {

  String joinKey = defaultJoinKey;
  if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) {
    joinKey = joinKeyMap.get(sourceName);
  }
  String ret = "INVALID";
  if (joinKey != null) {
    Object object = record.get(joinKey);
    if (object != null) {
      ret = object.toString();
    }
  }
  LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret);
  return ret;
}
 
Example 4
Source Project: simplesource   File: AvroGenericUtils.java    License: Apache License 2.0 6 votes vote down vote up
static <K> CommandResponse<GenericRecord> fromCommandResponse(
        final GenericRecord record) {
    final GenericRecord aggregateKey = (GenericRecord) record.get(AGGREGATE_KEY);
    final Sequence readSequence = Sequence.position((Long) record.get(READ_SEQUENCE));
    final UUID commandId = UUID.fromString(String.valueOf(record.get(COMMAND_ID)));
    final GenericRecord genericResult = (GenericRecord) record.get(RESULT);
    final Result<CommandError, Sequence> result;
    if (nonNull(genericResult.get(WRITE_SEQUENCE))) {
        final Sequence writeSequence = Sequence.position((Long) genericResult.get(WRITE_SEQUENCE));
        result = Result.success(writeSequence);
    } else {
        final CommandError commandError = toCommandError((GenericRecord) genericResult.get(REASON));
        final List<CommandError> additionalCommandErrors = ((List<GenericRecord>) genericResult.get(ADDITIONAL_REASONS))
                .stream()
                .map(AggregateUpdateResultAvroHelper::toCommandError)
                .collect(Collectors.toList());
        result = Result.failure(new NonEmptyList<>(commandError, additionalCommandErrors));
    }

    return CommandResponse.of(CommandId.of(commandId), aggregateKey, readSequence, result);
}
 
Example 5
protected SqoopRecord toSqoopRecord(GenericRecord record) throws IOException {
  Schema avroSchema = record.getSchema();
  for (Map.Entry<Writable, Writable> e : columnTypes.entrySet()) {
    String columnName = e.getKey().toString();
    String columnType = e.getValue().toString();
    String cleanedCol = ClassWriter.toIdentifier(columnName);
    Schema.Field field = getFieldIgnoreCase(avroSchema, cleanedCol);
    if (null == field) {
      throw new IOException("Cannot find field " + cleanedCol
          + " in Avro schema " + avroSchema);
    }

    Object avroObject = record.get(field.name());
    Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), columnType);
    recordImpl.setField(cleanedCol, fieldVal);
  }
  return recordImpl;
}
 
Example 6
@Override
public void accumulate(GenericRecord value)
{
  if (value.get("type").toString().equals("click"))
  {
    clicks++;
  }
  else if (value.get("type").toString().equals("impression"))
  {
    impressions++;
  }
  else
  {
    throw new RuntimeException("Didn't expect: " + value.get("type"));
  }
}
 
Example 7
@Test(groups = {"serializationTest"})
public void shouldWriteRightUnionIndex() {
  // Create two record schemas
  Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING)));
  Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING)));
  Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2);
  Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema));

  GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2);
  objectOfRecordSchema2.put("record2_field1", "abc");
  GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema);
  wrapperObject.put("union_field", objectOfRecordSchema2);

  GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject));

  Object unionField = record.get("union_field");
  Assert.assertTrue(unionField instanceof GenericData.Record);
  GenericData.Record unionRecord = (GenericData.Record)unionField;
  Assert.assertEquals(unionRecord.getSchema().getName(), "record2");
}
 
Example 8
Source Project: Cubert   File: Purge.java    License: Apache License 2.0 6 votes vote down vote up
private void loadMembersToPurge(String filename) throws IOException
{
    // TODO: "memberId" column name should be configurable
    DataFileReader<GenericRecord> dataFileReader =
            createDataFileReader(filename, true);
    while (dataFileReader.hasNext())
    {
        GenericRecord record = dataFileReader.next();
        Integer memberId = (Integer) record.get("memberId");
        if (memberId == null)
        {
            throw new NullPointerException("memberId is null");
        }
        membersToPurge.add(((Number) record.get("memberId")).intValue());
    }
    dataFileReader.close();
}
 
Example 9
/** Import blob data that is smaller than inline lob limit. Blob data
 * should be saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportInline() throws IOException, SQLException {
  String [] types = { getBlobType() };
  String expectedVal = "This is short BLOB data";
  String [] vals = { getBlobInsertStr(expectedVal) };

  createTableWithColTypes(types, vals);

  runImport(getArgv());

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  GenericRecord record = reader.next();

  // Verify that blob data is imported as Avro bytes.
  ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
  String returnVal = new String(buf.array());

  assertEquals(getColName(0), expectedVal, returnVal);
}
 
Example 10
/**
 * Get payload field from GenericRecord and convert to byte array
 */
public byte[] getPayload(GenericRecord inputRecord, String payloadFieldName) {
  ByteBuffer bb = (ByteBuffer) inputRecord.get(payloadFieldName);
  byte[] payloadBytes;
  if (bb.hasArray()) {
    payloadBytes = bb.array();
  } else {
    payloadBytes = new byte[bb.remaining()];
    bb.get(payloadBytes);
  }
  String hexString = new String(payloadBytes, StandardCharsets.UTF_8);
  return DatatypeConverter.parseHexBinary(hexString);
}
 
Example 11
protected long extractTimestampMillis(GenericRecord record) {
    try {
        if (record != null) {
            Object fieldValue = record.get(mConfig.getMessageTimestampName());
            if (fieldValue != null) {
                return toMillis(Double.valueOf(fieldValue.toString()).longValue());
            }
        } else if (m_timestampRequired) {
            throw new RuntimeException("Missing timestamp field for message: " + record.toString());
        }
    } catch (SerializationException e) {
        LOG.error("Failed to parse record", e);
    }
    return 0;
}
 
Example 12
public static Number getMetricFromRecord(GenericRecord record, String metricName, MetricType metricType) {
  Number metricValue = (Number) record.get(metricName);
  if (metricValue == null) {
    metricValue = metricType.getDefaultNullValue();
  }
  return metricValue;
}
 
Example 13
@Override
public int getPartition(T genericRecordAvroKey, AvroValue<GenericRecord> genericRecordAvroValue, int numPartitions) {
  final GenericRecord inputRecord = genericRecordAvroValue.datum();
  final Object partitionColumnValue = inputRecord.get(_partitionColumn);
  return _partitionFunction.getPartition(partitionColumnValue);
}
 
Example 14
Source Project: simplesource   File: AvroGenericUtils.java    License: Apache License 2.0 4 votes vote down vote up
public static ValueWithSequence<GenericRecord> fromGenericRecord(final GenericRecord record) {
    final GenericRecord genericValue = (GenericRecord) record.get(VALUE);
    final Sequence sequence = Sequence.position((Long) record.get(SEQUENCE));

    return new ValueWithSequence<>(genericValue, sequence);
}
 
Example 15
@Test
public void testAvroGeneratorShortType() throws Exception {
  final String SCHEMA_JSON = "{\n"
  +"\"type\": \"record\",\n"
  +"\"name\": \"WithDecimal\",\n"
  +"\"fields\": [\n"
  +" {\"name\": \"short\", \"type\": \"int\"}"
  +"]}";
  final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON);

  Map<String, Field> map = new LinkedHashMap<>();
  map.put("short", Field.create(Field.Type.SHORT, (short)1));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Object retrievedField = readRecord.get("short");
  Assert.assertEquals(1, retrievedField);

  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 16
@Test
public void test() throws FileNotFoundException, IOException {

    // create the file, write some data
    OutputStream out = new FileOutputStream(testFile);
    String builderName = ApacheLogAvroEventSerializer.Builder.class.getName();

    Context ctx = new Context();
    ctx.put("syncInterval", "4096");

    EventSerializer serializer =
            EventSerializerFactory.getInstance(builderName, ctx, out);
    serializer.afterCreate(); // must call this when a file is newly created

    List<Event> events = generateApacheEvents();
    for (Event e : events) {
        serializer.write(e);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();

    // now try to read the file back

    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader =
            new DataFileReader<GenericRecord>(testFile, reader);

    GenericRecord record = new GenericData.Record(fileReader.getSchema());
    int numEvents = 0;
    while (fileReader.hasNext()) {
        fileReader.next(record);
        String ip = record.get("ip").toString();
        String uri = record.get("uri").toString();
        Integer statuscode = (Integer) record.get("statuscode");
        String original = record.get("original").toString();
        String connectionstatus = record.get("connectionstatus").toString();

        Assert.assertEquals("Ip should be 80.79.194.3", "80.79.194.3", ip);
        System.out.println("IP " + ip + " requested: " + uri + " with status code " + statuscode + " and connectionstatus: " + connectionstatus);
        System.out.println("Original logline: " + original);
        numEvents++;
    }

    fileReader.close();
    Assert.assertEquals("Should have found a total of 3 events", 2, numEvents);

    FileUtils.forceDelete(testFile);
}
 
Example 17
Source Project: incubator-samoa   File: AvroLoader.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Method to read Sparse Instances from Avro File
 * 
 * @return Instance
 */
protected Instance readInstanceSparse(GenericRecord record) {

  Instance instance = new SparseInstance(1.0, null);
  int numAttribute = -1;
  ArrayList<Double> attributeValues = new ArrayList<Double>();
  List<Integer> indexValues = new ArrayList<Integer>();

  for (Attribute attribute : attributes) {
    numAttribute++;
    Object value = record.get(attribute.name);

    boolean isNumeric = attributes.get(numAttribute).isNumeric();
    boolean isNominal = attributes.get(numAttribute).isNominal();

    /** If value is empty/null iterate to the next attribute. **/
    if (value == null)
      continue;

    if (isNumeric)
    {
      if (value instanceof Double) {
        Double v = (double) value;
        //if (Double.isFinite(v))
        if (!Double.isNaN(v) && !Double.isInfinite(v))
          this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value);
      }
      else if (value instanceof Long)
        this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (long) value);
      else if (value instanceof Integer)
        this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (int) value);
      else
        throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name);
    }
    else if (isNominal)
    {
      double valueAttribute;

      if (!(value instanceof EnumSymbol))
        throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name);

      EnumSymbol enumSymbolalue = (EnumSymbol) value;

      String stringValue = enumSymbolalue.toString();

      if (("?".equals(stringValue)) || (stringValue == null)) {
        valueAttribute = Double.NaN;
      } else {
        valueAttribute = this.instanceInformation.attribute(numAttribute).indexOfValue(stringValue);
      }

      this.setSparseValue(instance, indexValues, attributeValues, numAttribute, valueAttribute);
    }
  }

  int[] arrayIndexValues = new int[attributeValues.size()];
  double[] arrayAttributeValues = new double[attributeValues.size()];

  for (int i = 0; i < arrayIndexValues.length; i++) {
    arrayIndexValues[i] = indexValues.get(i).intValue();
    arrayAttributeValues[i] = attributeValues.get(i).doubleValue();
  }

  instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes());
  return instance;

}
 
Example 18
@SuppressWarnings("unchecked")
private String toCSV(GenericRecord record) {
  Column[] columns = this.schema.getColumnsArray();

  StringBuilder csvString = new StringBuilder();
  for (int i = 0; i < columns.length; i++) {

    Object obj = record.get(columns[i].getName());
    if (obj == null && !columns[i].isNullable()) {
      throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005,
          columns[i].getName() + " does not support null values");
    }
    if (obj == null) {
      csvString.append(NULL_VALUE);
    } else {

      switch (columns[i].getType()) {
      case ARRAY:
      case SET:
        List<Object> objList = (List<Object>) obj;
        csvString.append(toCSVList(toObjectArray(objList), columns[i]));
        break;
      case MAP:
        Map<Object, Object> objMap = (Map<Object, Object>) obj;
        csvString.append(toCSVMap(objMap, columns[i]));
        break;
      case ENUM:
      case TEXT:
        csvString.append(toCSVString(obj.toString()));
        break;
      case BINARY:
      case UNKNOWN:
        csvString.append(toCSVByteArray(getBytesFromByteBuffer(obj)));
        break;
      case FIXED_POINT:
        csvString.append(toCSVFixedPoint(obj, columns[i]));
        break;
      case FLOATING_POINT:
        csvString.append(toCSVFloatingPoint(obj, columns[i]));
        break;
      case DECIMAL:
        // stored as string
        csvString.append(toCSVDecimal(obj));
        break;
      case DATE:
        // stored as long
        Long dateInMillis = (Long) obj;
        csvString.append(toCSVDate(new org.joda.time.LocalDate(dateInMillis)));
        break;
      case TIME:
        // stored as long
        Long timeInMillis = (Long) obj;
        csvString.append(toCSVTime(new org.joda.time.LocalTime(timeInMillis), columns[i]));
        break;
      case DATE_TIME:
        // stored as long
        Long dateTimeInMillis = (Long) obj;
        csvString.append(toCSVDateTime(new org.joda.time.DateTime(dateTimeInMillis), columns[i]));
        break;
      case BIT:
        csvString.append(toCSVBit(obj));
        break;
      default:
        throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001,
            "Column type from schema was not recognized for " + columns[i].getType());
      }
    }
    if (i < columns.length - 1) {
      csvString.append(CSV_SEPARATOR_CHARACTER);
    }

  }

  return csvString.toString();
}
 
Example 19
@Test
public void testConverter()
    throws Exception {
  initResources("/converter/schema.json");
  JsonIntermediateToAvroConverter converter = new JsonIntermediateToAvroConverter();

  Schema avroSchema = converter.convertSchema(jsonSchema, state);
  GenericRecord record = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next();

  //testing output values are expected types and values
  Assert.assertEquals(jsonRecord.get("Id").getAsString(), record.get("Id").toString());
  Assert.assertEquals(jsonRecord.get("IsDeleted").getAsBoolean(), record.get("IsDeleted"));

  if (!(record.get("Salutation") instanceof GenericArray)) {
    Assert.fail("expected array, found " + record.get("Salutation").getClass().getName());
  }

  if (!(record.get("MapAccount") instanceof Map)) {
    Assert.fail("expected map, found " + record.get("MapAccount").getClass().getName());
  }

  Assert.assertEquals(jsonRecord.get("Industry").getAsString(), record.get("Industry").toString());

  DateTimeFormatter format = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss")
      .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));

  Assert.assertEquals(jsonRecord.get("LastModifiedDate").getAsString(),
      new DateTime(record.get("LastModifiedDate")).toString(format));
  Assert.assertEquals(jsonRecord.get("date_type").getAsString(),
      new DateTime(record.get("date_type")).toString(format));

  format = DateTimeFormat.forPattern("HH:mm:ss").withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
  Assert.assertEquals(jsonRecord.get("time_type").getAsString(),
      new DateTime(record.get("time_type")).toString(format));
  Assert.assertEquals(jsonRecord.get("bytes_type").getAsString().getBytes(),
      ((ByteBuffer) record.get("bytes_type")).array());
  Assert.assertEquals(jsonRecord.get("int_type").getAsInt(), record.get("int_type"));
  Assert.assertEquals(jsonRecord.get("long_type").getAsLong(), record.get("long_type"));
  Assert.assertEquals(jsonRecord.get("float_type").getAsFloat(), record.get("float_type"));
  Assert.assertEquals(jsonRecord.get("double_type").getAsDouble(), record.get("double_type"));

  //Testing timezone
  state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "EST");
  avroSchema = converter.convertSchema(jsonSchema, state);
  GenericRecord record2 = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next();

  Assert.assertNotEquals(record.get("LastModifiedDate"), record2.get("LastModifiedDate"));
}
 
Example 20
Source Project: flink   File: AvroRecordInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Helper method to test GenericRecord serialisation.
 *
 * @param format
 *            the format to test
 * @param parameters
 *            the configuration to use
 * @throws IOException
 *             thrown id there is a issue
 */
@SuppressWarnings("unchecked")
private void doTestDeserializationGenericRecord(final AvroInputFormat<GenericRecord> format,
		final Configuration parameters) throws IOException {
	try {
		format.configure(parameters);
		FileInputSplit[] splits = format.createInputSplits(1);
		assertEquals(splits.length, 1);
		format.open(splits[0]);

		GenericRecord u = format.nextRecord(null);
		assertNotNull(u);
		assertEquals("The schemas should be equal", userSchema, u.getSchema());

		String name = u.get("name").toString();
		assertNotNull("empty record", name);
		assertEquals("name not equal", TEST_NAME, name);

		// check arrays
		List<CharSequence> sl = (List<CharSequence>) u.get("type_array_string");
		assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
		assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());

		List<Boolean> bl = (List<Boolean>) u.get("type_array_boolean");
		assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
		assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));

		// check enums
		GenericData.EnumSymbol enumValue = (GenericData.EnumSymbol) u.get("type_enum");
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), enumValue.toString());

		// check maps
		Map<CharSequence, Long> lm = (Map<CharSequence, Long>) u.get("type_map");
		assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
		assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());

		assertFalse("expecting second element", format.reachedEnd());
		assertNotNull("expecting second element", format.nextRecord(u));

		assertNull(format.nextRecord(u));
		assertTrue(format.reachedEnd());
	} finally {
		format.close();
	}
}