Java Code Examples for org.apache.avro.generic.GenericRecord#get()

The following examples show how to use org.apache.avro.generic.GenericRecord#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LobAvroImportTestCase.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
/** Import blob data that is smaller than inline lob limit. Blob data
 * should be saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportInline() throws IOException, SQLException {
  String [] types = { getBlobType() };
  String expectedVal = "This is short BLOB data";
  String [] vals = { getBlobInsertStr(expectedVal) };

  createTableWithColTypes(types, vals);

  runImport(getArgv());

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  GenericRecord record = reader.next();

  // Verify that blob data is imported as Avro bytes.
  ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
  String returnVal = new String(buf.array());

  assertEquals(getColName(0), expectedVal, returnVal);
}
 
Example 2
Source File: KafkaValueDeserializer.java    From kareldb with Apache License 2.0 6 votes vote down vote up
private NavigableMap<Long, VersionedValue> toValue(GenericArray<GenericRecord> array) {
    NavigableMap<Long, VersionedValue> map = new TreeMap<>();
    Schema recordSchema = avroSchema.getElementType();
    List<Schema.Field> fields = recordSchema.getFields();
    int size = fields.size();
    for (GenericRecord record : array) {
        Long version = (Long) record.get(0);
        Long commit = (Long) record.get(1);
        boolean deleted = (Boolean) record.get(2);
        Comparable[] row = new Comparable[size - 3];
        for (int i = 0; i < row.length; i++) {
            Schema schema = fields.get(i + 3).schema();
            Comparable value = (Comparable) record.get(i + 3);
            row[i] = AvroSchema.fromAvroValue(schema, value);
        }
        map.put(version, new VersionedValue(version, commit, deleted, row));
    }
    return map;
}
 
Example 3
Source File: Purge.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private void loadMembersToPurge(String filename) throws IOException
{
    // TODO: "memberId" column name should be configurable
    DataFileReader<GenericRecord> dataFileReader =
            createDataFileReader(filename, true);
    while (dataFileReader.hasNext())
    {
        GenericRecord record = dataFileReader.next();
        Integer memberId = (Integer) record.get("memberId");
        if (memberId == null)
        {
            throw new NullPointerException("memberId is null");
        }
        membersToPurge.add(((Number) record.get("memberId")).intValue());
    }
    dataFileReader.close();
}
 
Example 4
Source File: DefaultJoinKeyExtractor.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
@Override
public String extractJoinKey(String sourceName, GenericRecord record) {

  String joinKey = defaultJoinKey;
  if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) {
    joinKey = joinKeyMap.get(sourceName);
  }
  String ret = "INVALID";
  if (joinKey != null) {
    Object object = record.get(joinKey);
    if (object != null) {
      ret = object.toString();
    }
  }
  LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret);
  return ret;
}
 
Example 5
Source File: AvroGenericUtils.java    From simplesource with Apache License 2.0 6 votes vote down vote up
static <K> CommandResponse<GenericRecord> fromCommandResponse(
        final GenericRecord record) {
    final GenericRecord aggregateKey = (GenericRecord) record.get(AGGREGATE_KEY);
    final Sequence readSequence = Sequence.position((Long) record.get(READ_SEQUENCE));
    final UUID commandId = UUID.fromString(String.valueOf(record.get(COMMAND_ID)));
    final GenericRecord genericResult = (GenericRecord) record.get(RESULT);
    final Result<CommandError, Sequence> result;
    if (nonNull(genericResult.get(WRITE_SEQUENCE))) {
        final Sequence writeSequence = Sequence.position((Long) genericResult.get(WRITE_SEQUENCE));
        result = Result.success(writeSequence);
    } else {
        final CommandError commandError = toCommandError((GenericRecord) genericResult.get(REASON));
        final List<CommandError> additionalCommandErrors = ((List<GenericRecord>) genericResult.get(ADDITIONAL_REASONS))
                .stream()
                .map(AggregateUpdateResultAvroHelper::toCommandError)
                .collect(Collectors.toList());
        result = Result.failure(new NonEmptyList<>(commandError, additionalCommandErrors));
    }

    return CommandResponse.of(CommandId.of(commandId), aggregateKey, readSequence, result);
}
 
Example 6
Source File: GenericRecordExportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
protected SqoopRecord toSqoopRecord(GenericRecord record) throws IOException {
  Schema avroSchema = record.getSchema();
  for (Map.Entry<Writable, Writable> e : columnTypes.entrySet()) {
    String columnName = e.getKey().toString();
    String columnType = e.getValue().toString();
    String cleanedCol = ClassWriter.toIdentifier(columnName);
    Schema.Field field = getFieldIgnoreCase(avroSchema, cleanedCol);
    if (null == field) {
      throw new IOException("Cannot find field " + cleanedCol
          + " in Avro schema " + avroSchema);
    }

    Object avroObject = record.get(field.name());
    Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), columnType);
    recordImpl.setField(cleanedCol, fieldVal);
  }
  return recordImpl;
}
 
Example 7
Source File: AvroMessageParser.java    From secor with Apache License 2.0 6 votes vote down vote up
@Override
public long extractTimestampMillis(final Message message) {
    try {
        GenericRecord record = schemaRegistry.deserialize(message.getTopic(), message.getPayload());
        if (record != null) {
            Object fieldValue = record.get(mConfig.getMessageTimestampName());
            if (fieldValue != null) {
                return toMillis(Double.valueOf(fieldValue.toString()).longValue());
            }
        } else if (m_timestampRequired) {
            throw new RuntimeException("Missing timestamp field for message: " + message);
        }
    } catch (Exception e) {
        LOG.error("Failed to parse record", e);
    }
    return 0;
}
 
Example 8
Source File: ImpressionClickPartitionPreservingJob.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public void accumulate(GenericRecord value)
{
  if (value.get("type").toString().equals("click"))
  {
    clicks++;
  }
  else if (value.get("type").toString().equals("impression"))
  {
    impressions++;
  }
  else
  {
    throw new RuntimeException("Didn't expect: " + value.get("type"));
  }
}
 
Example 9
Source File: FastGenericSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteRightUnionIndex() {
  // Create two record schemas
  Schema recordSchema1 = createRecord("record1", createField("record1_field1", Schema.create(Schema.Type.STRING)));
  Schema recordSchema2 = createRecord("record2", createField("record2_field1", Schema.create(Schema.Type.STRING)));
  Schema unionSchema = createUnionSchema(recordSchema1, recordSchema2);
  Schema recordWrapperSchema = createRecord(createField("union_field", unionSchema));

  GenericData.Record objectOfRecordSchema2 = new GenericData.Record(recordSchema2);
  objectOfRecordSchema2.put("record2_field1", "abc");
  GenericData.Record wrapperObject = new GenericData.Record(recordWrapperSchema);
  wrapperObject.put("union_field", objectOfRecordSchema2);

  GenericRecord record = decodeRecord(recordWrapperSchema, dataAsBinaryDecoder(wrapperObject));

  Object unionField = record.get("union_field");
  Assert.assertTrue(unionField instanceof GenericData.Record);
  GenericData.Record unionRecord = (GenericData.Record)unionField;
  Assert.assertEquals(unionRecord.getSchema().getName(), "record2");
}
 
Example 10
Source File: ThirdeyeAvroUtils.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
public static Number getMetricFromRecord(GenericRecord record, String metricName, MetricType metricType) {
  Number metricValue = (Number) record.get(metricName);
  if (metricValue == null) {
    metricValue = metricType.getDefaultNullValue();
  }
  return metricValue;
}
 
Example 11
Source File: EnvelopeSchemaConverter.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Get payload field from GenericRecord and convert to byte array
 */
public byte[] getPayload(GenericRecord inputRecord, String payloadFieldName) {
  ByteBuffer bb = (ByteBuffer) inputRecord.get(payloadFieldName);
  byte[] payloadBytes;
  if (bb.hasArray()) {
    payloadBytes = bb.array();
  } else {
    payloadBytes = new byte[bb.remaining()];
    bb.get(payloadBytes);
  }
  String hexString = new String(payloadBytes, StandardCharsets.UTF_8);
  return DatatypeConverter.parseHexBinary(hexString);
}
 
Example 12
Source File: AvroSplitByFieldMessageParser.java    From secor with Apache License 2.0 5 votes vote down vote up
protected long extractTimestampMillis(GenericRecord record) {
    try {
        if (record != null) {
            Object fieldValue = record.get(mConfig.getMessageTimestampName());
            if (fieldValue != null) {
                return toMillis(Double.valueOf(fieldValue.toString()).longValue());
            }
        } else if (m_timestampRequired) {
            throw new RuntimeException("Missing timestamp field for message: " + record.toString());
        }
    } catch (SerializationException e) {
        LOG.error("Failed to parse record", e);
    }
    return 0;
}
 
Example 13
Source File: AvroLoader.java    From incubator-samoa with Apache License 2.0 4 votes vote down vote up
/**
 * Method to read Sparse Instances from Avro File
 * 
 * @return Instance
 */
protected Instance readInstanceSparse(GenericRecord record) {

  Instance instance = new SparseInstance(1.0, null);
  int numAttribute = -1;
  ArrayList<Double> attributeValues = new ArrayList<Double>();
  List<Integer> indexValues = new ArrayList<Integer>();

  for (Attribute attribute : attributes) {
    numAttribute++;
    Object value = record.get(attribute.name);

    boolean isNumeric = attributes.get(numAttribute).isNumeric();
    boolean isNominal = attributes.get(numAttribute).isNominal();

    /** If value is empty/null iterate to the next attribute. **/
    if (value == null)
      continue;

    if (isNumeric)
    {
      if (value instanceof Double) {
        Double v = (double) value;
        //if (Double.isFinite(v))
        if (!Double.isNaN(v) && !Double.isInfinite(v))
          this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value);
      }
      else if (value instanceof Long)
        this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (long) value);
      else if (value instanceof Integer)
        this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (int) value);
      else
        throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name);
    }
    else if (isNominal)
    {
      double valueAttribute;

      if (!(value instanceof EnumSymbol))
        throw new RuntimeException(AVRO_LOADER_INVALID_TYPE_ERROR + " : " + attribute.name);

      EnumSymbol enumSymbolalue = (EnumSymbol) value;

      String stringValue = enumSymbolalue.toString();

      if (("?".equals(stringValue)) || (stringValue == null)) {
        valueAttribute = Double.NaN;
      } else {
        valueAttribute = this.instanceInformation.attribute(numAttribute).indexOfValue(stringValue);
      }

      this.setSparseValue(instance, indexValues, attributeValues, numAttribute, valueAttribute);
    }
  }

  int[] arrayIndexValues = new int[attributeValues.size()];
  double[] arrayAttributeValues = new double[attributeValues.size()];

  for (int i = 0; i < arrayIndexValues.length; i++) {
    arrayIndexValues[i] = indexValues.get(i).intValue();
    arrayAttributeValues[i] = attributeValues.get(i).doubleValue();
  }

  instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes());
  return instance;

}
 
Example 14
Source File: AvroRecordInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Helper method to test GenericRecord serialisation.
 *
 * @param format
 *            the format to test
 * @param parameters
 *            the configuration to use
 * @throws IOException
 *             thrown id there is a issue
 */
@SuppressWarnings("unchecked")
private void doTestDeserializationGenericRecord(final AvroInputFormat<GenericRecord> format,
		final Configuration parameters) throws IOException {
	try {
		format.configure(parameters);
		FileInputSplit[] splits = format.createInputSplits(1);
		assertEquals(splits.length, 1);
		format.open(splits[0]);

		GenericRecord u = format.nextRecord(null);
		assertNotNull(u);
		assertEquals("The schemas should be equal", userSchema, u.getSchema());

		String name = u.get("name").toString();
		assertNotNull("empty record", name);
		assertEquals("name not equal", TEST_NAME, name);

		// check arrays
		List<CharSequence> sl = (List<CharSequence>) u.get("type_array_string");
		assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
		assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());

		List<Boolean> bl = (List<Boolean>) u.get("type_array_boolean");
		assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
		assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));

		// check enums
		GenericData.EnumSymbol enumValue = (GenericData.EnumSymbol) u.get("type_enum");
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), enumValue.toString());

		// check maps
		Map<CharSequence, Long> lm = (Map<CharSequence, Long>) u.get("type_map");
		assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
		assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());

		assertFalse("expecting second element", format.reachedEnd());
		assertNotNull("expecting second element", format.nextRecord(u));

		assertNull(format.nextRecord(u));
		assertTrue(format.reachedEnd());
	} finally {
		format.close();
	}
}
 
Example 15
Source File: JsonIntermediateToAvroConverterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testConverter()
    throws Exception {
  initResources("/converter/schema.json");
  JsonIntermediateToAvroConverter converter = new JsonIntermediateToAvroConverter();

  Schema avroSchema = converter.convertSchema(jsonSchema, state);
  GenericRecord record = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next();

  //testing output values are expected types and values
  Assert.assertEquals(jsonRecord.get("Id").getAsString(), record.get("Id").toString());
  Assert.assertEquals(jsonRecord.get("IsDeleted").getAsBoolean(), record.get("IsDeleted"));

  if (!(record.get("Salutation") instanceof GenericArray)) {
    Assert.fail("expected array, found " + record.get("Salutation").getClass().getName());
  }

  if (!(record.get("MapAccount") instanceof Map)) {
    Assert.fail("expected map, found " + record.get("MapAccount").getClass().getName());
  }

  Assert.assertEquals(jsonRecord.get("Industry").getAsString(), record.get("Industry").toString());

  DateTimeFormatter format = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss")
      .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));

  Assert.assertEquals(jsonRecord.get("LastModifiedDate").getAsString(),
      new DateTime(record.get("LastModifiedDate")).toString(format));
  Assert.assertEquals(jsonRecord.get("date_type").getAsString(),
      new DateTime(record.get("date_type")).toString(format));

  format = DateTimeFormat.forPattern("HH:mm:ss").withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
  Assert.assertEquals(jsonRecord.get("time_type").getAsString(),
      new DateTime(record.get("time_type")).toString(format));
  Assert.assertEquals(jsonRecord.get("bytes_type").getAsString().getBytes(),
      ((ByteBuffer) record.get("bytes_type")).array());
  Assert.assertEquals(jsonRecord.get("int_type").getAsInt(), record.get("int_type"));
  Assert.assertEquals(jsonRecord.get("long_type").getAsLong(), record.get("long_type"));
  Assert.assertEquals(jsonRecord.get("float_type").getAsFloat(), record.get("float_type"));
  Assert.assertEquals(jsonRecord.get("double_type").getAsDouble(), record.get("double_type"));

  //Testing timezone
  state.setProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "EST");
  avroSchema = converter.convertSchema(jsonSchema, state);
  GenericRecord record2 = converter.convertRecord(avroSchema, jsonRecord, state).iterator().next();

  Assert.assertNotEquals(record.get("LastModifiedDate"), record2.get("LastModifiedDate"));
}
 
Example 16
Source File: AVROIntermediateDataFormat.java    From sqoop-on-spark with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private String toCSV(GenericRecord record) {
  Column[] columns = this.schema.getColumnsArray();

  StringBuilder csvString = new StringBuilder();
  for (int i = 0; i < columns.length; i++) {

    Object obj = record.get(columns[i].getName());
    if (obj == null && !columns[i].isNullable()) {
      throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005,
          columns[i].getName() + " does not support null values");
    }
    if (obj == null) {
      csvString.append(NULL_VALUE);
    } else {

      switch (columns[i].getType()) {
      case ARRAY:
      case SET:
        List<Object> objList = (List<Object>) obj;
        csvString.append(toCSVList(toObjectArray(objList), columns[i]));
        break;
      case MAP:
        Map<Object, Object> objMap = (Map<Object, Object>) obj;
        csvString.append(toCSVMap(objMap, columns[i]));
        break;
      case ENUM:
      case TEXT:
        csvString.append(toCSVString(obj.toString()));
        break;
      case BINARY:
      case UNKNOWN:
        csvString.append(toCSVByteArray(getBytesFromByteBuffer(obj)));
        break;
      case FIXED_POINT:
        csvString.append(toCSVFixedPoint(obj, columns[i]));
        break;
      case FLOATING_POINT:
        csvString.append(toCSVFloatingPoint(obj, columns[i]));
        break;
      case DECIMAL:
        // stored as string
        csvString.append(toCSVDecimal(obj));
        break;
      case DATE:
        // stored as long
        Long dateInMillis = (Long) obj;
        csvString.append(toCSVDate(new org.joda.time.LocalDate(dateInMillis)));
        break;
      case TIME:
        // stored as long
        Long timeInMillis = (Long) obj;
        csvString.append(toCSVTime(new org.joda.time.LocalTime(timeInMillis), columns[i]));
        break;
      case DATE_TIME:
        // stored as long
        Long dateTimeInMillis = (Long) obj;
        csvString.append(toCSVDateTime(new org.joda.time.DateTime(dateTimeInMillis), columns[i]));
        break;
      case BIT:
        csvString.append(toCSVBit(obj));
        break;
      default:
        throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001,
            "Column type from schema was not recognized for " + columns[i].getType());
      }
    }
    if (i < columns.length - 1) {
      csvString.append(CSV_SEPARATOR_CHARACTER);
    }

  }

  return csvString.toString();
}
 
Example 17
Source File: TestApacheAvroEventSerializer.java    From flume-plugins with MIT License 4 votes vote down vote up
@Test
public void test() throws FileNotFoundException, IOException {

    // create the file, write some data
    OutputStream out = new FileOutputStream(testFile);
    String builderName = ApacheLogAvroEventSerializer.Builder.class.getName();

    Context ctx = new Context();
    ctx.put("syncInterval", "4096");

    EventSerializer serializer =
            EventSerializerFactory.getInstance(builderName, ctx, out);
    serializer.afterCreate(); // must call this when a file is newly created

    List<Event> events = generateApacheEvents();
    for (Event e : events) {
        serializer.write(e);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();

    // now try to read the file back

    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader =
            new DataFileReader<GenericRecord>(testFile, reader);

    GenericRecord record = new GenericData.Record(fileReader.getSchema());
    int numEvents = 0;
    while (fileReader.hasNext()) {
        fileReader.next(record);
        String ip = record.get("ip").toString();
        String uri = record.get("uri").toString();
        Integer statuscode = (Integer) record.get("statuscode");
        String original = record.get("original").toString();
        String connectionstatus = record.get("connectionstatus").toString();

        Assert.assertEquals("Ip should be 80.79.194.3", "80.79.194.3", ip);
        System.out.println("IP " + ip + " requested: " + uri + " with status code " + statuscode + " and connectionstatus: " + connectionstatus);
        System.out.println("Original logline: " + original);
        numEvents++;
    }

    fileReader.close();
    Assert.assertEquals("Should have found a total of 3 events", 2, numEvents);

    FileUtils.forceDelete(testFile);
}
 
Example 18
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroGeneratorShortType() throws Exception {
  final String SCHEMA_JSON = "{\n"
  +"\"type\": \"record\",\n"
  +"\"name\": \"WithDecimal\",\n"
  +"\"fields\": [\n"
  +" {\"name\": \"short\", \"type\": \"int\"}"
  +"]}";
  final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON);

  Map<String, Field> map = new LinkedHashMap<>();
  map.put("short", Field.create(Field.Type.SHORT, (short)1));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Object retrievedField = readRecord.get("short");
  Assert.assertEquals(1, retrievedField);

  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 19
Source File: AvroGenericUtils.java    From simplesource with Apache License 2.0 4 votes vote down vote up
public static ValueWithSequence<GenericRecord> fromGenericRecord(final GenericRecord record) {
    final GenericRecord genericValue = (GenericRecord) record.get(VALUE);
    final Sequence sequence = Sequence.position((Long) record.get(SEQUENCE));

    return new ValueWithSequence<>(genericValue, sequence);
}
 
Example 20
Source File: GenericPartitioner.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Override
public int getPartition(T genericRecordAvroKey, AvroValue<GenericRecord> genericRecordAvroValue, int numPartitions) {
  final GenericRecord inputRecord = genericRecordAvroValue.datum();
  final Object partitionColumnValue = inputRecord.get(_partitionColumn);
  return _partitionFunction.getPartition(partitionColumnValue);
}