Java Code Examples for org.apache.avro.util.Utf8

The following examples show how to use org.apache.avro.util.Utf8. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == mapMapValueSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(mapMapValueSchema0);
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example 2
Source Project: big-c   Source File: TaskFailedEvent.java    License: Apache License 2.0 6 votes vote down vote up
public Object getDatum() {
  if(datum == null) {
    datum = new TaskFailed();
    datum.taskid = new Utf8(id.toString());
    datum.error = new Utf8(error);
    datum.finishTime = finishTime;
    datum.taskType = new Utf8(taskType.name());
    datum.failedDueToAttempt =
        failedDueToAttempt == null
        ? null
        : new Utf8(failedDueToAttempt.toString());
    datum.status = new Utf8(status);
    datum.counters = EventWriter.toAvro(counters);
  }
  return datum;
}
 
Example 3
public IndexedRecord deserializerecordName0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord recordName;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == readerSchema)) {
        recordName = ((IndexedRecord)(reuse));
    } else {
        recordName = new org.apache.avro.generic.GenericData.Record(readerSchema);
    }
    if (recordName.get(0) instanceof Utf8) {
        recordName.put(0, (decoder).readString(((Utf8) recordName.get(0))));
    } else {
        recordName.put(0, (decoder).readString(null));
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        recordName.put(1, deserializerecordName0(recordName.get(1), (decoder)));
    }
    return recordName;
}
 
Example 4
@SuppressWarnings("unchecked")
public void serializeRecord0(IndexedRecord data, Encoder encoder)
    throws IOException
{
    CharSequence field0 = ((CharSequence) data.get(0));
    if (field0 == null) {
        (encoder).writeIndex(0);
        (encoder).writeNull();
    } else {
        if (field0 instanceof CharSequence) {
            (encoder).writeIndex(1);
            if (field0 instanceof Utf8) {
                (encoder).writeString(((Utf8) field0));
            } else {
                (encoder).writeString(field0 .toString());
            }
        }
    }
}
 
Example 5
Source Project: pulsar   Source File: InfluxDBSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAvroSchema() {
    AvroSchema<Cpu> schema = AvroSchema.of(Cpu.class);

    AutoConsumeSchema autoConsumeSchema = new AutoConsumeSchema();
    autoConsumeSchema.setSchema(GenericSchemaImpl.of(schema.getSchemaInfo()));
    GenericSchema<GenericRecord> genericAvroSchema = GenericSchemaImpl.of(autoConsumeSchema.getSchemaInfo());

    assertTrue(genericAvroSchema instanceof GenericAvroSchema);

    byte[] bytes = schema.encode(cpu);
    GenericRecord record = genericAvroSchema.decode(bytes);

    assertEquals("cpu", record.getField("measurement"));
    assertEquals(timestamp, record.getField("timestamp"));
    assertEquals("server-1", ((Map)record.getField("tags")).get(new Utf8("host")).toString());
    assertEquals(10, ((Map)record.getField("fields")).get(new Utf8("value")));
}
 
Example 6
Source Project: ml-ease   Source File: ItemModelTestLoglik.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void reduce(Utf8 key,
                   Iterable<RegressionTestLoglikOutput> values,
                   AvroCollector<RegressionTestLoglikOutput> collector,
                   Reporter reporter) throws IOException
{
  double sumLoglik = 0;
  double n = 0;
  for (RegressionTestLoglikOutput value : values)
  {
    float loglik = value.testLoglik;
    sumLoglik += loglik;
    n += value.count;
  }
  RegressionTestLoglikOutput output = new RegressionTestLoglikOutput();
  output.key = key;
  output.testLoglik = (float) (sumLoglik / n);
  output.count = n;
  collector.collect(output);
}
 
Example 7
Source Project: DBus   Source File: OracleGenericSchemaDecoder.java    License: Apache License 2.0 6 votes vote down vote up
public List<IGenericMessage> unwrap(byte[] input) throws IOException {
    List<IGenericMessage> list = new LinkedList<>();

    BinaryDecoder decoder = getBinaryDecoder(input);
    while (!decoder.isEnd()) {
        GenericRecord record = datumReader.read(null, decoder);

        OracleGenericMessage msg = new OracleGenericMessage();

        Utf8 utf8 = (Utf8) record.get(OracleGenericMessage.NAMESAPCE);
        msg.setNameSpace(utf8.toString());
        msg.setSchemaHash((Integer) record.get(OracleGenericMessage.SCHEMA_HASH));
        ByteBuffer buffer = (ByteBuffer) record.get(OracleGenericMessage.PAYLOAD);
        msg.setPayload(buffer.array());

        logger.debug(String.format("TAble: %s, HASH: %d\n", msg.getNameSpace(), msg.getSchemaHash()));

        list.add((IGenericMessage) msg);
    }

    return list;
}
 
Example 8
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == mapValueOptionSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(mapValueOptionSchema0);
    }
    int unionIndex1 = (decoder.readIndex());
    if (unionIndex1 == 0) {
        decoder.readNull();
    }
    if (unionIndex1 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example 9
Source Project: DataflowTemplates   Source File: AvroRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
@SuppressWarnings("unchecked")
static Optional<List<Boolean>> readBoolArray(
    GenericRecord record, Schema.Type avroType, String fieldName) {
  switch (avroType) {
    case BOOLEAN:
      return Optional.ofNullable((List<Boolean>) record.get(fieldName));
    case STRING:
      {
        List<Utf8> value = (List<Utf8>) record.get(fieldName);
        if (value == null) {
          return Optional.empty();
        }
        List<Boolean> result =
            value
                .stream()
                .map(x -> x == null ? null : Boolean.valueOf(x.toString()))
                .collect(Collectors.toList());
        return Optional.of(result);
      }
    default:
      throw new IllegalArgumentException("Cannot interpret " + avroType + " as BOOL");
  }
}
 
Example 10
Source Project: big-c   Source File: JobUnsuccessfulCompletionEvent.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create an event to record unsuccessful completion (killed/failed) of jobs
 * @param id Job ID
 * @param finishTime Finish time of the job
 * @param finishedMaps Number of finished maps
 * @param finishedReduces Number of finished reduces
 * @param status Status of the job
 * @param diagnostics job runtime diagnostics
 */
public JobUnsuccessfulCompletionEvent(JobID id, long finishTime,
    int finishedMaps,
    int finishedReduces,
    String status,
    Iterable<String> diagnostics) {
  datum.setJobid(new Utf8(id.toString()));
  datum.setFinishTime(finishTime);
  datum.setFinishedMaps(finishedMaps);
  datum.setFinishedReduces(finishedReduces);
  datum.setJobStatus(new Utf8(status));
  if (diagnostics == null) {
    diagnostics = NODIAGS_LIST;
  }
  datum.setDiagnostics(new Utf8(Joiner.on('\n').skipNulls()
      .join(diagnostics)));
}
 
Example 11
Source Project: pulsar   Source File: GenericAvroRecord.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Object getField(String fieldName) {
    Object value = record.get(fieldName);
    if (value instanceof Utf8) {
        return ((Utf8) value).toString();
    } else if (value instanceof org.apache.avro.generic.GenericRecord) {
        org.apache.avro.generic.GenericRecord avroRecord =
            (org.apache.avro.generic.GenericRecord) value;
        org.apache.avro.Schema recordSchema = avroRecord.getSchema();
        List<Field> fields = recordSchema.getFields()
            .stream()
            .map(f -> new Field(f.name(), f.pos()))
            .collect(Collectors.toList());
        return new GenericAvroRecord(schemaVersion, schema, fields, avroRecord);
    } else {
        return value;
    }
}
 
Example 12
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldReadSubRecordField(Boolean whetherUseFastDeserializer) {
  // given
  TestRecord record = emptyTestRecord();
  SubRecord subRecord = new SubRecord();
  subRecord.subField = "abc";

  record.subRecordUnion = subRecord;
  record.subRecord = subRecord;

  // when
  if (whetherUseFastDeserializer) {
    record = decodeRecordFast(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  } else {
    record = decodeRecordSlow(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  }

  // then
  Assert.assertEquals(new Utf8("abc"), record.subRecordUnion.subField);
  Assert.assertEquals(new Utf8("abc"), record.subRecord.subField);
}
 
Example 13
@Test
public void shouldDeserializeNullElementInMap() {
    // given
    Schema mapRecordSchema = Schema.createMap(Schema.createUnion(
            Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)));

    Map<String, Object> records = new HashMap<>();
    records.put("0", "0");
    records.put("1", null);
    records.put("2", 2);

    // when
    Map<Utf8, Object> map = deserializeGenericFast(mapRecordSchema, mapRecordSchema,
            serializeGeneric(records, mapRecordSchema));

    // then
    Assert.assertEquals(3, map.size());
    Assert.assertEquals("0", map.get(new Utf8("0")).toString());
    Assert.assertNull(map.get(new Utf8("1")));
    Assert.assertEquals(2, map.get(new Utf8("2")));
}
 
Example 14
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == arrayArrayElemSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(arrayArrayElemSchema0);
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example 15
@Test(groups = {"serializationTest"})
public void shouldWriteSubRecordCollectionsField() {

  // given
  TestRecord record = emptyTestRecord();
  SubRecord subRecord = new SubRecord();
  subRecord.subField = "abc";

  List<SubRecord> recordsArray = new ArrayList<>();
  recordsArray.add(subRecord);
  record.recordsArray = recordsArray;
  record.recordsArrayUnion = recordsArray;
  Map<CharSequence, SubRecord> recordsMap = new HashMap<>();
  recordsMap.put("1", subRecord);
  record.recordsMap = recordsMap;
  record.recordsMapUnion = recordsMap;

  // when
  record = decodeRecordFast(TestRecord.SCHEMA$, dataAsDecoder(record));

  // then
  Assert.assertEquals("abc", record.recordsArray.get(0).subField.toString());
  Assert.assertEquals("abc", record.recordsArrayUnion.get(0).subField.toString());
  Assert.assertEquals("abc", record.recordsMap.get(new Utf8("1")).subField.toString());
  Assert.assertEquals("abc", record.recordsMapUnion.get(new Utf8("1")).subField.toString());
}
 
Example 16
Source Project: iceberg   Source File: TestBucketing.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testUtf8() {
  Utf8 utf8 = new Utf8("string to test murmur3 hash");
  byte[] asBytes = utf8.toString().getBytes(Charsets.UTF_8);

  Bucket<CharSequence> bucketFunc = Bucket.get(Types.StringType.get(), 100);

  Assert.assertEquals("String hash should match hash of UTF-8 bytes",
      hashBytes(asBytes), bucketFunc.hash(utf8));
}
 
Example 17
Source Project: kite   Source File: TestCharSequences.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testStringCharSequenceSetContains() {
  List<String> colors = Lists.newArrayList(
      "orange", "green", "blue", "red", "purple", "red");
  ImmutableCharSequenceSet set = new ImmutableCharSequenceSet(colors);

  for (String color : colors) {
    Assert.assertTrue("Should contain Utf8(value)",
        set.contains(new Utf8(color)));
  }
  Assert.assertTrue("Should contain all values",
      set.containsAll(colors));
}
 
Example 18
Source Project: Flink-CEPplus   Source File: DataInputDecoder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Utf8 readString(Utf8 old) throws IOException {
	int length = readInt();
	Utf8 result = (old != null ? old : new Utf8());
	result.setByteLength(length);

	if (length > 0) {
		in.readFully(result.getBytes(), 0, length);
	}

	return result;
}
 
Example 19
Source Project: hadoop-connectors   Source File: DirectBigQueryWordCount.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void map(NullWritable unusedKey, GenericRecord row, Context context)
    throws IOException, InterruptedException {
  word.set(((Utf8) row.get("word")).toString());
  count.set((Long) row.get("word_count"));
  context.write(word, count);
}
 
Example 20
Source Project: flink   Source File: DataInputDecoder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Utf8 readString(Utf8 old) throws IOException {
	int length = readInt();
	Utf8 result = (old != null ? old : new Utf8());
	result.setByteLength(length);

	if (length > 0) {
		in.readFully(result.getBytes(), 0, length);
	}

	return result;
}
 
Example 21
Source Project: divolte-collector   Source File: DslRecordMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldApplyActionsInClosureWhenEqualToConditionHolds() throws IOException, InterruptedException {
    setupServer("when-mapping.groovy");
    final EventPayload event = request("http://www.example.com/", "http://www.example.com/somepage.html");

    assertEquals("locationmatch", event.record.get("eventType"));
    assertEquals("referermatch", event.record.get("client"));
    assertEquals(new Utf8("not set"), event.record.get("queryparam"));

    assertEquals("absent", event.record.get("event"));
    assertEquals("present", event.record.get("pageview"));
}
 
Example 22
Source Project: ml-ease   Source File: PartitionIdAssigner.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Utf8 key,
                   Iterable<Integer> values,
                   AvroCollector<Pair<String, Integer>> collector,
                   Reporter reporter) throws IOException
{
  collector.collect(new Pair<String, Integer>(key, _partitionId));
  _partitionId++;
}
 
Example 23
@Test
public void shouldSerializeRowWithNullCorrectly() {
  SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient();
  KsqlGenericRowAvroSerializer ksqlGenericRowAvroSerializer = new KsqlGenericRowAvroSerializer
      (schema, schemaRegistryClient, new KsqlConfig(new HashMap<>()));

  List columns = Arrays.asList(1511897796092L, 1L, null, 10.0, new Double[]{100.0},
                               Collections.singletonMap("key1", 100.0));

  GenericRow genericRow = new GenericRow(columns);
  byte[] serializedRow = ksqlGenericRowAvroSerializer.serialize("t1", genericRow);
  KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
  GenericRecord genericRecord = (GenericRecord) kafkaAvroDeserializer.deserialize("t1", serializedRow);
  Assert.assertNotNull(genericRecord);
  assertThat("Incorrect serialization.", genericRecord.get("ordertime".toUpperCase()), equalTo
      (1511897796092L));
  assertThat("Incorrect serialization.", genericRecord.get("orderid".toUpperCase()), equalTo
      (1L));
  assertThat("Incorrect serialization.", genericRecord.get("itemid".toUpperCase()), equalTo
      (null));
  assertThat("Incorrect serialization.", genericRecord.get("orderunits".toUpperCase()), equalTo
      (10.0));

  GenericData.Array array = (GenericData.Array) genericRecord.get("arraycol".toUpperCase());
  Map map = (Map) genericRecord.get("mapcol".toUpperCase());

  assertThat("Incorrect serialization.", array.size(), equalTo(1));
  assertThat("Incorrect serialization.", array.get(0), equalTo(100.0));
  assertThat("Incorrect serialization.", map,
             equalTo(Collections.singletonMap(new Utf8("key1"), 100.0)));

}
 
Example 24
Source Project: kite   Source File: TestRangeCharSequence.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testStringRangeAcceptsUtf8() {
  Range<CharSequence> range = Ranges.<CharSequence>openClosed("ab", "cd");
  Assert.assertEquals(range.toString(), "(ab, cd]");
  Assert.assertTrue("Should contain inner Utf8",
      range.contains(new Utf8("ac")));
  Assert.assertFalse("Should not contain outside Utf8",
      range.contains(new Utf8("ab")));
}
 
Example 25
/**
 * Converts a (nested) Flink Row into Avro's {@link GenericRecord}.
 * Strings are converted into Avro's {@link Utf8} fields.
 */
private static Object convertToRecord(Schema schema, Object rowObj) {

    if (rowObj instanceof Row) {

        // records can be wrapped in a union
        if (schema.getType() == Schema.Type.UNION) {
            final List<Schema> types = schema.getTypes();
            if (types.size() == 2 && types.get(0).getType() == Schema.Type.NULL && types.get(1).getType() == Schema.Type.RECORD) {
                schema = types.get(1);
            }
            else if (types.size() == 2 && types.get(0).getType() == Schema.Type.RECORD && types.get(1).getType() == Schema.Type.NULL) {
                schema = types.get(0);
            }
            else {
                throw new RuntimeException("Currently we only support schemas of the following form: UNION[null, RECORD] or UNION[RECORD, NULL] Given: " + schema);
            }
        } else if (schema.getType() != Schema.Type.RECORD) {
            throw new RuntimeException("Record type for row type expected. But is: " + schema);
        }
        final List<Schema.Field> fields = schema.getFields();
        final GenericRecord record = new GenericData.Record(schema);
        final Row row = (Row) rowObj;
        for (int i = 0; i < fields.size(); i++) {
            final Schema.Field field = fields.get(i);
            record.put(field.pos(), convertToRecord(field.schema(), row.getField(i)));
        }
        return record;
    } else if (rowObj instanceof String) {
        return new Utf8((String) rowObj);
    } else {
        return rowObj;
    }
}
 
Example 26
Source Project: iceberg   Source File: SparkValueWriters.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(UTF8String s, Encoder encoder) throws IOException {
  // use getBytes because it may return the backing byte array if available.
  // otherwise, it copies to a new byte array, which is still cheaper than Avro
  // calling toString, which incurs encoding costs
  encoder.writeString(new Utf8(s.getBytes()));
}
 
Example 27
Source Project: iceberg   Source File: ParquetValueWriters.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(int repetitionLevel, CharSequence value) {
  if (value instanceof Utf8) {
    Utf8 utf8 = (Utf8) value;
    column.writeBinary(repetitionLevel,
        Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength()));
  } else {
    column.writeBinary(repetitionLevel, Binary.fromString(value.toString()));
  }
}
 
Example 28
Source Project: iceberg   Source File: PartitionData.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> void set(int pos, T value) {
  if (value instanceof Utf8) {
    // Utf8 is not Serializable
    data[pos] = value.toString();
  } else if (value instanceof ByteBuffer) {
    // ByteBuffer is not Serializable
    ByteBuffer buffer = (ByteBuffer) value;
    byte[] bytes = new byte[buffer.remaining()];
    buffer.duplicate().get(bytes);
    data[pos] = bytes;
  } else {
    data[pos] = value;
  }
}
 
Example 29
Source Project: funcj   Source File: AvroMapCodecs.java    License: MIT License 5 votes vote down vote up
@Override
public Object encode(CodecCoreEx<WithSchema, Object, Config> core, Map<String, V> value, Object out) {
    final Schema schema = checkSchemaType((Schema)out, Schema.Type.MAP);
    final Schema valueSchema = schema.getValueType();

    final Map<CharSequence, Object> map = new HashMap<>();

    value.forEach((key, val) -> {
        map.put(new Utf8(key), valueCodec.encodeWithCheck(core, val, valueSchema));
    });

    return map;
}
 
Example 30
Source Project: parquet-mr   Source File: TestGenericLogicalTypes.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteUUIDReadStringMissingLogicalType() throws IOException {
  Schema uuidSchema = record("R",
      field("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING))));
  GenericRecord u1 = instance(uuidSchema, "uuid", UUID.randomUUID());
  GenericRecord u2 = instance(uuidSchema, "uuid", UUID.randomUUID());

  GenericRecord s1 = instance(uuidSchema, "uuid", new Utf8(u1.get("uuid").toString()));
  GenericRecord s2 = instance(uuidSchema, "uuid", new Utf8(u2.get("uuid").toString()));

  File test = write(GENERIC, uuidSchema, u1, u2);
  Assert.assertEquals("Should read UUIDs as Strings",
      Arrays.asList(s1, s2), read(GenericData.get(), uuidSchema, test));
}