org.apache.avro.util.Utf8 Java Examples

The following examples show how to use org.apache.avro.util.Utf8. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == arrayArrayElemSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(arrayArrayElemSchema0);
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example #2
Source File: GenericAvroRecord.java    From pulsar with Apache License 2.0 6 votes vote down vote up
@Override
public Object getField(String fieldName) {
    Object value = record.get(fieldName);
    if (value instanceof Utf8) {
        return ((Utf8) value).toString();
    } else if (value instanceof org.apache.avro.generic.GenericRecord) {
        org.apache.avro.generic.GenericRecord avroRecord =
            (org.apache.avro.generic.GenericRecord) value;
        org.apache.avro.Schema recordSchema = avroRecord.getSchema();
        List<Field> fields = recordSchema.getFields()
            .stream()
            .map(f -> new Field(f.name(), f.pos()))
            .collect(Collectors.toList());
        return new GenericAvroRecord(schemaVersion, schema, fields, avroRecord);
    } else {
        return value;
    }
}
 
Example #3
Source File: InfluxDBSinkTest.java    From pulsar with Apache License 2.0 6 votes vote down vote up
@Test
public void testAvroSchema() {
    AvroSchema<Cpu> schema = AvroSchema.of(Cpu.class);

    AutoConsumeSchema autoConsumeSchema = new AutoConsumeSchema();
    autoConsumeSchema.setSchema(GenericSchemaImpl.of(schema.getSchemaInfo()));
    GenericSchema<GenericRecord> genericAvroSchema = GenericSchemaImpl.of(autoConsumeSchema.getSchemaInfo());

    assertTrue(genericAvroSchema instanceof GenericAvroSchema);

    byte[] bytes = schema.encode(cpu);
    GenericRecord record = genericAvroSchema.decode(bytes);

    assertEquals("cpu", record.getField("measurement"));
    assertEquals(timestamp, record.getField("timestamp"));
    assertEquals("server-1", ((Map)record.getField("tags")).get(new Utf8("host")).toString());
    assertEquals(10, ((Map)record.getField("fields")).get(new Utf8("value")));
}
 
Example #4
Source File: ItemModelTestLoglik.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
@Override
public void reduce(Utf8 key,
                   Iterable<RegressionTestLoglikOutput> values,
                   AvroCollector<RegressionTestLoglikOutput> collector,
                   Reporter reporter) throws IOException
{
  double sumLoglik = 0;
  double n = 0;
  for (RegressionTestLoglikOutput value : values)
  {
    float loglik = value.testLoglik;
    sumLoglik += loglik;
    n += value.count;
  }
  RegressionTestLoglikOutput output = new RegressionTestLoglikOutput();
  output.key = key;
  output.testLoglik = (float) (sumLoglik / n);
  output.count = n;
  collector.collect(output);
}
 
Example #5
Source File: AvroRecordConverter.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
@SuppressWarnings("unchecked")
static Optional<List<Boolean>> readBoolArray(
    GenericRecord record, Schema.Type avroType, String fieldName) {
  switch (avroType) {
    case BOOLEAN:
      return Optional.ofNullable((List<Boolean>) record.get(fieldName));
    case STRING:
      {
        List<Utf8> value = (List<Utf8>) record.get(fieldName);
        if (value == null) {
          return Optional.empty();
        }
        List<Boolean> result =
            value
                .stream()
                .map(x -> x == null ? null : Boolean.valueOf(x.toString()))
                .collect(Collectors.toList());
        return Optional.of(result);
      }
    default:
      throw new IllegalArgumentException("Cannot interpret " + avroType + " as BOOL");
  }
}
 
Example #6
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldDeserializeNullElementInMap() {
    // given
    Schema mapRecordSchema = Schema.createMap(Schema.createUnion(
            Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)));

    Map<String, Object> records = new HashMap<>();
    records.put("0", "0");
    records.put("1", null);
    records.put("2", 2);

    // when
    Map<Utf8, Object> map = deserializeGenericFast(mapRecordSchema, mapRecordSchema,
            serializeGeneric(records, mapRecordSchema));

    // then
    Assert.assertEquals(3, map.size());
    Assert.assertEquals("0", map.get(new Utf8("0")).toString());
    Assert.assertNull(map.get(new Utf8("1")));
    Assert.assertEquals(2, map.get(new Utf8("2")));
}
 
Example #7
Source File: Array_of_UNION_GenericSerializer_585074122056792963.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void serializeRecord0(IndexedRecord data, Encoder encoder)
    throws IOException
{
    CharSequence field0 = ((CharSequence) data.get(0));
    if (field0 == null) {
        (encoder).writeIndex(0);
        (encoder).writeNull();
    } else {
        if (field0 instanceof CharSequence) {
            (encoder).writeIndex(1);
            if (field0 instanceof Utf8) {
                (encoder).writeString(((Utf8) field0));
            } else {
                (encoder).writeString(field0 .toString());
            }
        }
    }
}
 
Example #8
Source File: FastSpecificSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteSubRecordCollectionsField() {

  // given
  TestRecord record = emptyTestRecord();
  SubRecord subRecord = new SubRecord();
  subRecord.subField = "abc";

  List<SubRecord> recordsArray = new ArrayList<>();
  recordsArray.add(subRecord);
  record.recordsArray = recordsArray;
  record.recordsArrayUnion = recordsArray;
  Map<CharSequence, SubRecord> recordsMap = new HashMap<>();
  recordsMap.put("1", subRecord);
  record.recordsMap = recordsMap;
  record.recordsMapUnion = recordsMap;

  // when
  record = decodeRecordFast(TestRecord.SCHEMA$, dataAsDecoder(record));

  // then
  Assert.assertEquals("abc", record.recordsArray.get(0).subField.toString());
  Assert.assertEquals("abc", record.recordsArrayUnion.get(0).subField.toString());
  Assert.assertEquals("abc", record.recordsMap.get(new Utf8("1")).subField.toString());
  Assert.assertEquals("abc", record.recordsMapUnion.get(new Utf8("1")).subField.toString());
}
 
Example #9
Source File: JobUnsuccessfulCompletionEvent.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Create an event to record unsuccessful completion (killed/failed) of jobs
 * @param id Job ID
 * @param finishTime Finish time of the job
 * @param finishedMaps Number of finished maps
 * @param finishedReduces Number of finished reduces
 * @param status Status of the job
 * @param diagnostics job runtime diagnostics
 */
public JobUnsuccessfulCompletionEvent(JobID id, long finishTime,
    int finishedMaps,
    int finishedReduces,
    String status,
    Iterable<String> diagnostics) {
  datum.setJobid(new Utf8(id.toString()));
  datum.setFinishTime(finishTime);
  datum.setFinishedMaps(finishedMaps);
  datum.setFinishedReduces(finishedReduces);
  datum.setJobStatus(new Utf8(status));
  if (diagnostics == null) {
    diagnostics = NODIAGS_LIST;
  }
  datum.setDiagnostics(new Utf8(Joiner.on('\n').skipNulls()
      .join(diagnostics)));
}
 
Example #10
Source File: recordName_GenericDeserializer_6897301803194779359_6897301803194779359.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public IndexedRecord deserializerecordName0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord recordName;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == readerSchema)) {
        recordName = ((IndexedRecord)(reuse));
    } else {
        recordName = new org.apache.avro.generic.GenericData.Record(readerSchema);
    }
    if (recordName.get(0) instanceof Utf8) {
        recordName.put(0, (decoder).readString(((Utf8) recordName.get(0))));
    } else {
        recordName.put(0, (decoder).readString(null));
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        recordName.put(1, deserializerecordName0(recordName.get(1), (decoder)));
    }
    return recordName;
}
 
Example #11
Source File: Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == mapValueOptionSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(mapValueOptionSchema0);
    }
    int unionIndex1 = (decoder.readIndex());
    if (unionIndex1 == 0) {
        decoder.readNull();
    }
    if (unionIndex1 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example #12
Source File: OracleGenericSchemaDecoder.java    From DBus with Apache License 2.0 6 votes vote down vote up
public List<IGenericMessage> unwrap(byte[] input) throws IOException {
    List<IGenericMessage> list = new LinkedList<>();

    BinaryDecoder decoder = getBinaryDecoder(input);
    while (!decoder.isEnd()) {
        GenericRecord record = datumReader.read(null, decoder);

        OracleGenericMessage msg = new OracleGenericMessage();

        Utf8 utf8 = (Utf8) record.get(OracleGenericMessage.NAMESAPCE);
        msg.setNameSpace(utf8.toString());
        msg.setSchemaHash((Integer) record.get(OracleGenericMessage.SCHEMA_HASH));
        ByteBuffer buffer = (ByteBuffer) record.get(OracleGenericMessage.PAYLOAD);
        msg.setPayload(buffer.array());

        logger.debug(String.format("TAble: %s, HASH: %d\n", msg.getNameSpace(), msg.getSchemaHash()));

        list.add((IGenericMessage) msg);
    }

    return list;
}
 
Example #13
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldReadSubRecordField(Boolean whetherUseFastDeserializer) {
  // given
  TestRecord record = emptyTestRecord();
  SubRecord subRecord = new SubRecord();
  subRecord.subField = "abc";

  record.subRecordUnion = subRecord;
  record.subRecord = subRecord;

  // when
  if (whetherUseFastDeserializer) {
    record = decodeRecordFast(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  } else {
    record = decodeRecordSlow(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  }

  // then
  Assert.assertEquals(new Utf8("abc"), record.subRecordUnion.subField);
  Assert.assertEquals(new Utf8("abc"), record.subRecord.subField);
}
 
Example #14
Source File: TaskFailedEvent.java    From big-c with Apache License 2.0 6 votes vote down vote up
public Object getDatum() {
  if(datum == null) {
    datum = new TaskFailed();
    datum.taskid = new Utf8(id.toString());
    datum.error = new Utf8(error);
    datum.finishTime = finishTime;
    datum.taskType = new Utf8(taskType.name());
    datum.failedDueToAttempt =
        failedDueToAttempt == null
        ? null
        : new Utf8(failedDueToAttempt.toString());
    datum.status = new Utf8(status);
    datum.counters = EventWriter.toAvro(counters);
  }
  return datum;
}
 
Example #15
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public IndexedRecord deserializerecord0(Object reuse, Decoder decoder)
    throws IOException
{
    IndexedRecord record;
    if ((((reuse)!= null)&&((reuse) instanceof IndexedRecord))&&(((IndexedRecord)(reuse)).getSchema() == mapMapValueSchema0)) {
        record = ((IndexedRecord)(reuse));
    } else {
        record = new org.apache.avro.generic.GenericData.Record(mapMapValueSchema0);
    }
    int unionIndex0 = (decoder.readIndex());
    if (unionIndex0 == 0) {
        decoder.readNull();
    }
    if (unionIndex0 == 1) {
        if (record.get(0) instanceof Utf8) {
            record.put(0, (decoder).readString(((Utf8) record.get(0))));
        } else {
            record.put(0, (decoder).readString(null));
        }
    }
    return record;
}
 
Example #16
Source File: GobblinMetricsPinotFlattenerConverter.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, GenericRecord inputRecord, WorkUnitState workUnit)
    throws DataConversionException {
  GenericRecordBuilder baseBuilder = new GenericRecordBuilder(this.schema);
  Map<Utf8, Utf8> tags = (Map<Utf8, Utf8>) inputRecord.get("tags");
  List<String> tagList = Lists.newArrayList(Iterables.transform(tags.entrySet(), new Function<Map.Entry<Utf8, Utf8>, String>() {
    @Override
    public String apply(Map.Entry<Utf8, Utf8> input) {
      return input.getKey().toString() + ":" + input.getValue().toString();
    }
  }));
  baseBuilder.set("tags", tagList);
  baseBuilder.set("timestamp", inputRecord.get("timestamp"));

  List<GenericRecord> metrics = (List<GenericRecord>)inputRecord.get("metrics");

  List<GenericRecord> flatMetrics = Lists.newArrayList();

  for (GenericRecord metric : metrics) {
    GenericRecordBuilder thisMetric = new GenericRecordBuilder(baseBuilder);
    thisMetric.set("metricName", metric.get("name"));
    thisMetric.set("metricValue", metric.get("value"));
    flatMetrics.add(thisMetric.build());
  }

  return flatMetrics;
}
 
Example #17
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public Map<Utf8, IndexedRecord> deserialize(Map<Utf8, IndexedRecord> reuse, Decoder decoder)
    throws IOException
{
    Map<Utf8, IndexedRecord> map0 = null;
    long chunkLen0 = (decoder.readMapStart());
    if (chunkLen0 > 0) {
        Map<Utf8, IndexedRecord> mapReuse0 = null;
        if ((reuse) instanceof Map) {
            mapReuse0 = ((Map)(reuse));
        }
        if (mapReuse0 != (null)) {
            mapReuse0 .clear();
            map0 = mapReuse0;
        } else {
            map0 = new HashMap<Utf8, IndexedRecord>();
        }
        do {
            for (int counter0 = 0; (counter0 <chunkLen0); counter0 ++) {
                Utf8 key0 = (decoder.readString(null));
                map0 .put(key0, deserializerecord0(null, (decoder)));
            }
            chunkLen0 = (decoder.mapNext());
        } while (chunkLen0 > 0);
    } else {
        map0 = Collections.emptyMap();
    }
    return map0;
}
 
Example #18
Source File: AvroParquetConvertIT.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticConfiguration() throws Exception {
  File inputFile = new File(getInputDir(), "input.avro");

  List<Map<String, Object>> data = ImmutableList.of(
    (Map<String, Object>)new ImmutableMap.Builder<String, Object>()
      .put("id", new Utf8("monitor"))
      .put("price", 10)
      .build()
  );

  generateAvroFile(AVRO_SCHEMA, inputFile, data);

  AvroConversionCommonConfig commonConfig = new AvroConversionCommonConfig();
  AvroParquetConfig conf = new AvroParquetConfig();
  commonConfig.inputFile = inputFile.getAbsolutePath();
  commonConfig.outputDirectory = getOutputDir();

  MapReduceExecutor executor = generateExecutor(commonConfig, conf, Collections.emptyMap());

  ExecutorRunner runner = new ExecutorRunner.Builder(MapReduceDExecutor.class, executor)
    .setOnRecordError(OnRecordError.TO_ERROR)
    .build();
  runner.runInit();

  Record record = RecordCreator.create();
  record.set(Field.create(Collections.<String, Field>emptyMap()));

  runner.runWrite(ImmutableList.of(record));
  Assert.assertEquals(0, runner.getErrorRecords().size());
  runner.runDestroy();

  validateParquetFile(new Path(getOutputDir(), "input.parquet"), data);
  Assert.assertFalse(inputFile.exists());
}
 
Example #19
Source File: TestGenericLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteUUIDReadStringMissingLogicalType() throws IOException {
  Schema uuidSchema = record("R",
      field("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING))));
  GenericRecord u1 = instance(uuidSchema, "uuid", UUID.randomUUID());
  GenericRecord u2 = instance(uuidSchema, "uuid", UUID.randomUUID());

  GenericRecord s1 = instance(uuidSchema, "uuid", new Utf8(u1.get("uuid").toString()));
  GenericRecord s2 = instance(uuidSchema, "uuid", new Utf8(u2.get("uuid").toString()));

  File test = write(GENERIC, uuidSchema, u1, u2);
  Assert.assertEquals("Should read UUIDs as Strings",
      Arrays.asList(s1, s2), read(GenericData.get(), uuidSchema, test));
}
 
Example #20
Source File: MapAttemptFinishedEvent.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public Object getDatum() {
  if (datum == null) {
    datum = new MapAttemptFinished();
    datum.taskid = new Utf8(attemptId.getTaskID().toString());
    datum.attemptId = new Utf8(attemptId.toString());
    datum.taskType = new Utf8(taskType.name());
    datum.taskStatus = new Utf8(taskStatus);
    datum.mapFinishTime = mapFinishTime;
    datum.finishTime = finishTime;
    datum.hostname = new Utf8(hostname);
    datum.port = port;
    if (rackName != null) {
      datum.rackname = new Utf8(rackName);
    }
    datum.state = new Utf8(state);
    datum.counters = EventWriter.toAvro(counters);

    datum.clockSplits = AvroArrayUtils.toAvro(ProgressSplitsBlock
      .arrayGetWallclockTime(allSplits));
    datum.cpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock
      .arrayGetCPUTime(allSplits));
    datum.gpuUsages = AvroArrayUtils.toAvro(ProgressSplitsBlock
      .arrayGetGPUTime(allSplits));
    datum.vMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock
      .arrayGetVMemKbytes(allSplits));
    datum.physMemKbytes = AvroArrayUtils.toAvro(ProgressSplitsBlock
      .arrayGetPhysMemKbytes(allSplits));
  }
  return datum;
}
 
Example #21
Source File: AllAvroTypesIT.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Parameterized.Parameters(name = "type({0})")
public static Collection<Object[]> data() throws Exception {
  return Arrays.asList(new Object[][]{
    // Primitive types
    // Skipping null
    {"\"boolean\"",   true,                                                  ImmutableList.of("required boolean value;")},
    {"\"int\"",       Integer.MIN_VALUE,                                     ImmutableList.of("required int32 value;")},
    {"\"long\"",      Long.MAX_VALUE,                                        ImmutableList.of("required int64 value;")},
    {"\"float\"",     Float.NaN,                                             ImmutableList.of("required float value;")},
    {"\"double\"",    Double.NEGATIVE_INFINITY,                              ImmutableList.of("required double value;")},
    {"\"bytes\"",     ByteBuffer.wrap(new byte[]{(byte)0x00, (byte)0xFF}),   ImmutableList.of("required binary value;")},
    {"\"string\"",    new Utf8(""),                                          ImmutableList.of("required binary value (UTF8);")},

    // Complex types
    {RECORD.toString(),   RECORD_DATA,                                       ImmutableList.of("required group value", "required binary name (UTF8);", "required int64 value;")},
    {ENUM.toString(),     new GenericData.EnumSymbol(ENUM, "SPADES"),        ImmutableList.of("required binary value (ENUM);")},
    {ARRAY.toString(),    new ArrayList<>(ImmutableList.of(new Utf8("a"), new Utf8("b"), new Utf8("c"))), ImmutableList.of("repeated binary array (UTF8);")},
    {MAP.toString(),      ImmutableMap.of(new Utf8("key"), 1L),              ImmutableList.of("repeated group map (MAP_KEY_VALUE)", "required binary key (UTF8);", "required int64 value;")},
    {UNION.toString(),    new Utf8("union"),                                 ImmutableList.of("optional int64 member0;", "optional binary member1 (UTF8);")},
    {UNION_WITH_NULL.toString(),    null,                                 ImmutableList.of("optional binary value (UTF8);")},
    {FIXED.toString(),    new GenericData.Fixed(FIXED, new byte[]{(byte)0x00, (byte)0xFF}), ImmutableList.of("required fixed_len_byte_array(2) value;")},

    // Logical types
    {DECIMAL.toString(), ByteBuffer.wrap(new byte[]{(byte)0x0F}),            ImmutableList.of("required binary value (DECIMAL(2,1)")},
    {DATE.toString(),               35000,                                   ImmutableList.of("required int32 value (DATE);")},
    {TIME_MILLIS.toString(),        35000,                                   ImmutableList.of("required int32 value (TIME_MILLIS);")},
    {TIMESTAMP_MILLIS.toString(),  35000L,                                   ImmutableList.of("required int96 value;")},
  });
}
 
Example #22
Source File: FastGenericSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test(groups = {"serializationTest"})
public void shouldWriteSubRecordCollectionsField() {
  // given
  Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));
  Schema recordSchema = createRecord(
      createArrayFieldSchema("recordsArray", subRecordSchema),
      createMapFieldSchema("recordsMap", subRecordSchema),
      createUnionField("recordsArrayUnion", Schema.createArray(createUnionSchema(subRecordSchema))),
      createUnionField("recordsMapUnion", Schema.createMap(createUnionSchema(subRecordSchema))));

  GenericData.Record subRecordBuilder = new GenericData.Record(subRecordSchema);
  subRecordBuilder.put("subField", "abc");

  GenericData.Record builder = new GenericData.Record(recordSchema);
  List<GenericData.Record> recordsArray = new ArrayList<>();
  recordsArray.add(subRecordBuilder);
  builder.put("recordsArray", recordsArray);
  builder.put("recordsArrayUnion", recordsArray);
  Map<String, GenericData.Record> recordsMap = new HashMap<>();
  recordsMap.put("1", subRecordBuilder);
  builder.put("recordsMap", recordsMap);
  builder.put("recordsMapUnion", recordsMap);

  // when
  GenericRecord record = decodeRecord(recordSchema, dataAsBinaryDecoder(builder));

  // then
  Assert.assertEquals("abc",
      ((List<GenericData.Record>) record.get("recordsArray")).get(0).get("subField").toString());
  Assert.assertEquals("abc",
      ((List<GenericData.Record>) record.get("recordsArrayUnion")).get(0).get("subField").toString());
  Assert.assertEquals("abc",
      ((Map<String, GenericData.Record>) record.get("recordsMap")).get(new Utf8("1")).get("subField").toString());
  Assert.assertEquals("abc", ((Map<String, GenericData.Record>) record.get("recordsMapUnion")).get(new Utf8("1"))
      .get("subField")
      .toString());
}
 
Example #23
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private Binary fromAvroString(Object value) {
  if (value instanceof Utf8) {
    Utf8 utf8 = (Utf8) value;
    return Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength());
  }
  return Binary.fromCharSequence((CharSequence) value);
}
 
Example #24
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public Map<Utf8, IndexedRecord> deserialize(Map<Utf8, IndexedRecord> reuse, Decoder decoder)
    throws IOException
{
    Map<Utf8, IndexedRecord> map0 = null;
    long chunkLen0 = (decoder.readMapStart());
    if (chunkLen0 > 0) {
        Map<Utf8, IndexedRecord> mapReuse0 = null;
        if ((reuse) instanceof Map) {
            mapReuse0 = ((Map)(reuse));
        }
        if (mapReuse0 != (null)) {
            mapReuse0 .clear();
            map0 = mapReuse0;
        } else {
            map0 = new HashMap<Utf8, IndexedRecord>();
        }
        do {
            for (int counter0 = 0; (counter0 <chunkLen0); counter0 ++) {
                Utf8 key0 = (decoder.readString(null));
                map0 .put(key0, deserializerecord0(null, (decoder)));
            }
            chunkLen0 = (decoder.mapNext());
        } while (chunkLen0 > 0);
    } else {
        map0 = Collections.emptyMap();
    }
    return map0;
}
 
Example #25
Source File: TestAvroTypeUtil.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateMapPrimitiveField() throws Exception {
  String schema = "{\"type\":\"map\",\"values\":\"int\"}";
  Schema avroSchema = new Schema.Parser().parse(schema);

  Record record = RecordCreator.create();
  Field field = AvroTypeUtil.avroToSdcField(record, avroSchema, ImmutableMap.of(new Utf8("Hari"), 1, new Utf8("Kiran"), 2), false);
  Assert.assertEquals(Field.Type.MAP, field.getType());
  Map<String, Field> valueAsMap = field.getValueAsMap();

  Assert.assertTrue(valueAsMap.containsKey("Hari"));
  Field hari = valueAsMap.get("Hari");
  Assert.assertEquals(Field.Type.INTEGER, hari.getType());
  Assert.assertEquals(1, hari.getValueAsInteger());

  Assert.assertTrue(valueAsMap.containsKey("Kiran"));
  hari = valueAsMap.get("Kiran");
  Assert.assertEquals(Field.Type.INTEGER, hari.getType());
  Assert.assertEquals(2, hari.getValueAsInteger());

  record.set(field);
  Object avroObject = AvroTypeUtil.sdcRecordToAvro(record, avroSchema, new HashMap<String, Object>());
  Assert.assertTrue(avroObject instanceof Map<?, ?>);

  Map<String, Integer> map = (Map<String, Integer>) avroObject;
  Assert.assertTrue(map.containsKey("Hari"));
  Assert.assertEquals(1, (int) map.get("Hari"));
  Assert.assertTrue(map.containsKey("Kiran"));
  Assert.assertEquals(2, (int) map.get("Kiran"));

  //Check invalid type - String to Map
  makeBadType(Field.create("notMicroseconds"), record, avroSchema);
}
 
Example #26
Source File: FastSpecificDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldReadMultipleChoiceUnion(Boolean whetherUseFastDeserializer) {
  // given
  TestRecord record = emptyTestRecord();
  SubRecord subRecord = new SubRecord();
  subRecord.subField = "abc";
  record.union = subRecord;

  // when
  record = decodeRecordFast(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));

  // then
  Assert.assertEquals(new Utf8("abc"), ((SubRecord) record.union).subField);

  // given
  record.union = "abc";

  // when
  if (whetherUseFastDeserializer) {
    record = decodeRecordFast(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  } else {
    record = decodeRecordSlow(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  }

  // then
  Assert.assertEquals(new Utf8("abc"), record.union);

  // given
  record.union = 1;

  // when
  if (whetherUseFastDeserializer) {
    record = decodeRecordFast(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  } else {
    record = decodeRecordSlow(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAsDecoder(record));
  }
  // then
  Assert.assertEquals(1, record.union);
}
 
Example #27
Source File: AvroMapCodecs.java    From funcj with MIT License 5 votes vote down vote up
@Override
public Object encode(CodecCoreEx<WithSchema, Object, Config> core, Map<String, V> value, Object out) {
    final Schema schema = checkSchemaType((Schema)out, Schema.Type.MAP);
    final Schema valueSchema = schema.getValueType();

    final Map<CharSequence, Object> map = new HashMap<>();

    value.forEach((key, val) -> {
        map.put(new Utf8(key), valueCodec.encodeWithCheck(core, val, valueSchema));
    });

    return map;
}
 
Example #28
Source File: PartitionData.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public <T> void set(int pos, T value) {
  if (value instanceof Utf8) {
    // Utf8 is not Serializable
    data[pos] = value.toString();
  } else if (value instanceof ByteBuffer) {
    // ByteBuffer is not Serializable
    ByteBuffer buffer = (ByteBuffer) value;
    byte[] bytes = new byte[buffer.remaining()];
    buffer.duplicate().get(bytes);
    data[pos] = bytes;
  } else {
    data[pos] = value;
  }
}
 
Example #29
Source File: ParquetValueWriters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public void write(int repetitionLevel, CharSequence value) {
  if (value instanceof Utf8) {
    Utf8 utf8 = (Utf8) value;
    column.writeBinary(repetitionLevel,
        Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength()));
  } else {
    column.writeBinary(repetitionLevel, Binary.fromString(value.toString()));
  }
}
 
Example #30
Source File: SparkValueWriters.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public void write(UTF8String s, Encoder encoder) throws IOException {
  // use getBytes because it may return the backing byte array if available.
  // otherwise, it copies to a new byte array, which is still cheaper than Avro
  // calling toString, which incurs encoding costs
  encoder.writeString(new Utf8(s.getBytes()));
}