Java Code Examples for org.apache.avro.Schema#getValueType()

The following examples show how to use org.apache.avro.Schema#getValueType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroMapCodecs.java    From funcj with MIT License 6 votes vote down vote up
@Override
public Map<String, V> decode(CodecCoreEx<WithSchema, Object, Config> core, WithSchema in) {
    final Schema schema = checkSchemaType(in.schema(), Schema.Type.MAP);
    final Schema valueSchema = schema.getValueType();

    final Map<CharSequence, Object> inMap = in.value();

    final Map<String, V> map = new HashMap<>();

    inMap.forEach((key, value) -> {
        final String name = key.toString();
        map.put(name, valueCodec.decodeWithCheck(core, WithSchema.of(value, valueSchema)));
    });

    return map;
}
 
Example 2
Source File: PigAvroDatumReader.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Called to read a map instance. Overridden to read a pig map.
 */
protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throws IOException {
    Schema eValue = expected.getValueType();
    long l = in.readMapStart();
    Object map = newMap(old, (int) l);
    if (l > 0) {
        do {
            for (int i = 0; i < l; i++) {
                addToMap(map, readString(null, AvroStorageUtils.StringSchema, in),
                                  read(null, eValue, in));
            }
        } while ((l = in.mapNext()) > 0);
    }
    return map;
}
 
Example 3
Source File: MercifulJsonConverter.java    From hudi with Apache License 2.0 5 votes vote down vote up
private static JsonToAvroFieldProcessor generateMapTypeHandler() {
  return new JsonToAvroFieldProcessor() {
    @Override
    public Pair<Boolean, Object> convert(Object value, String name, Schema schema) {
      Schema valueSchema = schema.getValueType();
      Map<String, Object> mapRes = new HashMap<>();
      for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
        mapRes.put(v.getKey(), convertJsonToAvroField(v.getValue(), name, valueSchema));
      }
      return Pair.of(true, mapRes);
    }
  };
}
 
Example 4
Source File: AvroResolver.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * When an Avro field is actually a map, we resolve the type of the map
 * value For each entry, the field name and value are added to a local
 * record we create an object of type OneField and insert it into the output
 * {@code List<OneField>} record.
 * <p>
 * Unchecked warning is suppressed to enable us to cast fieldValue to a Map.
 * (since the value schema has been identified to me of type map)
 *
 * @param record     list of fields to be populated
 * @param fieldValue field value
 * @param mapSchema  map schema
 * @return number of populated fields
 */
@SuppressWarnings("unchecked")
int setMapField(List<OneField> record, Object fieldValue, Schema mapSchema) {
    Schema keySchema = Schema.create(Schema.Type.STRING);
    Schema valueSchema = mapSchema.getValueType();
    Map<String, ?> avroMap = ((Map<String, ?>) fieldValue);
    for (Map.Entry<String, ?> entry : avroMap.entrySet()) {
        List<OneField> complexRecord = new LinkedList<>();
        populateRecord(complexRecord, entry.getKey(), keySchema);
        populateRecord(complexRecord, entry.getValue(), valueSchema);
        addOneFieldToRecord(record, DataType.TEXT,
                HdfsUtilities.toString(complexRecord, mapkeyDelim));
    }
    return avroMap.size();
}
 
Example 5
Source File: PigAvroDatumReader.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Called to read a map instance. Overridden to read a pig map.
 */
protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throws IOException {
    Schema eValue = expected.getValueType();
    long l = in.readMapStart();
    Object map = newMap(old, (int) l);
    if (l > 0) {
        do {
            for (int i = 0; i < l; i++) {
                addToMap(map, readString(null, AvroStorageUtils.StringSchema, in),
                                  read(null, eValue, in));
            }
        } while ((l = in.mapNext()) > 0);
    }
    return map;
}
 
Example 6
Source File: ParquetRecordReaderTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedMapGroup() throws IOException {
	Schema nestedMapSchema = unWrapSchema(NESTED_SCHEMA.getField("nestedMap").schema());
	Preconditions.checkState(nestedMapSchema.getType().equals(Schema.Type.MAP));

	Schema mapValueSchema = nestedMapSchema.getValueType();
	GenericRecord mapValue = new GenericRecordBuilder(mapValueSchema)
		.set("type", "nested")
		.set("value", "nested_value").build();

	ImmutableMap.Builder<String, GenericRecord> map = ImmutableMap.builder();
	map.put("testKey", mapValue);

	GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA)
		.set("nestedMap", map.build())
		.set("foo", 34L).build();

	Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record));
	MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA);
	ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema);

	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);

	rowReader.initialize(fileReader, testConfig);
	assertFalse(rowReader.reachEnd());

	Row row = rowReader.nextRecord();
	assertEquals(7, row.getArity());

	assertEquals(34L, row.getField(0));
	Map result = (Map) row.getField(5);

	Row nestedRow = (Row) result.get("testKey");
	assertEquals("nested", nestedRow.getField(0));
	assertEquals("nested_value", nestedRow.getField(1));
}
 
Example 7
Source File: ParquetRecordReaderTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedMapGroup() throws IOException {
	Schema nestedMapSchema = unWrapSchema(NESTED_SCHEMA.getField("nestedMap").schema());
	Preconditions.checkState(nestedMapSchema.getType().equals(Schema.Type.MAP));

	Schema mapValueSchema = nestedMapSchema.getValueType();
	GenericRecord mapValue = new GenericRecordBuilder(mapValueSchema)
		.set("type", "nested")
		.set("value", "nested_value").build();

	ImmutableMap.Builder<String, GenericRecord> map = ImmutableMap.builder();
	map.put("testKey", mapValue);

	GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA)
		.set("nestedMap", map.build())
		.set("foo", 34L).build();

	Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record));
	MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA);
	ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema);

	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);

	rowReader.initialize(fileReader, testConfig);
	assertFalse(rowReader.reachEnd());

	Row row = rowReader.nextRecord();
	assertEquals(7, row.getArity());

	assertEquals(34L, row.getField(0));
	Map result = (Map) row.getField(5);

	Row nestedRow = (Row) result.get("testKey");
	assertEquals("nested", nestedRow.getField(0));
	assertEquals("nested_value", nestedRow.getField(1));
}
 
Example 8
Source File: Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.mapMapValueSchema0 = readerSchema.getValueType();
    this.mapValueOptionSchema0 = mapMapValueSchema0 .getTypes().get(1);
    this.field0 = mapValueOptionSchema0 .getField("field").schema();
}
 
Example 9
Source File: AvroUtils.java    From envelope with Apache License 2.0 4 votes vote down vote up
/**
 * Convert Avro Types into their associated DataType.
 *
 * @param schemaType Avro Schema.Type
 * @return DataType representation
 */
public static DataType dataTypeFor(Schema schemaType) {
  LOG.trace("Converting Schema[{}] to DataType", schemaType);

  // Unwrap "optional" unions to the base type
  boolean isOptional = isNullable(schemaType);

  if (isOptional) {
    // if only 2 items in the union, then "unwrap," otherwise, it's a full union and should be rendered as such
    if (schemaType.getTypes().size() == 2) {
      LOG.trace("Unwrapping simple 'optional' union for {}", schemaType);
      for (Schema s : schemaType.getTypes()) {
        if (s.getType().equals(NULL)) {
          continue;
        }
        // Unwrap
        schemaType = s;
        break;
      }
    }
  }

  // Convert supported LogicalTypes
  if (null != schemaType.getLogicalType()) {
    LogicalType logicalType = schemaType.getLogicalType();
    switch (logicalType.getName()) {
      case "date" :
        return DataTypes.DateType;
      case "timestamp-millis" :
        return DataTypes.TimestampType;
      case "decimal" :
        LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
        return DataTypes.createDecimalType(decimal.getPrecision(), decimal.getScale());
      default:
        // Pass-thru
        LOG.warn("Unsupported LogicalType[{}], continuing with underlying base type", logicalType.getName());
    }
  }

  switch (schemaType.getType()) {
    case RECORD:
      // StructType
      List<StructField> structFieldList = Lists.newArrayListWithCapacity(schemaType.getFields().size());
      for (Field f : schemaType.getFields()) {
        structFieldList.add(DataTypes.createStructField(f.name(), dataTypeFor(f.schema()), isNullable(f.schema())));
      }
      return DataTypes.createStructType(structFieldList);
    case ARRAY:
      Schema elementType = schemaType.getElementType();
      return DataTypes.createArrayType(dataTypeFor(elementType), isNullable(elementType));
    case MAP:
      Schema valueType = schemaType.getValueType();
      return DataTypes.createMapType(DataTypes.StringType, dataTypeFor(valueType), isNullable(valueType));
    case UNION:
      // StructType of members
      List<StructField> unionFieldList = Lists.newArrayListWithCapacity(schemaType.getTypes().size());
      int m = 0;
      for (Schema u : schemaType.getTypes()) {
        unionFieldList.add(DataTypes.createStructField("member" + m++, dataTypeFor(u), isNullable(u)));
      }
      return DataTypes.createStructType(unionFieldList);
    case FIXED:
    case BYTES:
      return DataTypes.BinaryType;
    case ENUM:
    case STRING:
      return DataTypes.StringType;
    case INT:
      return DataTypes.IntegerType;
    case LONG:
      return DataTypes.LongType;
    case FLOAT:
      return DataTypes.FloatType;
    case DOUBLE:
      return DataTypes.DoubleType;
    case BOOLEAN:
      return DataTypes.BooleanType;
    case NULL:
      return DataTypes.NullType;
    default:
      throw new RuntimeException(String.format("Unrecognized or unsupported Avro Type conversion: %s", schemaType));
  }
}
 
Example 10
Source File: AvroFieldsGenerator.java    From registry with Apache License 2.0 4 votes vote down vote up
private void parseSchema(Schema schema, List<SchemaFieldInfo> schemaFieldInfos, Set<String> visitedRecords) {
    Schema.Type type = schema.getType();
    LOG.debug("Visiting type: [{}]", type);

    switch (type) {
        case RECORD:

            String completeName = schema.getFullName();

            // Since we are only interested in primitive data types, if we encounter a record that was already parsed it can be ignored
            if (!visitedRecords.contains(completeName)) {
                visitedRecords.add(completeName);

                // store fields of a record.
                List<Schema.Field> fields = schema.getFields();
                for (Schema.Field recordField : fields) {
                    parseField(recordField, schemaFieldInfos, visitedRecords);
                }
            }
            break;
        case MAP:
            Schema valueTypeSchema = schema.getValueType();
            parseSchema(valueTypeSchema, schemaFieldInfos, visitedRecords);
            break;
        case ENUM:
            break;
        case ARRAY:
            Schema elementType = schema.getElementType();
            parseSchema(elementType, schemaFieldInfos, visitedRecords);
            break;

        case UNION:
            List<Schema> unionTypes = schema.getTypes();
            for (Schema typeSchema : unionTypes) {
                parseSchema(typeSchema, schemaFieldInfos, visitedRecords);
            }
            break;

        case STRING:
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
        case FIXED:
        case BOOLEAN:
        case BYTES:
        case NULL:

            break;

        default:
            throw new RuntimeException("Unsupported type: " + type);

    }

}
 
Example 11
Source File: FastSerializerGenerator.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
private void processMap(final Schema mapSchema, JExpression mapExpr, JBlock body) {

        final JClass mapClass = schemaAssistant.classFromSchema(mapSchema);
        JClass keyClass = schemaAssistant.keyClassFromMapSchema(mapSchema);

        body.invoke(JExpr.direct(ENCODER), "writeMapStart");

        final JExpression emptyMapCondition = mapExpr.eq(JExpr._null())
                .cor(JExpr.invoke(mapExpr, "isEmpty"));
        final JConditional emptyMapIf = body._if(emptyMapCondition);
        final JBlock emptyMapBlock = emptyMapIf._then();
        emptyMapBlock.invoke(JExpr.direct(ENCODER), "setItemCount").arg(JExpr.lit(0));

        final JBlock nonEmptyMapBlock = emptyMapIf._else();
        nonEmptyMapBlock.invoke(JExpr.direct(ENCODER), "setItemCount")
                .arg(JExpr.invoke(mapExpr, "size"));

        final JForEach mapKeysLoop = nonEmptyMapBlock.forEach(keyClass, getVariableName("key"),
                JExpr.invoke(JExpr.cast(mapClass, mapExpr), "keySet"));

        final JBlock forBody = mapKeysLoop.body();
        forBody.invoke(JExpr.direct(ENCODER), "startItem");

        JVar keyStringVar;
        if (SchemaAssistant.hasStringableKey(mapSchema)) {
            keyStringVar = forBody.decl(string, getVariableName("keyString"),
                    mapKeysLoop.var().invoke("toString"));
        } else {
            keyStringVar = mapKeysLoop.var();
        }

        final Schema valueSchema = mapSchema.getValueType();

        forBody.invoke(JExpr.direct(ENCODER), "writeString").arg(keyStringVar);

        JVar containerVar;
        if (SchemaAssistant.isComplexType(valueSchema)) {
            containerVar = declareValueVar(valueSchema.getName(), valueSchema, forBody);
            forBody.assign(containerVar, JExpr.invoke(JExpr.cast(mapClass, mapExpr), "get").arg(mapKeysLoop.var()));

            processComplexType(valueSchema, containerVar, forBody);
        } else {
            processSimpleType(valueSchema, mapExpr.invoke("get").arg(mapKeysLoop.var()), forBody);
        }
        body.invoke(JExpr.direct(ENCODER), "writeMapEnd");
    }
 
Example 12
Source File: FastDeserializerGenerator.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
private void processMap(JVar mapSchemaVar, final String name, final Schema mapSchema, final Schema readerMapSchema,
        JBlock parentBody, FieldAction action, BiConsumer<JBlock, JExpression> putMapIntoParent) {

    if (action.getShouldRead()) {
        Symbol valuesActionSymbol = null;
        for (Symbol symbol : action.getSymbol().production) {
            if (Symbol.Kind.REPEATER.equals(symbol.kind)
                    && "map-end".equals(getSymbolPrintName(((Symbol.Repeater) symbol).end))) {
                valuesActionSymbol = symbol;
                break;
            }
        }

        if (valuesActionSymbol == null) {
            throw new FastDeserializerGeneratorException("unable to determine action for map: " + name);
        }

        action = FieldAction.fromValues(mapSchema.getValueType().getType(), action.getShouldRead(),
                valuesActionSymbol);
    } else {
        action = FieldAction.fromValues(mapSchema.getValueType().getType(), false, EMPTY_SYMBOL);
    }

    final JVar mapVar = action.getShouldRead() ? declareValueVar(name, readerMapSchema, parentBody) : null;
    JVar chunkLen = parentBody.decl(codeModel.LONG, getVariableName("chunkLen"),
            JExpr.direct(DECODER + ".readMapStart()"));

    JConditional conditional = parentBody._if(chunkLen.gt(JExpr.lit(0)));
    JBlock ifBlock = conditional._then();

    if (action.getShouldRead()) {
        ifBlock.assign(mapVar, JExpr._new(schemaAssistant.classFromSchema(readerMapSchema, false)));
        JBlock elseBlock = conditional._else();
        elseBlock.assign(mapVar, codeModel.ref(Collections.class).staticInvoke("emptyMap"));
    }

    JDoLoop doLoop = ifBlock._do(chunkLen.gt(JExpr.lit(0)));
    JForLoop forLoop = doLoop.body()._for();
    JVar counter = forLoop.init(codeModel.INT, getVariableName("counter"), JExpr.lit(0));
    forLoop.test(counter.lt(chunkLen));
    forLoop.update(counter.incr());
    JBlock forBody = forLoop.body();

    JClass keyClass = schemaAssistant.keyClassFromMapSchema(action.getShouldRead() ? readerMapSchema : mapSchema);
    JExpression keyValueExpression = (string.equals(keyClass)) ?
            JExpr.direct(DECODER + ".readString()")
            : JExpr.direct(DECODER + ".readString(null)");

    if (SchemaAssistant.hasStringableKey(mapSchema)) {
        keyValueExpression = JExpr._new(keyClass).arg(keyValueExpression.invoke("toString"));
    }

    JVar key = forBody.decl(keyClass, getVariableName("key"), keyValueExpression);
    JVar mapValueSchemaVar = null;
    if (action.getShouldRead() && useGenericTypes) {
        mapValueSchemaVar = declareSchemaVar(mapSchema.getValueType(), name + "MapValueSchema",
                mapSchemaVar.invoke("getValueType"));
    }

    BiConsumer<JBlock, JExpression> putValueInMap = null;
    if (action.getShouldRead()) {
        putValueInMap = (block, expression) -> block.invoke(mapVar, "put").arg(key).arg(expression);
    }

    if (SchemaAssistant.isComplexType(mapSchema.getValueType())) {
        String valueName = name + "Value";
        Schema readerMapValueSchema = null;
        if (action.getShouldRead()) {
            readerMapValueSchema = readerMapSchema.getValueType();
        }
        processComplexType(mapValueSchemaVar, valueName, mapSchema.getValueType(), readerMapValueSchema, forBody,
                action, putValueInMap);
    } else {
        // to preserve reader string specific options use reader map schema
        if (action.getShouldRead() && Schema.Type.STRING.equals(mapSchema.getValueType().getType())) {
            processSimpleType(readerMapSchema.getValueType(), forBody, action, putValueInMap);
        } else {
            processSimpleType(mapSchema.getValueType(), forBody, action, putValueInMap);
        }
    }
    doLoop.body().assign(chunkLen, JExpr.direct(DECODER + ".mapNext()"));

    if (action.getShouldRead()) {
        putMapIntoParent.accept(parentBody, mapVar);
    }
}
 
Example 13
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.mapMapValueSchema0 = readerSchema.getValueType();
    this.field0 = mapMapValueSchema0 .getField("field").schema();
}
 
Example 14
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.mapMapValueSchema0 = readerSchema.getValueType();
    this.field0 = mapMapValueSchema0 .getField("field").schema();
}
 
Example 15
Source File: Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.mapMapValueSchema0 = readerSchema.getValueType();
    this.mapValueOptionSchema0 = mapMapValueSchema0 .getTypes().get(1);
    this.field0 = mapValueOptionSchema0 .getField("field").schema();
}
 
Example 16
Source File: AvroSchemaManager.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize given a schema
 */
protected void init(String namespace, Schema schema,
                                boolean ignoreNameMap) {

    /* put to map[type name]=>schema */
    if (isNamedSchema(schema)) {
        String typeName = schema.getName();
        if (typeName2Schema.containsKey(typeName))
            AvroStorageLog.warn("Duplicate schemas defined for type:"
                    + typeName
                    + ". will ignore the second one:"
                    + schema);
        else {
            AvroStorageLog.details("add " + schema.getName() + "=" + schema
                    + " to type2Schema");
            typeName2Schema.put(schema.getName(), schema);
        }
    }

    /* put field schema to map[field name]=>schema*/
    if (schema.getType().equals(Type.RECORD)) {

        List<Field> fields = schema.getFields();
        for (Field field : fields) {

            Schema fieldSchema = field.schema();
            String name = (namespace == null) ? field.name()  : namespace + "." + field.name();

            if (!ignoreNameMap) {
                if (name2Schema.containsKey(name))
                    AvroStorageLog.warn("Duplicate schemas defined for alias:" + name
                                      + ". Will ignore the second one:"+ fieldSchema);
                else {
                    AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema");
                    name2Schema.put(name, fieldSchema);
                }
            }

            init(name, fieldSchema, ignoreNameMap);
        }
    } else if (schema.getType().equals(Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(schema)) {
            Schema realSchema = AvroStorageUtils.getAcceptedType(schema);
            init(namespace, realSchema, ignoreNameMap);
        } else {
            List<Schema> list = schema.getTypes();
            for (Schema s : list) {
                init(namespace, s, true);
            }
        }
    } else if (schema.getType().equals(Type.ARRAY)) {
        Schema elemSchema = schema.getElementType();
        init(namespace, elemSchema, true);
    } else if (schema.getType().equals(Type.MAP)) {
        Schema valueSchema = schema.getValueType();
        init(namespace, valueSchema, true);
    }
}
 
Example 17
Source File: AvroNestedReader.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
/**
 * Processes a map at this point in the path.
 *
 * @param map           the map to process
 * @param s             the current schema at this point in the path
 * @param ignoreMissing true if null is to be returned for user fields that don't appear in the schema
 * @return the field value or null for out-of-bounds array indexes, non-existent map keys or unsupported avro types.
 * @throws KettleException if a problem occurs
 */
public Object convertToKettleValue( AvroInputField avroInputField,
                                    Map<Utf8, Object> map, Schema s, Schema defaultSchema, boolean ignoreMissing )
  throws KettleException {

  if ( map == null ) {
    return null;
  }

  if ( avroInputField.getTempParts().size() == 0 ) {
    throw new KettleException( BaseMessages.getString( PKG, "AvroInput.Error.MalformedPathMap" ) );
  }

  String part = avroInputField.getTempParts().remove( 0 );
  if ( !( part.charAt( 0 ) == '[' ) ) {
    throw new KettleException( BaseMessages.getString( PKG, "AvroInput.Error.MalformedPathMap2", part ) );
  }

  String key = part.substring( 1, part.indexOf( ']' ) );

  if ( part.indexOf( ']' ) < part.length() - 1 ) {
    // more dimensions to the array/map
    part = part.substring( part.indexOf( ']' ) + 1, part.length() );
    avroInputField.getTempParts().add( 0, part );
  }

  Object value = map.get( new Utf8( key ) );
  if ( value == null ) {
    return null;
  }

  Schema valueType = s.getValueType();

  if ( valueType.getType() == Schema.Type.UNION ) {
    if ( value instanceof GenericContainer ) {
      // we can ask these things for their schema (covers
      // records, arrays, enums and fixed)
      valueType = ( (GenericContainer) value ).getSchema();
    } else {
      // either have a map or primitive here
      if ( value instanceof Map ) {
        // now have to look for the schema of the map
        Schema mapSchema = null;
        for ( Schema ts : valueType.getTypes() ) {
          if ( ts.getType() == Schema.Type.MAP ) {
            mapSchema = ts;
            break;
          }
        }
        if ( mapSchema == null ) {
          throw new KettleException( BaseMessages.getString( PKG,
            "AvroInput.Error.UnableToFindSchemaForUnionMap" ) );
        }
        valueType = mapSchema;
      } else {
        if ( avroInputField.getTempValueMeta().getType() != ValueMetaInterface.TYPE_STRING ) {
          // we have a two element union, where one element is the type
          // "null". So in this case we actually have just one type and can
          // output specific values of it (instead of using String as a
          // catch all for varying primitive types in the union)
          valueType = checkUnion( valueType );
        } else {
          // use the string representation of the value
          valueType = Schema.create( Schema.Type.STRING );
        }
      }
    }
  }

  // what have we got?
  if ( valueType.getType() == Schema.Type.RECORD ) {
    return convertToKettleValue( avroInputField, (GenericData.Record) value, valueType, defaultSchema,
      ignoreMissing );
  } else if ( valueType.getType() == Schema.Type.ARRAY ) {
    return convertToKettleValue( avroInputField, (GenericData.Array) value, valueType, defaultSchema, ignoreMissing );
  } else if ( valueType.getType() == Schema.Type.MAP ) {
    return convertToKettleValue( avroInputField, (Map<Utf8, Object>) value, valueType, defaultSchema, ignoreMissing );
  } else {
    // assume a primitive
    return getPrimitive( avroInputField, value, valueType );
  }
}
 
Example 18
Source File: AvroNestedFieldGetter.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
/**
 * Builds a list of field objects holding paths corresponding to the leaf primitives in an Avro schema.
 *
 * @param s the schema to process
 * @return a List of field objects
 * @throws KettleException if a problem occurs
 */
public static List<? extends IAvroInputField> getLeafFields( Schema s ) throws KettleException {
  if ( s == null ) {
    return null;
  }

  List<AvroInputField> fields = new ArrayList<>();

  String root = "";

  if ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) {
    while ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) {
      if ( s.getType() == Schema.Type.ARRAY ) {
        root += "[0]";
        s = s.getElementType();
      } else {
        root += KEY;
        s = s.getValueType();
      }
    }
  }

  if ( s.getType() == Schema.Type.RECORD ) {
    processRecord( root, s, fields );
  } else if ( s.getType() == Schema.Type.UNION ) {
    processUnion( root, s, fields );
  } else {

    // our top-level array/map structure bottoms out with primitive types
    // we'll create one zero-indexed path through to a primitive - the
    // user can copy and paste the path if they want to extract other
    // indexes out to separate Kettle fields
    AvroInputField newField = createAvroField( root, s );
    if ( newField != null ) {
      fields.add( newField );
    }
  }

  for ( int i = 0; i < fields.size() - 1; i++ ) {
    AvroInputField field = fields.get( i );
    boolean duplicateName;
    int suffix = 0;
    String fieldName;
    do {
      fieldName = field.getPentahoFieldName();
      if ( suffix > 0 ) {
        fieldName = fieldName + "-" + Integer.toString( suffix );
      }
      duplicateName = false;
      for ( int j = i + 1; ( j < fields.size() ) && !duplicateName; j++ ) {
        duplicateName = fieldName.equals( fields.get( j ).getPentahoFieldName() );
      }
      suffix++;
    } while ( duplicateName );

    field.setPentahoFieldName( fieldName );
  }
  return fields;
}
 
Example 19
Source File: AvroSchemaManager.java    From Cubert with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize given a schema
 */
protected void init(String namespace, Schema schema,
                                boolean ignoreNameMap) {

    /* put to map[type name]=>schema */
    if (isNamedSchema(schema)) {
        String typeName = schema.getName();
        if (typeName2Schema.containsKey(typeName))
            AvroStorageLog.warn("Duplicate schemas defined for type:"
                    + typeName
                    + ". will ignore the second one:"
                    + schema);
        else {
            AvroStorageLog.details("add " + schema.getName() + "=" + schema
                    + " to type2Schema");
            typeName2Schema.put(schema.getName(), schema);
        }
    }

    /* put field schema to map[field name]=>schema*/
    if (schema.getType().equals(Type.RECORD)) {

        List<Field> fields = schema.getFields();
        for (Field field : fields) {

            Schema fieldSchema = field.schema();
            String name = (namespace == null) ? field.name()  : namespace + "." + field.name();

            if (!ignoreNameMap) {
                if (name2Schema.containsKey(name))
                    AvroStorageLog.warn("Duplicate schemas defined for alias:" + name
                                      + ". Will ignore the second one:"+ fieldSchema);
                else {
                    AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema");
                    name2Schema.put(name, fieldSchema);
                }
            }

            init(name, fieldSchema, ignoreNameMap);
        }
    } else if (schema.getType().equals(Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(schema)) {
            Schema realSchema = AvroStorageUtils.getAcceptedType(schema);
            init(namespace, realSchema, ignoreNameMap);
        } else {
            List<Schema> list = schema.getTypes();
            for (Schema s : list) {
                init(namespace, s, true);
            }
        }
    } else if (schema.getType().equals(Type.ARRAY)) {
        Schema elemSchema = schema.getElementType();
        init(namespace, elemSchema, true);
    } else if (schema.getType().equals(Type.MAP)) {
        Schema valueSchema = schema.getValueType();
        init(namespace, valueSchema, true);
    }
}
 
Example 20
Source File: AvroTypeSystem.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
protected Schema getMapValueType(Schema dataType) {
  return dataType.getValueType();
}