Java Code Examples for org.apache.avro.Schema#Type

The following examples show how to use org.apache.avro.Schema#Type . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroRecordConverter.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private static Optional<String> readString(
    GenericRecord record, Schema.Type avroType, String fieldName) {
  switch (avroType) {
    case INT:
      return Optional.ofNullable((Integer) record.get(fieldName)).map(String::valueOf);
    case LONG:
      return Optional.ofNullable((Long) record.get(fieldName)).map(String::valueOf);
    case FLOAT:
      return Optional.ofNullable((Float) record.get(fieldName)).map(String::valueOf);
    case DOUBLE:
      return Optional.ofNullable((Double) record.get(fieldName)).map(String::valueOf);
    case STRING:
      return Optional.ofNullable((Utf8) record.get(fieldName)).map(Utf8::toString);
    default:
      throw new IllegalArgumentException("Cannot interpret " + avroType + " as STRING");
  }
}
 
Example 2
Source File: AvroRecordConverter.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
@SuppressWarnings("unchecked")
static Optional<List<Boolean>> readBoolArray(
    GenericRecord record, Schema.Type avroType, String fieldName) {
  switch (avroType) {
    case BOOLEAN:
      return Optional.ofNullable((List<Boolean>) record.get(fieldName));
    case STRING:
      {
        List<Utf8> value = (List<Utf8>) record.get(fieldName);
        if (value == null) {
          return Optional.empty();
        }
        List<Boolean> result =
            value
                .stream()
                .map(x -> x == null ? null : Boolean.valueOf(x.toString()))
                .collect(Collectors.toList());
        return Optional.of(result);
      }
    default:
      throw new IllegalArgumentException("Cannot interpret " + avroType + " as BOOL");
  }
}
 
Example 3
Source File: ClusterIntegrationTestUtils.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method to generate random value for the given field type.
 *
 * @param fieldType Field type
 * @return Random value for the given field type
 */
private static Object generateRandomValue(Schema.Type fieldType) {
  switch (fieldType) {
    case BOOLEAN:
      return RANDOM.nextBoolean();
    case INT:
      return RANDOM.nextInt(100000);
    case LONG:
      return RANDOM.nextLong() % 1000000;
    case FLOAT:
      return RANDOM.nextFloat() % 100000;
    case DOUBLE:
      return RANDOM.nextDouble() % 1000000;
    case STRING:
      return "potato" + RANDOM.nextInt(1000);
    default:
      throw new IllegalStateException("Unsupported field type: " + fieldType);
  }
}
 
Example 4
Source File: HiveAvroORCQueryGenerator.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Referencing org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo#generateTypeInfo(org.apache.avro.Schema) on
 * how to deal with logical types that supported by Hive but not by Avro(e.g. VARCHAR).
 *
 * If unsupported logical types found, return empty string as a result.
 * @param schema Avro schema
 * @return
 * @throws AvroSerdeException
 */
public static String generateHiveSpecificLogicalType(Schema schema) throws AvroSerdeException {
  // For bytes type, it can be mapped to decimal.
  Schema.Type type = schema.getType();

  if (type == Schema.Type.STRING && AvroSerDe.VARCHAR_TYPE_NAME
      .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) {
    int maxLength = 0;
    try {
      maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
    } catch (Exception ex) {
      throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex);
    }
    return String.format("varchar(%s)", maxLength);
  } else {
    return StringUtils.EMPTY;
  }
}
 
Example 5
Source File: NiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
public static String generateHiveDDL(Schema avroSchema, String tableName) {
    Schema.Type schemaType = avroSchema.getType();
    StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
    sb.append(tableName);
    sb.append(" (");
    if (Schema.Type.RECORD.equals(schemaType)) {
        List<String> hiveColumns = new ArrayList<>();
        List<Schema.Field> fields = avroSchema.getFields();
        if (fields != null) {
            hiveColumns.addAll(
                    fields.stream().map(field -> field.name() + " " + getHiveTypeFromAvroType(field.schema())).collect(Collectors.toList()));
        }
        sb.append(StringUtils.join(hiveColumns, ", "));
        sb.append(") STORED AS ORC");
        return sb.toString();
    } else {
        throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD");
    }
}
 
Example 6
Source File: ThirdeyeAvroUtils.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
/**
 * Extracts the datatype of a field from the avro schema, given the name of the field
 * @param fieldname
 * @param schema
 * @return
 */
public static String getDataTypeForField(String fieldname, Schema schema) {
  Field field = schema.getField(fieldname);
  if (field == null) {
    throw new IllegalStateException("Field " + fieldname + " does not exist in schema");
  }

  final Schema.Type type = field.schema().getType();
  if (type == Schema.Type.ARRAY) {
    throw new RuntimeException("TODO: validate correctness after commit b19a0965044d3e3f4f1541cc4cd9ea60b96a4b99");
  }

  return DataType.valueOf(extractSchemaFromUnionIfNeeded(field.schema()).getType()).toString();
}
 
Example 7
Source File: ConvertAvroTypeToSQL.java    From components with Apache License 2.0 5 votes vote down vote up
private int convertRawAvroType(Schema.Type type) {
    Integer sqlType = this.config.CONVERT_AVROTYPE_TO_SQLTYPE.get(type);
    if(sqlType != null){
        return sqlType;
    }

    switch (type) {
    case STRING:
        sqlType = Types.VARCHAR;
        break;
    case BYTES:
        sqlType = Types.BLOB;
        break;
    case INT:
        sqlType = Types.INTEGER;
        break;
    case LONG:
        sqlType = Types.INTEGER;
        break;
    case FLOAT:
        sqlType = Types.NUMERIC;
        break;
    case DOUBLE:
        sqlType = Types.NUMERIC;
        break;
    case BOOLEAN:
        sqlType = Types.BOOLEAN;
        break;
    default:
        // ignored types ENUM, RECORD, MAP, FIXED, ARRAY, NULL
        throw new UnsupportedOperationException(type + " Avro type not supported");
    }

    return sqlType;
}
 
Example 8
Source File: SalesforceAvroRegistryTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Tests {@link SalesforceAvroRegistry#inferSchema(Object)} returns {@link Schema} of type {@link Type#DOUBLE},
 * when percent Field is passed
 * 
 * This test-case related to https://jira.talendforge.org/browse/TDI-37479 bug
 */
@Test
public void testInferSchemaFieldPercent() {
    Field percentField = new Field();
    percentField.setType(FieldType.percent);

    Schema schema = sRegistry.inferSchema(percentField);
    Schema.Type actualType = schema.getType();
    assertThat(actualType, is(Schema.Type.DOUBLE));
}
 
Example 9
Source File: AvroTypeFactoryImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
private void validateTopLevelAvroType(Schema schema) {
  Schema.Type type = schema.getType();
  if (type != Schema.Type.RECORD) {
    String msg =
        String.format("Samza Sql supports only RECORD as top level avro type, But the Schema's type is %s", type);
    LOG.error(msg);
    throw new SamzaException(msg);
  }
}
 
Example 10
Source File: CSVUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
private static Schema.Type inferFieldType(String example) {
  if (example == null || example.isEmpty()) {
    return null; // not enough information
  } else if (LONG.matcher(example).matches()) {
    return Schema.Type.LONG;
  } else if (DOUBLE.matcher(example).matches()) {
    return Schema.Type.DOUBLE;
  } else if (FLOAT.matcher(example).matches()) {
    return Schema.Type.FLOAT;
  }
  return Schema.Type.STRING;
}
 
Example 11
Source File: AvroRecordReader.java    From Bats with Apache License 2.0 5 votes vote down vote up
private void processRecord(final GenericContainer container, final Schema schema) {

    final Schema.Type type = schema.getType();

    switch (type) {
      case RECORD:
        process(container, schema, null, new MapOrListWriterImpl(writer.rootAsMap()), fieldSelection);
        break;
      default:
        throw new DrillRuntimeException("Root object must be record type. Found: " + type);
    }
  }
 
Example 12
Source File: AvroGenerators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Schema generate(SourceOfRandomness random, GenerationStatus status) {
  Schema.Type type;

  if (nesting(status) >= MAX_NESTING) {
    type = random.choose(PRIMITIVE_TYPES);
  } else {
    type = random.choose(ALL_TYPES);
  }

  if (PRIMITIVE_TYPES.contains(type)) {
    return Schema.create(type);
  } else {
    nestingInc(status);

    if (type == Schema.Type.FIXED) {
      int size = random.choose(Arrays.asList(1, 5, 12));
      return Schema.createFixed("fixed_" + branch(status), "", "", size);
    } else if (type == Schema.Type.UNION) {
      // only nullable fields, everything else isn't supported in row conversion code
      return UnionSchemaGenerator.INSTANCE.generate(random, status);
    } else if (type == Schema.Type.ENUM) {
      return EnumSchemaGenerator.INSTANCE.generate(random, status);
    } else if (type == Schema.Type.RECORD) {
      return RecordSchemaGenerator.INSTANCE.generate(random, status);
    } else if (type == Schema.Type.MAP) {
      return Schema.createMap(generate(random, status));
    } else if (type == Schema.Type.ARRAY) {
      return Schema.createArray(generate(random, status));
    } else {
      throw new AssertionError("Unexpected AVRO type: " + type);
    }
  }
}
 
Example 13
Source File: FilterRowProperties.java    From components with Apache License 2.0 4 votes vote down vote up
/**
 * TODO: This method will be used once the field autocompletion will be implemented
 */
private Boolean isString(Schema.Type type) {
    return Schema.Type.STRING.equals(type);
}
 
Example 14
Source File: FastDeserializerGeneratorBase.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
private FieldAction(Schema.Type type, boolean shouldRead, Symbol symbol) {
  this.type = type;
  this.shouldRead = shouldRead;
  this.symbol = symbol;
}
 
Example 15
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Convert a schema with field name to a pig schema
 */
 private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords)
         throws IOException {

    AvroStorageLog.details("InConvert avro schema with field name " + fieldName);

    Schema.Type avroType = in.getType();
    ResourceFieldSchema fieldSchema = new ResourceFieldSchema();
    fieldSchema.setName(fieldName);

    if (avroType.equals(Schema.Type.RECORD)) {

        AvroStorageLog.details("convert to a pig tuple");

        if (visitedRecords.contains(in)) {
            fieldSchema.setType(DataType.BYTEARRAY);
        } else {
            visitedRecords.add(in);
            fieldSchema.setType(DataType.TUPLE);
            ResourceSchema tupleSchema = new ResourceSchema();
            List<Schema.Field> fields = in.getFields();
            ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()];
            int index = 0;
            for (Schema.Field field : fields) {
                childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords);
            }

            tupleSchema.setFields(childFields);
            fieldSchema.setSchema(tupleSchema);
            visitedRecords.remove(in);
        }

    } else if (avroType.equals(Schema.Type.ARRAY)) {

        AvroStorageLog.details("convert array to a pig bag");
        fieldSchema.setType(DataType.BAG);
        Schema elemSchema = in.getElementType();
        ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords);
        add2BagSchema(fieldSchema, subFieldSchema);

    } else if (avroType.equals(Schema.Type.MAP)) {

        AvroStorageLog.details("convert map to a pig map");
        fieldSchema.setType(DataType.MAP);

    } else if (avroType.equals(Schema.Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(in)) {
            Schema acceptSchema = AvroStorageUtils.getAcceptedType(in);
            ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords);
            fieldSchema.setType(realFieldSchema.getType());
            fieldSchema.setSchema(realFieldSchema.getSchema());
        } else
            throw new IOException("Do not support generic union:" + in);

    } else if (avroType.equals(Schema.Type.FIXED)) {
         fieldSchema.setType(DataType.BYTEARRAY);
    } else if (avroType.equals(Schema.Type.BOOLEAN)) {
        fieldSchema.setType(DataType.BOOLEAN);
    } else if (avroType.equals(Schema.Type.BYTES)) {
        fieldSchema.setType(DataType.BYTEARRAY);
    } else if (avroType.equals(Schema.Type.DOUBLE)) {
        fieldSchema.setType(DataType.DOUBLE);
    } else if (avroType.equals(Schema.Type.ENUM)) {
        fieldSchema.setType(DataType.CHARARRAY);
    } else if (avroType.equals(Schema.Type.FLOAT)) {
        fieldSchema.setType(DataType.FLOAT);
    } else if (avroType.equals(Schema.Type.INT)) {
        fieldSchema.setType(DataType.INTEGER);
    } else if (avroType.equals(Schema.Type.LONG)) {
        fieldSchema.setType(DataType.LONG);
    } else if (avroType.equals(Schema.Type.STRING)) {
        fieldSchema.setType(DataType.CHARARRAY);
    } else if (avroType.equals(Schema.Type.NULL)) {
        // value of NULL is always NULL
        fieldSchema.setType(DataType.INTEGER);
    } else {
        throw new IOException("Unsupported avro type:" + avroType);
    }
    return fieldSchema;
}
 
Example 16
Source File: AvroSchemaProvider.java    From registry with Apache License 2.0 4 votes vote down vote up
private static Appendable build(Map<String, String> env,
                                Schema schema,
                                Appendable appendable) throws IOException {
    boolean firstTime = true;
    Schema.Type schemaType = schema.getType();
    String fullName = schema.getFullName();
    switch (schemaType) {
        default: // boolean, bytes, double, float, int, long, null, string
            return appendable.append('"').append(schemaType.getName()).append('"');

        case UNION:
            appendable.append('[');
            for (Schema b : schema.getTypes()) {
                if (!firstTime) appendable.append(',');
                else firstTime = false;
                build(env, b, appendable);
            }
            return appendable.append(']');

        case ARRAY:
        case MAP:
            appendable.append("{\"type\":\"").append(schemaType.getName()).append("\"");
            if (schemaType == Schema.Type.ARRAY)
                build(env, schema.getElementType(), appendable.append(",\"items\":"));
            else build(env, schema.getValueType(), appendable.append(",\"values\":"));
            return appendable.append("}");

        case ENUM:
            if (env.get(fullName) != null) {
                return appendable.append(env.get(fullName));
            }
            addNameType(env, appendable, schemaType, fullName);

            appendable.append(",\"symbols\":[");
            for (String enumSymbol : schema.getEnumSymbols()) {
                if (!firstTime) appendable.append(',');
                else firstTime = false;
                appendable.append('"').append(enumSymbol).append('"');
            }
            return appendable.append("]").append("}");

        case FIXED:
            if (env.get(fullName) != null) {
                return appendable.append(env.get(fullName));
            }
            addNameType(env, appendable, schemaType, fullName);

            return appendable.append(",\"size\":").append(Integer.toString(schema.getFixedSize())).append("}");

        case RECORD:
            if (env.get(fullName) != null) {
                return appendable.append(env.get(fullName));
            }
            addNameType(env, appendable, schemaType, fullName);

            // avro resolution parsing does not handle aliases and default attributes
            // handle aliases
            Set<String> aliases = schema.getAliases();
            if (aliases != null && !aliases.isEmpty()) {
                appendable.append("\"aliases\":")
                        .append("[")
                        .append(Joiner.on(",").join(aliases.stream()
                                                            .map(x -> "\"" + x + "\"")
                                                            .collect(Collectors.toList())))
                        .append("]")
                        .append(",");
            }

            appendable.append(",\"fields\":[");
            for (Schema.Field field : schema.getFields()) {
                if (!firstTime) {
                    appendable.append(',');
                } else {
                    firstTime = false;
                }
                appendable.append("{\"name\":\"").append(field.name()).append("\"").append(",\"type\":");

                // handle default value
                Object defaultValue = field.defaultVal();
                if (defaultValue != null) {
                    appendable.append(defaultValue.toString());
                }

                build(env, field.schema(), appendable).append("}");
            }
            return appendable.append("]").append("}");
    }
}
 
Example 17
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
  Types.PrimitiveBuilder<PrimitiveType> builder;
  Schema.Type type = schema.getType();
  LogicalType logicalType = schema.getLogicalType();
  if (type.equals(Schema.Type.BOOLEAN)) {
    builder = Types.primitive(BOOLEAN, repetition);
  } else if (type.equals(Schema.Type.INT)) {
    builder = Types.primitive(INT32, repetition);
  } else if (type.equals(Schema.Type.LONG)) {
    builder = Types.primitive(INT64, repetition);
  } else if (type.equals(Schema.Type.FLOAT)) {
    builder = Types.primitive(FLOAT, repetition);
  } else if (type.equals(Schema.Type.DOUBLE)) {
    builder = Types.primitive(DOUBLE, repetition);
  } else if (type.equals(Schema.Type.BYTES)) {
    builder = Types.primitive(BINARY, repetition);
  } else if (type.equals(Schema.Type.STRING)) {
    if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) {
      builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
          .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
    } else {
      builder = Types.primitive(BINARY, repetition).as(stringType());
    }
  } else if (type.equals(Schema.Type.RECORD)) {
    return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
  } else if (type.equals(Schema.Type.ENUM)) {
    builder = Types.primitive(BINARY, repetition).as(enumType());
  } else if (type.equals(Schema.Type.ARRAY)) {
    if (writeOldListStructure) {
      return ConversionPatterns.listType(repetition, fieldName,
          convertField("array", schema.getElementType(), REPEATED));
    } else {
      return ConversionPatterns.listOfElements(repetition, fieldName,
          convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
    }
  } else if (type.equals(Schema.Type.MAP)) {
    Type valType = convertField("value", schema.getValueType());
    // avro map key type is always string
    return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
  } else if (type.equals(Schema.Type.FIXED)) {
    builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
        .length(schema.getFixedSize());
  } else if (type.equals(Schema.Type.UNION)) {
    return convertUnion(fieldName, schema, repetition);
  } else {
    throw new UnsupportedOperationException("Cannot convert Avro type " + type);
  }

  // schema translation can only be done for known logical types because this
  // creates an equivalence
  if (logicalType != null) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
      builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
    } else {
      LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
      if (annotation != null) {
        builder.as(annotation);
      }
    }
  }

  return builder.named(fieldName);
}
 
Example 18
Source File: TestAvroExport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
private Field buildAvroField(String name, Schema.Type type) {
  return new Field(name, Schema.create(type), null, null);
}
 
Example 19
Source File: TestCSVSchemaInference.java    From kite with Apache License 2.0 4 votes vote down vote up
public Schema schema(Schema.Type type) {
  return Schema.create(type);
}
 
Example 20
Source File: JsonElementConversionWithAvroSchemaFactory.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Override
public Schema.Type getTargetType() {
  return schema().getType();
}