Java Code Examples for org.apache.avro.Schema#getType()

The following examples show how to use org.apache.avro.Schema#getType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveAvroORCQueryGenerator.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Referencing org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo#generateTypeInfo(org.apache.avro.Schema) on
 * how to deal with logical types that supported by Hive but not by Avro(e.g. VARCHAR).
 *
 * If unsupported logical types found, return empty string as a result.
 * @param schema Avro schema
 * @return
 * @throws AvroSerdeException
 */
public static String generateHiveSpecificLogicalType(Schema schema) throws AvroSerdeException {
  // For bytes type, it can be mapped to decimal.
  Schema.Type type = schema.getType();

  if (type == Schema.Type.STRING && AvroSerDe.VARCHAR_TYPE_NAME
      .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) {
    int maxLength = 0;
    try {
      maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
    } catch (Exception ex) {
      throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex);
    }
    return String.format("varchar(%s)", maxLength);
  } else {
    return StringUtils.EMPTY;
  }
}
 
Example 2
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method that does the actual work for {@link #getFieldSchema(Schema, String)}
 * @param schema passed from {@link #getFieldSchema(Schema, String)}
 * @param pathList passed from {@link #getFieldSchema(Schema, String)}
 * @param field keeps track of the index used to access the list pathList
 * @return the schema of the field
 */
private static Optional<Schema> getFieldSchemaHelper(Schema schema, List<String> pathList, int field) {
  if (schema.getType() == Type.RECORD && schema.getField(pathList.get(field)) == null) {
    return Optional.absent();
  }
  switch (schema.getType()) {
    case UNION:
      if (AvroSerdeUtils.isNullableType(schema)) {
        return AvroUtils.getFieldSchemaHelper(AvroSerdeUtils.getOtherTypeFromNullableType(schema), pathList, field);
      }
      throw new AvroRuntimeException("Union of complex types cannot be handled : " + schema);
    case MAP:
      if ((field + 1) == pathList.size()) {
        return Optional.fromNullable(schema.getValueType());
      }
      return AvroUtils.getFieldSchemaHelper(schema.getValueType(), pathList, ++field);
    case RECORD:
      if ((field + 1) == pathList.size()) {
        return Optional.fromNullable(schema.getField(pathList.get(field)).schema());
      }
      return AvroUtils.getFieldSchemaHelper(schema.getField(pathList.get(field)).schema(), pathList, ++field);
    default:
      throw new AvroRuntimeException("Invalid type in schema : " + schema);
  }
}
 
Example 3
Source File: SchemaUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
/**
 * Returns whether null is allowed by the schema.
 *
 * @param schema a Schema
 * @return true if schema allows the value to be null
 */
public static boolean nullOk(Schema schema) {
  if (Schema.Type.NULL == schema.getType()) {
    return true;
  } else if (Schema.Type.UNION == schema.getType()) {
    for (Schema possible : schema.getTypes()) {
      if (nullOk(possible)) {
        return true;
      }
    }
  }
  return false;
}
 
Example 4
Source File: AvroGenericRecordMapper.java    From divolte-collector with Apache License 2.0 5 votes vote down vote up
private static Optional<Schema> resolveUnion(final Schema targetSchema) {
    Preconditions.checkArgument(targetSchema.getType() == Schema.Type.UNION);
    final List<Schema> possibleSchemas = targetSchema.getTypes();
    final Iterator<Schema> possibleSchemesIterator = possibleSchemas.iterator();
    final Optional<Schema> resolvedSchema;
    /*
     * We only allow unions of a specific type with null.
     *
     * The alternative here would be to replace the JsonParser with a TokenBuffer
     * and try to read as each possible schema type, until something succeeds.
     * This would be very expensive though, so for now it's not supported.
     */
    switch (possibleSchemas.size()) {
        case 2:
            final Schema firstSchema = possibleSchemesIterator.next();
            if (firstSchema.getType() != Schema.Type.NULL) {
                final Schema secondSchema = possibleSchemesIterator.next();
                resolvedSchema = (secondSchema.getType() == Schema.Type.NULL)
                        ? Optional.of(firstSchema)
                        : Optional.empty();
                break;
            }
            // Intentional fall-through.
        case 1:
            resolvedSchema = Optional.of(possibleSchemesIterator.next());
            break;
        default:
            // Not acceptable.
            resolvedSchema = Optional.empty();
    }
    return resolvedSchema;
}
 
Example 5
Source File: AvroTypeFactoryImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
private void validateTopLevelAvroType(Schema schema) {
  Schema.Type type = schema.getType();
  if (type != Schema.Type.RECORD) {
    String msg =
        String.format("Samza Sql supports only RECORD as top level avro type, But the Schema's type is %s", type);
    LOG.error(msg);
    throw new SamzaException(msg);
  }
}
 
Example 6
Source File: FastSerializerGenerator.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
private void processPrimitive(final Schema primitiveSchema, JExpression primitiveValueExpression, JBlock body, boolean cast) {
  String writeFunction;
  JClass primitiveClass = schemaAssistant.classFromSchema(primitiveSchema);
  JExpression writeFunctionArgument = cast
      ? JExpr.cast(primitiveClass, primitiveValueExpression)
      : primitiveValueExpression;
  switch (primitiveSchema.getType()) {
    case STRING:
      processString(primitiveSchema, primitiveValueExpression, body);
      return;
    case BYTES:
      writeFunction = "writeBytes";
      break;
    case INT:
      writeFunction = "writeInt";
      break;
    case LONG:
      writeFunction = "writeLong";
      break;
    case FLOAT:
      writeFunction = "writeFloat";
      break;
    case DOUBLE:
      writeFunction = "writeDouble";
      break;
    case BOOLEAN:
      writeFunction = "writeBoolean";
      break;
    default:
      throw new FastSerdeGeneratorException(
          "Unsupported primitive schema of type: " + primitiveSchema.getType());
  }

  body.invoke(JExpr.direct(ENCODER), writeFunction).arg(writeFunctionArgument);
}
 
Example 7
Source File: DslRecordMapping.java    From divolte-collector with Apache License 2.0 5 votes vote down vote up
private static Optional<Schema> unpackNullableUnion(final Schema source) {
    if (source.getType() == Type.UNION) {
        if (source.getTypes().size() != 2) {
            return Optional.empty();
        } else {
            return source.getTypes().stream().filter((t) -> t.getType() != Type.NULL).findFirst();
        }
    } else {
        return Optional.of(source);
    }
}
 
Example 8
Source File: SalesforceAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * A helper method to convert the String representation of a datum in the Salesforce system to the Avro type that
 * matches the Schema generated for it.
 *
 * @param f
 * @return
 */
public AvroConverter<String, ?> getConverterFromString(org.apache.avro.Schema.Field f) {
    Schema fieldSchema = AvroUtils.unwrapIfNullable(f.schema());
    // FIXME use avro type to decide the converter is not correct if the user change the avro type, Date to String
    // for instance
    if (AvroUtils.isSameType(fieldSchema, AvroUtils._boolean())) {
        return new StringToBooleanConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._decimal())) {
        return new StringToDecimalConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._double())) {
        return new StringToDoubleConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._float())) {
        return new StringToFloatConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._int())) {
        return new StringToIntegerConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._byte())) {
        return new StringToByteConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._short())) {
        return new StringToShortConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._long())) {
        return new StringToLongConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._date())) {
        return new StringToDateConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._bytes())) {
        return new StringToBytesConverter(f);
    } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._string())) {
        return super.getConverter(String.class);
    }
    throw new UnsupportedOperationException("The type " + fieldSchema.getType() + " is not supported."); //$NON-NLS-1$ //$NON-NLS-2$
}
 
Example 9
Source File: AvroSchemaUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static Schema toOption(Schema schema) {
  if (schema.getType() == UNION) {
    Preconditions.checkArgument(isOptionSchema(schema),
        "Union schemas are not supported: " + schema);
    return schema;
  } else {
    return Schema.createUnion(NULL, schema);
  }
}
 
Example 10
Source File: AvroRecordReader.java    From Bats with Apache License 2.0 5 votes vote down vote up
private void processRecord(final GenericContainer container, final Schema schema) {

    final Schema.Type type = schema.getType();

    switch (type) {
      case RECORD:
        process(container, schema, null, new MapOrListWriterImpl(writer.rootAsMap()), fieldSelection);
        break;
      default:
        throw new DrillRuntimeException("Root object must be record type. Found: " + type);
    }
  }
 
Example 11
Source File: SchemaAssistant.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public static boolean isPrimitive(Schema schema) {
  switch (schema.getType()) {
    case BOOLEAN:
    case DOUBLE:
    case FLOAT:
    case INT:
    case LONG:
      return true;
    default:
      return false;
  }
}
 
Example 12
Source File: FastDeserializerGenerator.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
private boolean methodAlreadyDefined(final Schema schema, boolean read) {
    if (!Schema.Type.RECORD.equals(schema.getType())) {
        throw new FastDeserializerGeneratorException(
                "Methods are defined only for records, not for " + schema.getType());
    }

    return (read ? deserializeMethodMap : skipMethodMap).containsKey(schema.getFullName());
}
 
Example 13
Source File: AvroCoder.java    From beam with Apache License 2.0 4 votes vote down vote up
private void doCheck(String context, TypeDescriptor<?> type, Schema schema) {
  switch (schema.getType()) {
    case ARRAY:
      checkArray(context, type, schema);
      break;
    case ENUM:
      // Enums should be deterministic, since they depend only on the ordinal.
      break;
    case FIXED:
      // Depending on the implementation of GenericFixed, we don't know how
      // the given field will be encoded. So, we assume that it isn't
      // deterministic.
      reportError(context, "FIXED encodings are not guaranteed to be deterministic");
      break;
    case MAP:
      checkMap(context, type, schema);
      break;
    case RECORD:
      if (!(type.getType() instanceof Class)) {
        reportError(context, "Cannot determine type from generic %s due to erasure", type);
        return;
      }
      checkRecord(type, schema);
      break;
    case UNION:
      checkUnion(context, type, schema);
      break;
    case STRING:
      checkString(context, type);
      break;
    case BOOLEAN:
    case BYTES:
    case DOUBLE:
    case INT:
    case FLOAT:
    case LONG:
    case NULL:
      // For types that Avro encodes using one of the above primitives, we assume they are
      // deterministic.
      break;
    default:
      // In any other case (eg., new types added to Avro) we cautiously return
      // false.
      reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType());
      break;
  }
}
 
Example 14
Source File: AvroRecordConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private static Converter newConverter(Schema schema, Type type,
    GenericData model, Class<?> knownClass, ParentValueContainer setter) {
  LogicalType logicalType = schema.getLogicalType();
  Conversion<?> conversion;

  if (knownClass != null) {
    conversion = model.getConversionByClass(knownClass, logicalType);
  } else {
    conversion = model.getConversionFor(logicalType);
  }

  ParentValueContainer parent = ParentValueContainer
      .getConversionContainer(setter, conversion, schema);

  switch (schema.getType()) {
  case BOOLEAN:
    return new AvroConverters.FieldBooleanConverter(parent);
  case INT:
    Class<?> intDatumClass = getDatumClass(conversion, knownClass, schema, model);
    if (intDatumClass == null) {
      return new AvroConverters.FieldIntegerConverter(parent);
    }
    if (intDatumClass == byte.class || intDatumClass == Byte.class) {
      return new AvroConverters.FieldByteConverter(parent);
    }
    if (intDatumClass == char.class || intDatumClass == Character.class) {
      return new AvroConverters.FieldCharConverter(parent);
    }
    if (intDatumClass == short.class || intDatumClass == Short.class) {
      return new AvroConverters.FieldShortConverter(parent);
    }
    return new AvroConverters.FieldIntegerConverter(parent);
  case LONG:
    return new AvroConverters.FieldLongConverter(parent);
  case FLOAT:
    return new AvroConverters.FieldFloatConverter(parent);
  case DOUBLE:
    return new AvroConverters.FieldDoubleConverter(parent);
  case BYTES:
    Class<?> byteDatumClass = getDatumClass(conversion, knownClass, schema, model);
    if (byteDatumClass == null) {
      return new AvroConverters.FieldByteBufferConverter(parent);
    }
    if (byteDatumClass.isArray() && byteDatumClass.getComponentType() == byte.class) {
      return new AvroConverters.FieldByteArrayConverter(parent);
    }
    return new AvroConverters.FieldByteBufferConverter(parent);
  case STRING:
    if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName())) {
      return new AvroConverters.FieldUUIDConverter(parent, type.asPrimitiveType());
    }
    return newStringConverter(schema, model, parent);
  case RECORD:
    return new AvroRecordConverter(parent, type.asGroupType(), schema, model);
  case ENUM:
    return new AvroConverters.FieldEnumConverter(parent, schema, model);
  case ARRAY:
    Class<?> arrayDatumClass = getDatumClass(conversion, knownClass, schema, model);
    if (arrayDatumClass != null && arrayDatumClass.isArray()) {
      return new AvroArrayConverter(parent, type.asGroupType(), schema, model,
          arrayDatumClass);
    }
    return new AvroCollectionConverter(parent, type.asGroupType(), schema,
        model, arrayDatumClass);
  case MAP:
    return new MapConverter(parent, type.asGroupType(), schema, model);
  case UNION:
    return new AvroUnionConverter(parent, type, schema, model);
  case FIXED:
    return new AvroConverters.FieldFixedConverter(parent, schema, model);
  default:
    throw new UnsupportedOperationException(String.format(
        "Cannot convert Avro type: %s to Parquet type: %s", schema, type));
  }
}
 
Example 15
Source File: AvroSchemaManager.java    From spork with Apache License 2.0 4 votes vote down vote up
private boolean isNamedSchema(Schema schema) {
    Type type = schema.getType();
    return type.equals(Type.RECORD) || type.equals(Type.ENUM) || type.equals(Type.FIXED);
}
 
Example 16
Source File: AvroSchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
public static boolean isKeyValueSchema(Schema schema) {
  return schema.getType() == RECORD && schema.getFields().size() == 2;
}
 
Example 17
Source File: AvroTypeSystem.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
protected boolean isUnknownType(Schema dataType) {
  return dataType.getType() == NULL;
}
 
Example 18
Source File: AvroFieldsGenerator.java    From registry with Apache License 2.0 4 votes vote down vote up
private void parseSchema(Schema schema, List<SchemaFieldInfo> schemaFieldInfos, Set<String> visitedRecords) {
    Schema.Type type = schema.getType();
    LOG.debug("Visiting type: [{}]", type);

    switch (type) {
        case RECORD:

            String completeName = schema.getFullName();

            // Since we are only interested in primitive data types, if we encounter a record that was already parsed it can be ignored
            if (!visitedRecords.contains(completeName)) {
                visitedRecords.add(completeName);

                // store fields of a record.
                List<Schema.Field> fields = schema.getFields();
                for (Schema.Field recordField : fields) {
                    parseField(recordField, schemaFieldInfos, visitedRecords);
                }
            }
            break;
        case MAP:
            Schema valueTypeSchema = schema.getValueType();
            parseSchema(valueTypeSchema, schemaFieldInfos, visitedRecords);
            break;
        case ENUM:
            break;
        case ARRAY:
            Schema elementType = schema.getElementType();
            parseSchema(elementType, schemaFieldInfos, visitedRecords);
            break;

        case UNION:
            List<Schema> unionTypes = schema.getTypes();
            for (Schema typeSchema : unionTypes) {
                parseSchema(typeSchema, schemaFieldInfos, visitedRecords);
            }
            break;

        case STRING:
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
        case FIXED:
        case BOOLEAN:
        case BYTES:
        case NULL:

            break;

        default:
            throw new RuntimeException("Unsupported type: " + type);

    }

}
 
Example 19
Source File: AvroStorageDataConversionUtilities.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Packs a Pig Tuple into an Avro record.
 * @param t the Pig tuple to pack into the avro object
 * @param s The avro schema for which to determine the type
 * @return the avro record corresponding to the input tuple
 * @throws IOException
 */
public static GenericData.Record packIntoAvro(final Tuple t, final Schema s)
    throws IOException {

  try {
    GenericData.Record record = new GenericData.Record(s);
    for (Field f : s.getFields()) {
      Object o = t.get(f.pos());
      Schema innerSchema = f.schema();
      if (AvroStorageSchemaConversionUtilities.isNullableUnion(innerSchema)) {
        if (o == null) {
          record.put(f.pos(), null);
          continue;
        }
        innerSchema = AvroStorageSchemaConversionUtilities
            .removeSimpleUnion(innerSchema);
      }
      switch(innerSchema.getType()) {
      case RECORD:
        record.put(f.pos(), packIntoAvro((Tuple) o, innerSchema));
        break;
      case ARRAY:
        record.put(f.pos(), packIntoAvro((DataBag) o, innerSchema));
        break;
      case BYTES:
        record.put(f.pos(), ByteBuffer.wrap(((DataByteArray) o).get()));
        break;
      case FIXED:
        record.put(f.pos(), new GenericData.Fixed(
            innerSchema, ((DataByteArray) o).get()));
        break;
      default:
        if (t.getType(f.pos()) == DataType.DATETIME) {
          record.put(f.pos(), ((DateTime) o).getMillis() );
        } else {
          record.put(f.pos(), o);
        }
      }
    }
    return record;
  } catch (Exception e) {
    throw new IOException(
        "exception in AvroStorageDataConversionUtilities.packIntoAvro", e);
  }
}
 
Example 20
Source File: RecordBuilder.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * Returns a the value as the first matching schema type or null.
 *
 * Note that if the value may be null even if the schema does not allow the
 * value to be null.
 *
 * @param string a String representation of the value
 * @param schema a Schema
 * @return the string coerced to the correct type from the schema or null
 */
private static Object makeValue(String string, Schema schema) {
  if (string == null) {
    return null;
  }

  try {
    switch (schema.getType()) {
      case BOOLEAN:
        return Boolean.valueOf(string);
      case STRING:
        return string;
      case FLOAT:
        return Float.valueOf(string);
      case DOUBLE:
        return Double.valueOf(string);
      case INT:
        return Integer.valueOf(string);
      case LONG:
        return Long.valueOf(string);
      case ENUM:
        // TODO: translate to enum class
        if (schema.hasEnumSymbol(string)) {
          return string;
        } else {
          try {
            return schema.getEnumSymbols().get(Integer.parseInt(string));
          } catch (IndexOutOfBoundsException ex) {
            return null;
          }
        }
      case UNION:
        Object value = null;
        for (Schema possible : schema.getTypes()) {
          value = makeValue(string, possible);
          if (value != null) {
            return value;
          }
        }
        return null;
      case NULL:
        return null;
      default:
        // FIXED, BYTES, MAP, ARRAY, RECORD are not supported
        throw new RecordException(
            "Unsupported field type:" + schema.getType());
    }
  } catch (NumberFormatException e) {
    // empty string is considered null for numeric types
    if (string.isEmpty()) {
      return null;
    } else {
      throw e;
    }
  }
}