Java Code Examples for org.apache.avro.Schema#getTypes()

The following examples show how to use org.apache.avro.Schema#getTypes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroScanner.java    From tajo with Apache License 2.0 6 votes vote down vote up
private static Schema getNonNull(Schema schema) {
  if (!schema.getType().equals(Schema.Type.UNION)) {
    return schema;
  }
  List<Schema> schemas = schema.getTypes();
  if (schemas.size() != 2) {
    return schema;
  }
  if (schemas.get(0).getType().equals(Schema.Type.NULL)) {
    return schemas.get(1);
  } else if (schemas.get(1).getType().equals(Schema.Type.NULL)) {
    return schemas.get(0);
  } else {
    return schema;
  }
}
 
Example 2
Source File: AvroSchemaUtil.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * given a (parent) schema, and a field name, find the schema for that field.
 * if the field is a union, returns the (only) non-null branch of the union
 * @param parent parent schema containing field
 * @param fieldName name of the field in question
 * @return schema of the field (or non-null union branch thereof)
 */
public static Schema findNonNullUnionBranch(Schema parent, String fieldName) {
  if (parent == null || fieldName == null || fieldName.isEmpty()) {
    throw new IllegalArgumentException("arguments must not be null/empty");
  }
  Schema.Field field = parent.getField(fieldName);
  if (field == null) {
    return null;
  }
  Schema fieldSchema = field.schema();
  Schema.Type fieldSchemaType = fieldSchema.getType();
  if (!Schema.Type.UNION.equals(fieldSchemaType)) {
    return fieldSchema; //field is not a union
  }
  List<Schema> branches = fieldSchema.getTypes();
  List<Schema> nonNullBranches = branches.stream().
    filter(schema -> !Schema.Type.NULL.equals(schema.getType())).collect(Collectors.toList());
  if (nonNullBranches.size() != 1) {
    throw new IllegalArgumentException(String.format("field %s has %d non-null union branches, where exactly 1 is expected in %s",
      fieldName, nonNullBranches.size(), parent));
  }
  return nonNullBranches.get(0);
}
 
Example 3
Source File: AvroStorageUtils.java    From Cubert with Apache License 2.0 6 votes vote down vote up
/** extract schema from a nullable union */
public static Schema getAcceptedType(Schema in) {
    if (!isAcceptableUnion(in))
        throw new RuntimeException("Cannot call this function on a unacceptable union");

    List<Schema> types = in.getTypes();
    switch (types.size()) {
    case 0:
        return null; /*union with no type*/
    case 1:
        return types.get(0); /*union with one type*/
    case 2:
        return  (types.get(0).getType().equals(Schema.Type.NULL))
                    ? types.get(1)
                    : types.get(0);
    default:
        return null;
    }
}
 
Example 4
Source File: TestAvroUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test
public void toTypeSchemaMapsNullable() throws Exception {
  Schema schema = AvroUtils.typeFor(DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, false));

  assertEquals("Invalid type", Schema.Type.UNION, schema.getType());
  assertEquals("Invalid union size", 2, schema.getTypes().size());

  for (Schema s : schema.getTypes()) {
    assertThat("Invalid union types", s.getType(), anyOf(is(Schema.Type.MAP), is(Schema.Type.NULL)));
    if (s.getType().equals(Schema.Type.MAP)) {
      assertEquals("Invalid value type", Schema.Type.INT, s.getValueType().getType());
    }
  }

  //System.out.println(schema.toString(true));
}
 
Example 5
Source File: AvroRecordHelper.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
private static Object convertAndResolveUnionToPrimitive(Schema schema, String key, String value) throws ParseException
{
  Schema unionSchema = schema.getField(key).schema();
  List<Schema> types = unionSchema.getTypes();
  Object convertedValue = null;
  for (int i = 0; i < types.size(); i++) {
    try {
      if (types.get(i).getType() == Type.NULL) {
        if (value == null || value.equals("null")) {
          convertedValue = null;
          break;
        } else {
          continue;
        }
      }
      convertedValue = convertValueToAvroPrimitive(types.get(i).getType(), key, value);
    } catch (RuntimeException e) {
      LOG.error("Could not handle schema resolution", e);
      continue;
    }
    break;
  }

  return convertedValue;
}
 
Example 6
Source File: AvroNestedReader.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
/**
 * @param pentahoType
 * @param avroData
 * @param fieldSchema
 * @return
 */
public Object convertToKettleValue( AvroInputField pentahoType, ByteBuffer avroData, Schema fieldSchema ) {
  Object pentahoData = null;
  if ( avroData != null ) {
    try {
      switch ( pentahoType.getPentahoType() ) {
        case ValueMetaInterface.TYPE_BIGNUMBER:
          Conversions.DecimalConversion converter = new Conversions.DecimalConversion();
          Schema schema = fieldSchema;
          if ( schema.getType().equals( Schema.Type.UNION ) ) {
            List<Schema> schemas = schema.getTypes();
            for ( Schema s : schemas ) {
              if ( !s.getName().equalsIgnoreCase( "null" ) ) {
                schema = s;
                break;
              }
            }
          }
          Object precision = schema.getObjectProp( AvroSpec.DECIMAL_PRECISION );
          Object scale = schema.getObjectProp( AvroSpec.DECIMAL_SCALE );
          LogicalTypes.Decimal decimalType =
            LogicalTypes.decimal( Integer.parseInt( precision.toString() ), Integer.parseInt( scale.toString() ) );
          pentahoData = converter.fromBytes( avroData, m_schemaToUse, decimalType );
          break;
        case ValueMetaInterface.TYPE_BINARY:
          pentahoData = new byte[ avroData.remaining() ];
          avroData.get( (byte[]) pentahoData );
          break;
      }
    } catch ( Exception e ) {
      // If unable to do the type conversion just ignore. null will be returned.
    }
  }
  return pentahoData;
}
 
Example 7
Source File: DefaultSQLCreateTableAction.java    From components with Apache License 2.0 5 votes vote down vote up
private boolean isNullable(Schema schema){
    Schema.Type type = schema.getType();
    if (type == Schema.Type.UNION) {
        for (Schema s : schema.getTypes()) {
            if (s.getType() == Schema.Type.NULL) {
                return true;
            }
        }
    }
    return false;
}
 
Example 8
Source File: TestAvroUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void toTypeSchemaStringNullable() throws Exception {
  Schema schema = AvroUtils.typeFor(DataTypes.StringType);

  assertEquals("Invalid type", Schema.Type.UNION, schema.getType());
  assertEquals("Invalid union size", 2, schema.getTypes().size());

  for (Schema s : schema.getTypes()) {
    assertThat("Invalid union types", s.getType(), anyOf(is(Schema.Type.STRING), is(Schema.Type.NULL)));
  }

  //System.out.println(schema.toString(true));
}
 
Example 9
Source File: SchemaUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
public static <T> T visit(Schema schema, SchemaVisitor<T> visitor) {
  switch (schema.getType()) {
    case RECORD:
      // check to make sure this hasn't been visited before
      String name = schema.getFullName();
      Preconditions.checkState(!visitor.recordLevels.contains(name),
          "Cannot process recursive Avro record %s", name);

      visitor.recordLevels.push(name);

      List<Schema.Field> fields = schema.getFields();
      List<String> names = Lists.newArrayListWithExpectedSize(fields.size());
      List<T> results = Lists.newArrayListWithExpectedSize(fields.size());
      for (Schema.Field field : schema.getFields()) {
        names.add(field.name());
        results.add(visit(field.schema(), visitor));
      }

      visitor.recordLevels.pop();

      return visitor.record(schema, names, results);

    case UNION:
      List<Schema> types = schema.getTypes();
      List<T> options = Lists.newArrayListWithExpectedSize(types.size());
      for (Schema type : types) {
        options.add(visit(type, visitor));
      }
      return visitor.union(schema, options);

    case ARRAY:
      return visitor.array(schema, visit(schema.getElementType(), visitor));

    case MAP:
      return visitor.map(schema, visit(schema.getValueType(), visitor));

    default:
      return visitor.primitive(schema);
  }
}
 
Example 10
Source File: AvroSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type convertUnion(String fieldName, Schema schema, Type.Repetition repetition) {
  List<Schema> nonNullSchemas = new ArrayList<Schema>(schema.getTypes().size());
  // Found any schemas in the union? Required for the edge case, where the union contains only a single type.
  boolean foundNullSchema = false;
  for (Schema childSchema : schema.getTypes()) {
    if (childSchema.getType().equals(Schema.Type.NULL)) {
      foundNullSchema = true;
      if (Type.Repetition.REQUIRED == repetition) {
        repetition = Type.Repetition.OPTIONAL;
      }
    } else {
      nonNullSchemas.add(childSchema);
    }
  }
  // If we only get a null and one other type then its a simple optional field
  // otherwise construct a union container
  switch (nonNullSchemas.size()) {
    case 0:
      throw new UnsupportedOperationException("Cannot convert Avro union of only nulls");

    case 1:
      return foundNullSchema ? convertField(fieldName, nonNullSchemas.get(0), repetition) :
        convertUnionToGroupType(fieldName, repetition, nonNullSchemas);

    default: // complex union type
      return convertUnionToGroupType(fieldName, repetition, nonNullSchemas);
  }
}
 
Example 11
Source File: EntityAccessor.java    From kite with Apache License 2.0 5 votes vote down vote up
public Object get(E object, String name) {
  List<Schema.Field> fields = cache.get(name);

  Object value;
  if (fields != null) {
    value = get(object, fields);

  } else {
    value = object;
    fields = Lists.newArrayList();
    Schema nested = schema;
    for (String level : SchemaUtil.NAME_SPLITTER.split(name)) {
      // assume that the nested schemas are Records or nullable Records
      // this is checked by SchemaUtil.fieldSchema(Schema, String)
      if (nested.getType() == Schema.Type.UNION) {
        // nullable Records are not allowed in partition fields, but the read
        // schema may contain nullable records when using reflection.
        List<Schema> types = nested.getTypes();
        if (types.get(0).getType() == Schema.Type.NULL) {
          nested = types.get(1);
        } else {
          nested = types.get(0);
        }
      }
      Schema.Field field = nested.getField(level);
      fields.add(field);
      nested = field.schema();
      value = model.getField(value, level, field.pos());
    }
    cache.put(name, fields);
  }

  return value;
}
 
Example 12
Source File: SchemaTraverser.java    From data-highway with Apache License 2.0 5 votes vote down vote up
private static <T> T traverse(Schema schema, Visitor<T> visitor, Stack<String> breadcrumb) {
  visitor.onVisit(schema, ImmutableList.copyOf(breadcrumb));
  switch (schema.getType()) {
  case RECORD:
    for (Field field : schema.getFields()) {
      breadcrumb.push(field.name());
      visitor.onVisit(field, ImmutableList.copyOf(breadcrumb));
      traverse(field.schema(), visitor, breadcrumb);
      breadcrumb.pop();
    }
    break;
  case ARRAY:
    breadcrumb.push("*");
    traverse(schema.getElementType(), visitor, breadcrumb);
    breadcrumb.pop();
    break;
  case MAP:
    breadcrumb.push("*");
    traverse(schema.getValueType(), visitor, breadcrumb);
    breadcrumb.pop();
    break;
  case UNION:
    int i = 0;
    for (Schema type : schema.getTypes()) {
      breadcrumb.push(Integer.toString(i++));
      traverse(type, visitor, breadcrumb);
      breadcrumb.pop();
    }
    break;
  default:
    break;
  }
  if (breadcrumb.isEmpty()) {
    return visitor.getResult();
  }
  return null;
}
 
Example 13
Source File: AvroUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
private static boolean isNullable(Schema schema) {
  if (schema.getType().equals(UNION)) {
    for (Schema s : schema.getTypes()){
      if (s.getType().equals(NULL)) {
        return true;
      }
    }
  }
  return false;
}
 
Example 14
Source File: SchemaUtil.java    From kite with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a union of two {@link Schema} instances.
 * <p>
 * If either {@code Schema} is a union, this will attempt to merge the other
 * schema with the types contained in that union before adding more types to
 * the union that is produced.
 * <p>
 * If both schemas are not unions, no merge is attempted.
 *
 * @param left a {@code Schema}
 * @param right a {@code Schema}
 * @return a UNION schema of the to {@code Schema} instances
 */
private static Schema union(Schema left, Schema right) {
  if (left.getType() == Schema.Type.UNION) {
    if (right.getType() == Schema.Type.UNION) {
      // combine the unions by adding each type in right individually
      Schema combined = left;
      for (Schema type : right.getTypes()) {
        combined = union(combined, type);
      }
      return combined;

    } else {
      boolean notMerged = true;
      // combine a union with a non-union by checking if each type will merge
      List<Schema> types = Lists.newArrayList();
      Iterator<Schema> schemas = left.getTypes().iterator();
      // try to merge each type and stop when one succeeds
      while (schemas.hasNext()) {
        Schema next = schemas.next();
        Schema merged = mergeOnly(next, right);
        if (merged != null) {
          types.add(merged);
          notMerged = false;
          break;
        } else {
          // merge didn't work, add the type
          types.add(next);
        }
      }
      // add the remaining types from the left union
      while (schemas.hasNext()) {
        types.add(schemas.next());
      }

      if (notMerged) {
        types.add(right);
      }

      return Schema.createUnion(types);
    }
  } else if (right.getType() == Schema.Type.UNION) {
    return union(right, left);
  }

  return Schema.createUnion(ImmutableList.of(left, right));
}
 
Example 15
Source File: ParquetRecordReaderTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private Schema unWrapSchema(Schema o) {
	List<Schema> schemas = o.getTypes();
	Preconditions.checkArgument(schemas.size() == 2, "Invalid union type");
	return schemas.get(0).getType() == NULL ? schemas.get(1) : schemas.get(0);
}
 
Example 16
Source File: AvroRowSerializationSchema.java    From flink with Apache License 2.0 4 votes vote down vote up
private Object convertFlinkType(Schema schema, Object object) {
	if (object == null) {
		return null;
	}
	switch (schema.getType()) {
		case RECORD:
			if (object instanceof Row) {
				return convertRowToAvroRecord(schema, (Row) object);
			}
			throw new IllegalStateException("Row expected but was: " + object.getClass());
		case ENUM:
			return new GenericData.EnumSymbol(schema, object.toString());
		case ARRAY:
			final Schema elementSchema = schema.getElementType();
			final Object[] array = (Object[]) object;
			final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema);
			for (Object element : array) {
				convertedArray.add(convertFlinkType(elementSchema, element));
			}
			return convertedArray;
		case MAP:
			final Map<?, ?> map = (Map<?, ?>) object;
			final Map<Utf8, Object> convertedMap = new HashMap<>();
			for (Map.Entry<?, ?> entry : map.entrySet()) {
				convertedMap.put(
					new Utf8(entry.getKey().toString()),
					convertFlinkType(schema.getValueType(), entry.getValue()));
			}
			return convertedMap;
		case UNION:
			final List<Schema> types = schema.getTypes();
			final int size = types.size();
			final Schema actualSchema;
			if (size == 2 && types.get(0).getType() == Schema.Type.NULL) {
				actualSchema = types.get(1);
			} else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) {
				actualSchema = types.get(0);
			} else if (size == 1) {
				actualSchema = types.get(0);
			} else {
				// generic type
				return object;
			}
			return convertFlinkType(actualSchema, object);
		case FIXED:
			// check for logical type
			if (object instanceof BigDecimal) {
				return new GenericData.Fixed(
					schema,
					convertFromDecimal(schema, (BigDecimal) object));
			}
			return new GenericData.Fixed(schema, (byte[]) object);
		case STRING:
			return new Utf8(object.toString());
		case BYTES:
			// check for logical type
			if (object instanceof BigDecimal) {
				return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object));
			}
			return ByteBuffer.wrap((byte[]) object);
		case INT:
			// check for logical types
			if (object instanceof Date) {
				return convertFromDate(schema, (Date) object);
			} else if (object instanceof Time) {
				return convertFromTime(schema, (Time) object);
			}
			return object;
		case LONG:
			// check for logical type
			if (object instanceof Timestamp) {
				return convertFromTimestamp(schema, (Timestamp) object);
			}
			return object;
		case FLOAT:
		case DOUBLE:
		case BOOLEAN:
			return object;
	}
	throw new RuntimeException("Unsupported Avro type:" + schema);
}
 
Example 17
Source File: AvroKeySerDe.java    From kite with Apache License 2.0 4 votes vote down vote up
@Override
public byte[] serialize(PartitionKey key) {
  ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
  Encoder encoder = new MemcmpEncoder(outputStream);

  Schema schemaToUse;
  if (key.getLength() == schema.getFields().size()) {
    schemaToUse = schema;
  } else {
    schemaToUse = partialSchemas[key.getLength() - 1];
  }
  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(
      schemaToUse);
  GenericRecord record = new GenericData.Record(schemaToUse);
  for (int i = 0; i < key.getLength(); i++) {
    Object keyPart = key.get(i);
    if (keyPart == null) {
      // keyPart is null, let's make sure we check that the key can support a
      // null value so we can throw a friendly exception if it can't.
      Schema fieldSchema = schemaToUse.getFields().get(i).schema();
      if (fieldSchema.getType() != Schema.Type.NULL
          && fieldSchema.getType() != Schema.Type.UNION) {
        throw new DatasetException(
            "Null key field only supported in null type or union type that has a null type.");
      } else if (fieldSchema.getType() == Schema.Type.UNION) {
        boolean foundNullInUnion = false;
        for (Schema unionSchema : fieldSchema.getTypes()) {
          if (unionSchema.getType() == Schema.Type.NULL) {
            foundNullInUnion = true;
          }
        }
        if (!foundNullInUnion) {
          throw new DatasetException(
              "Null key field only supported in union type that has a null type.");
        }
      }
    }
    record.put(i, keyPart);
  }
  AvroUtils.writeAvroEntity(record, encoder, datumWriter);
  return outputStream.toByteArray();
}
 
Example 18
Source File: MercifulJsonConverter.java    From hudi with Apache License 2.0 4 votes vote down vote up
private static Schema getNonNull(Schema schema) {
  List<Schema> types = schema.getTypes();
  Schema.Type firstType = types.get(0).getType();
  return firstType.equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
}
 
Example 19
Source File: AvroUtil.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
/**
 * Convert from Avro type to Sqoop's java representation of the SQL type
 * see SqlManager#toJavaType
 */
public static Object fromAvro(Object avroObject, Schema schema, String type) {
  if (avroObject == null) {
    return null;
  }

  switch (schema.getType()) {
    case NULL:
      return null;
    case BOOLEAN:
    case INT:
    case FLOAT:
    case DOUBLE:
      return avroObject;
    case LONG:
      if (type.equals(DATE_TYPE)) {
        return new Date((Long) avroObject);
      } else if (type.equals(TIME_TYPE)) {
        return new Time((Long) avroObject);
      } else if (type.equals(TIMESTAMP_TYPE)) {
        return new Timestamp((Long) avroObject);
      }
      return avroObject;
    case BYTES:
      ByteBuffer bb = (ByteBuffer) avroObject;
      BytesWritable bw = new BytesWritable();
      bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
      if (type.equals(BLOB_REF_TYPE)) {
        // TODO: Should convert BytesWritable to BlobRef properly. (SQOOP-991)
        throw new UnsupportedOperationException("BlobRef not supported");
      }
      return bw;
    case STRING:
      if (type.equals(BIG_DECIMAL_TYPE)) {
        return new BigDecimal(avroObject.toString());
      } else if (type.equals(DATE_TYPE)) {
        return Date.valueOf(avroObject.toString());
      } else if (type.equals(TIME_TYPE)) {
        return Time.valueOf(avroObject.toString());
      } else if (type.equals(TIMESTAMP_TYPE)) {
        return Timestamp.valueOf(avroObject.toString());
      }
      return avroObject.toString();
    case ENUM:
      return avroObject.toString();
    case UNION:
      List<Schema> types = schema.getTypes();
      if (types.size() != 2) {
        throw new IllegalArgumentException("Only support union with null");
      }
      Schema s1 = types.get(0);
      Schema s2 = types.get(1);
      if (s1.getType() == Schema.Type.NULL) {
        return fromAvro(avroObject, s2, type);
      } else if (s2.getType() == Schema.Type.NULL) {
        return fromAvro(avroObject, s1, type);
      } else {
        throw new IllegalArgumentException("Only support union with null");
      }
    case FIXED:
      return new BytesWritable(((GenericFixed) avroObject).bytes());
    case RECORD:
    case ARRAY:
    case MAP:
    default:
      throw new IllegalArgumentException("Cannot convert Avro type "
          + schema.getType());
  }
}
 
Example 20
Source File: SchemaMerge.java    From funcj with MIT License 4 votes vote down vote up
public static Schema merge(Schema lhs, Schema rhs) {

        switch (lhs.getType()) {
            case STRING:
            case BYTES:
            case INT:
            case LONG:
            case FLOAT:
            case DOUBLE:
            case BOOLEAN:
            case NULL:
                if (lhs.getType() == rhs.getType()) {
                    return lhs;
                } else {
                    break;
                }
            case ENUM:
            case FIXED:
            case RECORD:
                if (lhs.equals(rhs)) {
                    return lhs;
                }
                break;
            case ARRAY:
            case MAP:
                if (lhs.getValueType().equals(rhs.getValueType())) {
                    return lhs;
                } else {
                    break;
                }
            case UNION:
                final Set<Schema> subSchemas = new HashSet<>(lhs.getTypes());
                if (rhs.getType() == Schema.Type.UNION) {
                    subSchemas.addAll(rhs.getTypes());
                } else {
                    subSchemas.add(rhs);
                }
                return Schema.createUnion(new ArrayList<>(subSchemas));
            default:
                throw new CodecException("Unexpected schema type - " + lhs.getType());
        }

        return Schema.createUnion(lhs, rhs);
    }