Java Code Examples for org.apache.avro.Schema.Type#RECORD

The following examples show how to use org.apache.avro.Schema.Type#RECORD . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BigQueryAvroUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
private static Stream<TableFieldSchema> mapTableFieldSchema(
    TableFieldSchema fieldSchema, Schema avroSchema) {
  Field avroFieldSchema = avroSchema.getField(fieldSchema.getName());
  if (avroFieldSchema == null) {
    return Stream.empty();
  } else if (avroFieldSchema.schema().getType() != Type.RECORD) {
    return Stream.of(fieldSchema);
  }

  List<TableFieldSchema> subSchemas =
      fieldSchema.getFields().stream()
          .flatMap(subSchema -> mapTableFieldSchema(subSchema, avroFieldSchema.schema()))
          .collect(Collectors.toList());

  TableFieldSchema output =
      new TableFieldSchema()
          .setCategories(fieldSchema.getCategories())
          .setDescription(fieldSchema.getDescription())
          .setFields(subSchemas)
          .setMode(fieldSchema.getMode())
          .setName(fieldSchema.getName())
          .setType(fieldSchema.getType());

  return Stream.of(output);
}
 
Example 2
Source File: RootSchemaUtils.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Checks whether given schema is Root schema
 * It checks names of this schema and its 2 child fields
 * Also it checks schema type
 * This schema name is supposed to be "Root"
 * Field name are supposed to be "Main" and "OutOfBand"
 * However this method doesn't check field schemas types
 * 
 * @param avro record schema
 * @return true is given schema is Root; false otherwise
 */
public static boolean isRootSchema(final Schema schema) {
    if (schema == null) {
        return false;
    }

    String schemaName = schema.getName();
    if (!"Root".equals(schemaName)) {
        return false;
    }
    if (Type.RECORD != schema.getType()) {
        return false;
    }
    Field main = schema.getField(MAIN_FIELD_NAME);
    Field outOfBand = schema.getField(OUTOFBAND_FIELD_NAME);
    if (main == null || outOfBand == null) {
        return false;
    }
    return true;
}
 
Example 3
Source File: AvroUtils.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private static Type convertToAvroType(DataType colType)
{
    final Type subType;
    if (colType == DataType.TUPLE)
    {
        /* Pig converts RECORD to TUPLE. Converting it back. */
        subType = Type.RECORD;
    }
    else if (colType == DataType.BAG)
    {
        subType = Type.ARRAY;
    }
    else if (colType == DataType.MAP)
    {
      subType = Type.MAP;
    }
    else
    {
        subType = Type.valueOf(colType.toString().toUpperCase());
    }
    return subType;
}
 
Example 4
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method that does the actual work for {@link #getFieldSchema(Schema, String)}
 * @param schema passed from {@link #getFieldSchema(Schema, String)}
 * @param pathList passed from {@link #getFieldSchema(Schema, String)}
 * @param field keeps track of the index used to access the list pathList
 * @return the schema of the field
 */
private static Optional<Schema> getFieldSchemaHelper(Schema schema, List<String> pathList, int field) {
  if (schema.getType() == Type.RECORD && schema.getField(pathList.get(field)) == null) {
    return Optional.absent();
  }
  switch (schema.getType()) {
    case UNION:
      if (AvroSerdeUtils.isNullableType(schema)) {
        return AvroUtils.getFieldSchemaHelper(AvroSerdeUtils.getOtherTypeFromNullableType(schema), pathList, field);
      }
      throw new AvroRuntimeException("Union of complex types cannot be handled : " + schema);
    case MAP:
      if ((field + 1) == pathList.size()) {
        return Optional.fromNullable(schema.getValueType());
      }
      return AvroUtils.getFieldSchemaHelper(schema.getValueType(), pathList, ++field);
    case RECORD:
      if ((field + 1) == pathList.size()) {
        return Optional.fromNullable(schema.getField(pathList.get(field)).schema());
      }
      return AvroUtils.getFieldSchemaHelper(schema.getField(pathList.get(field)).schema(), pathList, ++field);
    default:
      throw new AvroRuntimeException("Invalid type in schema : " + schema);
  }
}
 
Example 5
Source File: AvroStorageDataConversionUtilities.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Packs a Pig DataBag into an Avro array.
 * @param db the Pig databad to pack into the avro array
 * @param s The avro schema for which to determine the type
 * @return the avro array corresponding to the input bag
 * @throws IOException
 */
public static GenericData.Array<Object> packIntoAvro(
    final DataBag db, final Schema s) throws IOException {

  try {
    GenericData.Array<Object> array
      = new GenericData.Array<Object>(new Long(db.size()).intValue(), s);
    for (Tuple t : db) {
      if (s.getElementType() != null
          && s.getElementType().getType() == Type.RECORD) {
        array.add(packIntoAvro(t, s.getElementType()));
      } else if (t.size() == 1) {
        array.add(t.get(0));
      } else {
        throw new IOException(
            "AvroStorageDataConversionUtilities.packIntoAvro: Can't pack "
                + t + " into schema " + s);
      }
    }
    return array;
  } catch (Exception e) {
    throw new IOException(
        "exception in AvroStorageDataConversionUtilities.packIntoAvro", e);
  }
}
 
Example 6
Source File: SegmentTestUtils.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
public static DataType getColumnType(Field field) {
  org.apache.avro.Schema fieldSchema = field.schema();
  fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);

  final Type type = fieldSchema.getType();
  if (type == Type.ARRAY) {
    org.apache.avro.Schema elementSchema = extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
    if (elementSchema.getType() == Type.RECORD) {
      if (elementSchema.getFields().size() == 1) {
        elementSchema = elementSchema.getFields().get(0).schema();
      } else {
        throw new RuntimeException("More than one schema in Multi-value column!");
      }
      elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
    }
    return AvroSchemaUtil.valueOf(elementSchema.getType());
  } else {
    return AvroSchemaUtil.valueOf(type);
  }
}
 
Example 7
Source File: BigQueryAvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
private static Field convertField(TableFieldSchema bigQueryField) {
  ImmutableCollection<Type> avroTypes = BIG_QUERY_TO_AVRO_TYPES.get(bigQueryField.getType());
  if (avroTypes.isEmpty()) {
    throw new IllegalArgumentException(
        "Unable to map BigQuery field type " + bigQueryField.getType() + " to avro type.");
  }

  Type avroType = avroTypes.iterator().next();
  Schema elementSchema;
  if (avroType == Type.RECORD) {
    elementSchema = toGenericAvroSchema(bigQueryField.getName(), bigQueryField.getFields());
  } else {
    elementSchema = Schema.create(avroType);
  }
  Schema fieldSchema;
  if (bigQueryField.getMode() == null || "NULLABLE".equals(bigQueryField.getMode())) {
    fieldSchema = Schema.createUnion(Schema.create(Type.NULL), elementSchema);
  } else if ("REQUIRED".equals(bigQueryField.getMode())) {
    fieldSchema = elementSchema;
  } else if ("REPEATED".equals(bigQueryField.getMode())) {
    fieldSchema = Schema.createArray(elementSchema);
  } else {
    throw new IllegalArgumentException(
        String.format("Unknown BigQuery Field Mode: %s", bigQueryField.getMode()));
  }
  return new Field(
      bigQueryField.getName(),
      fieldSchema,
      bigQueryField.getDescription(),
      (Object) null /* Cast to avoid deprecated JsonNode constructor. */);
}
 
Example 8
Source File: AvroRecordInputFormat.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
private final Type checkTypeConstraintsAndGetType(final Schema schema) {
	final Type type = schema.getType();
	if (type == Type.RECORD) {
		throw new RuntimeException("The given Avro file contains complex data types which are not supported right now");
	}

	if (type == Type.UNION) {
		List<Schema> types = schema.getTypes();
		if (types.size() > 2) {
			throw new RuntimeException("The given Avro file contains a union that has more than two elements");
		}
		if (types.size() == 1 && types.get(0).getType() != Type.UNION) {
			return types.get(0).getType();
		}
		if (types.get(0).getType() == Type.UNION || types.get(1).getType() == Type.UNION) {
			throw new RuntimeException("The given Avro file contains a nested union");
		}
		if (types.get(0).getType() == Type.NULL) {
			return types.get(1).getType();
		} else {
			if (types.get(1).getType() != Type.NULL) {
				throw new RuntimeException("The given Avro file is contains a union with two non-null types.");
			}
			return types.get(0).getType();
		}
	}
	return type;
}
 
Example 9
Source File: DataHighwaySchemaValidator.java    From data-highway with Apache License 2.0 4 votes vote down vote up
private static void validateIsRecord(Schema schema) throws SchemaValidationException {
  if (schema.getType() != Type.RECORD) {
    throw new SchemaValidationException(
        String.format("Unexpected schema root type '%s', expected '%s'", schema.getType(), Type.RECORD));
  }
}
 
Example 10
Source File: CSVUtils.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
/**
 *
 */
private static void updateRecord(Field field, Type type, String providedValue, Record avroRecord) {
    if (Type.NULL != type) {
        Object value;
        if (Type.INT == type) {
            value = null == providedValue ? possiblyGetDefaultValue(field, IntNode.class).getIntValue()
                    : Integer.parseInt(providedValue);
            avroRecord.put(field.name(), value);
        } else if (Type.BOOLEAN == type) {
            value = null == providedValue
                    ? possiblyGetDefaultValue(field, BooleanNode.class).getBooleanValue()
                    : Boolean.parseBoolean(providedValue);
            avroRecord.put(field.name(), value);
        } else if (Type.DOUBLE == type) {
            value = null == providedValue ? possiblyGetDefaultValue(field, DoubleNode.class).getDoubleValue()
                    : Double.parseDouble(providedValue);
            avroRecord.put(field.name(), value);
        } else if (Type.FLOAT == type) {
            value = null == providedValue ? possiblyGetDefaultValue(field, DoubleNode.class).getDoubleValue()
                    : Float.parseFloat(providedValue);
            avroRecord.put(field.name(), value);
        } else if (Type.LONG == type) {
            value = null == providedValue ? possiblyGetDefaultValue(field, LongNode.class).getLongValue()
                    : Long.parseLong(providedValue);
            avroRecord.put(field.name(), value);
        } else if (Type.STRING == type) {
            value = null == providedValue ? possiblyGetDefaultValue(field, TextNode.class).getTextValue()
                    : providedValue;
            avroRecord.put(field.name(), value);
        } else if (Type.BYTES == type) {
            value = encodeLogicalType(field, providedValue);
            avroRecord.put(field.name(), value);
        } else if (Type.UNION == type) {
            field.schema().getTypes()
                    .forEach(schema -> updateRecord(field, schema.getType(), providedValue, avroRecord));
        } else if (Type.ARRAY == type || Type.ENUM == type || Type.FIXED == type || Type.MAP == type
                || Type.NULL == type || Type.RECORD == type) {
            throw new IllegalArgumentException("The field type '" + type + "' is not supported at the moment");
        } else {
            avroRecord.put(field.name(), providedValue);
        }
    }
}