Java Code Examples for org.apache.avro.Schema#getName()

The following examples show how to use org.apache.avro.Schema#getName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeCollection(GroupType type, Schema schema, Collection collection) {
  if (collection.size() > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : collection) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 2
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeObjectArray(GroupType type, Schema schema,
    Object[] array) {
  if (array.length > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : array) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 3
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeObjectArray(GroupType type, Schema schema,
                                Object[] array) {
  if (array.length > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : array) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 4
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeObjectArray(GroupType type, Schema schema,
    Object[] array) {
  if (array.length > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : array) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 5
Source File: AvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Converts an Avro Schema to a RecordSchema
 *
 * @param avroSchema the Avro Schema to convert
 * @param schemaText the textual representation of the schema
 * @param schemaId the identifier of the schema
 * @return the Corresponding Record Schema
 */
public static RecordSchema createSchema(final Schema avroSchema, final String schemaText, final SchemaIdentifier schemaId) {
    if (avroSchema == null) {
        throw new IllegalArgumentException("Avro Schema cannot be null");
    }

    final String schemaFullName = avroSchema.getNamespace() + "." + avroSchema.getName();
    final SimpleRecordSchema recordSchema = schemaText == null ? new SimpleRecordSchema(schemaId) : new SimpleRecordSchema(schemaText, AVRO_SCHEMA_FORMAT, schemaId);
    recordSchema.setSchemaName(avroSchema.getName());
    recordSchema.setSchemaNamespace(avroSchema.getNamespace());
    final DataType recordSchemaType = RecordFieldType.RECORD.getRecordDataType(recordSchema);
    final Map<String, DataType> knownRecords = new HashMap<>();
    knownRecords.put(schemaFullName, recordSchemaType);

    final List<RecordField> recordFields = new ArrayList<>(avroSchema.getFields().size());
    for (final Field field : avroSchema.getFields()) {
        final String fieldName = field.name();
        final Schema fieldSchema = field.schema();
        final DataType dataType = AvroTypeUtil.determineDataType(fieldSchema, knownRecords);
        final boolean nullable = isNullable(fieldSchema);
        addFieldToList(recordFields, field, fieldName, fieldSchema, dataType, nullable);
    }

    recordSchema.setFields(recordFields);
    return recordSchema;
}
 
Example 6
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeCollection(GroupType type, Schema schema, Collection<?> collection) {
  if (collection.size() > 0) {
    recordConsumer.startField(LIST_REPEATED_NAME, 0);
    GroupType repeatedType = type.getType(0).asGroupType();
    Type elementType = repeatedType.getType(0);
    for (Object element : collection) {
      recordConsumer.startGroup(); // repeated group array, middle layer
      if (element != null) {
        recordConsumer.startField(LIST_ELEMENT_NAME, 0);
        writeValue(elementType, schema.getElementType(), element);
        recordConsumer.endField(LIST_ELEMENT_NAME, 0);
      } else if (!elementType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException(
            "Null list element for " + schema.getName());
      }
      recordConsumer.endGroup();
    }
    recordConsumer.endField(LIST_REPEATED_NAME, 0);
  }
}
 
Example 7
Source File: NetSuiteMockTestBase.java    From components with Apache License 2.0 5 votes vote down vote up
public static <T> List<IndexedRecord> makeIndexedRecords(
        NetSuiteClientService<?> clientService, Schema schema,
        ObjectComposer<T> objectComposer, int count) throws Exception {

    NsObjectInputTransducer transducer = new NsObjectInputTransducer(clientService, schema, schema.getName());

    List<IndexedRecord> recordList = new ArrayList<>();

    while (count > 0) {
        T nsRecord = objectComposer.composeObject();

        IndexedRecord convertedRecord = transducer.read(nsRecord);
        Schema recordSchema = convertedRecord.getSchema();

        GenericRecord record = new GenericData.Record(recordSchema);
        for (Schema.Field field : schema.getFields()) {
            Object value = convertedRecord.get(field.pos());
            record.put(field.pos(), value);
        }

        recordList.add(record);

        count--;
    }

    return recordList;
}
 
Example 8
Source File: CodeTransformationsAvro16Test.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
private String runNativeCodegen(Schema schema) throws Exception {
  File outputRoot = Files.createTempDirectory(null).toFile();
  SpecificCompiler compiler = new SpecificCompiler(schema);
  compiler.compileToDestination(null, outputRoot);
  File javaFile = new File(outputRoot, schema.getNamespace().replaceAll("\\.",File.separator) + File.separator + schema.getName() + ".java");
  Assert.assertTrue(javaFile.exists());

  String sourceCode;
  try (FileInputStream fis = new FileInputStream(javaFile)) {
    sourceCode = IOUtils.toString(fis, StandardCharsets.UTF_8);
  }

  return sourceCode;
}
 
Example 9
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private <V> void writeMap(GroupType schema, Schema avroSchema,
                          Map<CharSequence, V> map) {
  GroupType innerGroup = schema.getType(0).asGroupType();
  Type keyType = innerGroup.getType(0);
  Type valueType = innerGroup.getType(1);

  recordConsumer.startGroup(); // group wrapper (original type MAP)
  if (map.size() > 0) {
    recordConsumer.startField(MAP_REPEATED_NAME, 0);

    for (Map.Entry<CharSequence, V> entry : map.entrySet()) {
      recordConsumer.startGroup(); // repeated group key_value, middle layer
      recordConsumer.startField(MAP_KEY_NAME, 0);
      writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey());
      recordConsumer.endField(MAP_KEY_NAME, 0);
      V value = entry.getValue();
      if (value != null) {
        recordConsumer.startField(MAP_VALUE_NAME, 1);
        writeValue(valueType, avroSchema.getValueType(), value);
        recordConsumer.endField(MAP_VALUE_NAME, 1);
      } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) {
        throw new RuntimeException("Null map value for " + avroSchema.getName());
      }
      recordConsumer.endGroup();
    }

    recordConsumer.endField(MAP_REPEATED_NAME, 0);
  }
  recordConsumer.endGroup();
}
 
Example 10
Source File: SchemaAssistant.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public static String getTypeName (Schema schema) {
  Schema.Type schemaType = schema.getType();
  if (Schema.Type.RECORD.equals(schemaType)) {
    return schema.getName();
  } else if (Schema.Type.ARRAY.equals(schemaType)) {
    return "Array_of_" + getTypeName(schema.getElementType());
  } else if (Schema.Type.MAP.equals(schemaType)) {
    return "Map_of_" + getTypeName(schema.getValueType());
  } else {
    return schema.getType().name();
  }
}
 
Example 11
Source File: SpecificAvroDao.java    From kite with Apache License 2.0 5 votes vote down vote up
private static String getSchemaName(Schema schema) {
  if (schema.getType() == Schema.Type.UNION) {
    List<Schema> types = schema.getTypes();
    if (types.size() == 2) {
      if (types.get(0).getType() == Schema.Type.NULL) {
        return types.get(1).getName();
      } else if (types.get(1).getType() == Schema.Type.NULL) {
        return types.get(0).getName();
      }
    }
    throw new IllegalArgumentException("Unsupported union schema: " + schema);
  }
  return schema.getName();
}
 
Example 12
Source File: FastDeserializerGeneratorBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static String getClassName(Schema writerSchema, Schema readerSchema, String description) {
    Integer writerSchemaId = Math.abs(getSchemaId(writerSchema));
    Integer readerSchemaId = Math.abs(getSchemaId(readerSchema));
    if (Schema.Type.RECORD.equals(readerSchema.getType())) {
        return readerSchema.getName() + description + "Deserializer"
                + writerSchemaId + "_" + readerSchemaId;
    } else if (Schema.Type.ARRAY.equals(readerSchema.getType())) {
        return "Array" + description + "Deserializer"
                + writerSchemaId + "_" + readerSchemaId;
    } else if (Schema.Type.MAP.equals(readerSchema.getType())) {
        return "Map" + description + "Deserializer"
                + writerSchemaId + "_" + readerSchemaId;
    }
    throw new FastDeserializerGeneratorException("Unsupported return type: " + readerSchema.getType());
}
 
Example 13
Source File: FastSerializerGeneratorBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static String getClassName(Schema schema, String description) {
    final Integer schemaId = Math.abs(getSchemaId(schema));
    if (Schema.Type.RECORD.equals(schema.getType())) {
        return schema.getName() + description + "Serializer"
                + "_" + schemaId;
    } else if (Schema.Type.ARRAY.equals(schema.getType())) {
        return "Array" + description + "Serializer"
                + "_" + schemaId;
    } else if (Schema.Type.MAP.equals(schema.getType())) {
        return "Map" + description + "Serializer"
                + "_" + schemaId;
    }
    throw new FastSerializerGeneratorException("Unsupported return type: " + schema.getType());
}
 
Example 14
Source File: SchemaUtil.java    From kite with Apache License 2.0 4 votes vote down vote up
/**
 * Merges two {@link Schema} instances or returns {@code null}.
 * <p>
 * The two schemas are merged if they are the same type. Records are merged
 * if the two records have the same name or have no names but have a
 * significant number of shared fields.
 * <p>
 * @see {@link #mergeOrUnion} to return a union when a merge is not possible.
 *
 * @param left a {@code Schema}
 * @param right a {@code Schema}
 * @return a merged {@code Schema} or {@code null} if merging is not possible
 */
private static Schema mergeOnly(Schema left, Schema right) {
  if (Objects.equal(left, right)) {
    return left;
  }

  // handle primitive type promotion; doesn't promote integers to floats
  switch (left.getType()) {
    case INT:
      if (right.getType() == Schema.Type.LONG) {
        return right;
      }
      break;
    case LONG:
      if (right.getType() == Schema.Type.INT) {
        return left;
      }
      break;
    case FLOAT:
      if (right.getType() == Schema.Type.DOUBLE) {
        return right;
      }
      break;
    case DOUBLE:
      if (right.getType() == Schema.Type.FLOAT) {
        return left;
      }
  }

  // any other cases where the types don't match must be combined by a union
  if (left.getType() != right.getType()) {
    return null;
  }

  switch (left.getType()) {
    case UNION:
      return union(left, right);
    case RECORD:
      if (left.getName() == null && right.getName() == null &&
          fieldSimilarity(left, right) < SIMILARITY_THRESH) {
        return null;
      } else if (!Objects.equal(left.getName(), right.getName())) {
        return null;
      }

      Schema combinedRecord = Schema.createRecord(
          coalesce(left.getName(), right.getName()),
          coalesce(left.getDoc(), right.getDoc()),
          coalesce(left.getNamespace(), right.getNamespace()),
          false
      );
      combinedRecord.setFields(mergeFields(left, right));

      return combinedRecord;

    case MAP:
      return Schema.createMap(
          mergeOrUnion(left.getValueType(), right.getValueType()));

    case ARRAY:
      return Schema.createArray(
          mergeOrUnion(left.getElementType(), right.getElementType()));

    case ENUM:
      if (!Objects.equal(left.getName(), right.getName())) {
        return null;
      }
      Set<String> symbols = Sets.newLinkedHashSet();
      symbols.addAll(left.getEnumSymbols());
      symbols.addAll(right.getEnumSymbols());
      return Schema.createEnum(
          left.getName(),
          coalesce(left.getDoc(), right.getDoc()),
          coalesce(left.getNamespace(), right.getNamespace()),
          ImmutableList.copyOf(symbols)
      );

    default:
      // all primitives are handled before the switch by the equality check.
      // schemas that reach this point are not primitives and also not any of
      // the above known types.
      throw new UnsupportedOperationException(
          "Unknown schema type: " + left.getType());
  }
}
 
Example 15
Source File: FastDeserializerGenerator.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
private void processEnum(final Schema schema, final JBlock body, FieldAction action,
        BiConsumer<JBlock, JExpression> putEnumIntoParent) {

    if (action.getShouldRead()) {

        Symbol.EnumAdjustAction enumAdjustAction = null;
        if (action.getSymbol() instanceof Symbol.EnumAdjustAction) {
            enumAdjustAction = (Symbol.EnumAdjustAction) action.getSymbol();
        } else {
            for (Symbol symbol : action.getSymbol().production) {
                if (symbol instanceof Symbol.EnumAdjustAction) {
                    enumAdjustAction = (Symbol.EnumAdjustAction) symbol;
                }
            }
        }

        boolean enumOrderCorrect = true;
        for (int i = 0; i < enumAdjustAction.adjustments.length; i++) {
            Object adjustment = enumAdjustAction.adjustments[i];
            if (adjustment instanceof String) {
                throw new FastDeserializerGeneratorException(
                        schema.getName() + " enum label impossible to deserialize: " + adjustment.toString());
            } else if (!adjustment.equals(i)) {
                enumOrderCorrect = false;
            }
        }

        JExpression newEnum;
        JExpression enumValueExpr = JExpr.direct(DECODER + ".readEnum()");

        if (enumOrderCorrect) {
            newEnum = schemaAssistant.getEnumValueByIndex(schema, enumValueExpr, getSchemaExpr(schema));
        } else {
            JVar enumIndex = body.decl(codeModel.INT, getVariableName("enumIndex"), enumValueExpr);
            JClass enumClass = schemaAssistant.classFromSchema(schema);
            newEnum = body.decl(enumClass, getVariableName("enumValue"), JExpr._null());

            for (int i = 0; i < enumAdjustAction.adjustments.length; i++) {
                JExpression ithVal = schemaAssistant
                        .getEnumValueByIndex(schema, JExpr.lit((Integer) enumAdjustAction.adjustments[i]),
                                getSchemaExpr(schema));
                body._if(enumIndex.eq(JExpr.lit(i)))._then().assign((JVar) newEnum, ithVal);
            }
        }
        putEnumIntoParent.accept(body, newEnum);
    } else {
        body.directStatement(DECODER + ".readEnum();");
    }

}
 
Example 16
Source File: AvroSchemaManager.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize given a schema
 */
protected void init(String namespace, Schema schema,
                                boolean ignoreNameMap) {

    /* put to map[type name]=>schema */
    if (isNamedSchema(schema)) {
        String typeName = schema.getName();
        if (typeName2Schema.containsKey(typeName))
            AvroStorageLog.warn("Duplicate schemas defined for type:"
                    + typeName
                    + ". will ignore the second one:"
                    + schema);
        else {
            AvroStorageLog.details("add " + schema.getName() + "=" + schema
                    + " to type2Schema");
            typeName2Schema.put(schema.getName(), schema);
        }
    }

    /* put field schema to map[field name]=>schema*/
    if (schema.getType().equals(Type.RECORD)) {

        List<Field> fields = schema.getFields();
        for (Field field : fields) {

            Schema fieldSchema = field.schema();
            String name = (namespace == null) ? field.name()  : namespace + "." + field.name();

            if (!ignoreNameMap) {
                if (name2Schema.containsKey(name))
                    AvroStorageLog.warn("Duplicate schemas defined for alias:" + name
                                      + ". Will ignore the second one:"+ fieldSchema);
                else {
                    AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema");
                    name2Schema.put(name, fieldSchema);
                }
            }

            init(name, fieldSchema, ignoreNameMap);
        }
    } else if (schema.getType().equals(Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(schema)) {
            Schema realSchema = AvroStorageUtils.getAcceptedType(schema);
            init(namespace, realSchema, ignoreNameMap);
        } else {
            List<Schema> list = schema.getTypes();
            for (Schema s : list) {
                init(namespace, s, true);
            }
        }
    } else if (schema.getType().equals(Type.ARRAY)) {
        Schema elemSchema = schema.getElementType();
        init(namespace, elemSchema, true);
    } else if (schema.getType().equals(Type.MAP)) {
        Schema valueSchema = schema.getValueType();
        init(namespace, valueSchema, true);
    }
}
 
Example 17
Source File: AvroSchemaManager.java    From Cubert with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize given a schema
 */
protected void init(String namespace, Schema schema,
                                boolean ignoreNameMap) {

    /* put to map[type name]=>schema */
    if (isNamedSchema(schema)) {
        String typeName = schema.getName();
        if (typeName2Schema.containsKey(typeName))
            AvroStorageLog.warn("Duplicate schemas defined for type:"
                    + typeName
                    + ". will ignore the second one:"
                    + schema);
        else {
            AvroStorageLog.details("add " + schema.getName() + "=" + schema
                    + " to type2Schema");
            typeName2Schema.put(schema.getName(), schema);
        }
    }

    /* put field schema to map[field name]=>schema*/
    if (schema.getType().equals(Type.RECORD)) {

        List<Field> fields = schema.getFields();
        for (Field field : fields) {

            Schema fieldSchema = field.schema();
            String name = (namespace == null) ? field.name()  : namespace + "." + field.name();

            if (!ignoreNameMap) {
                if (name2Schema.containsKey(name))
                    AvroStorageLog.warn("Duplicate schemas defined for alias:" + name
                                      + ". Will ignore the second one:"+ fieldSchema);
                else {
                    AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema");
                    name2Schema.put(name, fieldSchema);
                }
            }

            init(name, fieldSchema, ignoreNameMap);
        }
    } else if (schema.getType().equals(Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(schema)) {
            Schema realSchema = AvroStorageUtils.getAcceptedType(schema);
            init(namespace, realSchema, ignoreNameMap);
        } else {
            List<Schema> list = schema.getTypes();
            for (Schema s : list) {
                init(namespace, s, true);
            }
        }
    } else if (schema.getType().equals(Type.ARRAY)) {
        Schema elemSchema = schema.getElementType();
        init(namespace, elemSchema, true);
    } else if (schema.getType().equals(Type.MAP)) {
        Schema valueSchema = schema.getValueType();
        init(namespace, valueSchema, true);
    }
}
 
Example 18
Source File: GenericAvroSerializer.java    From geowave with Apache License 2.0 4 votes vote down vote up
private static String getSchemaName(final Schema schema) {
  return schema.getNamespace() + "." + schema.getName();
}
 
Example 19
Source File: Schemas.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * Merges two {@link Schema} instances or returns {@code null}.
 * <p>
 * The two schemas are merged if they are the same type. Records are merged
 * if the two records have the same name or have no names but have a
 * significant number of shared fields.
 * <p>
 * @see {@link #mergeOrUnion} to return a union when a merge is not possible.
 *
 * @param left a {@code Schema}
 * @param right a {@code Schema}
 * @return a merged {@code Schema} or {@code null} if merging is not possible
 */
private static Schema mergeOnly(Schema left, Schema right) {
  if (Objects.equal(left, right)) {
    return left;
  }

  // handle primitive type promotion; doesn't promote integers to floats
  switch (left.getType()) {
    case INT:
      if (right.getType() == Schema.Type.LONG) {
        return right;
      }
      break;
    case LONG:
      if (right.getType() == Schema.Type.INT) {
        return left;
      }
      break;
    case FLOAT:
      if (right.getType() == Schema.Type.DOUBLE) {
        return right;
      }
      break;
    case DOUBLE:
      if (right.getType() == Schema.Type.FLOAT) {
        return left;
      }
  }

  // any other cases where the types don't match must be combined by a union
  if (left.getType() != right.getType()) {
    return null;
  }

  switch (left.getType()) {
    case UNION:
      return union(left, right);
    case RECORD:
      if (left.getName() == null && right.getName() == null &&
          fieldSimilarity(left, right) < SIMILARITY_THRESH) {
        return null;
      } else if (!Objects.equal(left.getName(), right.getName())) {
        return null;
      }

      Schema combinedRecord = Schema.createRecord(
          coalesce(left.getName(), right.getName()),
          coalesce(left.getDoc(), right.getDoc()),
          coalesce(left.getNamespace(), right.getNamespace()),
          false
      );
      combinedRecord.setFields(mergeFields(left, right));

      return combinedRecord;

    case MAP:
      return Schema.createMap(
          mergeOrUnion(left.getValueType(), right.getValueType()));

    case ARRAY:
      return Schema.createArray(
          mergeOrUnion(left.getElementType(), right.getElementType()));

    case ENUM:
      if (!Objects.equal(left.getName(), right.getName())) {
        return null;
      }
      Set<String> symbols = Sets.newLinkedHashSet();
      symbols.addAll(left.getEnumSymbols());
      symbols.addAll(right.getEnumSymbols());
      return Schema.createEnum(
          left.getName(),
          coalesce(left.getDoc(), right.getDoc()),
          coalesce(left.getNamespace(), right.getNamespace()),
          ImmutableList.copyOf(symbols)
      );

    default:
      // all primitives are handled before the switch by the equality check.
      // schemas that reach this point are not primitives and also not any of
      // the above known types.
      throw new UnsupportedOperationException(
          "Unknown schema type: " + left.getType());
  }
}
 
Example 20
Source File: PigSchema2Avro.java    From Cubert with Apache License 2.0 4 votes vote down vote up
/**
 * Validate a Pig tuple is compatible with Avro record. If the Avro schema 
 * is not complete (with uncovered fields), then convert those fields using 
 * methods in set 1. 
 * 
 * Notice that users can get rid of Pig tuple wrappers, e.g. an Avro schema
 * "int" is compatible with a Pig schema "T:(int)"
 * 
 */
protected static Schema validateAndConvertRecord(Schema avroSchema, ResourceFieldSchema[] pigFields) throws IOException {

    /* Get rid of Pig tuple wrappers. */
    if (!avroSchema.getType().equals(Schema.Type.RECORD)) {
        if (pigFields.length != 1)
            throw new IOException("Expect only one field in Pig tuple schema. Avro schema is " + avroSchema.getType());

        return validateAndConvert(avroSchema, pigFields[0]);
    }

    /* validate and convert a pig tuple with avro record */
    boolean isPartialSchema = AvroStorageUtils.isUDPartialRecordSchema(avroSchema);
    AvroStorageLog.details("isPartialSchema=" + isPartialSchema);

    String typeName = isPartialSchema ? getRecordName() : avroSchema.getName();
    Schema outSchema = Schema.createRecord(typeName, avroSchema.getDoc(), avroSchema.getNamespace(), false);

    List<Schema.Field> inFields = avroSchema.getFields();
    if (!isPartialSchema && inFields.size() != pigFields.length) {
        throw new IOException("Expect " + inFields.size() + " fields in pig schema." + " But there are " + pigFields.length);
    }

    List<Schema.Field> outFields = new ArrayList<Schema.Field>();

    for (int i = 0; i < pigFields.length; i++) {
        /* get user defined avro field schema */
        Field inputField = isPartialSchema ? AvroStorageUtils.getUDField(avroSchema, i) : inFields.get(i);

        /* get schema */
        Schema fieldSchema = null;
        if (inputField == null) { 
            /* convert pig schema (nullable) */
            fieldSchema = convert(pigFields[i], true);
        } else if (inputField.schema() == null) { 
            /* convert pig schema (not-null) */
            fieldSchema = convert(pigFields[i], false);
        } else { 
            /* validate pigFields[i] with given avro schema */
            fieldSchema = validateAndConvert(inputField.schema(),
                                            pigFields[i]);
        }

        /* get field name of output */
        String outname = (isPartialSchema) ? pigFields[i].getName() : inputField.name();
        if (outname == null)
            outname = FIELD_NAME + "_" + i; // field name cannot be null

        /* get doc of output */
        String doc = (isPartialSchema) ? pigFields[i].getDescription() : inputField.doc();

        JsonNode defaultvalue = (inputField != null) ? inputField.defaultValue() : null;

        outFields.add(new Field(outname, fieldSchema, doc, defaultvalue));

    }

    outSchema.setFields(outFields);
    return outSchema;

}