Java Code Examples for org.apache.avro.Schema#equals()

The following examples show how to use org.apache.avro.Schema#equals() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MergeJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
 
Example 2
Source File: Log4jAppender.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
private byte[] serialize(Object datum, Schema datumSchema) throws FlumeException {
  if (schema == null || !datumSchema.equals(schema)) {
    schema = datumSchema;
    out = new ByteArrayOutputStream();
    writer = new ReflectDatumWriter<Object>(schema);
    encoder = EncoderFactory.get().binaryEncoder(out, null);
  }
  out.reset();
  try {
    writer.write(datum, encoder);
    encoder.flush();
    return out.toByteArray();
  } catch (IOException e) {
    throw new FlumeException(e);
  }
}
 
Example 3
Source File: DatasetKeyInputFormat.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Adds configuration for {@code DatasetKeyInputFormat} to read from the
 * given {@link Dataset} or {@link View} instance.
 *
 * @param view a dataset or view
 * @return this for method chaining
 */
public ConfigBuilder readFrom(View<?> view) {
  DatasetDescriptor descriptor = view.getDataset().getDescriptor();
  // if this is a partitioned dataset, add the partition location
  if (view instanceof FileSystemDataset) {
    conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation()));
  }
  // add descriptor properties to the config
  for (String property : descriptor.listProperties()) {
    conf.set(property, descriptor.getProperty(property));
  }

  if (DataModelUtil.isGeneric(view.getType())) {
    Schema datasetSchema = view.getDataset().getDescriptor().getSchema();
    // only set the read schema if the view is a projection
    if (!datasetSchema.equals(view.getSchema())) {
      withSchema(view.getSchema());
    }
  } else {
    withType(view.getType());
  }

  conf.set(KITE_INPUT_URI, view.getUri().toString());
  return this;
}
 
Example 4
Source File: Log4jAppender.java    From kite with Apache License 2.0 6 votes vote down vote up
private byte[] serialize(Object datum, Schema datumSchema) throws FlumeException {
  if (schema == null || !datumSchema.equals(schema)) {
    schema = datumSchema;
    out = new ByteArrayOutputStream();
    writer = new ReflectDatumWriter<Object>(schema);
    encoder = EncoderFactory.get().binaryEncoder(out, null);
  }
  out.reset();
  try {
    writer.write(datum, encoder);
    encoder.flush();
    return out.toByteArray();
  } catch (IOException e) {
    throw new FlumeException(e);
  }
}
 
Example 5
Source File: NetSuiteOutputModuleProperties.java    From components with Apache License 2.0 5 votes vote down vote up
private void afterMainSchema() {
    try {
        Schema schema = main.schema.getValue();

        // If last main schema is null then we treat this as initial update (setup)
        // of schema properties after materialization/deserialization.
        // On initial update we skip updating of schema for outgoing flow(s) and
        // just remember initial schema.
        // On subsequent updates we should check whether schema was changed and
        // update schema for outgoing flow(s).

        if (lastMainSchema != null) {

            // If schema was not changed since last known update then we can
            // ignore this change to avoid unnecessary updating of schema for outgoing flow(s).
            if (schema.equals(lastMainSchema)) {
                return;
            }

            setupOutgoingSchema();

            refreshLayout(getForm(Form.MAIN));
            refreshLayout(getForm(Form.ADVANCED));
        }

        // Remember changed schema for next check
        lastMainSchema = schema;

    } catch (TalendRuntimeException e) {
        throw ComponentExceptions.asComponentExceptionWithValidationResult(e);
    }
}
 
Example 6
Source File: AvroStorage.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Get avro schema of input path. There are three cases:
 * 1. if path is a file, then return its avro schema;
 * 2. if path is a first-level directory (no sub-directories), then
 * return the avro schema of one underlying file;
 * 3. if path contains sub-directories, then recursively check
 * whether all of them share the same schema and return it
 * if so or throw an exception if not.
 *
 * @param path input path
 * @param fs file system
 * @return avro schema of data
 * @throws IOException if underlying sub-directories do not share the same schema; or if input path is empty or does not exist
 */
@SuppressWarnings("deprecation")
protected Schema getAvroSchema(Path path, FileSystem fs) throws IOException {
    if (!fs.exists(path) || !AvroStorageUtils.PATH_FILTER.accept(path))
        return null;

    /* if path is first level directory or is a file */
    if (!fs.isDirectory(path)) {
        return getSchema(path, fs);
    }

    FileStatus[] ss = fs.listStatus(path, AvroStorageUtils.PATH_FILTER);
    Schema schema = null;
    if (ss.length > 0) {
        if (AvroStorageUtils.noDir(ss))
            return getSchema(path, fs);

        /*otherwise, check whether schemas of underlying directories are the same */
        for (FileStatus s : ss) {
            Schema newSchema = getAvroSchema(s.getPath(), fs);
            if (schema == null) {
                schema = newSchema;
                if(!checkSchema) {
                    System.out.println("Do not check schema; use schema of " + s.getPath());
                    return schema;
                }
            } else if (newSchema != null && !schema.equals(newSchema)) {
                throw new IOException( "Input path is " + path + ". Sub-direcotry " + s.getPath()
                                     + " contains different schema " + newSchema + " than " + schema);
            }
        }
    }

    if (schema == null)
        System.err.println("Cannot get avro schema! Input path " + path + " might be empty.");

    return schema;
}
 
Example 7
Source File: AvroStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Get avro schema of input path. There are three cases:
 * 1. if path is a file, then return its avro schema;
 * 2. if path is a first-level directory (no sub-directories), then
 * return the avro schema of one underlying file;
 * 3. if path contains sub-directories, then recursively check
 * whether all of them share the same schema and return it
 * if so or throw an exception if not.
 *
 * @param path input path
 * @param fs file system
 * @return avro schema of data
 * @throws IOException if underlying sub-directories do not share the same schema; or if input path is empty or does not exist
 */
@SuppressWarnings("deprecation")
protected Schema getAvroSchema(Path path, FileSystem fs) throws IOException {
    if (!fs.exists(path) || !AvroStorageUtils.PATH_FILTER.accept(path))
        return null;

    /* if path is first level directory or is a file */
    if (!fs.isDirectory(path)) {
        return getSchema(path, fs);
    }

    FileStatus[] ss = fs.listStatus(path, AvroStorageUtils.PATH_FILTER);
    Schema schema = null;
    if (ss.length > 0) {
        if (AvroStorageUtils.noDir(ss))
            return getSchema(path, fs);

        /*otherwise, check whether schemas of underlying directories are the same */
        for (FileStatus s : ss) {
            Schema newSchema = getAvroSchema(s.getPath(), fs);
            if (schema == null) {
                schema = newSchema;
                if(!checkSchema) {
                    System.out.println("Do not check schema; use schema of " + s.getPath());
                    return schema;
                }
            } else if (newSchema != null && !schema.equals(newSchema)) {
                throw new IOException( "Input path is " + path + ". Sub-direcotry " + s.getPath()
                                     + " contains different schema " + newSchema + " than " + schema);
            }
        }
    }

    if (schema == null)
        System.err.println("Cannot get avro schema! Input path " + path + " might be empty.");

    return schema;
}
 
Example 8
Source File: DatasetSink.java    From kite with Apache License 2.0 5 votes vote down vote up
private DatasetWriter<GenericRecord> newWriter(
    final UserGroupInformation login, final URI uri) {
  View<GenericRecord> view = KerberosUtil.runPrivileged(login,
      new PrivilegedExceptionAction<Dataset<GenericRecord>>() {
        @Override
        public Dataset<GenericRecord> run() {
          return Datasets.load(uri);
        }
      });

  DatasetDescriptor descriptor = view.getDataset().getDescriptor();
  String formatName = descriptor.getFormat().getName();
  Preconditions.checkArgument(allowedFormats().contains(formatName),
      "Unsupported format: " + formatName);

  Schema newSchema = descriptor.getSchema();
  if (targetSchema == null || !newSchema.equals(targetSchema)) {
    this.targetSchema = descriptor.getSchema();
    // target dataset schema has changed, invalidate all readers based on it
    readers.invalidateAll();
  }

  this.reuseDatum = !("parquet".equals(formatName));
  this.datasetName = view.getDataset().getName();

  return view.newWriter();
}
 
Example 9
Source File: SchemaMerge.java    From funcj with MIT License 4 votes vote down vote up
public static Schema merge(Schema lhs, Schema rhs) {

        switch (lhs.getType()) {
            case STRING:
            case BYTES:
            case INT:
            case LONG:
            case FLOAT:
            case DOUBLE:
            case BOOLEAN:
            case NULL:
                if (lhs.getType() == rhs.getType()) {
                    return lhs;
                } else {
                    break;
                }
            case ENUM:
            case FIXED:
            case RECORD:
                if (lhs.equals(rhs)) {
                    return lhs;
                }
                break;
            case ARRAY:
            case MAP:
                if (lhs.getValueType().equals(rhs.getValueType())) {
                    return lhs;
                } else {
                    break;
                }
            case UNION:
                final Set<Schema> subSchemas = new HashSet<>(lhs.getTypes());
                if (rhs.getType() == Schema.Type.UNION) {
                    subSchemas.addAll(rhs.getTypes());
                } else {
                    subSchemas.add(rhs);
                }
                return Schema.createUnion(new ArrayList<>(subSchemas));
            default:
                throw new CodecException("Unexpected schema type - " + lhs.getType());
        }

        return Schema.createUnion(lhs, rhs);
    }
 
Example 10
Source File: AvroUtils.java    From kite with Apache License 2.0 4 votes vote down vote up
/**
 * Returns true if the types of two avro schemas are equal. This ignores
 * things like custom field properties that the equals() implementation of
 * Schema checks.
 * 
 * @param schema1
 *          The first schema to compare
 * @param schema2
 *          The second schema to compare
 * @return True if the types are equal, otherwise false.
 */
public static boolean avroSchemaTypesEqual(Schema schema1, Schema schema2) {
  if (schema1.getType() != schema2.getType()) {
    // if the types aren't equal, no need to go further. Return false
    return false;
  }

  if (schema1.getType() == Schema.Type.ENUM
      || schema1.getType() == Schema.Type.FIXED) {
    // Enum and Fixed types schemas should be equal using the Schema.equals
    // method.
    return schema1.equals(schema2);
  }
  if (schema1.getType() == Schema.Type.ARRAY) {
    // Avro element schemas should be equal, which is tested by recursively
    // calling this method.
    return avroSchemaTypesEqual(schema1.getElementType(),
        schema2.getElementType());
  } else if (schema1.getType() == Schema.Type.MAP) {
    // Map type values schemas should be equal, which is tested by recursively
    // calling this method.
    return avroSchemaTypesEqual(schema1.getValueType(),
        schema2.getValueType());
  } else if (schema1.getType() == Schema.Type.UNION) {
    // Compare Union fields in the same position by comparing their schemas
    // recursively calling this method.
    if (schema1.getTypes().size() != schema2.getTypes().size()) {
      return false;
    }
    for (int i = 0; i < schema1.getTypes().size(); i++) {
      if (!avroSchemaTypesEqual(schema1.getTypes().get(i), schema2.getTypes()
          .get(i))) {
        return false;
      }
    }
    return true;
  } else if (schema1.getType() == Schema.Type.RECORD) {
    // Compare record fields that match in name by comparing their schemas
    // recursively calling this method.
    if (schema1.getFields().size() != schema2.getFields().size()) {
      return false;
    }
    for (Field field1 : schema1.getFields()) {
      Field field2 = schema2.getField(field1.name());
      if (field2 == null) {
        return false;
      }
      if (!avroSchemaTypesEqual(field1.schema(), field2.schema())) {
        return false;
      }
    }
    return true;
  } else {
    // All other types are primitive, so them matching in type is enough.
    return true;
  }
}