org.apache.avro.Schema.Parser Java Examples

The following examples show how to use org.apache.avro.Schema.Parser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConvertCsvToParquetFileExpressionProcessor.java    From vividus with Apache License 2.0 6 votes vote down vote up
private void write(File file, String avroSchemaPath, List<Map<String, String>> data) throws IOException
{
    Schema schema = new Parser().parse(ResourceUtils.loadResource(avroSchemaPath));
    try (ParquetWriter<GenericRecord> writer = AvroParquetWriter
            .<GenericRecord>builder(new Path(file.toURI()))
            .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
            .withDataModel(GenericData.get())
            .withSchema(schema)
            .build())
    {
        for (Map<String, String> map : data)
        {
            GenericRecord record = new GenericData.Record(schema);
            map.forEach(record::put);
            writer.write(record);
        }
    }
}
 
Example #2
Source File: ReadAvroBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
public ReadAvro(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
  super(builder, config, parent, child, context);
  
  String schemaString = getConfigs().getString(config, "writerSchemaString", null);
  if (schemaString != null) {
    this.writerSchema = new Parser().parse(schemaString);
  } else {        
    String schemaFile = getConfigs().getString(config, "writerSchemaFile", null);
    if (schemaFile != null) {
      try { 
        this.writerSchema = new Parser().parse(new File(schemaFile));
      } catch (IOException e) {
        throw new MorphlineCompilationException("Cannot parse external Avro writer schema file: " + schemaFile, config, e);
      }
    } else {
      this.writerSchema = null;
    }
  }
  
  this.isJson = getConfigs().getBoolean(config, "isJson", false);
  validateArguments();      
}
 
Example #3
Source File: ValidateRecord.java    From nifi with Apache License 2.0 6 votes vote down vote up
protected RecordSchema getValidationSchema(final ProcessContext context, final FlowFile flowFile, final RecordReader reader)
    throws MalformedRecordException, IOException, SchemaNotFoundException {
    final String schemaAccessStrategy = context.getProperty(SCHEMA_ACCESS_STRATEGY).getValue();
    if (schemaAccessStrategy.equals(READER_SCHEMA.getValue())) {
        return reader.getSchema();
    } else if (schemaAccessStrategy.equals(SCHEMA_NAME_PROPERTY.getValue())) {
        final SchemaRegistry schemaRegistry = context.getProperty(SCHEMA_REGISTRY).asControllerService(SchemaRegistry.class);
        final String schemaName = context.getProperty(SCHEMA_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final SchemaIdentifier schemaIdentifier = SchemaIdentifier.builder().name(schemaName).build();
        return schemaRegistry.retrieveSchema(schemaIdentifier);
    } else if (schemaAccessStrategy.equals(SCHEMA_TEXT_PROPERTY.getValue())) {
        final String schemaText = context.getProperty(SCHEMA_TEXT).evaluateAttributeExpressions(flowFile).getValue();
        final Parser parser = new Schema.Parser();
        final Schema avroSchema = parser.parse(schemaText);
        return AvroTypeUtil.createSchema(avroSchema);
    } else {
        throw new ProcessException("Invalid Schema Access Strategy: " + schemaAccessStrategy);
    }
}
 
Example #4
Source File: FileWriter.java    From SPADE with GNU General Public License v3.0 5 votes vote down vote up
public FileWriter(String schemaFile, String outputFile) throws IOException{
	Parser parser = new Schema.Parser();
	Schema schema = parser.parse(new File(schemaFile));
       DatumWriter<Object> datumWriter = new SpecificDatumWriter<Object>(schema);
	fileWriter = new DataFileWriter<>(datumWriter);
	fileWriter.create(schema, new File(outputFile));
}
 
Example #5
Source File: ReadAvroContainerBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
public ReadAvroContainer(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {   
  super(builder, config, parent, child, context);

  String schemaString = getConfigs().getString(config, "readerSchemaString", null);
  if (schemaString != null) {
    this.readerSchema = new Parser().parse(schemaString);
  } else {        
    String schemaFile = getConfigs().getString(config, "readerSchemaFile", null);
    if (schemaFile != null) {
      try { 
        this.readerSchema = new Parser().parse(new File(schemaFile));
      } catch (IOException e) {
        throw new MorphlineCompilationException("Cannot parse external Avro reader schema file: " + schemaFile, config, e);
      }
    } else {
      this.readerSchema = null;
    }
  }
  
  if (getClass() == ReadAvroContainer.class) {
    resolverCache = new BoundedLRUHashMap<ByteArrayKey, ResolvingDecoder>(
        getConfigs().getInt(config, "schemaCacheCapacity", 100));
    
    validateArguments();
  } else {
    resolverCache = null;
  }
}
 
Example #6
Source File: DirectBigQueryRecordReaderTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  // Manually creating the parsed Schema is a pain so duplicate logic
  Parser parser = new Parser();
  parsedSchema = parser.parse(RAW_SCHEMA);

  MockitoAnnotations.initMocks(this);
  when(bqClient.readRowsCallable()).thenReturn(readRows);
  when(readRows.call(any(ReadRowsRequest.class))).thenReturn(rowsStream);

  reader = new TestDirectBigQueryRecordReader();
}
 
Example #7
Source File: SerializableAvroSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
	if (ois.readBoolean()) {
		String schema = ois.readUTF();
		this.schema = new Parser().parse(schema);
	}
	else {
		this.schema = null;
	}
}
 
Example #8
Source File: JsonFileWriter.java    From SPADE with GNU General Public License v3.0 5 votes vote down vote up
public JsonFileWriter(String schemaFile, String outputFilePath) throws Exception{
	
	File outputFile = new File(outputFilePath);
	if(outputFile == null || outputFile.getParentFile() == null || !outputFile.getParentFile().exists()){
		throw new Exception("Invalid file path: " + outputFilePath);
	}
	
	Parser parser = new Schema.Parser();
	Schema schema = parser.parse(new File(schemaFile));
	datumWriter = new SpecificDatumWriter<Object>(schema);
	OutputStream outputStream = new FileOutputStream(outputFile);
	jsonEncoder = EncoderFactory.get().jsonEncoder(schema, outputStream);
}
 
Example #9
Source File: SchemaDeserializer.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Override
public Schema deserialize(JsonParser parser, DeserializationContext context)
  throws IOException, JsonProcessingException {
  Parser schemaParser = new Schema.Parser();
  // Validate any default values provided
  schemaParser.setValidateDefaults(true);
  // Validate all names.
  schemaParser.setValidate(true);
  return schemaParser.parse(parser.readValueAsTree().toString());
}
 
Example #10
Source File: CDM.java    From SPADE with GNU General Public License v3.0 5 votes vote down vote up
public BinaryReader(String dataFilepath, String schemaFilepath) throws Exception{
	this.filepath = dataFilepath;
	Parser parser = new Schema.Parser();
	Schema schema = parser.parse(new File(schemaFilepath));
	DatumReader<Object> datumReader = new SpecificDatumReader<Object>(schema);
	this.dataFileReader = new DataFileReader<>(new File(dataFilepath), datumReader);
}
 
Example #11
Source File: CDM.java    From SPADE with GNU General Public License v3.0 5 votes vote down vote up
public JsonReader(String dataFilepath, String schemaFilepath) throws Exception{
	this.filepath = dataFilepath;
	Parser parser = new Schema.Parser();
	Schema schema = parser.parse(new File(schemaFilepath));
	this.datumReader = new SpecificDatumReader<Object>(schema);
	this.decoder = DecoderFactory.get().jsonDecoder(schema, 
			new FileInputStream(new File(dataFilepath)));
}
 
Example #12
Source File: SimpleSchemaTest.java    From pulsar with Apache License 2.0 5 votes vote down vote up
@Test
public void newProducerForMessageSchemaOnTopicInitialWithNoSchema() throws Exception {
    String topic = "my-property/my-ns/schema-test";
    Schema<V1Data> v1Schema = Schema.AVRO(V1Data.class);
    byte[] v1SchemaBytes = v1Schema.getSchemaInfo().getSchema();
    AvroWriter<V1Data> v1Writer = new AvroWriter<>(
            new Parser().parse(new ByteArrayInputStream(v1SchemaBytes)));
    Schema<V2Data> v2Schema = Schema.AVRO(V2Data.class);
    byte[] v2SchemaBytes = v2Schema.getSchemaInfo().getSchema();
    AvroWriter<V2Data> v2Writer = new AvroWriter<>(
            new Parser().parse(new ByteArrayInputStream(v2SchemaBytes)));
    try (Producer<byte[]> p = pulsarClient.newProducer()
                                          .topic(topic).create();
         Consumer<byte[]> c = pulsarClient.newConsumer()
                                          .topic(topic)
                                          .subscriptionName("sub1").subscribe()) {
        for (int i = 0; i < 2; ++i) {
            V1Data dataV1 = new V1Data(i);
            V2Data dataV2 = new V2Data(i, -i);
            byte[] contentV1 = v1Writer.write(dataV1);
            byte[] contentV2 = v2Writer.write(dataV2);
            p.newMessage(Schema.AUTO_PRODUCE_BYTES(v1Schema)).value(contentV1).send();
            Message<byte[]> msg1 = c.receive();
            Assert.assertEquals(msg1.getSchemaVersion(), new LongSchemaVersion(0).bytes());
            Assert.assertEquals(msg1.getData(), contentV1);
            p.newMessage(Schema.AUTO_PRODUCE_BYTES(v2Schema)).value(contentV2).send();
            Message<byte[]> msg2 = c.receive();
            Assert.assertEquals(msg2.getSchemaVersion(), new LongSchemaVersion(1).bytes());
            Assert.assertEquals(msg2.getData(), contentV2);
        }
    }

    List<SchemaInfo> allSchemas = admin.schemas().getAllSchemas(topic);
    Assert.assertEquals(allSchemas, Arrays.asList(v1Schema.getSchemaInfo(),
                                                  v2Schema.getSchemaInfo()));
}
 
Example #13
Source File: SerializableAvroSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
	if (ois.readBoolean()) {
		String schema = ois.readUTF();
		this.schema = new Parser().parse(schema);
	}
	else {
		this.schema = null;
	}
}
 
Example #14
Source File: SerializableAvroSchema.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
	if (ois.readBoolean()) {
		String schema = ois.readUTF();
		this.schema = new Parser().parse(schema);
	}
	else {
		this.schema = null;
	}
}
 
Example #15
Source File: StructSchema.java    From pulsar with Apache License 2.0 4 votes vote down vote up
protected static org.apache.avro.Schema parseAvroSchema(String schemaJson) {
    final Parser parser = new Parser();
    parser.setValidateDefaults(false);
    return parser.parse(schemaJson);
}
 
Example #16
Source File: MappingTestServer.java    From divolte-collector with Apache License 2.0 4 votes vote down vote up
private Schema loadSchema(final String schemaFilename) throws IOException {
    final Parser parser = new Schema.Parser();
    return parser.parse(new File(schemaFilename));
}
 
Example #17
Source File: SimpleSchemaTest.java    From pulsar with Apache License 2.0 4 votes vote down vote up
@Test
public void newProducerForMessageSchemaOnTopicWithMultiVersionSchema() throws Exception {
    String topic = "my-property/my-ns/schema-test";
    Schema<V1Data> v1Schema = Schema.AVRO(V1Data.class);
    byte[] v1SchemaBytes = v1Schema.getSchemaInfo().getSchema();
    AvroWriter<V1Data> v1Writer = new AvroWriter<>(
            new Parser().parse(new ByteArrayInputStream(v1SchemaBytes)));
    Schema<V2Data> v2Schema = Schema.AVRO(V2Data.class);
    byte[] v2SchemaBytes = v2Schema.getSchemaInfo().getSchema();
    AvroWriter<V2Data> v2Writer = new AvroWriter<>(
            new Parser().parse(new ByteArrayInputStream(v2SchemaBytes)));
    try (Producer<V1Data> ignored = pulsarClient.newProducer(v1Schema)
                                                .topic(topic).create()) {
    }
    try (Producer<V2Data> p = pulsarClient.newProducer(Schema.AVRO(V2Data.class))
                                          .topic(topic).create()) {
        p.send(new V2Data(-1, -1));
    }
    V1Data dataV1 = new V1Data(2);
    V2Data dataV2 = new V2Data(3, 5);
    byte[] contentV1 = v1Writer.write(dataV1);
    byte[] contentV2 = v2Writer.write(dataV2);
    try (Producer<byte[]> p = pulsarClient.newProducer(Schema.AUTO_PRODUCE_BYTES())
                                          .topic(topic).create();
            Consumer<V2Data> c = pulsarClient.newConsumer(v2Schema)
                                             .topic(topic)
                                             .subscriptionName("sub1").subscribe()) {
        Assert.expectThrows(SchemaSerializationException.class, () -> p.send(contentV1));

        p.newMessage(Schema.AUTO_PRODUCE_BYTES(Schema.AVRO(V1Data.class)))
         .value(contentV1).send();
        p.send(contentV2);
        Message<V2Data> msg1 = c.receive();
        V2Data msg1Value = msg1.getValue();
        Assert.assertEquals(dataV1.i, msg1Value.i);
        Assert.assertNull(msg1Value.j);
        Assert.assertEquals(msg1.getSchemaVersion(), new LongSchemaVersion(0).bytes());

        Message<V2Data> msg2 = c.receive();
        Assert.assertEquals(dataV2, msg2.getValue());
        Assert.assertEquals(msg2.getSchemaVersion(), new LongSchemaVersion(1).bytes());

        try {
            p.newMessage(Schema.BYTES).value(contentV1).send();
            if (schemaValidationEnforced) {
                Assert.fail("Shouldn't be able to send to a schema'd topic with no schema"
                                    + " if SchemaValidationEnabled is enabled");
            }
            Message<V2Data> msg3 = c.receive();
            Assert.assertEquals(msg3.getSchemaVersion(), SchemaVersion.Empty.bytes());
        } catch (PulsarClientException e) {
            if (schemaValidationEnforced) {
                Assert.assertTrue(e instanceof IncompatibleSchemaException);
            } else {
                Assert.fail("Shouldn't throw IncompatibleSchemaException"
                                    + " if SchemaValidationEnforced is disabled");
            }
        }
    }
}
 
Example #18
Source File: ToAvroBuilder.java    From kite with Apache License 2.0 4 votes vote down vote up
public ToAvro(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
  super(builder, config, parent, child, context);
  
  String schemaFile = getConfigs().getString(config, "schemaFile", null);
  String schemaString = getConfigs().getString(config, "schemaString", null);
  this.schemaField = getConfigs().getString(config, "schemaField", null);
  
  int numDefinitions = 0;
  if (schemaFile != null) {
    numDefinitions++;
  }
  if (schemaString != null) {
    numDefinitions++;
  }
  if (schemaField != null) {
    numDefinitions++;
  }
  if (numDefinitions == 0) {
    throw new MorphlineCompilationException(
      "Either schemaFile or schemaString or schemaField must be defined", config);
  }
  if (numDefinitions > 1) {
    throw new MorphlineCompilationException(
      "Must define only one of schemaFile or schemaString or schemaField at the same time", config);
  }

  if (schemaString != null) {
    this.fixedSchema = new Parser().parse(schemaString);
  } else if (schemaFile != null) {
    try { 
      this.fixedSchema = new Parser().parse(new File(schemaFile));
    } catch (IOException e) {
      throw new MorphlineCompilationException(
        "Cannot parse external Avro schema file: " + schemaFile, config, e);
    }
  } else {
    this.fixedSchema = null;
  }
  
  Config mappingsConfig = getConfigs().getConfig(config, "mappings", ConfigFactory.empty());
  for (Map.Entry<String, Object> entry : new Configs().getEntrySet(mappingsConfig)) {
    mappings.put(entry.getKey(), entry.getValue().toString());
  }
  validateArguments();
}
 
Example #19
Source File: AvroMorphlineTest.java    From kite with Apache License 2.0 4 votes vote down vote up
@Test
public void testToAvroBasic() throws Exception {
  Schema schema = new Parser().parse(new File("src/test/resources/test-avro-schemas/interop.avsc"));
  morphline = createMorphline("test-morphlines/toAvroWithSchemaFile");
  
  byte[] bytes = new byte[] {47, 13};
  byte[] fixed = new byte[16];
  Record jdoc1 = new Record();     
  jdoc1.put("_dataset_descriptor_schema", schema);
  collector.reset();
  assertFalse(morphline.process(jdoc1)); // "has no default value"

  jdoc1.put("intField", "notAnInteger");
  collector.reset();
  assertFalse(morphline.process(jdoc1)); // can't convert

  jdoc1.replaceValues("intField", "20");
  jdoc1.put("longField", "200");
  jdoc1.put("stringField", "abc");
  jdoc1.put("boolField", "true");
  jdoc1.put("floatField", "200");
  jdoc1.put("doubleField","200");
  jdoc1.put("bytesField", bytes);
  jdoc1.put("nullField", null);
  jdoc1.getFields().putAll("arrayField", Arrays.asList(10.0, 20.0));
  jdoc1.put("mapField", 
      new HashMap(ImmutableMap.of("myMap", 
        ImmutableMap.of("label", "car")
      ))
  );
  jdoc1.put("unionField", new ArrayList(Arrays.asList(bytes)));
  jdoc1.put("enumField", "B");
  jdoc1.put("fixedField", fixed);
  jdoc1.put("recordField", 
      ImmutableMap.of(  
          "label", "house",
          "children", new ArrayList(Arrays.asList(bytes)))
  );    
  collector.reset();
  assertTrue(morphline.process(jdoc1));
  
  GenericData.Record actual = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
  assertEquals(20, actual.get("intField"));
  assertEquals(123, actual.get("defaultIntField"));    
  assertEquals(200L, actual.get("longField"));
  assertEquals("abc", actual.get("stringField"));
  assertEquals(Boolean.TRUE, actual.get("boolField"));
  assertEquals(200.0f, actual.get("floatField"));
  assertEquals(200.0, actual.get("doubleField"));
  assertEquals(ByteBuffer.wrap(bytes), actual.get("bytesField"));
  assertNull(actual.get("nullField"));
  assertEquals(Arrays.asList(10.0, 20.0), actual.get("arrayField"));
  GenericData.Record expected = new GenericData.Record(schema.getField("mapField").schema().getValueType());
  expected.put("label", "car");
  assertEquals(ImmutableMap.of("myMap", expected), actual.get("mapField"));
  assertEquals(Arrays.asList(ByteBuffer.wrap(bytes)), actual.get("unionField"));
  assertEquals("B", actual.get("enumField"));
  assertEquals(
      new GenericData.Fixed(schema.getField("fixedField").schema(), fixed), 
      actual.get("fixedField"));
  expected = new GenericData.Record(schema.getField("recordField").schema());
  expected.put("label", "house");
  expected.put("children", new ArrayList(Arrays.asList(ByteBuffer.wrap(bytes))));
  assertEquals(expected, actual.get("recordField"));
}