org.apache.avro.Schema.Field Java Examples

The following examples show how to use org.apache.avro.Schema.Field. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DslRecordMapping.java    From divolte-collector with Apache License 2.0 6 votes vote down vote up
public <T> void map(final String fieldName, final T literal) {
    if (!COMPATIBLE_PRIMITIVES.containsKey(literal.getClass())) {
        throw new SchemaMappingException("Type error. Cannot map literal %s of type %s. Only primitive types are allowed.", literal.toString(), literal.getClass());
    }

    final Field field = schema.getField(fieldName);
    if (field == null) {
        throw new SchemaMappingException("Field %s does not exist in Avro schema; error in mapping %s onto %s", fieldName, literal, fieldName);
    }

    final Optional<Schema> targetSchema = unpackNullableUnion(field.schema());
    if (!targetSchema.map((s) -> s.getType() == COMPATIBLE_PRIMITIVES.get(literal.getClass())).orElse(false)) {
        throw new SchemaMappingException("Type mismatch. Cannot map literal %s of type %s onto a field of type %s (type of value and schema of field do not match).", literal.toString(), literal.getClass(), field.schema());
    }

    stack.getLast().add((e,c,r) -> {
        r.set(field, literal);
        return MappingAction.MappingResult.CONTINUE;
    });
}
 
Example #2
Source File: BulkResultAdapterFactory.java    From components with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public Object get(int i) {
    // Lazy initialization of the cached converter objects.
    if (names == null) {
        names = new String[getSchema().getFields().size()];
        fieldConverter = new AvroConverter[names.length];
        for (int j = 0; j < names.length; j++) {
            Field f = getSchema().getFields().get(j);
            names[j] = f.name();
            fieldConverter[j] = SalesforceAvroRegistry.get().getConverterFromString(f);
        }
    }
    Object resultValue = value.getValue(names[i]);
    if (resultValue == null) {
        String columnName = names[i].substring(names[i].indexOf("_") + 1);
        resultValue = value.getValue(columnName);
    }
    if (returnNullForEmpty && resultValue != null && "".equals(resultValue)) {
        resultValue = null;
    }
    return fieldConverter[i].convertToAvro(resultValue);
}
 
Example #3
Source File: AbstractRealtimeRecordReader.java    From hudi with Apache License 2.0 6 votes vote down vote up
private Schema constructHiveOrderedSchema(Schema writerSchema, Map<String, Field> schemaFieldsMap) {
  // Get all column names of hive table
  String hiveColumnString = jobConf.get(hive_metastoreConstants.META_TABLE_COLUMNS);
  LOG.info("Hive Columns : " + hiveColumnString);
  String[] hiveColumns = hiveColumnString.split(",");
  LOG.info("Hive Columns : " + hiveColumnString);
  List<Field> hiveSchemaFields = new ArrayList<>();

  for (String columnName : hiveColumns) {
    Field field = schemaFieldsMap.get(columnName.toLowerCase());

    if (field != null) {
      hiveSchemaFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
    } else {
      // Hive has some extra virtual columns like BLOCK__OFFSET__INSIDE__FILE which do not exist in table schema.
      // They will get skipped as they won't be found in the original schema.
      LOG.debug("Skipping Hive Column => " + columnName);
    }
  }

  Schema hiveSchema = Schema.createRecord(writerSchema.getName(), writerSchema.getDoc(), writerSchema.getNamespace(),
      writerSchema.isError());
  hiveSchema.setFields(hiveSchemaFields);
  LOG.info("HIVE Schema is :" + hiveSchema.toString(true));
  return hiveSchema;
}
 
Example #4
Source File: PigAvroDatumReader.java    From Cubert with Apache License 2.0 6 votes vote down vote up
/**
 * Called to read a record instance. Overridden to read a pig tuple.
 */
@Override
protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException {

    // find out the order in which we will receive fields from the ResolvingDecoder
    Field[] readOrderedFields = in.readFieldOrder();

    /* create an empty tuple */
    Tuple tuple = TupleFactory.getInstance().newTuple(readOrderedFields.length);

    /* read fields and put in output order in tuple
     * The ResolvingDecoder figures out the writer schema to reader schema mapping for us
     */
    for (Field f : readOrderedFields) {
        tuple.set(f.pos(), read(old, f.schema(), in));
    }

    return tuple;
}
 
Example #5
Source File: DslRecordMapping.java    From divolte-collector with Apache License 2.0 6 votes vote down vote up
public <T> void map(final String fieldName, final ValueProducer<T> producer) {
    final Field field = schema.getField(fieldName);
    if (field == null) {
        throw new SchemaMappingException("Field %s does not exist in Avro schema; error in mapping %s onto %s", fieldName, producer.identifier, fieldName);
    }
    final Optional<ValidationError> validationError = producer.validateTypes(field);
    if (validationError.isPresent()) {
        throw new SchemaMappingException("Cannot map the result of %s onto field %s: %s",
                                         producer.identifier, fieldName, validationError.get().message);
    }
    stack.getLast().add((e,c,r) -> {
        producer.produce(e,c)
                .flatMap(v -> producer.mapToGenericRecord(v, field.schema()))
                .ifPresent(v -> r.set(field, v));
        return MappingAction.MappingResult.CONTINUE;
    });
}
 
Example #6
Source File: MarketoBaseRESTClient.java    From components with Apache License 2.0 6 votes vote down vote up
public JsonElement convertIndexedRecordsToJson(List<IndexedRecord> records) {
    List<Map<String, Object>> results = new ArrayList<>();
    for (IndexedRecord r : records) {
        Map<String, Object> result = new HashMap<>();
        for (Field f : r.getSchema().getFields()) {
            Object value = r.get(f.pos());
            // skip status & error fields
            if (FIELD_STATUS.equals(f.name()) || FIELD_ERROR_MSG.equals(f.name())) {
                continue;
            }
            if (MarketoClientUtils.isDateTypeField(f) && value != null) {
                result.put(f.name(), MarketoClientUtils.formatLongToDateString(Long.valueOf(String.valueOf(value))));
                continue;
            }
            result.put(f.name(), value);
        }
        results.add(result);
    }
    return new Gson().toJsonTree(results);
}
 
Example #7
Source File: SimpleFileIODatasetRuntimeTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSampleExcel() throws Exception {
    String fileSpec = sourceFilePrepare("basic.xlsx");
    
    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.path.setValue(fileSpec);
    props.format.setValue(SimpleFileIOFormat.EXCEL);
    props.sheet.setValue("Sheet1");
    props.setHeaderLine.setValue(true);
    props.headerLine.setValue(1);

    final List<IndexedRecord> actual = getSample(props,100);

    assertThat(actual, hasSize(2));
    List<Field> fields = actual.get(0).getSchema().getFields();
    assertThat(fields, hasSize(3));
    
    assertThat("2", equalTo(actual.get(0).get(0)));
    assertThat("gaoyan", equalTo(actual.get(0).get(1)));
    assertThat("Shunyi", equalTo(actual.get(0).get(2)));
}
 
Example #8
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertAvroRecordToMapWithFieldTypeOfBinaryAndLogicalTypeDecimal() {
    // Create a field schema like {"type":"binary","name":"amount","logicalType":"decimal","precision":18,"scale":8}
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.create(Type.BYTES);
    decimalType.addToSchema(fieldSchema);

    // Create a field named "amount" using the field schema above
    final Schema.Field field = new Schema.Field("amount", fieldSchema, null, (Object)null);

    // Create an overall record schema with the amount field
    final Schema avroSchema = Schema.createRecord(Collections.singletonList(field));

    // Create an example Avro record with the amount field of type binary and a logical type of decimal
    final BigDecimal expectedBigDecimal = new BigDecimal("1234567890.12345678");
    final GenericRecord genericRecord = new GenericData.Record(avroSchema);
    genericRecord.put("amount", new Conversions.DecimalConversion().toBytes(expectedBigDecimal, fieldSchema, decimalType));

    // Convert the Avro schema to a Record schema
    thenConvertAvroSchemaToRecordSchema(avroSchema, expectedBigDecimal, genericRecord);
}
 
Example #9
Source File: JDBCAvroRegistry.java    From components with Apache License 2.0 6 votes vote down vote up
protected Schema inferSchemaResultSetMetaData(ResultSetMetaData metadata) throws SQLException {
    List<Field> fields = new ArrayList<>();

    int count = metadata.getColumnCount();
    for (int i = 1; i <= count; i++) {
        int size = metadata.getPrecision(i);
        int scale = metadata.getScale(i);
        boolean nullable = ResultSetMetaData.columnNullable == metadata.isNullable(i);

        int dbtype = metadata.getColumnType(i);
        String fieldName = metadata.getColumnLabel(i);
        String dbColumnName = metadata.getColumnName(i);

        // not necessary for the result schema from the query statement
        boolean isKey = false;

        Field field = sqlType2Avro(size, scale, dbtype, nullable, fieldName, dbColumnName, null, isKey);

        fields.add(field);
    }

    return Schema.createRecord("DYNAMIC", null, null, false, fields);
}
 
Example #10
Source File: AvroEntitySerDe.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public Object deserializeColumnValueFromBytes(String fieldName, byte[] bytes) {
  Field field = avroSchema.getAvroSchema().getField(fieldName);
  DatumReader<Object> datumReader = fieldDatumReaders.get(fieldName);
  if (field == null) {
    throw new ValidationException("Invalid field name " + fieldName
        + " for schema " + avroSchema.toString());
  }
  if (datumReader == null) {
    throw new ValidationException("No datum reader for field name: "
        + fieldName);
  }

  ByteArrayInputStream byteIn = new ByteArrayInputStream(bytes);
  Decoder decoder = getColumnDecoder(field.schema(), byteIn);
  return AvroUtils.readAvroEntity(decoder, datumReader);
}
 
Example #11
Source File: SnowflakeWriter.java    From components with Apache License 2.0 6 votes vote down vote up
protected void populateRowData(IndexedRecord input,
        List<Schema.Field> recordFields, List<Schema.Field> remoteFields) {
    for (int i = 0, j = 0; i < row.length && j < remoteFields.size(); j++) {
        Field f = recordFields.get(j);
        Field remoteTableField = remoteFields.get(j);
        if (f == null) {
            if (Boolean.valueOf(remoteTableField.getProp(SnowflakeAvroRegistry.TALEND_FIELD_AUTOINCREMENTED))) {
                continue;
            }
            Object defaultValue = remoteTableField.defaultVal();
            row[i] = StringUtils.EMPTY.equals(defaultValue) ? null : defaultValue;
        } else {
            Object inputValue = input.get(f.pos());
            row[i] = getFieldValue(inputValue, remoteTableField);
        }
        i++;
    }

    loader.submitRow(row);
}
 
Example #12
Source File: SimpleFileIODatasetRuntimeTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSampleExcel_no_sheet() throws Exception {
    String fileSpec = sourceFilePrepare("basic.xlsx");

    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.path.setValue(fileSpec);
    props.format.setValue(SimpleFileIOFormat.EXCEL);
    props.setHeaderLine.setValue(true);
    props.headerLine.setValue(1);

    final List<IndexedRecord> actual = getSample(props,100);

    assertThat(actual, hasSize(2));
    List<Field> fields = actual.get(0).getSchema().getFields();
    assertThat(fields, hasSize(3));
    
    assertThat("2", equalTo(actual.get(0).get(0)));
    assertThat("gaoyan", equalTo(actual.get(0).get(1)));
    assertThat("Shunyi", equalTo(actual.get(0).get(2)));
}
 
Example #13
Source File: FieldDescription.java    From components with Apache License 2.0 6 votes vote down vote up
public static Schema getSchemaForThisFields(String schemaName, FieldDescription[] fields, String[] keys) {
    Schema schema = Schema.createRecord(schemaName, "", "", false);
    List<Field> fieldList = new ArrayList<>();
    if (fields == null) {
        return null;
    }
    for (FieldDescription field : fields) {
        Field f = field.toAvroField();
        for (String key : keys) {
            if (field.getName().equals(key)) {
                f.addProp(SchemaConstants.TALEND_COLUMN_IS_KEY, "true");
            }
        }
        fieldList.add(f);
    }
    schema.setFields(fieldList);

    return schema;
}
 
Example #14
Source File: DelimitedStringConverter.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Initialize converters per each schema field
 * 
 * @param schema
 *            design schema
 */
private void initConverters(Schema schema) {
	converters = new StringConverter[size];
	List<Field> fields = schema.getFields();
	for (int i = 0; i < size; i++) {
		Field field = fields.get(i);
		Schema fieldSchema = field.schema();
		fieldSchema = AvroUtils.unwrapIfNullable(fieldSchema);
		if (LogicalTypeUtils.isLogicalTimestampMillis(fieldSchema)) {
			String datePattern = field.getProp(SchemaConstants.TALEND_COLUMN_PATTERN);
			converters[i] = new StringTimestampConverter(datePattern);
		} else {
			Type type = fieldSchema.getType();
			converters[i] = converterRegistry.get(type);
		}
	}
}
 
Example #15
Source File: AvroTupleWrapper.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void append(final Object o) {
  List<Field> fields = avroObject.getSchema().getFields();
  avroObject.put(fields.size(), o);
  Schema fieldSchema = null;
  if (o instanceof String) {
    fieldSchema = Schema.create(Type.STRING);
  } else if (o instanceof Integer) {
    fieldSchema = Schema.create(Type.INT);
  } else if (o instanceof Long) {
    fieldSchema = Schema.create(Type.LONG);
  } else if (o instanceof Double) {
    fieldSchema = Schema.create(Type.DOUBLE);
  } else if (o instanceof Float) {
    fieldSchema = Schema.create(Type.FLOAT);
  } else if (o == null) {
    fieldSchema = Schema.create(Type.NULL);
  } else if (o instanceof Boolean) {
    fieldSchema = Schema.create(Type.BOOLEAN);
  } else if (o instanceof Map) {
    fieldSchema = Schema.create(Type.MAP);
  }
  Field newField = new Field("FIELD_" + fields.size(), fieldSchema, "", null);
  fields.add(newField);
  avroObject.getSchema().setFields(fields);
}
 
Example #16
Source File: FieldSelectorDoFnTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testHierarchicalWithSelector() throws Exception {
    FieldSelectorProperties properties =
            addSelector(null, "yearOfToyota", ".automobiles{.maker === \"Toyota\"}.year");
    IndexedRecord input = SampleAvpathSchemas.Vehicles.getDefaultVehicleCollection();

    FieldSelectorDoFn function = new FieldSelectorDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(input);

    assertEquals(1, outputs.size());
    List<Field> fields = outputs.get(0).getSchema().getFields();
    assertEquals(1, fields.size());
    assertEquals("yearOfToyota", fields.get(0).name());
    assertThat(((List<Integer>) outputs.get(0).get(0)), hasItems(2016, 2017));
}
 
Example #17
Source File: MarketoLeadClientTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetAllLeadFields() throws Exception {
    doThrow(new MarketoException("REST", "error")).when(client).executeGetRequest(DescribeFieldsResult.class);
    List<Field> result = client.getAllLeadFields();
    assertTrue(result.isEmpty());
    //
    doReturn(new DescribeFieldsResult()).when(client).executeGetRequest(DescribeFieldsResult.class);
    result = client.getAllLeadFields();
    assertTrue(result.isEmpty());
    //
    DescribeFieldsResult dfr = new DescribeFieldsResult();
    dfr.setSuccess(true);
    List<FieldDescription> fields = new ArrayList<>();
    FieldDescription fd = new FieldDescription();
    fd.setName("test");
    fd.setDataType("string");
    fd.setDisplayName("test");
    fd.setId(124566);
    fd.setLength(10);
    fd.setUpdateable(true);
    fields.add(fd);
    dfr.setResult(fields);
    doReturn(dfr).when(client).executeGetRequest(DescribeFieldsResult.class);
    result = client.getAllLeadFields();
    assertFalse(result.isEmpty());
}
 
Example #18
Source File: TestParquetImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testOverrideTypeMapping() throws IOException {
  String [] types = { "INT" };
  String [] vals = { "10" };
  createTableWithColTypes(types, vals);

  String [] extraArgs = { "--map-column-java", "DATA_COL0=String"};
  runImport(getOutputArgv(true, extraArgs));

  Schema schema = getSchema();
  assertEquals(Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());
  checkField(fields.get(0), "DATA_COL0", Type.STRING);

  DatasetReader<GenericRecord> reader = getReader();
  try {
    assertTrue(reader.hasNext());
    GenericRecord record1 = reader.next();
    assertEquals("DATA_COL0", "10", record1.get("DATA_COL0"));
    assertFalse(reader.hasNext());
  } finally {
    reader.close();
  }
}
 
Example #19
Source File: TestParquetImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testNonIdentCharactersInColumnName() throws IOException {
  String [] names = { "test_p-a+r/quet" };
  String [] types = { "INT" };
  String [] vals = { "2015" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Schema schema = getSchema();
  assertEquals(Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());
  checkField(fields.get(0), "TEST_P_A_R_QUET", Type.INT);

  DatasetReader<GenericRecord> reader = getReader();
  try {
    assertTrue(reader.hasNext());
    GenericRecord record1 = reader.next();
    assertEquals("TEST_P_A_R_QUET", 2015, record1.get("TEST_P_A_R_QUET"));
    assertFalse(reader.hasNext());
  } finally {
    reader.close();
  }
}
 
Example #20
Source File: DynamicSchemaUtilsTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetColumnsWhenRemoteSchemaHasMoreColumns() {
    Schema remoteSchema = SchemaBuilder.builder().record("record").fields().requiredString("name").requiredInt("age")
            .requiredInt("year").endRecord();
    Schema inputSchema = SchemaBuilder.builder().record("record").fields().requiredString("name").requiredInt("year")
            .endRecord();

    List<Field> resultFieldList = DynamicSchemaUtils.getCommonFieldsForDynamicSchema(remoteSchema, inputSchema);

    Assert.assertEquals(remoteSchema.getFields().size(), resultFieldList.size());
    Set<String> exspectedFieldsNames = new HashSet<>(Arrays.asList(null, "name", "year"));
    for (Field field : resultFieldList) {
        if (field == null) {
            Assert.assertTrue(exspectedFieldsNames.contains(null));
            continue;
        }
        Assert.assertTrue(exspectedFieldsNames.contains(field.name()));
    }
}
 
Example #21
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testFirstUnderscoreInColumnName() throws IOException {
  String [] names = { "_NAME" };
  String [] types = { "INT" };
  String [] vals = { "1987" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "__NAME", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("__NAME", 1987, record1.get("__NAME"));
}
 
Example #22
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testNonstandardCharactersInColumnName() throws IOException {
  String [] names = { "avro\uC3A11" };
  String [] types = { "INT" };
  String [] vals = { "1987" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "AVRO1", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("AVRO1", 1987, record1.get("AVRO1"));
}
 
Example #23
Source File: TestAvroImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
public void testNonIdentCharactersInColumnName() throws IOException {
  String [] names = { "test_a-v+r/o" };
  String [] types = { "INT" };
  String [] vals = { "2015" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "TEST_A_V_R_O", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O"));
}
 
Example #24
Source File: TSplunkEventCollectorWriter.java    From components with Apache License 2.0 6 votes vote down vote up
private Schema initDefaultSchema(Schema designSchema) {
    AvroRegistry avroReg = new AvroRegistry();
    FieldAssembler<Schema> record = SchemaBuilder.record("Main").fields();
    for (SplunkJSONEventField metadataField : SplunkJSONEventField.getMetadataFields()) {
        Schema base = avroReg.getConverter(metadataField.getDataType()).getSchema();
        FieldBuilder<Schema> fieldBuilder = record.name(metadataField.getName());
        if (metadataField.getName().equals(SplunkJSONEventField.TIME.getName())) {
            String datePattern;
            Field designField = designSchema.getField(metadataField.getName());
            if (designField != null) {
                datePattern = designField.getProp(SchemaConstants.TALEND_COLUMN_PATTERN);
            } else {
                datePattern = designSchema.getProp(SchemaConstants.TALEND_COLUMN_PATTERN);
            }
            if (datePattern == null || datePattern.isEmpty()) {
                datePattern = "dd-MM-yyyy";
            }
            fieldBuilder.prop(SchemaConstants.TALEND_COLUMN_PATTERN, datePattern);
        }
        fieldBuilder.type(AvroUtils.wrapAsNullable(base)).noDefault();
    }
    Schema defaultSchema = record.endRecord();
    return defaultSchema;
}
 
Example #25
Source File: BigQueryAvroUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
private static TableRow convertGenericRecordToTableRow(
    GenericRecord record, List<TableFieldSchema> fields) {
  TableRow row = new TableRow();
  for (TableFieldSchema subSchema : fields) {
    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the name field
    // is required, so it may not be null.
    Field field = record.getSchema().getField(subSchema.getName());
    Object convertedValue =
        getTypedCellValue(field.schema(), subSchema, record.get(field.name()));
    if (convertedValue != null) {
      // To match the JSON files exported by BigQuery, do not include null values in the output.
      row.set(field.name(), convertedValue);
    }
  }

  return row;
}
 
Example #26
Source File: TSnowflakeRowProperties.java    From components with Apache License 2.0 6 votes vote down vote up
private String generateSqlQuery(String tableName, Schema schema) {
    StringBuilder sql = new StringBuilder();
    sql.append("SELECT ");
    List<Schema.Field> fields = schema.getFields();
    boolean firstOne = true;
    for (Schema.Field field : fields) {
        if (firstOne) {
            firstOne = false;
        } else {
            sql.append(", ");
        }
        sql.append(tableName).append(".").append(field.name());
    }
    sql.append(" FROM ").append(tableName);

    return sql.toString();
}
 
Example #27
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Copies the input {@link org.apache.avro.Schema} but changes the schema name.
 * @param schema {@link org.apache.avro.Schema} to copy.
 * @param newName name for the copied {@link org.apache.avro.Schema}.
 * @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the name newName.
 */
public static Schema switchName(Schema schema, String newName) {
  if (schema.getName().equals(newName)) {
    return schema;
  }

  Schema newSchema = Schema.createRecord(newName, schema.getDoc(), schema.getNamespace(), schema.isError());

  List<Field> fields = schema.getFields();
  Iterable<Field> fieldsNew = Iterables.transform(fields, new Function<Field, Field>() {
    @Override
    public Schema.Field apply(Field input) {
      //this should never happen but the API has marked input as Nullable
      if (null == input) {
        return null;
      }
      Field field = new Field(input.name(), input.schema(), input.doc(), input.defaultValue(), input.order());
      return field;
    }
  });

  newSchema.setFields(Lists.newArrayList(fieldsNew));
  return newSchema;
}
 
Example #28
Source File: JDBCAvroRegistryString.java    From components with Apache License 2.0 5 votes vote down vote up
protected Schema inferSchemaResultSetMetaData(ResultSetMetaData metadata) throws SQLException {
    List<Field> fields = new ArrayList<>();

    Set<String> existNames = new HashSet<String>();
    int index = 0;
    
    int count = metadata.getColumnCount();
    for (int i = 1; i <= count; i++) {
        int size = metadata.getPrecision(i);
        int scale = metadata.getScale(i);
        boolean nullable = ResultSetMetaData.columnNullable == metadata.isNullable(i);

        int dbtype = metadata.getColumnType(i);
        String fieldName = metadata.getColumnLabel(i);
        String dbColumnName = metadata.getColumnName(i);

        // not necessary for the result schema from the query statement
        boolean isKey = false;

        String validName = NameUtil.correct(fieldName, index++, existNames);
        existNames.add(validName);
        
        Field field = sqlType2Avro(size, scale, dbtype, nullable, validName, dbColumnName, null, isKey);

        fields.add(field);
    }
    return Schema.createRecord("DYNAMIC", null, null, false, fields);
}
 
Example #29
Source File: SnowflakeRowWriter.java    From components with Apache License 2.0 5 votes vote down vote up
private void handleSuccess(IndexedRecord input) throws SQLException {

        if (mainSchema == null || mainSchema.getFields().size() == 0) {
            return;
        }

        if (!resultSetValidation && !validateResultSet()) {
            result.totalCount++;
            result.successCount++;
            successfulWrites.add(input);
            return;
        }

        if (resultSetFactory == null) {
            resultSetFactory = new SnowflakeResultSetIndexedRecordConverter();
            resultSetFactory.setSchema(mainSchema);
        }
        while (rs.next()) {
            IndexedRecord resultSetIndexedRecord = resultSetFactory.convertToAvro(rs);

            if (AvroUtils.isIncludeAllFields(mainSchema)) {
                // Since we're sending dynamic record further, only on this step we know exact remote schema value.
                successfulWrites.add(resultSetIndexedRecord);
            } else {
                IndexedRecord output = new GenericData.Record(mainSchema);
                // On this moment schemas will be the same, since schema validation has passed.
                for (Field outField : mainSchema.getFields()) {
                    Field inputField = resultSetIndexedRecord.getSchema().getField(outField.name());
                    if (inputField != null) {
                        output.put(outField.pos(), resultSetIndexedRecord.get(inputField.pos()));
                    }
                }

                successfulWrites.add(output);
            }
            result.totalCount++;
            result.successCount++;
        }
    }
 
Example #30
Source File: AvroMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
private String toString(GenericData.Record avroRecord) {
  Record record = new Record();
  for (Field field : avroRecord.getSchema().getFields()) {
    record.put(field.name(), avroRecord.get(field.pos()));
  }
  return record.toString(); // prints sorted by key for human readability
}