org.apache.spark.sql.types.MapType Java Examples

The following examples show how to use org.apache.spark.sql.types.MapType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroWithSparkSchemaVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static <T> T visit(DataType type, Schema schema, AvroWithSparkSchemaVisitor<T> visitor) {
  switch (schema.getType()) {
    case RECORD:
      Preconditions.checkArgument(type instanceof StructType, "Invalid struct: %s is not a struct", type);
      return visitRecord((StructType) type, schema, visitor);

    case UNION:
      return visitUnion(type, schema, visitor);

    case ARRAY:
      return visitArray(type, schema, visitor);

    case MAP:
      Preconditions.checkArgument(type instanceof MapType, "Invalid map: %s is not a map", type);
      MapType map = (MapType) type;
      Preconditions.checkArgument(map.keyType() instanceof StringType,
          "Invalid map: %s is not a string", map.keyType());
      return visitor.map(map, schema, visit(map.valueType(), schema.getValueType(), visitor));

    default:
      return visitor.primitive(type, schema);
  }
}
 
Example #2
Source File: AvroWithSparkSchemaVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static <T> T visitArray(DataType type, Schema array, AvroWithSparkSchemaVisitor<T> visitor) {
  if (array.getLogicalType() instanceof LogicalMap || type instanceof MapType) {
    Preconditions.checkState(
        AvroSchemaUtil.isKeyValueSchema(array.getElementType()),
        "Cannot visit invalid logical map type: %s", array);
    Preconditions.checkArgument(type instanceof MapType, "Invalid map: %s is not a map", type);
    MapType map = (MapType) type;
    List<Schema.Field> keyValueFields = array.getElementType().getFields();
    return visitor.map(map, array,
        visit(map.keyType(), keyValueFields.get(0).schema(), visitor),
        visit(map.valueType(), keyValueFields.get(1).schema(), visitor));

  } else {
    Preconditions.checkArgument(type instanceof ArrayType, "Invalid array: %s is not an array", type);
    ArrayType list = (ArrayType) type;
    return visitor.array(list, array, visit(list.elementType(), array.getElementType(), visitor));
  }
}
 
Example #3
Source File: TestHelpers.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static void assertEqualsSafe(Types.MapType map,
                                     Map<?, ?> expected, Map<?, ?> actual) {
  Type keyType = map.keyType();
  Type valueType = map.valueType();

  for (Object expectedKey : expected.keySet()) {
    Object matchingKey = null;
    for (Object actualKey : actual.keySet()) {
      try {
        assertEqualsSafe(keyType, expectedKey, actualKey);
        matchingKey = actualKey;
      } catch (AssertionError e) {
        // failed
      }
    }

    Assert.assertNotNull("Should have a matching key", matchingKey);
    assertEqualsSafe(valueType, expected.get(expectedKey), actual.get(matchingKey));
  }
}
 
Example #4
Source File: TestHelpers.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static void assertEqualsUnsafe(Types.MapType map, Map<?, ?> expected, MapData actual) {
  Type keyType = map.keyType();
  Type valueType = map.valueType();

  List<Map.Entry<?, ?>> expectedElements = Lists.newArrayList(expected.entrySet());
  ArrayData actualKeys = actual.keyArray();
  ArrayData actualValues = actual.valueArray();

  for (int i = 0; i < expectedElements.size(); i += 1) {
    Map.Entry<?, ?> expectedPair = expectedElements.get(i);
    Object actualKey = actualKeys.get(i, convert(keyType));
    Object actualValue = actualValues.get(i, convert(keyType));

    assertEqualsUnsafe(keyType, expectedPair.getKey(), actualKey);
    assertEqualsUnsafe(valueType, expectedPair.getValue(), actualValue);
  }
}
 
Example #5
Source File: TestHelpers.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static void assertEquals(String context, MapType map, MapData expected, MapData actual) {
  Assert.assertEquals("Should have the same number of elements",
      expected.numElements(), actual.numElements());

  DataType keyType = map.keyType();
  ArrayData expectedKeys = expected.keyArray();
  ArrayData expectedValues = expected.valueArray();

  DataType valueType = map.valueType();
  ArrayData actualKeys = actual.keyArray();
  ArrayData actualValues = actual.valueArray();

  for (int i = 0; i < actual.numElements(); i += 1) {
    assertEquals(context + ".key", keyType,
        expectedKeys.get(i, keyType), actualKeys.get(i, keyType));
    assertEquals(context + ".value", valueType,
        expectedValues.get(i, valueType), actualValues.get(i, valueType));
  }
}
 
Example #6
Source File: SparkTypeVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static <T> T visit(DataType type, SparkTypeVisitor<T> visitor) {
  if (type instanceof StructType) {
    StructField[] fields = ((StructType) type).fields();
    List<T> fieldResults = Lists.newArrayListWithExpectedSize(fields.length);

    for (StructField field : fields) {
      fieldResults.add(visitor.field(
          field,
          visit(field.dataType(), visitor)));
    }

    return visitor.struct((StructType) type, fieldResults);

  } else if (type instanceof MapType) {
    return visitor.map((MapType) type,
        visit(((MapType) type).keyType(), visitor),
        visit(((MapType) type).valueType(), visitor));

  } else if (type instanceof ArrayType) {
    return visitor.array(
        (ArrayType) type,
        visit(((ArrayType) type).elementType(), visitor));

  } else if (type instanceof UserDefinedType) {
    throw new UnsupportedOperationException(
        "User-defined types are not supported");

  } else {
    return visitor.atomic(type);
  }
}
 
Example #7
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("checkstyle:CyclomaticComplexity")
public Object get(int ordinal, DataType dataType) {
  if (dataType instanceof IntegerType) {
    return getInt(ordinal);
  } else if (dataType instanceof LongType) {
    return getLong(ordinal);
  } else if (dataType instanceof StringType) {
    return getUTF8String(ordinal);
  } else if (dataType instanceof FloatType) {
    return getFloat(ordinal);
  } else if (dataType instanceof DoubleType) {
    return getDouble(ordinal);
  } else if (dataType instanceof DecimalType) {
    DecimalType decimalType = (DecimalType) dataType;
    return getDecimal(ordinal, decimalType.precision(), decimalType.scale());
  } else if (dataType instanceof BinaryType) {
    return getBinary(ordinal);
  } else if (dataType instanceof StructType) {
    return getStruct(ordinal, ((StructType) dataType).size());
  } else if (dataType instanceof ArrayType) {
    return getArray(ordinal);
  } else if (dataType instanceof MapType) {
    return getMap(ordinal);
  } else if (dataType instanceof BooleanType) {
    return getBoolean(ordinal);
  } else if (dataType instanceof ByteType) {
    return getByte(ordinal);
  } else if (dataType instanceof ShortType) {
    return getShort(ordinal);
  }
  return null;
}
 
Example #8
Source File: StructInternalRow.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private MapData mapToMapData(Types.MapType mapType, Map<?, ?> map) {
  // make a defensive copy to ensure entries do not change
  List<Map.Entry<?, ?>> entries = ImmutableList.copyOf(map.entrySet());
  return new ArrayBasedMapData(
      collectionToArrayData(mapType.keyType(), Lists.transform(entries, Map.Entry::getKey)),
      collectionToArrayData(mapType.valueType(), Lists.transform(entries, Map.Entry::getValue)));
}
 
Example #9
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static void assertEquals(String context, DataType type, Object expected, Object actual) {
  if (expected == null && actual == null) {
    return;
  }

  if (type instanceof StructType) {
    Assert.assertTrue("Expected should be an InternalRow: " + context,
        expected instanceof InternalRow);
    Assert.assertTrue("Actual should be an InternalRow: " + context,
        actual instanceof InternalRow);
    assertEquals(context, (StructType) type, (InternalRow) expected, (InternalRow) actual);

  } else if (type instanceof ArrayType) {
    Assert.assertTrue("Expected should be an ArrayData: " + context,
        expected instanceof ArrayData);
    Assert.assertTrue("Actual should be an ArrayData: " + context,
        actual instanceof ArrayData);
    assertEquals(context, (ArrayType) type, (ArrayData) expected, (ArrayData) actual);

  } else if (type instanceof MapType) {
    Assert.assertTrue("Expected should be a MapData: " + context,
        expected instanceof MapData);
    Assert.assertTrue("Actual should be a MapData: " + context,
        actual instanceof MapData);
    assertEquals(context, (MapType) type, (MapData) expected, (MapData) actual);

  } else if (type instanceof BinaryType) {
    assertEqualBytes(context, (byte[]) expected, (byte[]) actual);
  } else {
    Assert.assertEquals("Value should match expected: " + context, expected, actual);
  }
}
 
Example #10
Source File: SparkTypeVisitor.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static <T> T visit(DataType type, SparkTypeVisitor<T> visitor) {
  if (type instanceof StructType) {
    StructField[] fields = ((StructType) type).fields();
    List<T> fieldResults = Lists.newArrayListWithExpectedSize(fields.length);

    for (StructField field : fields) {
      fieldResults.add(visitor.field(
          field,
          visit(field.dataType(), visitor)));
    }

    return visitor.struct((StructType) type, fieldResults);

  } else if (type instanceof MapType) {
    return visitor.map((MapType) type,
        visit(((MapType) type).keyType(), visitor),
        visit(((MapType) type).valueType(), visitor));

  } else if (type instanceof ArrayType) {
    return visitor.array(
        (ArrayType) type,
        visit(((ArrayType) type).elementType(), visitor));

  } else if (type instanceof UserDefinedType){
    throw new UnsupportedOperationException(
        "User-defined types are not supported");

  } else {
    return visitor.atomic(type);
  }
}
 
Example #11
Source File: TestHelpers.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static void assertEqualsMaps(String prefix, Types.MapType type,
                                     MapData expected, Map<?, ?> actual) {
  if (expected == null || actual == null) {
    Assert.assertEquals(prefix, expected, actual);
  } else {
    Type keyType = type.keyType();
    Type valueType = type.valueType();
    ArrayData expectedKeyArray = expected.keyArray();
    ArrayData expectedValueArray = expected.valueArray();
    Assert.assertEquals(prefix + " length", expected.numElements(), actual.size());
    for (int e = 0; e < expected.numElements(); ++e) {
      Object expectedKey = getValue(expectedKeyArray, e, keyType);
      Object actualValue = actual.get(expectedKey);
      if (actualValue == null) {
        Assert.assertEquals(prefix + ".key=" + expectedKey + " has null", true,
            expected.valueArray().isNullAt(e));
      } else {
        switch (valueType.typeId()) {
          case BOOLEAN:
          case INTEGER:
          case LONG:
          case FLOAT:
          case DOUBLE:
          case STRING:
          case DECIMAL:
          case DATE:
          case TIMESTAMP:
            Assert.assertEquals(prefix + ".key=" + expectedKey + " - " + valueType,
                getValue(expectedValueArray, e, valueType),
                actual.get(expectedKey));
            break;
          case UUID:
          case FIXED:
          case BINARY:
            assertEqualBytes(prefix + ".key=" + expectedKey,
                (byte[]) getValue(expectedValueArray, e, valueType),
                (byte[]) actual.get(expectedKey));
            break;
          case STRUCT: {
            Types.StructType st = (Types.StructType) valueType;
            assertEquals(prefix + ".key=" + expectedKey, st,
                expectedValueArray.getStruct(e, st.fields().size()),
                (Row) actual.get(expectedKey));
            break;
          }
          case LIST:
            assertEqualsLists(prefix + ".key=" + expectedKey,
                valueType.asListType(),
                expectedValueArray.getArray(e),
                toList((Seq<?>) actual.get(expectedKey)));
            break;
          case MAP:
            assertEqualsMaps(prefix + ".key=" + expectedKey, valueType.asMapType(),
                expectedValueArray.getMap(e),
                toJavaMap((scala.collection.Map<?, ?>) actual.get(expectedKey)));
            break;
          default:
            throw new IllegalArgumentException("Unhandled type " + valueType);
        }
      }
    }
  }
}
 
Example #12
Source File: JsonSchema.java    From sylph with Apache License 2.0 4 votes vote down vote up
public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset)
        throws IOException
{
    @SuppressWarnings("unchecked")
    Map<String, Object> map = MAPPER.readValue(message, Map.class);
    String[] names = rowTypeInfo.names();
    Object[] values = new Object[names.length];
    for (int i = 0; i < names.length; i++) {
        String key = names[i];
        switch (key) {
            case "_topic":
                values[i] = topic;
                continue;
            case "_message":
                values[i] = new String(message, UTF_8);
                continue;
            case "_key":
                values[i] = new String(messageKey, UTF_8);
                continue;
            case "_partition":
                values[i] = partition;
                continue;
            case "_offset":
                values[i] = offset;
                continue;
        }

        Object value = map.get(key);
        if (value == null) {
            continue;
        }
        DataType type = rowTypeInfo.apply(i).dataType();

        if (type instanceof MapType && ((MapType) type).valueType() == DataTypes.StringType) {
            scala.collection.mutable.Map convertValue = new scala.collection.mutable.HashMap(); //必须是scala的map
            for (Map.Entry entry : ((Map<?, ?>) value).entrySet()) {
                convertValue.put(entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString());
            }
            values[i] = convertValue;
        }
        else if (value instanceof ArrayType) {
            //Class<?> aClass = type.getTypeClass();
            //values[i] = MAPPER.convertValue(value, aClass);
            //todo: Spark List to Array
            values[i] = value;
        }
        else if (type == DataTypes.LongType) {
            values[i] = ((Number) value).longValue();
        }
        else {
            values[i] = value;
        }
    }
    return new GenericRowWithSchema(values, rowTypeInfo);
}
 
Example #13
Source File: JsonSchema.java    From sylph with Apache License 2.0 4 votes vote down vote up
public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset)
        throws IOException
{
    @SuppressWarnings("unchecked")
    Map<String, Object> map = MAPPER.readValue(message, Map.class);
    String[] names = rowTypeInfo.names();
    Object[] values = new Object[names.length];
    for (int i = 0; i < names.length; i++) {
        String key = names[i];
        switch (key) {
            case "_topic":
                values[i] = topic;
                continue;
            case "_message":
                values[i] = new String(message, UTF_8);
                continue;
            case "_key":
                values[i] = new String(messageKey, UTF_8);
                continue;
            case "_partition":
                values[i] = partition;
                continue;
            case "_offset":
                values[i] = offset;
                continue;
        }

        Object value = map.get(key);
        if (value == null) {
            continue;
        }
        DataType type = rowTypeInfo.apply(i).dataType();

        if (type instanceof MapType && ((MapType) type).valueType() == DataTypes.StringType) {
            scala.collection.mutable.Map convertValue = new scala.collection.mutable.HashMap(); //必须是scala的map
            for (Map.Entry entry : ((Map<?, ?>) value).entrySet()) {
                convertValue.put(entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString());
            }
            values[i] = convertValue;
        }
        else if (value instanceof ArrayType) {
            //Class<?> aClass = type.getTypeClass();
            //values[i] = MAPPER.convertValue(value, aClass);
            //todo: Spark List to Array
            values[i] = value;
        }
        else if (type == DataTypes.LongType) {
            values[i] = ((Number) value).longValue();
        }
        else {
            values[i] = value;
        }
    }
    return new GenericRow(values);
}
 
Example #14
Source File: AvroUtils.java    From envelope with Apache License 2.0 4 votes vote down vote up
private static Schema typeFor(DataType dataType, boolean isOptional, int recordCount) {
  LOG.trace("Converting {} to Avro, optional[{}]", dataType, isOptional);

  Schema typeSchema;
  SchemaBuilder.BaseTypeBuilder<Schema> typeBuilder = SchemaBuilder.builder();

  switch (dataType.typeName()) {
    case "binary":
      // bytes
      typeSchema = typeBuilder.bytesType();
      break;
    case "boolean":
      typeSchema = typeBuilder.booleanType();
      break;
    case "date":
      // int (logical)
      typeSchema = LogicalTypes.date().addToSchema(typeBuilder.intType());
      break;
    case "timestamp":
      // long (logical)
      typeSchema = LogicalTypes.timestampMillis().addToSchema(typeBuilder.longType());
      break;
    case "double":
      typeSchema = typeBuilder.doubleType();
      break;
    case "float":
      typeSchema = typeBuilder.floatType();
      break;
    case "integer":
    case "byte":
    case "short":
      typeSchema = typeBuilder.intType();
      break;
    case "long":
      typeSchema = typeBuilder.longType();
      break;
    case "null":
      typeSchema = typeBuilder.nullType();
      break;
    case "string":
      typeSchema = typeBuilder.stringType();
      break;
    case "array":
      ArrayType arrayType = (ArrayType) dataType;
      typeSchema = typeBuilder.array().items(typeFor(arrayType.elementType(), arrayType.containsNull(), recordCount));
      break;
    case "map":
      MapType mapType = (MapType) dataType;
      // Keys must be strings: mapType.keyType()
      typeSchema = typeBuilder.map().values(typeFor(mapType.valueType(), mapType.valueContainsNull(), recordCount));
      break;
    case "struct":
      StructType structType = (StructType) dataType;
      // Nested "anonymous" records
      typeSchema = schemaFor(structType, null, null, recordCount);
      break;
    default:
      if (dataType.typeName().startsWith("decimal")) {
        // byte (logical)
        DecimalType decimalType = (DecimalType) dataType;
        typeSchema = LogicalTypes.decimal(decimalType.precision(), decimalType.scale()).addToSchema(typeBuilder.bytesType());
      } else {
        throw new RuntimeException(String.format("DataType[%s] - DataType unrecognized or not yet implemented",
            dataType));
      }
  }

  if (isOptional && !typeSchema.getType().equals(NULL)) {
    return SchemaBuilder.builder().nullable().type(typeSchema);
  }

  return typeSchema;
}
 
Example #15
Source File: MapStreamReader.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public ColumnVector readBlock(DataType type, ColumnVector vector)
        throws IOException {
    MapType mapType = (MapType)type;

    if (!rowGroupOpen) {
        openRowGroup();
    }

    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            long entrySkipSize = lengthStream.sum(readOffset);
            keyStreamReader.prepareNextRead(toIntExact(entrySkipSize));
            valueStreamReader.prepareNextRead(toIntExact(entrySkipSize));
        }
    }

    // The length vector could be reused, but this simplifies the code below by
    // taking advantage of null entries being initialized to zero.  The vector
    // could be reinitialized for each loop, but that is likely just as expensive
    // as allocating a new array
    int[] lengthVector = new int[nextBatchSize];
    boolean[] nullVector = new boolean[nextBatchSize];
    if (presentStream == null) {
        if (lengthStream == null) {
            throw new OrcCorruptionException("Value is not null but data stream is not present");
        }
        lengthStream.nextIntVector(nextBatchSize, lengthVector);
    }
    else {
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (lengthStream == null) {
                throw new OrcCorruptionException("Value is not null but data stream is not present");
            }
            lengthStream.nextIntVector(nextBatchSize, lengthVector, nullVector);
        } else {
            // All Nulls
            // iterate over structs
        }
    }

    int entryCount = 0;
    for (int length : lengthVector) {
        entryCount += length;
    }
    /*
    //Convert map to array
    if (entryCount > 0) {
        keyStreamReader.prepareNextRead(entryCount);
        valueStreamReader.prepareNextRead(entryCount);
        keys = keyStreamReader.readBlock(keyType);
        values = valueStreamReader.readBlock(valueType);
    }
    else {
        keys = keyType.createBlockBuilder(new BlockBuilderStatus(), 0).build();
        values = valueType.createBlockBuilder(new BlockBuilderStatus(), 1).build();
    }

    InterleavedBlock keyValueBlock = createKeyValueBlock(nextBatchSize, keys, values, lengthVector);

    // convert lengths into offsets into the keyValueBlock (e.g., two positions per entry)
    int[] offsets = new int[nextBatchSize + 1];
    for (int i = 1; i < offsets.length; i++) {
        int length = lengthVector[i - 1] * 2;
        offsets[i] = offsets[i - 1] + length;
    }
    ArrayBlock arrayBlock = new ArrayBlock(nextBatchSize, nullVector, offsets, keyValueBlock);

    readOffset = 0;
    nextBatchSize = 0;
*/
    return vector;
}