Java Code Examples for org.apache.pig.data.DataType#CHARARRAY

The following examples show how to use org.apache.pig.data.DataType#CHARARRAY . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SchemaUtil.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static byte convertType(Type type) throws IOException {
  switch (type.typeId()) {
    case BOOLEAN:   return DataType.BOOLEAN;
    case INTEGER:   return DataType.INTEGER;
    case LONG:      return DataType.LONG;
    case FLOAT:     return DataType.FLOAT;
    case DOUBLE:    return DataType.DOUBLE;
    case TIMESTAMP: return DataType.CHARARRAY;
    case DATE:      return DataType.CHARARRAY;
    case STRING:    return DataType.CHARARRAY;
    case FIXED:     return DataType.BYTEARRAY;
    case BINARY:    return DataType.BYTEARRAY;
    case DECIMAL:   return DataType.BIGDECIMAL;
    case STRUCT:    return DataType.TUPLE;
    case LIST:      return DataType.BAG;
    case MAP:       return DataType.MAP;
    default:
      throw new FrontendException("Unsupported primitive type:" + type);
  }
}
 
Example 2
Source File: POPartialAgg.java    From spork with Apache License 2.0 6 votes vote down vote up
private Result getResult(ExpressionOperator op) throws ExecException {
    Result res;
    switch (op.getResultType()) {
    case DataType.BAG:
    case DataType.BOOLEAN:
    case DataType.BYTEARRAY:
    case DataType.CHARARRAY:
    case DataType.DOUBLE:
    case DataType.FLOAT:
    case DataType.INTEGER:
    case DataType.LONG:
    case DataType.BIGINTEGER:
    case DataType.BIGDECIMAL:
    case DataType.DATETIME:
    case DataType.MAP:
    case DataType.TUPLE:
        res = op.getNext(op.getResultType());
        break;
    default:
        String msg = "Invalid result type: "
                + DataType.findType(op.getResultType());
        throw new ExecException(msg, 2270, PigException.BUG);
    }

    return res;
}
 
Example 3
Source File: SchemaTupleClassGenerator.java    From spork with Apache License 2.0 6 votes vote down vote up
public String typeName(byte type) {
    switch(type) {
        case (DataType.INTEGER): return "int";
        case (DataType.LONG): return "long";
        case (DataType.FLOAT): return "float";
        case (DataType.DOUBLE): return "double";
        case (DataType.BYTEARRAY): return "byte[]";
        case (DataType.CHARARRAY): return "String";
        case (DataType.BOOLEAN): return "boolean";
        case (DataType.DATETIME): return "DateTime";
        case (DataType.BIGDECIMAL): return "BigDecimal";
        case (DataType.BIGINTEGER): return "BigInteger";
        case (DataType.TUPLE): return "Tuple";
        case (DataType.BAG): return "DataBag";
        case (DataType.MAP): return "Map";
        default: throw new RuntimeException("Can't return String for given type " + DataType.findTypeName(type));
    }
}
 
Example 4
Source File: STRSPLIT.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY)); 
    
    Schema s1 = new Schema();
    s1.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s1.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    
    Schema s2 = new Schema();
    s2.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s2.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s2.add(new Schema.FieldSchema(null, DataType.INTEGER));
    
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    funcList.add(new FuncSpec(this.getClass().getName(), s1));
    funcList.add(new FuncSpec(this.getClass().getName(), s2));
    return funcList;
}
 
Example 5
Source File: PhysicalOperator.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Implementations that call into the different versions of getNext are often
 * identical, differing only in the signature of the getNext() call they make.
 * This method allows to cut down on some of the copy-and-paste.
 * @param dataType Describes the type of obj; a byte from DataType.
 *
 * @return result Result of applying this Operator to the Object.
 * @throws ExecException
 */
public Result getNext(byte dataType) throws ExecException {
    try {
        switch (dataType) {
        case DataType.BAG:
            return getNextDataBag();
        case DataType.BOOLEAN:
            return getNextBoolean();
        case DataType.BYTEARRAY:
            return getNextDataByteArray();
        case DataType.CHARARRAY:
            return getNextString();
        case DataType.DOUBLE:
            return getNextDouble();
        case DataType.FLOAT:
            return getNextFloat();
        case DataType.INTEGER:
            return getNextInteger();
        case DataType.LONG:
            return getNextLong();
        case DataType.BIGINTEGER:
            return getNextBigInteger();
        case DataType.BIGDECIMAL:
            return getNextBigDecimal();
        case DataType.DATETIME:
            return getNextDateTime();
        case DataType.MAP:
            return getNextMap();
        case DataType.TUPLE:
            return getNextTuple();
        default:
            throw new ExecException("Unsupported type for getNext: " + DataType.findTypeName(dataType));
        }
    } catch (RuntimeException e) {
        throw new ExecException("Exception while executing " + this.toString() + ": " + e.toString(), e);
    }
}
 
Example 6
Source File: TestResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test that ResourceSchema is correctly with SortInfo
 */
@Test
public void testResourceFlatSchemaCreationWithSortInfo() 
throws ExecException, SchemaMergeException, FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    
    Schema origSchema = new Schema(
            new Schema.FieldSchema("t1", 
                    new Schema(
                            new Schema.FieldSchema("t0", 
                                    TypeCheckingTestUtil.genFlatSchema(
                                            aliases,types), 
                                            DataType.TUPLE)), DataType.BAG));
    List<SortColInfo> colList = new ArrayList<SortColInfo>();
    SortColInfo col1 = new SortColInfo("f1", 0, SortColInfo.Order.ASCENDING);
    SortColInfo col2 = new SortColInfo("f1", 1, SortColInfo.Order.DESCENDING);
    colList.add(col1);
    colList.add(col2);
    SortInfo sortInfo = new SortInfo(colList);
                    
    ResourceSchema rsSchema = new ResourceSchema(origSchema, sortInfo);

    Schema genSchema = Schema.getPigSchema(rsSchema);
    assertTrue("generated schema equals original", 
            Schema.equals(genSchema, origSchema, true, false));
    assertTrue(rsSchema.getSortKeys()[0]==0);
    assertTrue(rsSchema.getSortKeys()[1]==1);
    assertTrue(rsSchema.getSortKeyOrders()[0]==ResourceSchema.Order.ASCENDING);
    assertTrue(rsSchema.getSortKeyOrders()[1]==ResourceSchema.Order.DESCENDING);
}
 
Example 7
Source File: ExtractHour.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
/**
 * This method gives a name to the column.
 * @param input - schema of the input data
 * @return schema of the ouput data
 */
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.CHARARRAY));
}
 
Example 8
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBincond() throws Throwable {
    String query = "a= load 'a' as (name: chararray, age: int, gpa: float);"
    + "b = group a by name;"
    + "c = foreach b generate (IsEmpty(a) ? " + TestBinCondFieldSchema.class.getName() + "(*): a);";

    LOForEach foreach = getForeachFromPlan(query);

    Schema.FieldSchema charFs = new FieldSchema(null, DataType.CHARARRAY);
    Schema.FieldSchema intFs = new FieldSchema(null, DataType.INTEGER);
    Schema.FieldSchema floatFs = new FieldSchema(null, DataType.FLOAT);
    Schema tupleSchema= new Schema();
    tupleSchema.add(charFs);
    tupleSchema.add(intFs);
    tupleSchema.add(floatFs);
    Schema.FieldSchema bagFs = null;
    Schema bagSchema = new Schema();
    bagSchema.add(new FieldSchema(null, tupleSchema, DataType.TUPLE));

    try {
        bagFs = new Schema.FieldSchema(null, bagSchema, DataType.BAG);
    } catch (FrontendException fee) {
        fail("Did not expect an error");
    }

    Schema expectedSchema = new Schema(bagFs);
    Schema foreachSch = org.apache.pig.newplan.logical.Util.translateSchema(foreach.getSchema());
    assertTrue(Schema.equals(foreachSch, expectedSchema, false, true));
}
 
Example 9
Source File: TestSchemaUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSchema() throws Exception {
    String tupleName = "mytuple";
    String[] fieldNames = new String[] { "field_0", "field_1" };
    Byte[] dataTypes = new Byte[] { DataType.LONG, DataType.CHARARRAY };

    String expected = "{mytuple: (field_0: long,field_1: chararray)}";
    Schema tupleSchema = SchemaUtil.newTupleSchema(tupleName,
            fieldNames, dataTypes);
    assertEquals(expected, tupleSchema.toString());

    tupleSchema = SchemaUtil.newTupleSchema(tupleName, Arrays
            .asList(fieldNames), Arrays.asList(dataTypes));
    assertEquals(expected, tupleSchema.toString());

    expected = "{t: (field_0: long,field_1: chararray)}";
    tupleSchema = SchemaUtil.newTupleSchema(fieldNames, dataTypes);
    assertEquals(expected, tupleSchema.toString());

    tupleSchema = SchemaUtil.newTupleSchema(Arrays.asList(fieldNames),
            Arrays.asList(dataTypes));
    assertEquals(expected, tupleSchema.toString());

    expected = "{t: (f0: long,f1: chararray)}";
    tupleSchema = SchemaUtil.newTupleSchema(dataTypes);
    assertEquals(expected, tupleSchema.toString());

    tupleSchema = SchemaUtil.newTupleSchema(Arrays.asList(dataTypes));
    assertEquals(expected, tupleSchema.toString());
}
 
Example 10
Source File: TestPigStreamingUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDeserialize__bag() throws IOException {
    byte[] input = "|{_|(_A|,_1|)_|,_|(_B|,_2|)_|}_|_".getBytes();
    FieldSchema f1Inner = new FieldSchema("", DataType.CHARARRAY);
    FieldSchema f2Inner = new FieldSchema("", DataType.INTEGER);
    List<FieldSchema> fslInner = new ArrayList<FieldSchema>();
    fslInner.add(f1Inner);
    fslInner.add(f2Inner);
    Schema schemaInner = new Schema(fslInner);
    FieldSchema fsInner = new FieldSchema("", schemaInner, DataType.TUPLE);
    
    List<FieldSchema> fsl = new ArrayList<FieldSchema>();
    fsl.add(fsInner);
    Schema schema = new Schema(fsl);
    
    FieldSchema fs = new FieldSchema("", schema, DataType.BAG);
    PigStreamingUDF sp = new PigStreamingUDF(fs);

    
    Tuple expectedOutputInner1 = tf.newTuple(2);
    expectedOutputInner1.set(0, "A");
    expectedOutputInner1.set(1, 1);
    
    Tuple expectedOutputInner2 = tf.newTuple(2);
    expectedOutputInner2.set(0, "B");
    expectedOutputInner2.set(1, 2);
    
    List<Tuple> tuples = new ArrayList<Tuple>();
    tuples.add(expectedOutputInner1);
    tuples.add(expectedOutputInner2);
    DataBag expectedOutput = DefaultBagFactory.getInstance().newDefaultBag(tuples);

    Object out = sp.deserialize(input, 0, input.length);
    Assert.assertEquals(tf.newTuple(expectedOutput), out);
}
 
Example 11
Source File: TestHBaseStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Assert that without -noWAL, the WAL is enabled the WAL
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testWIthWAL() throws IOException, ParseException {
    HBaseStorage hbaseStorage = new HBaseStorage(TESTCOLUMN_A);

    Object key = "somekey";
    byte type = DataType.CHARARRAY;
    Assert.assertTrue(hbaseStorage.createPut(key, type).getWriteToWAL());
}
 
Example 12
Source File: BinCond.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try {
        return new Schema(new Schema.FieldSchema(getSchemaName(this
                .getClass().getName().toLowerCase(), input),
                DataType.CHARARRAY));
    } catch (Exception e) {
        return null;
    }
}
 
Example 13
Source File: POIsNull.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Result getNextBoolean() throws ExecException {

    Result res = null;
    switch(operandType) {
    case DataType.BYTEARRAY:
    case DataType.DOUBLE:
    case DataType.INTEGER:
    case DataType.BIGINTEGER:
    case DataType.BIGDECIMAL:
    case DataType.CHARARRAY:
    case DataType.BOOLEAN:
    case DataType.LONG:
    case DataType.FLOAT:
    case DataType.DATETIME:
    case DataType.MAP:
    case DataType.TUPLE:
    case DataType.BAG:
        res = expr.getNext(operandType);
        if(res.returnStatus == POStatus.STATUS_OK) {
            if (res.result == null) {
                res.result = true;
            } else {
                res.result = false;
            }
            illustratorMarkup(null, res.result, (Boolean) res.result ? 0 : 1);
        }
        return res;
    default: {
        int errCode = 2067;
        String msg = this.getClass().getSimpleName() + " does not know how to " +
        "handle type: " + DataType.findTypeName(operandType);
        throw new ExecException(msg, errCode, PigException.BUG);
    }

    }
}
 
Example 14
Source File: ISOToDay.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.CHARARRAY));
}
 
Example 15
Source File: TypeUtil.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * This method returns the most appropriate PDataType associated with 
 * the incoming Pig type. Note for Pig DataType DATETIME, returns DATE as 
 * inferredSqlType. 
 * 
 * This is later used to make a cast to targetPhoenixType accordingly. See
 * {@link #castPigTypeToPhoenix(Object, byte, PDataType)}
 * 
 * @param obj
 * @return PDataType
 */
public static PDataType getType(Object obj, byte type) {
	if (obj == null) {
		return null;
	}
	PDataType sqlType;

	switch (type) {
	case DataType.BYTEARRAY:
		sqlType = PVarbinary.INSTANCE;
		break;
	case DataType.CHARARRAY:
		sqlType = PVarchar.INSTANCE;
		break;
	case DataType.DOUBLE:
	case DataType.BIGDECIMAL:
		sqlType = PDouble.INSTANCE;
		break;
	case DataType.FLOAT:
		sqlType = PFloat.INSTANCE;
		break;
	case DataType.INTEGER:
		sqlType = PInteger.INSTANCE;
		break;
	case DataType.LONG:
	case DataType.BIGINTEGER:
		sqlType = PLong.INSTANCE;
		break;
	case DataType.BOOLEAN:
		sqlType = PBoolean.INSTANCE;
		break;
	case DataType.DATETIME:
		sqlType = PDate.INSTANCE;
		break;
	case DataType.BYTE:
		sqlType = PTinyint.INSTANCE;
		break;
	default:
		throw new RuntimeException("Unknown type " + obj.getClass().getName()
				+ " passed to PhoenixHBaseStorage");
	}

	return sqlType;

}
 
Example 16
Source File: LTRIM.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
}
 
Example 17
Source File: FixedWidthLoader.java    From spork with Apache License 2.0 4 votes vote down vote up
private Object readField(String line, ResourceFieldSchema field, FixedWidthField column) 
                         throws IOException, IllegalArgumentException {

    int start = column.start;
    int end = Math.min(column.end, line.length());

    if (start > line.length())
        return null;

    if (end <= start)
        return null;

    String s  = line.substring(start, end);
    String sTrim = s.trim();

    switch (field.getType()) {
        case DataType.UNKNOWN:
        case DataType.BYTEARRAY:
        case DataType.CHARARRAY:
            if (s.trim().length() == 0)
                return null;
            return s.trim();

        case DataType.BOOLEAN:
            return Boolean.parseBoolean(sTrim);

        case DataType.INTEGER:
            return Integer.parseInt(sTrim);

        case DataType.LONG:
            return Long.parseLong(sTrim);

        case DataType.FLOAT:
            return Float.parseFloat(sTrim);
        
        case DataType.DOUBLE:
            return Double.parseDouble(sTrim);

        case DataType.DATETIME:
            return (new DateTime(sTrim)).toDateTime(DateTimeZone.UTC);

        case DataType.MAP:
        case DataType.TUPLE:
        case DataType.BAG:
            throw new IllegalArgumentException("Object types (map, tuple, bag) are not supported by FixedWidthLoader");
        
        default:
            throw new IllegalArgumentException(
                "Unknown type in input schema: " + field.getType());
    }
}
 
Example 18
Source File: AvroStorageSchemaConversionUtilities.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Determines the pig object type of the Avro schema.
 * @param s The avro schema for which to determine the type
 * @return the byte representing the schema type
 * @throws ExecException
 * @see org.apache.avro.Schema.Type
 */
public static byte getPigType(final Schema s) throws ExecException {
  switch (s.getType()) {
  case ARRAY:
    return DataType.BAG;
  case BOOLEAN:
    return DataType.BOOLEAN;
  case BYTES:
    return DataType.BYTEARRAY;
  case DOUBLE:
    return DataType.DOUBLE;
  case ENUM:
    return DataType.CHARARRAY;
  case FIXED:
    return DataType.BYTEARRAY;
  case FLOAT:
    return DataType.FLOAT;
  case INT:
    return DataType.INTEGER;
  case LONG:
    return DataType.LONG;
  case MAP:
    return DataType.MAP;
  case NULL:
    return DataType.NULL;
  case RECORD:
    return DataType.TUPLE;
  case STRING:
    return DataType.CHARARRAY;
  case UNION:
    List<Schema> types = s.getTypes();
    if (types.size() == 1) {
      return getPigType(types.get(0));
    } else if (types.size() == 2 && types.get(0).getType() == Type.NULL) {
        return getPigType(types.get(1));
    } else if (types.size() == 2 && types.get(1).getType() == Type.NULL) {
        return getPigType(types.get(0));
    } else if (isUnionOfSimpleTypes(s)) {
        return DataType.BYTEARRAY;
    }
    throw new ExecException(
        "Currently only supports element unions of a type and null (" + s.toString() +")");
  default:
    throw new ExecException("Unknown type: " + s.getType().toString());
  }
}
 
Example 19
Source File: UPPER.java    From spork with Apache License 2.0 2 votes vote down vote up
/**
 * This method gives a name to the column. 
 * @param input - schema of the input data
 * @return schema of the input data
 */
@Override
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.CHARARRAY));
}
 
Example 20
Source File: Stuff.java    From spork with Apache License 2.0 2 votes vote down vote up
/**
 * @param input
 *            , schema of the input data
 * @return output schema
 */
@Override
public Schema outputSchema(Schema input) {
	return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
}