Java Code Examples for org.apache.pig.data.DataType#MAP

The following examples show how to use org.apache.pig.data.DataType#MAP . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SchemaTupleClassGenerator.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void process(int fieldPos, Schema.FieldSchema fs) {
    add("public "+typeName()+" getDummy_"+fieldPos+"() {");
    switch (fs.type) {
    case (DataType.INTEGER): add("    return 0;"); break;
    case (DataType.LONG): add("    return 0L;"); break;
    case (DataType.FLOAT): add("    return 0.0f;"); break;
    case (DataType.DOUBLE): add("    return 0.0;"); break;
    case (DataType.BOOLEAN): add("    return true;"); break;
    case (DataType.DATETIME): add("    return new DateTime();"); break;
    case (DataType.BIGDECIMAL): add("    return (BigDecimal)null;"); break;
    case (DataType.BIGINTEGER): add("    return (BigInteger)null;"); break;
    case (DataType.BYTEARRAY): add("    return (byte[])null;"); break;
    case (DataType.CHARARRAY): add("    return (String)null;"); break;
    case (DataType.TUPLE): add("    return (Tuple)null;"); break;
    case (DataType.BAG): add("    return (DataBag)null;"); break;
    case (DataType.MAP): add("    return (Map<String,Object>)null;"); break;
    default: throw new RuntimeException("Unsupported type");
    }
    add("}");
    addBreak();
}
 
Example 2
Source File: POPartialAgg.java    From spork with Apache License 2.0 6 votes vote down vote up
private Result getResult(ExpressionOperator op) throws ExecException {
    Result res;
    switch (op.getResultType()) {
    case DataType.BAG:
    case DataType.BOOLEAN:
    case DataType.BYTEARRAY:
    case DataType.CHARARRAY:
    case DataType.DOUBLE:
    case DataType.FLOAT:
    case DataType.INTEGER:
    case DataType.LONG:
    case DataType.BIGINTEGER:
    case DataType.BIGDECIMAL:
    case DataType.DATETIME:
    case DataType.MAP:
    case DataType.TUPLE:
        res = op.getNext(op.getResultType());
        break;
    default:
        String msg = "Invalid result type: "
                + DataType.findType(op.getResultType());
        throw new ExecException(msg, 2270, PigException.BUG);
    }

    return res;
}
 
Example 3
Source File: RubySchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * This method sets the name of a RubySchema to the name given. It's important to note that
 * if the RubySchema represents anything other than a tuple, databag, or map then an error
 * will be thrown.
 *
 * @param name a String to set the name of the encapsulated Schema object
 */
private void setName(String name) {
    Schema.FieldSchema fs;

    try {
        fs = internalSchema.getField(0);
    } catch (FrontendException e) {
        throw new RuntimeException("Error getting field from schema: " + internalSchema, e);
    }

    byte type = fs.type;

    if (type == DataType.TUPLE || type == DataType.BAG || type == DataType.MAP) {
        fs.alias = name;
    } else {
        throw new RuntimeException("setName cannot be set on Schema: " + internalSchema);
    }
}
 
Example 4
Source File: MapLookupExpression.java    From spork with Apache License 2.0 6 votes vote down vote up
public LogicalFieldSchema getFieldSchema() throws FrontendException {
    if (fieldSchema!=null)
        return fieldSchema;
    LogicalExpression successor = (LogicalExpression)plan.getSuccessors(this).get(0);
    LogicalFieldSchema predFS = successor.getFieldSchema();
    if (predFS!=null) {
        if (predFS.type==DataType.MAP && predFS.schema!=null) {
            return (predFS.schema.getField(0));
        }
        else {
            fieldSchema = new LogicalSchema.LogicalFieldSchema(null, null, DataType.BYTEARRAY);
            uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema);
            return fieldSchema;
        }
    }
    return null;
}
 
Example 5
Source File: HiveRCSchemaUtil.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
    * Returns the pig DataType for the hive type
    * 
    * @param hiveType
    * @return byte from DataType
    */
   public static byte findPigDataType(String hiveType) {
hiveType = hiveType.toLowerCase();

if (hiveType.equals("string"))
    return DataType.CHARARRAY;
else if (hiveType.equals("int"))
    return DataType.INTEGER;
else if (hiveType.equals("bigint") || hiveType.equals("long"))
    return DataType.LONG;
else if (hiveType.equals("float"))
    return DataType.FLOAT;
else if (hiveType.equals("double"))
    return DataType.DOUBLE;
else if (hiveType.equals("boolean"))
    return DataType.BOOLEAN;
else if (hiveType.equals("byte"))
    return DataType.INTEGER;
else if (hiveType.contains("array"))
    return DataType.TUPLE;
else if (hiveType.contains("map"))
    return DataType.MAP;
else
    return DataType.ERROR;
   }
 
Example 6
Source File: CastUtils.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param caster LoadCaster to be used to convert the bytes into a field.
 * @param bytes
 * @param fieldSchema schema of Bag or Tuple; pass in null if a simple type.
 * @param dataType type from DataType
 * @return converted object.
 * @throws IOException
 */
public static Object convertToType(LoadCaster caster, byte[] bytes,
        ResourceFieldSchema fieldSchema, byte dataType) throws IOException {
    switch (dataType) {
    case (DataType.BAG): return caster.bytesToBag(bytes, fieldSchema);
    case (DataType.BYTEARRAY): return new DataByteArray(bytes);
    case (DataType.CHARARRAY): return caster.bytesToCharArray(bytes);
    case (DataType.DOUBLE): return caster.bytesToDouble(bytes);
    case (DataType.FLOAT): return caster.bytesToFloat(bytes);
    case (DataType.INTEGER): return caster.bytesToInteger(bytes);
    case (DataType.BIGINTEGER): return caster.bytesToBigInteger(bytes);
    case (DataType.BIGDECIMAL): return caster.bytesToBigDecimal(bytes);
    case (DataType.LONG): return caster.bytesToLong(bytes);
    case (DataType.BOOLEAN): return caster.bytesToBoolean(bytes);
    case (DataType.DATETIME): return caster.bytesToDateTime(bytes);
    case (DataType.MAP): return caster.bytesToMap(bytes, fieldSchema);
    case (DataType.TUPLE): return caster.bytesToTuple(bytes, fieldSchema);
    default: throw new IOException("Unknown type " + dataType);
    }
}
 
Example 7
Source File: EqualToExpr.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Result getNextBoolean() throws ExecException {
  try {
    Result left, right;

    switch (operandType) {
    case DataType.BYTEARRAY:
    case DataType.DOUBLE:
    case DataType.FLOAT:
    case DataType.BOOLEAN:
    case DataType.INTEGER:
    case DataType.BIGINTEGER:
    case DataType.BIGDECIMAL:
    case DataType.LONG:
    case DataType.DATETIME:
    case DataType.CHARARRAY:
    case DataType.TUPLE:
    case DataType.MAP: {
        Result r = accumChild(null, operandType);
        if (r != null) {
            return r;
        }
        left = lhs.getNext(operandType);
        right = rhs.getNext(operandType);
        return doComparison(left, right);
    }

    default: {
        int errCode = 2067;
        String msg = this.getClass().getSimpleName() + " does not know how to " +
        "handle type: " + DataType.findTypeName(operandType);
        throw new ExecException(msg, errCode, PigException.BUG);
    }

    }
  } catch (RuntimeException e) {
      throw new ExecException("exception while executing " + this.toString() + ": " + e.toString(), 2067, PigException.BUG, e);
  }
}
 
Example 8
Source File: ResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Construct using a {@link org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema} as the template.
 * @param fieldSchema fieldSchema to copy from
 */
public ResourceFieldSchema(FieldSchema fieldSchema) {
    type = fieldSchema.type;
    name = fieldSchema.alias;
    description = "autogenerated from Pig Field Schema";
    Schema inner = fieldSchema.schema;
    
    // allow partial schema 
    if ((type == DataType.BAG || type == DataType.TUPLE || type == DataType.MAP)
            && inner != null) {
        schema = new ResourceSchema(inner);
    } else {
        schema = null;
    }
}
 
Example 9
Source File: HBaseStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private byte[] objToBytes(Object o, byte type) throws IOException {
    LoadStoreCaster caster = (LoadStoreCaster) caster_;
    if (o == null) return null;
    switch (type) {
    case DataType.BYTEARRAY: return ((DataByteArray) o).get();
    case DataType.BAG: return caster.toBytes((DataBag) o);
    case DataType.CHARARRAY: return caster.toBytes((String) o);
    case DataType.DOUBLE: return caster.toBytes((Double) o);
    case DataType.FLOAT: return caster.toBytes((Float) o);
    case DataType.INTEGER: return caster.toBytes((Integer) o);
    case DataType.LONG: return caster.toBytes((Long) o);
    case DataType.BIGINTEGER: return caster.toBytes((BigInteger) o);
    case DataType.BIGDECIMAL: return caster.toBytes((BigDecimal) o);
    case DataType.BOOLEAN: return caster.toBytes((Boolean) o);
    case DataType.DATETIME: return caster.toBytes((DateTime) o);

    // The type conversion here is unchecked.
    // Relying on DataType.findType to do the right thing.
    case DataType.MAP: return caster.toBytes((Map<String, Object>) o);

    case DataType.NULL: return null;
    case DataType.TUPLE: return caster.toBytes((Tuple) o);
    case DataType.ERROR: throw new IOException("Unable to determine type of " + o.getClass());
    default: throw new IOException("Unable to find a converter for tuple field " + o);
    }
}
 
Example 10
Source File: NotEqualToExpr.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Result getNextBoolean() throws ExecException {
    Result left, right;

    switch (operandType) {
    case DataType.BYTEARRAY:
    case DataType.DOUBLE:
    case DataType.FLOAT:
    case DataType.BOOLEAN:
    case DataType.INTEGER:
    case DataType.BIGINTEGER:
    case DataType.BIGDECIMAL:
    case DataType.LONG:
    case DataType.DATETIME:
    case DataType.CHARARRAY:
    case DataType.TUPLE:
    case DataType.MAP: {
        Result r = accumChild(null, operandType);
        if (r != null) {
            return r;
        }
        left = lhs.getNext(operandType);
        right = rhs.getNext(operandType);
        return doComparison(left, right);
    }
    default: {
        int errCode = 2067;
        String msg = this.getClass().getSimpleName() + " does not know how to " +
        "handle type: " + DataType.findTypeName(operandType);
        throw new ExecException(msg, errCode, PigException.BUG);
    }

    }
}
 
Example 11
Source File: TezDagBuilder.java    From spork with Apache License 2.0 4 votes vote down vote up
private static Class<? extends WritableComparator> getGroupingComparatorForKeyType(byte keyType)
        throws JobCreationException {

    switch (keyType) {
    case DataType.BOOLEAN:
        return PigGroupingBooleanWritableComparator.class;

    case DataType.INTEGER:
        return PigGroupingIntWritableComparator.class;

    case DataType.BIGINTEGER:
        return PigGroupingBigIntegerWritableComparator.class;

    case DataType.BIGDECIMAL:
        return PigGroupingBigDecimalWritableComparator.class;

    case DataType.LONG:
        return PigGroupingLongWritableComparator.class;

    case DataType.FLOAT:
        return PigGroupingFloatWritableComparator.class;

    case DataType.DOUBLE:
        return PigGroupingDoubleWritableComparator.class;

    case DataType.DATETIME:
        return PigGroupingDateTimeWritableComparator.class;

    case DataType.CHARARRAY:
        return PigGroupingCharArrayWritableComparator.class;

    case DataType.BYTEARRAY:
        return PigGroupingDBAWritableComparator.class;

    case DataType.MAP:
        int errCode = 1068;
        String msg = "Using Map as key not supported.";
        throw new JobCreationException(msg, errCode, PigException.INPUT);

    case DataType.TUPLE:
        return PigGroupingTupleWritableComparator.class;

    case DataType.BAG:
        errCode = 1068;
        msg = "Using Bag as key not supported.";
        throw new JobCreationException(msg, errCode, PigException.INPUT);

    default:
        errCode = 2036;
        msg = "Unhandled key type " + DataType.findTypeName(keyType);
        throw new JobCreationException(msg, errCode, PigException.BUG);
    }
}
 
Example 12
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 4 votes vote down vote up
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(null, DataType.MAP));
}
 
Example 13
Source File: TestEvalPipeline2.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(getSchemaName("parselong", input), DataType.MAP));
}
 
Example 14
Source File: TOMAP.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(null, DataType.MAP));
}
 
Example 15
Source File: PigStreamingUDF.java    From spork with Apache License 2.0 4 votes vote down vote up
private Object deserialize(FieldSchema fs, byte[] bytes, int startIndex, int endIndex) throws IOException {
    //If null, return null;
    if (WritableComparator.compareBytes(
            bytes, startIndex, DELIMS.getNull().length,
            DELIMS.getNull(), 0, DELIMS.getNull().length) == 0) {
        return null;
    }

    if (fs.type == DataType.BAG) {
        return deserializeBag(fs, bytes, startIndex + 3, endIndex - 2);
    } else if (fs.type == DataType.TUPLE) {
        return deserializeTuple(fs, bytes, startIndex + 3, endIndex - 2);
    } else if (fs.type == DataType.MAP) {
        return deserializeMap(bytes, startIndex + 3, endIndex - 2);
    }

    if (fs.type == DataType.CHARARRAY) {
        return extractString(bytes, startIndex, endIndex, true);
    } else if (fs.type == DataType.BYTEARRAY) {
        return new DataByteArray(bytes, startIndex, endIndex+1);
    }

    //Can we do this faster?
    String val = extractString(bytes, startIndex, endIndex, false);

    if (fs.type == DataType.LONG) {
        return Long.valueOf(val);
    } else if (fs.type == DataType.INTEGER) {
        return Integer.valueOf(val);
    } else if (fs.type == DataType.FLOAT) {
        return Float.valueOf(val);
    } else if (fs.type == DataType.DOUBLE) {
        return Double.valueOf(val);
    } else if (fs.type == DataType.BOOLEAN) {
        return Boolean.valueOf(val);
    } else if (fs.type == DataType.DATETIME) {
       return ToDate.extractDateTime(val);
    } else if (fs.type == DataType.BIGINTEGER) {
        return new BigInteger(val);
    } else if (fs.type == DataType.BIGDECIMAL) {
        return new BigDecimal(val);
    } else {
        throw new ExecException("Can't deserialize type: " + DataType.findTypeName(fs.type));
    }
}
 
Example 16
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
static Converter newConverter(FieldSchema pigField, Type type, final ParentValueContainer parent, boolean elephantBirdCompatible, boolean columnIndexAccess) {
  try {
    switch (pigField.type) {
    case DataType.BAG:
      return new BagConverter(type.asGroupType(), pigField, parent, elephantBirdCompatible, columnIndexAccess);
    case DataType.MAP:
      return new MapConverter(type.asGroupType(), pigField, parent, elephantBirdCompatible, columnIndexAccess);
    case DataType.TUPLE:
      return new TupleConverter(type.asGroupType(), pigField.schema, elephantBirdCompatible, columnIndexAccess) {
        @Override
        public void end() {
          super.end();
          parent.add(this.currentTuple);
        }
      };
    case DataType.CHARARRAY:
        //If the orignal type isn't a string, we don't want to use the dictionary because
        //a custom implementation will be needed for each type.  Just default to no dictionary.
      return new FieldStringConverter(parent, type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation);
    case DataType.BYTEARRAY:
      return new FieldByteArrayConverter(parent);
    case DataType.INTEGER:
      return new FieldIntegerConverter(parent);
    case DataType.BOOLEAN:
      if (elephantBirdCompatible) {
        return new FieldIntegerConverter(parent);
      } else {
        return new FieldBooleanConverter(parent);
      }
    case DataType.FLOAT:
      return new FieldFloatConverter(parent);
    case DataType.DOUBLE:
      return new FieldDoubleConverter(parent);
    case DataType.LONG:
      return new FieldLongConverter(parent);
    case DataType.BIGDECIMAL:
      return new FieldBigDecimalConverter(type, parent);
    default:
      throw new TupleConversionException("unsupported pig type: " + pigField);
    }
  } catch (FrontendException | RuntimeException e) {
    throw new TupleConversionException(
        "error while preparing converter for:\n" + pigField + "\n" + type, e);
  }
}
 
Example 17
Source File: Schema.java    From spork with Apache License 2.0 4 votes vote down vote up
public static void stringifySchema(StringBuilder sb,
                                   Schema schema,
                                   byte type,
                                   int indentLevel)
                                        throws FrontendException{

    if (type == DataType.TUPLE) {
        sb.append("(") ;
    }
    else if (type == DataType.BAG) {
        sb.append("{") ;
    }

    indentLevel++;

    if (schema != null) {
        boolean isFirst = true ;
        for (int i=0; i< schema.size() ;i++) {

            if (!isFirst) {
                sb.append(",") ;
            }
            else {
                isFirst = false ;
            }

            indent(sb, indentLevel);

            FieldSchema fs = schema.getField(i) ;

            if(fs == null) {
                continue;
            }
            
            if (fs.alias != null) {
                sb.append(fs.alias);
                sb.append(": ");
            }

            if (DataType.isAtomic(fs.type)) {
                sb.append(DataType.findTypeName(fs.type)) ;
            }
            else if ( (fs.type == DataType.TUPLE) ||
                      (fs.type == DataType.BAG) ) {
                // safety net
                if (schema != fs.schema) {
                    stringifySchema(sb, fs.schema, fs.type, indentLevel) ;
                }
                else {
                    throw new AssertionError("Schema refers to itself "
                                             + "as inner schema") ;
                }
            } else if (fs.type == DataType.MAP) {
                sb.append(DataType.findTypeName(fs.type) + "[");
                if (fs.schema!=null)
                    stringifySchema(sb, fs.schema, fs.type, indentLevel);
                sb.append("]");
            } else {
                sb.append(DataType.findTypeName(fs.type)) ;
            }
        }
    }

    indentLevel--;
    indent(sb, indentLevel);

    if (type == DataType.TUPLE) {
        sb.append(")") ;
    }
    else if (type == DataType.BAG) {
        sb.append("}") ;
    }

}
 
Example 18
Source File: TestPackage.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * To show that it does not have any type specific
 * code
 */
private void pickTest(byte t, boolean[] inner) throws ExecException, IOException {
    Random r = new Random();
    switch (t) {
    case DataType.BAG:
        runTest(GenRandomData.genRandSmallTupDataBag(r, 10, 100), inner, DataType.BAG);
        break;
    case DataType.BOOLEAN:
        runTest(r.nextBoolean(), inner, DataType.BOOLEAN);
        break;
    case DataType.BYTEARRAY:
        runTest(GenRandomData.genRandDBA(r), inner, DataType.BYTEARRAY);
        break;
    case DataType.BIGCHARARRAY: {
        String s = GenRandomData.genRandString(r);
        for (; s.length() < 65535;) {
            s += GenRandomData.genRandString(r);
        }
        runTest(s, inner, DataType.CHARARRAY);
        break;
    }
    case DataType.CHARARRAY:
        runTest(GenRandomData.genRandString(r), inner, DataType.CHARARRAY);
        break;
    case DataType.DOUBLE:
        runTest(r.nextDouble(), inner, DataType.DOUBLE);
        break;
    case DataType.FLOAT:
        runTest(r.nextFloat(), inner, DataType.FLOAT);
        break;
    case DataType.INTEGER:
        runTest(r.nextInt(), inner, DataType.INTEGER);
        break;
    case DataType.LONG:
        runTest(r.nextLong(), inner, DataType.LONG);
        break;
    case DataType.DATETIME:
        runTest(new DateTime(r.nextLong()), inner, DataType.DATETIME);
        break;
    case DataType.MAP:
    case DataType.INTERNALMAP:
    case DataType.BYTE:
        return; // map not key type
    case DataType.TUPLE:
        runTest(GenRandomData.genRandSmallBagTuple(r, 10, 100), inner, DataType.TUPLE);
        break;
    case DataType.BIGINTEGER:
        runTest(new BigInteger(256, r), inner, DataType.BIGINTEGER);
        break;
    case DataType.BIGDECIMAL:
        runTest(new BigDecimal(r.nextDouble()), inner, DataType.BIGDECIMAL);
        break;
    default:
        fail("No test case for type " + DataType.findTypeName(t));
    }
}
 
Example 19
Source File: PigSchema2Avro.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Check whether Avro type is compatible with Pig type
 * 
 */
protected static boolean isCompatible(Schema avroSchema, ResourceFieldSchema pigSchema) {

    Schema.Type avroType = avroSchema.getType();
    byte pigType = pigSchema.getType();

    if (avroType.equals(Schema.Type.UNION)) {
        return true;
    } else if (pigType == DataType.TUPLE) {
        /* Tuple is compatible with any type; for users may want to
           get rid of the tuple wrapper */
        return true;
    }
    return  (avroType.equals(Schema.Type.ARRAY) && pigType == DataType.BAG)
                  || (avroType.equals(Schema.Type.MAP) && pigType == DataType.MAP)
                  || (avroType.equals(Schema.Type.STRING) 
                                                  && pigType == DataType.CHARARRAY 
                                                  || pigType == DataType.BIGCHARARRAY)
                  || (avroType.equals(Schema.Type.ENUM) 
                                                  && pigType == DataType.CHARARRAY)
                  || (avroType.equals(Schema.Type.BOOLEAN) 
                                                  && pigType == DataType.BOOLEAN 
                                                  || pigType == DataType.INTEGER)
                  || (avroType.equals(Schema.Type.BYTES) 
                                                  && pigType == DataType.BYTEARRAY)
                  || (avroType.equals(Schema.Type.DOUBLE) 
                                                  && pigType == DataType.DOUBLE
                                                  || pigType == DataType.FLOAT
                                                  || pigType == DataType.INTEGER 
                                                  || pigType == DataType.LONG)
                  || (avroType.equals(Schema.Type.FLOAT)
                                                  && pigType == DataType.FLOAT
                                                  || pigType == DataType.INTEGER 
                                                  || pigType == DataType.LONG)
                  || (avroType.equals(Schema.Type.FIXED) 
                                                  && pigType == DataType.BYTEARRAY)
                  || (avroType.equals(Schema.Type.INT) 
                                                  && pigType == DataType.INTEGER)
                  || (avroType.equals(Schema.Type.LONG)
                                                  && pigType == DataType.LONG 
                                                  || pigType == DataType.INTEGER);

}
 
Example 20
Source File: SchemaTupleClassGenerator.java    From spork with Apache License 2.0 4 votes vote down vote up
public boolean isMap() {
    return type == DataType.MAP;
}