Java Code Examples for org.apache.pig.data.DataType#BAG

The following examples show how to use org.apache.pig.data.DataType#BAG . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testOutputSchemaForBagToTupleStringUDF() throws Exception {

	FieldSchema tupSch = new FieldSchema(null, DataType.TUPLE);
	tupSch.schema = new Schema();
	tupSch.schema.add(new FieldSchema(null, DataType.INTEGER));
	tupSch.schema.add(new FieldSchema(null, DataType.CHARARRAY));

	FieldSchema bagSch = new FieldSchema(null, DataType.BAG);
	bagSch.schema = new Schema(tupSch);

	Schema inputSch = new Schema();
	inputSch.add(bagSch);
	inputSch.add(new FieldSchema(null, DataType.CHARARRAY));

	BagToString udf = new BagToString();
	Schema outputSchema = udf.outputSchema(inputSch);

	assertEquals("schema of BagToTuple input", outputSchema.getField(0).type,
			DataType.CHARARRAY);

}
 
Example 2
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test that ResourceSchema is correctly created given a
 * pig.Schema and vice versa 
 */
@Test
public void testResourceFlatSchemaCreation2() 
throws ExecException, SchemaMergeException, FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    
    Schema origSchema = new Schema(
            new Schema.FieldSchema("t1", 
                    new Schema(
                            new Schema.FieldSchema("t0", 
                                    TypeCheckingTestUtil.genFlatSchema(
                                            aliases,types), 
                                            DataType.TUPLE)), DataType.BAG));
                    
    ResourceSchema rsSchema = new ResourceSchema(origSchema);

    Schema genSchema = Schema.getPigSchema(rsSchema);
    assertTrue("generated schema equals original", 
            Schema.equals(genSchema, origSchema, true, false));
}
 
Example 3
Source File: SchemaUtil.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static byte convertType(Type type) throws IOException {
  switch (type.typeId()) {
    case BOOLEAN:   return DataType.BOOLEAN;
    case INTEGER:   return DataType.INTEGER;
    case LONG:      return DataType.LONG;
    case FLOAT:     return DataType.FLOAT;
    case DOUBLE:    return DataType.DOUBLE;
    case TIMESTAMP: return DataType.CHARARRAY;
    case DATE:      return DataType.CHARARRAY;
    case STRING:    return DataType.CHARARRAY;
    case FIXED:     return DataType.BYTEARRAY;
    case BINARY:    return DataType.BYTEARRAY;
    case DECIMAL:   return DataType.BIGDECIMAL;
    case STRUCT:    return DataType.TUPLE;
    case LIST:      return DataType.BAG;
    case MAP:       return DataType.MAP;
    default:
      throw new FrontendException("Unsupported primitive type:" + type);
  }
}
 
Example 4
Source File: LogicalSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
public String toString(boolean verbose) {
    String uidString = "";
    if (verbose)
        uidString="#" + uid;
    
    String aliasToPrint = "";
    if (alias!=null)
        aliasToPrint = alias;
    
    if( type == DataType.BAG ) {
        if( schema == null ) {
            return ( aliasToPrint + uidString + ":bag{}" );
        }
        return ( aliasToPrint + uidString + ":bag{" + schema.toString(verbose) + "}" );
    } else if( type == DataType.TUPLE ) {
        if( schema == null ) {
            return ( aliasToPrint + uidString + ":tuple()" );
        }
        return ( aliasToPrint + uidString + ":tuple(" + schema.toString(verbose) + ")" );
    } else if (type == DataType.MAP) {
        if (schema == null ) {
            return (aliasToPrint + uidString + ":map");
        } else {
            return (aliasToPrint + uidString + ":map(" + schema.toString(verbose) + ")");
        }
    }
    return ( aliasToPrint + uidString + ":" + DataType.findTypeName(type) );
}
 
Example 5
Source File: Entropy.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.BAG)
        {
          throw new RuntimeException("Expected a BAG as input");
        }
        
        Schema inputBagSchema = inputFieldSchema.schema;
        
        if (inputBagSchema.getField(0).type != DataType.TUPLE)
        {
          throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                                   DataType.findTypeName(inputBagSchema.getField(0).type)));
        }
        
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             DataType.DOUBLE));
      } catch (FrontendException e) {
        throw new RuntimeException(e);
      }
 }
 
Example 6
Source File: CombinerOptimizerUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void visitProject(POProject proj) throws VisitorException {
    if (proj.getResultType() == DataType.BAG) {
        // IMPORTANT ASSUMPTION:
        // we should be calling this visitor only for
        // fixing up the projects in the map's foreach
        // inner plan. In the map side, we are dealing
        // with single tuple bags - so set the flag in
        // the project to use single tuple bags. If in
        // future we don't have single tuple bags in the
        // input to map's foreach, we should NOT be doing
        // this!
        proj.setResultSingleTupleBag(true);
    }
}
 
Example 7
Source File: ResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
private static void stringifyResourceSchema(StringBuilder sb, 
        ResourceSchema rs, byte type, boolean printAlias) {

    if (type == DataType.BAG) {
        sb.append("{");
    } else if (type == DataType.TUPLE) {
        sb.append("(");
    } else if (type == DataType.MAP) {
        sb.append("[");
    }
    
    if (rs != null) {
        for (int i=0; i<rs.getFields().length; i++) {
            sb.append(rs.getFields()[i].getDescription(printAlias));
            if (i < rs.getFields().length - 1) {
                sb.append(",");
            }
        }
    }
            
    if (type == DataType.BAG) {
        sb.append("}");
    } else if (type == DataType.TUPLE) {
        sb.append(")");
    } else if (type == DataType.MAP) {
        sb.append("]");
    }
}
 
Example 8
Source File: ResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Construct using a {@link org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema} as the template.
 * @param fieldSchema fieldSchema to copy from
 */
public ResourceFieldSchema(FieldSchema fieldSchema) {
    type = fieldSchema.type;
    name = fieldSchema.alias;
    description = "autogenerated from Pig Field Schema";
    Schema inner = fieldSchema.schema;
    
    // allow partial schema 
    if ((type == DataType.BAG || type == DataType.TUPLE || type == DataType.MAP)
            && inner != null) {
        schema = new ResourceSchema(inner);
    } else {
        schema = null;
    }
}
 
Example 9
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Check if the fieldSch is a bag with empty tuple schema
 * @param fieldSch
 * @return
 * @throws FrontendException
 */
private static boolean isNotBagWithEmptyTuple(FieldSchema fieldSch)
throws FrontendException {
    boolean isBagWithEmptyTuple = false;
    if(fieldSch.type == DataType.BAG
            && fieldSch.schema != null
            && fieldSch.schema.getField(0) != null
            && fieldSch.schema.getField(0).type == DataType.TUPLE
            && fieldSch.schema.getField(0).schema == null
    ){
        isBagWithEmptyTuple = true;
    }
    return !isBagWithEmptyTuple;
}
 
Example 10
Source File: Schema.java    From spork with Apache License 2.0 5 votes vote down vote up
public static Schema getPigSchema(ResourceSchema rSchema) 
throws FrontendException {
    if(rSchema == null) {
        return null;
    }
    List<FieldSchema> fsList = new ArrayList<FieldSchema>();
    for(ResourceFieldSchema rfs : rSchema.getFields()) {
        FieldSchema fs = new FieldSchema(rfs.getName(), 
                rfs.getSchema() == null ? 
                        null : getPigSchema(rfs.getSchema()), rfs.getType());
        
        if(rfs.getType() == DataType.BAG) {
            if (fs.schema != null) { // allow partial schema
                if (fs.schema.size() == 1) {
                    FieldSchema innerFs = fs.schema.getField(0);
                    if (innerFs.type != DataType.TUPLE) {
                        ResourceFieldSchema.throwInvalidSchemaException();
                    }
                } else {
                    ResourceFieldSchema.throwInvalidSchemaException();
                }
            } 
        }
        fsList.add(fs);
    }
    return new Schema(fsList);
}
 
Example 11
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type convertWithName(FieldSchema fieldSchema, String name) {
  try {
    switch (fieldSchema.type) {
    case DataType.BAG:
      return convertBag(name, fieldSchema);
    case DataType.TUPLE:
      return convertTuple(name, fieldSchema, Repetition.OPTIONAL);
    case DataType.MAP:
      return convertMap(name, fieldSchema);
    case DataType.BOOLEAN:
      return primitive(name, PrimitiveTypeName.BOOLEAN);
    case DataType.CHARARRAY:
      return primitive(name, PrimitiveTypeName.BINARY, stringType());
    case DataType.INTEGER:
      return primitive(name, PrimitiveTypeName.INT32);
    case DataType.LONG:
      return primitive(name, PrimitiveTypeName.INT64);
    case DataType.FLOAT:
      return primitive(name, PrimitiveTypeName.FLOAT);
    case DataType.DOUBLE:
      return primitive(name, PrimitiveTypeName.DOUBLE);
    case DataType.DATETIME:
      throw new UnsupportedOperationException();
    case DataType.BYTEARRAY:
      return primitive(name, PrimitiveTypeName.BINARY);
    default:
      throw new SchemaConversionException("Unknown type " + fieldSchema.type + " " + DataType.findTypeName(fieldSchema.type));
    }
  } catch (FrontendException e) {
    throw new SchemaConversionException("can't convert "+fieldSchema, e);
  }
}
 
Example 12
Source File: TestResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test that ResourceSchema is correctly with SortInfo
 */
@Test
public void testResourceFlatSchemaCreationWithSortInfo() 
throws ExecException, SchemaMergeException, FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    
    Schema origSchema = new Schema(
            new Schema.FieldSchema("t1", 
                    new Schema(
                            new Schema.FieldSchema("t0", 
                                    TypeCheckingTestUtil.genFlatSchema(
                                            aliases,types), 
                                            DataType.TUPLE)), DataType.BAG));
    List<SortColInfo> colList = new ArrayList<SortColInfo>();
    SortColInfo col1 = new SortColInfo("f1", 0, SortColInfo.Order.ASCENDING);
    SortColInfo col2 = new SortColInfo("f1", 1, SortColInfo.Order.DESCENDING);
    colList.add(col1);
    colList.add(col2);
    SortInfo sortInfo = new SortInfo(colList);
                    
    ResourceSchema rsSchema = new ResourceSchema(origSchema, sortInfo);

    Schema genSchema = Schema.getPigSchema(rsSchema);
    assertTrue("generated schema equals original", 
            Schema.equals(genSchema, origSchema, true, false));
    assertTrue(rsSchema.getSortKeys()[0]==0);
    assertTrue(rsSchema.getSortKeys()[1]==1);
    assertTrue(rsSchema.getSortKeyOrders()[0]==ResourceSchema.Order.ASCENDING);
    assertTrue(rsSchema.getSortKeyOrders()[1]==ResourceSchema.Order.DESCENDING);
}
 
Example 13
Source File: Schema.java    From spork with Apache License 2.0 4 votes vote down vote up
public static void stringifySchema(StringBuilder sb,
                                   Schema schema,
                                   byte type,
                                   int indentLevel)
                                        throws FrontendException{

    if (type == DataType.TUPLE) {
        sb.append("(") ;
    }
    else if (type == DataType.BAG) {
        sb.append("{") ;
    }

    indentLevel++;

    if (schema != null) {
        boolean isFirst = true ;
        for (int i=0; i< schema.size() ;i++) {

            if (!isFirst) {
                sb.append(",") ;
            }
            else {
                isFirst = false ;
            }

            indent(sb, indentLevel);

            FieldSchema fs = schema.getField(i) ;

            if(fs == null) {
                continue;
            }
            
            if (fs.alias != null) {
                sb.append(fs.alias);
                sb.append(": ");
            }

            if (DataType.isAtomic(fs.type)) {
                sb.append(DataType.findTypeName(fs.type)) ;
            }
            else if ( (fs.type == DataType.TUPLE) ||
                      (fs.type == DataType.BAG) ) {
                // safety net
                if (schema != fs.schema) {
                    stringifySchema(sb, fs.schema, fs.type, indentLevel) ;
                }
                else {
                    throw new AssertionError("Schema refers to itself "
                                             + "as inner schema") ;
                }
            } else if (fs.type == DataType.MAP) {
                sb.append(DataType.findTypeName(fs.type) + "[");
                if (fs.schema!=null)
                    stringifySchema(sb, fs.schema, fs.type, indentLevel);
                sb.append("]");
            } else {
                sb.append(DataType.findTypeName(fs.type)) ;
            }
        }
    }

    indentLevel--;
    indent(sb, indentLevel);

    if (type == DataType.TUPLE) {
        sb.append(")") ;
    }
    else if (type == DataType.BAG) {
        sb.append("}") ;
    }

}
 
Example 14
Source File: AugmentBaseDataVisitor.java    From spork with Apache License 2.0 4 votes vote down vote up
Object GetSmallerValue(Object v) {
    byte type = DataType.findType(v);

    if (type == DataType.BAG || type == DataType.TUPLE
            || type == DataType.MAP)
        return null;

    switch (type) {
    case DataType.CHARARRAY:
        String str = (String) v;
        if (str.length() > 0)
            return str.substring(0, str.length() - 1);
        else
            return null;
    case DataType.BYTEARRAY:
        DataByteArray data = (DataByteArray) v;
        if (data.size() > 0)
            return new DataByteArray(data.get(), 0, data.size() - 1);
        else
            return null;
    case DataType.INTEGER:
        return Integer.valueOf((Integer) v - 1);
    case DataType.LONG:
        return Long.valueOf((Long) v - 1);
    case DataType.FLOAT:
        return Float.valueOf((Float) v - 1);
    case DataType.DOUBLE:
        return Double.valueOf((Double) v - 1);
    case DataType.BIGINTEGER:
        return ((BigInteger)v).subtract(BigInteger.ONE);
    case DataType.BIGDECIMAL:
        return ((BigDecimal)v).subtract(BigDecimal.ONE);
    case DataType.DATETIME:
        DateTime dt = (DateTime) v;
        if (dt.getMillisOfSecond() != 0) {
            return dt.minusMillis(1);
        } else if (dt.getSecondOfMinute() != 0) {
            return dt.minusSeconds(1);
        } else if (dt.getMinuteOfHour() != 0) {
            return dt.minusMinutes(1);
        } else if (dt.getHourOfDay() != 0) {
            return dt.minusHours(1);
        } else {
            return dt.minusDays(1);
        }
    default:
        return null;
    }

}
 
Example 15
Source File: TestPackage.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * To show that it does not have any type specific
 * code
 */
private void pickTest(byte t, boolean[] inner) throws ExecException, IOException {
    Random r = new Random();
    switch (t) {
    case DataType.BAG:
        runTest(GenRandomData.genRandSmallTupDataBag(r, 10, 100), inner, DataType.BAG);
        break;
    case DataType.BOOLEAN:
        runTest(r.nextBoolean(), inner, DataType.BOOLEAN);
        break;
    case DataType.BYTEARRAY:
        runTest(GenRandomData.genRandDBA(r), inner, DataType.BYTEARRAY);
        break;
    case DataType.BIGCHARARRAY: {
        String s = GenRandomData.genRandString(r);
        for (; s.length() < 65535;) {
            s += GenRandomData.genRandString(r);
        }
        runTest(s, inner, DataType.CHARARRAY);
        break;
    }
    case DataType.CHARARRAY:
        runTest(GenRandomData.genRandString(r), inner, DataType.CHARARRAY);
        break;
    case DataType.DOUBLE:
        runTest(r.nextDouble(), inner, DataType.DOUBLE);
        break;
    case DataType.FLOAT:
        runTest(r.nextFloat(), inner, DataType.FLOAT);
        break;
    case DataType.INTEGER:
        runTest(r.nextInt(), inner, DataType.INTEGER);
        break;
    case DataType.LONG:
        runTest(r.nextLong(), inner, DataType.LONG);
        break;
    case DataType.DATETIME:
        runTest(new DateTime(r.nextLong()), inner, DataType.DATETIME);
        break;
    case DataType.MAP:
    case DataType.INTERNALMAP:
    case DataType.BYTE:
        return; // map not key type
    case DataType.TUPLE:
        runTest(GenRandomData.genRandSmallBagTuple(r, 10, 100), inner, DataType.TUPLE);
        break;
    case DataType.BIGINTEGER:
        runTest(new BigInteger(256, r), inner, DataType.BIGINTEGER);
        break;
    case DataType.BIGDECIMAL:
        runTest(new BigDecimal(r.nextDouble()), inner, DataType.BIGDECIMAL);
        break;
    default:
        fail("No test case for type " + DataType.findTypeName(t));
    }
}
 
Example 16
Source File: COV.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.BAG));
}
 
Example 17
Source File: UPPER.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
        return new Schema(new Schema.FieldSchema(getSchemaName("swap", input), DataType.BAG));
}
 
Example 18
Source File: EmpiricalCountEntropy.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.BAG)
        {
            throw new RuntimeException("Expected a BAG as input");
        }
        
        Schema inputBagSchema = inputFieldSchema.schema;
        
        if (inputBagSchema.getField(0).type != DataType.TUPLE)
        {
            throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                                   DataType.findTypeName(inputBagSchema.getField(0).type)));
        }
        
        Schema tupleSchema = inputBagSchema.getField(0).schema;
        
        if(tupleSchema == null) {
            throw new RuntimeException("The tuple of input bag has no schema");
        }
        
        List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
        
        if(fieldSchemaList == null || fieldSchemaList.size() != 1) {
            throw new RuntimeException("The field schema of the input tuple is null or its size is not 1");
        }
        
        if(fieldSchemaList.get(0).type != DataType.INTEGER &&
           fieldSchemaList.get(0).type != DataType.LONG )
        {
            String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER),
                                                   DataType.findTypeName(DataType.LONG)};
            throw new RuntimeException("Expect the type of the input tuple to be of (" +
                    java.util.Arrays.toString(expectedTypes) + "), but instead found " + 
                    DataType.findTypeName(fieldSchemaList.get(0).type));
        } 
        
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             DataType.DOUBLE));
      } catch (FrontendException e) {
        throw new RuntimeException(e);
      }
 }
 
Example 19
Source File: WeightedReservoirSample.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  try {
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG) {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    Schema inputBagSchema = inputFieldSchema.schema;
    
    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
        throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }
    
    Schema tupleSchema = inputBagSchema.getField(0).schema;
    
    if(tupleSchema == null) {
        throw new RuntimeException("The tuple of input bag has no schema");
    }
    
    List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
    
    if(fieldSchemaList == null || fieldSchemaList.size() <= Math.max(0, this.weightIdx)) {
        throw new RuntimeException("The field schema of the input tuple is null " +
        		                   "or the tuple size is no more than the weight field index: "
                                   + this.weightIdx);
    }
    
    if(fieldSchemaList.get(this.weightIdx).type != DataType.INTEGER &&
       fieldSchemaList.get(this.weightIdx).type != DataType.LONG &&
       fieldSchemaList.get(this.weightIdx).type != DataType.FLOAT &&
       fieldSchemaList.get(this.weightIdx).type != DataType.DOUBLE)
    {
        String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER),
                                               DataType.findTypeName(DataType.LONG),
                                               DataType.findTypeName(DataType.FLOAT),
                                               DataType.findTypeName(DataType.DOUBLE)};
        throw new RuntimeException("Expect the type of the weight field of the input tuple to be of (" +
                java.util.Arrays.toString(expectedTypes) + "), but instead found (" + 
                DataType.findTypeName(fieldSchemaList.get(this.weightIdx).type) + "), weight field: " + 
                this.weightIdx);
    } 
    
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             inputFieldSchema.schema, DataType.BAG));    
  } catch (FrontendException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}
 
Example 20
Source File: CounterConverter.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple> call(Integer index, final 
		Iterator<Tuple> input) {
       Tuple inp = null;
       Tuple output = null;
       long sizeBag = 0L;

       List<Tuple> listOutput = new ArrayList<Tuple>();
       
       try {
       	while (input.hasNext()) {
			inp = input.next();
			output = TupleFactory.getInstance()
					.newTuple(inp.getAll().size() + 3);
			
			for (int i = 0; i < inp.getAll().size(); i++) {
				output.set(i + 3, inp.get(i));
			}
			
			if (poCounter.isRowNumber() || poCounter.isDenseRank()) {
				output.set(2, getLocalCounter());
				incrementSparkCounter();
				incrementLocalCounter();
			} else if (!poCounter.isDenseRank()) {
				int positionBag = inp.getAll().size()-1;
				if (inp.getType(positionBag) == DataType.BAG) {
	                sizeBag = ((org.apache.pig.data.DefaultAbstractBag)
	                		inp.get(positionBag)).size();
	            }
				
				output.set(2, getLocalCounter());
                
				addToSparkCounter(sizeBag);
                addToLocalCounter(sizeBag);
			}
			
			output.set(0, index);
			output.set(1, getSparkCounter());
			listOutput.add(output);
		}
       } catch(ExecException e) {
       	throw new RuntimeException(e);
       }
	
			
	return listOutput.iterator();
}