org.apache.hadoop.hive.ql.exec.vector.ListColumnVector Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.exec.vector.ListColumnVector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcWriter.java    From osm2orc with ISC License 6 votes vote down vote up
@Override
public void process(WayContainer container) {
    DecimalColumnVector lat = (DecimalColumnVector) batch.cols[3];
    DecimalColumnVector lon = (DecimalColumnVector) batch.cols[4];
    ListColumnVector nds = (ListColumnVector) batch.cols[5];

    checkLimit();
    addCommonProperties(container);

    lat.isNull[row] = true;
    lon.isNull[row] = true;
    lat.set(row, (HiveDecimal) null);
    lon.set(row, (HiveDecimal) null);

    Way way = container.getEntity();

    nds.lengths[row] = way.getWayNodes().size();
    nds.childCount += nds.lengths[row];
    nds.child.ensureSize(nds.childCount, nds.offsets[row] != 0);

    for (int j = 0; j < way.getWayNodes().size(); j++) {
        StructColumnVector ndsStruct = (StructColumnVector) nds.child;

        ((LongColumnVector) ndsStruct.fields[0]).vector[(int) nds.offsets[row] + j] = way.getWayNodes().get(j).getNodeId();
    }
}
 
Example #2
Source File: VectorColumnFiller.java    From secor with Apache License 2.0 6 votes vote down vote up
public void convert(JsonElement value, ColumnVector vect, int row) {
    if (value == null || value.isJsonNull()) {
        vect.noNulls = false;
        vect.isNull[row] = true;
    } else {
        ListColumnVector vector = (ListColumnVector) vect;
        JsonArray obj = value.getAsJsonArray();
        vector.lengths[row] = obj.size();
        vector.offsets[row] = vector.childCount;
        vector.childCount += vector.lengths[row];
        vector.child.ensureSize(vector.childCount, true);
        for (int c = 0; c < obj.size(); ++c) {
            childrenConverter.convert(obj.get(c), vector.child,
                    (int) vector.offsets[row] + c);
        }
    }
}
 
Example #3
Source File: HiveORCCopierTest.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void getHiveBatch(ListColumnVector input, LongColumnVector child) {
  input.noNulls = false;

  input.childCount = 800;

  for(int i=0; i<HIVE_BATCH_SIZE; ++i) {
    child.vector[i] = 10 * i;
    input.offsets[i] = i;
    input.lengths[i] = 2;
  }
  for(int i=512; i<HIVE_BATCH_SIZE; ++i) {
    input.isNull[i] = true;
  }
}
 
Example #4
Source File: HiveORCCopierTest.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testListCopier() {
  LongColumnVector input1 = new LongColumnVector(HIVE_BATCH_SIZE);
  ListColumnVector input = new ListColumnVector(HIVE_BATCH_SIZE, input1);
  input.init();
  getHiveBatch(input, input1);

  HiveORCCopiers.ListCopier listCopier = new HiveORCCopiers.ListCopier((MultiValuedColumnVector)input);
  long childcountInFirstHalf = listCopier.countChildren(input.noNulls,
    input.lengths, 0, 512);
  long childcountInSecondHalf = listCopier.countChildren(input.noNulls,
    input.lengths, 512, 512);
  assertEquals(1024, childcountInFirstHalf);
  assertEquals(0, childcountInSecondHalf);
}
 
Example #5
Source File: OrcWriter.java    From osm2orc with ISC License 5 votes vote down vote up
@Override
public void process(RelationContainer container) {
    DecimalColumnVector lat = (DecimalColumnVector) batch.cols[3];
    DecimalColumnVector lon = (DecimalColumnVector) batch.cols[4];
    ListColumnVector members = (ListColumnVector) batch.cols[6];

    checkLimit();
    addCommonProperties(container);

    lat.isNull[row] = true;
    lon.isNull[row] = true;
    lat.set(row, (HiveDecimal) null);
    lon.set(row, (HiveDecimal) null);

    Relation relation = container.getEntity();

    members.lengths[row] = relation.getMembers().size();
    members.childCount += members.lengths[row];
    members.child.ensureSize(members.childCount, members.offsets[row] != 0);

    for (int j = 0; j < relation.getMembers().size(); j++) {
        StructColumnVector membersStruct = (StructColumnVector) members.child;

        ((BytesColumnVector) membersStruct.fields[0]).setVal((int) members.offsets[row] + j, relation.getMembers().get(j).getMemberType().toString().toLowerCase().getBytes());
        ((LongColumnVector) membersStruct.fields[1]).vector[(int) members.offsets[row] + j] = relation.getMembers().get(j).getMemberId();
        ((BytesColumnVector) membersStruct.fields[2]).setVal((int) members.offsets[row] + j, relation.getMembers().get(j).getMemberRole().getBytes());
    }
}
 
Example #6
Source File: JsonFieldFiller.java    From secor with Apache License 2.0 5 votes vote down vote up
private static void setList(JSONWriter writer, ListColumnVector vector,
        TypeDescription schema, int row) throws JSONException {
    writer.array();
    int offset = (int) vector.offsets[row];
    TypeDescription childType = schema.getChildren().get(0);
    for (int i = 0; i < vector.lengths[row]; ++i) {
        setValue(writer, vector.child, childType, offset + i);
    }
    writer.endArray();
}
 
Example #7
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private ColumnVector getListColumnVector(ListObjectInspector loi) {
  ColumnVector lecv = getColumnVector(loi.getListElementObjectInspector());
  return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE, lecv);
}
 
Example #8
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private ColumnVector getListColumnVector(ListObjectInspector loi) {
  ColumnVector lecv = getColumnVector(loi.getListElementObjectInspector());
  return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE, lecv);
}
 
Example #9
Source File: OrcWriter.java    From osm2orc with ISC License 4 votes vote down vote up
private void addCommonProperties(EntityContainer container) {
    LongColumnVector id = (LongColumnVector) batch.cols[0];
    BytesColumnVector type = (BytesColumnVector) batch.cols[1];
    MapColumnVector tags = (MapColumnVector) batch.cols[2];
    ListColumnVector nds = (ListColumnVector) batch.cols[5];
    ListColumnVector members = (ListColumnVector) batch.cols[6];
    LongColumnVector changeset = (LongColumnVector) batch.cols[7];
    TimestampColumnVector timestamp = (TimestampColumnVector) batch.cols[8];
    LongColumnVector uid = (LongColumnVector) batch.cols[9];
    BytesColumnVector user = (BytesColumnVector) batch.cols[10];
    LongColumnVector version = (LongColumnVector) batch.cols[11];
    LongColumnVector visible = (LongColumnVector) batch.cols[12];

    Entity entity = container.getEntity();

    id.vector[row] = entity.getId();
    changeset.vector[row] = entity.getChangesetId();
    type.setVal(row, entity.getType().toString().toLowerCase().getBytes());

    tags.offsets[row] = tags.childCount;
    tags.lengths[row] = entity.getTags().size(); // number of key/value pairings
    tags.childCount += tags.lengths[row];
    tags.keys.ensureSize(tags.childCount, tags.offsets[row] != 0);
    tags.values.ensureSize(tags.childCount, tags.offsets[row] != 0);

    int i = 0;
    for (Tag tag : entity.getTags()) {
        ((BytesColumnVector) tags.keys).setVal((int) tags.offsets[row] + i, tag.getKey().getBytes());
        ((BytesColumnVector) tags.values).setVal((int) tags.offsets[row] + i, tag.getValue().getBytes());

        i++;
    }

    timestamp.time[row] = entity.getTimestamp().getTime();
    timestamp.nanos[row] = 0;

    uid.vector[row] = entity.getUser().getId();

    user.setVal(row, entity.getUser().getName().getBytes());

    version.vector[row] = entity.getVersion();

    visible.vector[row] = 1;
    if (entity.getMetaTags().get("visible") == Boolean.FALSE) {
        visible.vector[row] = 0;
    }

    nds.offsets[row] = nds.childCount;
    nds.lengths[row] = 0;

    members.offsets[row] = members.childCount;
    members.lengths[row] = 0;
}
 
Example #10
Source File: JsonFieldFiller.java    From secor with Apache License 2.0 4 votes vote down vote up
static void setValue(JSONWriter writer, ColumnVector vector,
        TypeDescription schema, int row) throws JSONException {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        switch (schema.getCategory()) {
        case BOOLEAN:
            writer.value(((LongColumnVector) vector).vector[row] != 0);
            break;
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            writer.value(((LongColumnVector) vector).vector[row]);
            break;
        case FLOAT:
        case DOUBLE:
            writer.value(((DoubleColumnVector) vector).vector[row]);
            break;
        case STRING:
        case CHAR:
        case VARCHAR:
            writer.value(((BytesColumnVector) vector).toString(row));
            break;
        case DECIMAL:
            writer.value(((DecimalColumnVector) vector).vector[row]
                    .toString());
            break;
        case DATE:
            writer.value(new DateWritable(
                    (int) ((LongColumnVector) vector).vector[row])
                    .toString());
            break;
        case TIMESTAMP:
            writer.value(((TimestampColumnVector) vector)
                    .asScratchTimestamp(row).toString());
            break;
        case LIST:
            setList(writer, (ListColumnVector) vector, schema, row);
            break;
        case STRUCT:
            setStruct(writer, (StructColumnVector) vector, schema, row);
            break;
        case UNION:
            setUnion(writer, (UnionColumnVector) vector, schema, row);
            break;
        case BINARY:
            // To prevent similar mistakes like the one described in https://github.com/pinterest/secor/pull/1018,
            // it would be better to explicitly throw an exception here rather than ignore the incoming values,
            // which causes silent failures in a later stage.
            throw new UnsupportedOperationException();
        case MAP:
            setMap(writer, (MapColumnVector) vector, schema, row);
            break;
        default:
            throw new IllegalArgumentException("Unknown type "
                    + schema.toString());
        }
    } else {
        writer.value(null);
    }
}
 
Example #11
Source File: ORCRecordExtractorTest.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
/**
 * Create an ORC input file using the input records
 */
@Override
protected void createInputFile()
    throws IOException {
  TypeDescription schema = TypeDescription.fromString(
      "struct<user_id:int,firstName:string,lastName:string,bids:array<int>,campaignInfo:string,cost:double,timestamp:bigint>");
  Writer writer = OrcFile.createWriter(new Path(_dataFile.getAbsolutePath()),
      OrcFile.writerOptions(new Configuration()).setSchema(schema));

  int numRecords = _inputRecords.size();
  VectorizedRowBatch rowBatch = schema.createRowBatch(numRecords);
  LongColumnVector userIdVector = (LongColumnVector) rowBatch.cols[0];
  userIdVector.noNulls = false;
  BytesColumnVector firstNameVector = (BytesColumnVector) rowBatch.cols[1];
  firstNameVector.noNulls = false;
  BytesColumnVector lastNameVector = (BytesColumnVector) rowBatch.cols[2];
  ListColumnVector bidsVector = (ListColumnVector) rowBatch.cols[3];
  bidsVector.noNulls = false;
  LongColumnVector bidsElementVector = (LongColumnVector) bidsVector.child;
  bidsElementVector.ensureSize(6, false);
  BytesColumnVector campaignInfoVector = (BytesColumnVector) rowBatch.cols[4];
  DoubleColumnVector costVector = (DoubleColumnVector) rowBatch.cols[5];
  LongColumnVector timestampVector = (LongColumnVector) rowBatch.cols[6];

  for (int i = 0; i < numRecords; i++) {
    Map<String, Object> record = _inputRecords.get(i);

    Integer userId = (Integer) record.get("user_id");
    if (userId != null) {
      userIdVector.vector[i] = userId;
    } else {
      userIdVector.isNull[i] = true;
    }
    String firstName = (String) record.get("firstName");
    if (firstName != null) {
      firstNameVector.setVal(i, StringUtils.encodeUtf8(firstName));
    } else {
      firstNameVector.isNull[i] = true;
    }
    lastNameVector.setVal(i, StringUtils.encodeUtf8((String) record.get("lastName")));
    List<Integer> bids = (List<Integer>) record.get("bids");
    if (bids != null) {
      bidsVector.offsets[i] = bidsVector.childCount;
      bidsVector.lengths[i] = bids.size();
      for (int bid : bids) {
        bidsElementVector.vector[bidsVector.childCount++] = bid;
      }
    } else {
      bidsVector.isNull[i] = true;
    }
    campaignInfoVector.setVal(i, StringUtils.encodeUtf8((String) record.get("campaignInfo")));
    costVector.vector[i] = (double) record.get("cost");
    timestampVector.vector[i] = (long) record.get("timestamp");

    rowBatch.size++;
  }

  writer.addRowBatch(rowBatch);
  rowBatch.reset();
  writer.close();
}