Java Code Examples for org.codehaus.jackson.node.ArrayNode#insert()

The following examples show how to use org.codehaus.jackson.node.ArrayNode#insert() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ShuffleRewriter.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private JsonNode rewriteDistinct(JsonNode job)
{
    ObjectNode newJob = (ObjectNode) cloneNode(job);
    ObjectNode shuffle = (ObjectNode) newJob.get("shuffle");
    String name = getText(shuffle, "name");

    ObjectNode distinctOp =
            JsonUtils.createObjectNode("operator",
                                       "DISTINCT",
                                       "input",
                                       name,
                                       "output",
                                       name);

    if (!newJob.has("reduce") || newJob.get("reduce").isNull())
        newJob.put("reduce", mapper.createArrayNode());
    ArrayNode reduce = (ArrayNode) newJob.get("reduce");
    reduce.insert(0, distinctOp);

    shuffle.put("type", "SHUFFLE");
    shuffle.put("distinctShuffle", true);

    return newJob;
}
 
Example 2
Source File: ShuffleRewriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private JsonNode rewriteBlockgen(JsonNode job)
{

    String blockgenType = job.get("shuffle").get("blockgenType").getTextValue();

    if (blockgenType.equalsIgnoreCase("BY_INDEX"))
    {
        return rewriteBlockgenByIndex(job);
    }
    // else: following is the rewrite of BLOCKGEN

    ObjectNode newJob = (ObjectNode) cloneNode(job);
    ObjectNode shuffle = (ObjectNode) newJob.get("shuffle");
    JsonNode blockgenTypeNode = shuffle.get("blockgenType");
    JsonNode blockgenValueNode = shuffle.get("blockgenValue");

    if (!shuffle.has("pivotKeys"))
        throw new PlanRewriteException("PivotKeys are not defined in SHUFFLE");

    // add CREATE_BLOCK operator in the reducer
    if (!newJob.has("reduce") || newJob.get("reduce").isNull())
        newJob.put("reduce", mapper.createArrayNode());
    ArrayNode reduce = (ArrayNode) newJob.get("reduce");
    ObjectNode createBlockOperator =
            createObjectNode("operator",
                             "CREATE_BLOCK",
                             "input",
                             shuffle.get("name"),
                             "output",
                             shuffle.get("name"),
                             "blockgenType",
                             blockgenTypeNode,
                             "blockgenValue",
                             blockgenValueNode,
                             "partitionKeys",
                             shuffle.get("partitionKeys"));
    copyLine(shuffle, createBlockOperator, "[REDUCE] ");
    reduce.insert(0, createBlockOperator);

    // add DISTINCT operator, if requested
    boolean isDistinct =
            shuffle.has("distinct") && shuffle.get("distinct").getBooleanValue();

    if (isDistinct)
    {
        ObjectNode distinct =
                createObjectNode("operator",
                                 "DISTINCT",
                                 "input",
                                 shuffle.get("name"),
                                 "output",
                                 shuffle.get("name"));
        copyLine(shuffle, distinct, "[REDUCE DISTINCT]");
        reduce.insert(0, distinct);
    }

    // the sort keys for the SHUFFLE are set to the actual
    // blockgen PARTITION KEYS. These sort keys are configured into the JsonNode for
    // the CREATE_BLOCK operator

    // clean up shuffle
    shuffle.remove("blockgenType");
    shuffle.remove("blockgenValue");
    shuffle.put("type", "SHUFFLE");
    shuffle.put("distinct", isDistinct);

    if (!CommonUtils.isPrefix(asArray(shuffle, "pivotKeys"),
                              asArray(shuffle, "partitionKeys")))
    {
        createBlockOperator.put("pivotKeys", shuffle.get("pivotKeys"));
        shuffle.put("pivotKeys", shuffle.get("partitionKeys"));
    }

    return newJob;
}
 
Example 3
Source File: ShuffleRewriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private JsonNode rewriteBlockgenByIndex(JsonNode job)
{
    ObjectNode newJob = (ObjectNode) cloneNode(job);
    ObjectNode shuffle = (ObjectNode) newJob.get("shuffle");

    String path = getText(shuffle, "relation");

    // add a cache index
    String indexName = generateVariableName(namesUsed);
    if (!newJob.has("cacheIndex") || newJob.get("cacheIndex").isNull())
        newJob.put("cacheIndex", mapper.createArrayNode());
    ArrayNode cacheIndex = (ArrayNode) newJob.get("cacheIndex");
    cacheIndex.add(createObjectNode("name", indexName, "path", path));

    // create BLOCK-INDEX-JOIN operator
    ObjectNode blockIndexJoin =
            createObjectNode("operator",
                             "BLOCK_INDEX_JOIN",
                             "input",
                             shuffle.get("name"),
                             "output",
                             shuffle.get("name"),
                             "partitionKeys",
                             shuffle.get("partitionKeys"),
                             "index",
                             indexName);
    copyLine(shuffle, blockIndexJoin, "[MAP] ");
    // add it as the last operator for all mapper
    for (JsonNode map : newJob.path("map"))
    {
        if (!map.has("operators") || map.get("operators").isNull())
            ((ObjectNode) map).put("operators", mapper.createArrayNode());
        ArrayNode operators = (ArrayNode) map.get("operators");
        // we need unique references for all blockIndexJoin
        operators.add(JsonUtils.cloneNode(blockIndexJoin));
    }

    // create CREATE-BLOCK operator
    ObjectNode createBlock =
            createObjectNode("operator",
                             "CREATE_BLOCK",
                             "input",
                             shuffle.get("name"),
                             "output",
                             shuffle.get("name"),
                             "blockgenType",
                             "BY_INDEX",
                             "index",
                             indexName,
                             "partitionKeys",
                             createArrayNode("BLOCK_ID"),
                             "indexPath",
                             path);

    copyLine(shuffle, createBlock, "[REDUCE] ");
    // add it as first operator in reduce
    if (!newJob.has("reduce") || newJob.get("reduce").isNull())
        newJob.put("reduce", mapper.createArrayNode());
    ArrayNode reduce = (ArrayNode) newJob.get("reduce");
    reduce.insert(0, createBlock);

    // add DISTINCT operator, if requested
    boolean isDistinct =
            shuffle.has("distinct") && shuffle.get("distinct").getBooleanValue();

    if (isDistinct)
    {
        ObjectNode distinct =
                createObjectNode("operator",
                                 "DISTINCT",
                                 "input",
                                 shuffle.get("name"),
                                 "output",
                                 shuffle.get("name"));
        copyLine(shuffle, distinct, "[REDUCE DISTINCT] ");
        reduce.insert(0, distinct);
    }

    // blockgen by index uses a different partitioner
    shuffle.put("partitionerClass",
                "com.linkedin.cubert.plan.physical.ByIndexPartitioner");

    // clean up shuffle
    shuffle.put("type", "SHUFFLE");
    shuffle.put("partitionKeys", createArrayNode("BLOCK_ID"));
    shuffle.put("distinct", isDistinct);
    shuffle.put("index", indexName);
    shuffle.remove("blockgenType");
    shuffle.remove("relation");

    ArrayNode pivotKeys = mapper.createArrayNode();
    pivotKeys.add("BLOCK_ID");
    if (shuffle.has("pivotKeys"))
    {
        for (JsonNode key : shuffle.path("pivotKeys"))
            pivotKeys.add(key);
    }
    shuffle.put("pivotKeys", pivotKeys);

    return newJob;
}
 
Example 4
Source File: ShuffleRewriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private JsonNode rewriteCube(JsonNode job)
{
    ObjectNode newJob = (ObjectNode) cloneNode(job);
    ObjectNode shuffle = (ObjectNode) newJob.get("shuffle");
    String name = getText(shuffle, "name");
    JsonNode aggregates = shuffle.get("aggregates");

    // create the OLAP_CUBE_COUNT_DISTINCT operator
    ObjectNode cube =
            createObjectNode("operator",
                             "CUBE",
                             "input",
                             name,
                             "output",
                             name,
                             "dimensions",
                             shuffle.get("dimensions"),
                             "aggregates",
                             cloneNode(aggregates));

    if (shuffle.has("groupingSets"))
        cube.put("groupingSets", shuffle.get("groupingSets"));
    if (shuffle.has("innerDimensions"))
        cube.put("innerDimensions", shuffle.get("innerDimensions"));
    if (shuffle.has("hashTableSize"))
        cube.put("hashTableSize", shuffle.get("hashTableSize"));
    copyLine(shuffle, cube, "[MAP] ");

    // add it as the last operator for all mapper
    for (JsonNode map : newJob.path("map"))
    {
        if (!map.has("operators") || map.get("operators").isNull())
            ((ObjectNode) map).put("operators", mapper.createArrayNode());
        ArrayNode operators = (ArrayNode) map.get("operators");
        operators.add(cube);
    }

    rewriteGroupByAggregateForCube(aggregates);

    // create the GROUP BY operator at the reducer
    ObjectNode groupBy =
            createObjectNode("operator",
                             "GROUP_BY",
                             "input",
                             name,
                             "output",
                             name,
                             "groupBy",
                             shuffle.get("dimensions"),
                             "aggregates",
                             aggregates);
    copyLine(shuffle, groupBy, "[REDUCE] ");
    // add it as first operator in reduce
    if (!newJob.has("reduce") || newJob.get("reduce").isNull())
        newJob.put("reduce", mapper.createArrayNode());
    ArrayNode reduce = (ArrayNode) newJob.get("reduce");
    reduce.insert(0, groupBy);

    // clean up shuffle
    shuffle.put("type", "SHUFFLE");
    shuffle.put("aggregates", aggregates);
    shuffle.put("partitionKeys", shuffle.get("dimensions"));
    shuffle.put("pivotKeys", shuffle.get("dimensions"));
    shuffle.remove("dimensions");
    shuffle.remove("groupingSets");
    shuffle.remove("innerDimensions");

    return newJob;
}
 
Example 5
Source File: ShuffleRewriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private JsonNode rewriteJoin(JsonNode job)
{
    ObjectNode newJob = (ObjectNode) cloneNode(job);
    ObjectNode shuffle = (ObjectNode) newJob.get("shuffle");
    JsonNode joinKeys = shuffle.get("joinKeys");
    String blockName = getText(shuffle, "name");

    // make sure there are two mappers in the job
    JsonNode mapJsons = newJob.get("map");
    if (mapJsons.size() != 2)
    {
        throw new RuntimeException("There must be exactly two multimappers for JOIN shuffle command.");
    }

    // Add the Map side operator in each of the mappers
    // tag = 1, for the first mapper (non dimensional)
    // tag = 0, for the second dimensional mapper
    int tag = 1;
    for (JsonNode mapJson: mapJsons)
    {
        if (!mapJson.has("operators") || mapJson.get("operators").isNull())
            ((ObjectNode) mapJson).put("operators", mapper.createArrayNode());
        ArrayNode operators = (ArrayNode) mapJson.get("operators");

        // we need unique references for all blockIndexJoin
        operators.add(createObjectNode("operator", "REDUCE_JOIN_MAPPER",
                                       "input", createArrayNode(blockName),
                                       "output", blockName,
                                       "joinKeys", joinKeys,
                                       "tag", tag));
        tag --;
    }

    // create the reduce side operator
    ObjectNode reducerOperator = createObjectNode("operator", "REDUCE_JOIN",
                                                  "input", createArrayNode(blockName),
                                                  "output", blockName,
                                                  "joinKeys", joinKeys);
    if (shuffle.has("joinType"))
        reducerOperator.put("joinType", shuffle.get("joinType"));

    // add the reduce side operator
    if (!newJob.has("reduce") || newJob.get("reduce").isNull())
    {
        newJob.put("reduce", mapper.createArrayNode());
    }
    ArrayNode reduce = (ArrayNode) newJob.get("reduce");
    reduce.insert(0, reducerOperator);

    // Fix the shuffle json
    if (shuffle.has("partitionKeys"))
    {
        String[] partitionKeys = JsonUtils.asArray(shuffle, "partitionKeys");
        String[] joinKeyNames = JsonUtils.asArray(shuffle, "joinKeys");
        // make sure that partitionKeys is prefix of joinKeys
        if (!CommonUtils.isPrefix(joinKeyNames, partitionKeys))
        {
            throw new RuntimeException("Partition key must be a prefix of join keys");
        }
    } else {
        shuffle.put("partitionKeys", shuffle.get("joinKeys"));
    }
    // We will sort on (joinKeys + ___tag)
    JsonNode pivotKeys = cloneNode(shuffle.get("joinKeys"));
    ((ArrayNode) pivotKeys).add("___tag");

    shuffle.put("type", "SHUFFLE");
    shuffle.put("join", true);
    shuffle.put("pivotKeys", pivotKeys);
    shuffle.remove("joinKeys");

    return newJob;
}
 
Example 6
Source File: CountDistinctRewriter.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private void insertIncrementalMultipleDayGroupBy(ObjectNode programNode,
                                                 Pair<ObjectNode, ObjectNode> bgInfo) throws AggregateRewriteException
{
    String[] factColumns = lineage.getSchemaOutputColumns(bgInfo.getSecond());
    String[] sortKeys;
    String[] groupByColumns;

    if (lineage.isBlockgenByIndex(bgInfo.getSecond()))
    {
        ObjectNode jobNode = lineage.getOperatorJobNode(bgInfo.getSecond());
        sortKeys =      JsonUtils.asArray(((ObjectNode) (jobNode.get("shuffle"))).get("pivotKeys"));
        groupByColumns =
                (String[]) ArrayUtils.addAll(new String[] { "BLOCK_ID" }, factColumns);
    }
    else {
        sortKeys = JsonUtils.asArray(bgInfo.getSecond().get("pivotKeys"));
        groupByColumns = factColumns;
    }

    // check sort key condition
    if (!CommonUtils.isPrefix(sortKeys, groupByColumns))
      throw new AggregateRewriteException("Blockgen of union fact not sorted by fact collumns");

    ArrayNode aggsNode = JsonUtils.createArrayNode();
    ArrayNode udafArgsNode = JsonUtils.createArrayNode();

    String startDateHyphenated = DateTimeUtilities.getHyphenated(this.factStartDate);
    udafArgsNode.add(startDateHyphenated);
    aggsNode.add(RewriteUtils.createObjectNode("type",
                                               "USER_DEFINED_AGGREGATION",
                                               "udaf",
                                               "com.linkedin.cubert.operator.aggregate.PresenceBitmapUDAF",
                                               "constructorArgs",
                                               udafArgsNode,
                                               "input",
                                               DATE_COLUMN_NAME,
                                               "output",
                                               BITMAP_COLUMN_NAME));
    String blockgenInputRelation =
            lineage.getOperatorSources(bgInfo.getSecond())
                   .get(0)
                   .get("output")
                   .getTextValue();
    ObjectNode groupByNode =
            RewriteUtils.createObjectNode("operator",
                                          "GROUP_BY",
                                          "input",
                                          blockgenInputRelation,
                                          "output",
                                          blockgenInputRelation,
                                          "groupBy",
                                          JsonUtils.createArrayNode(groupByColumns),
                                          "aggregates",
                                          aggsNode);

    ArrayNode phaseOperators =
            lineage.getPhaseOperators(lineage.getPhase(bgInfo.getSecond()));
    int blockGenIndex = 0;
    for (; blockGenIndex < phaseOperators.size(); blockGenIndex++)
    {
        if (phaseOperators.get(blockGenIndex) == bgInfo.getSecond())
            break;
    }
    if (blockGenIndex == phaseOperators.size())
        throw new RuntimeException("Cannot find CREATE_BLOCK operator in phase operator list");
    phaseOperators.insert(blockGenIndex, groupByNode);
    // phaseOperators.insert(blockGenIndex + 1, generateNode);

}