Java Code Examples for org.apache.parquet.example.data.simple.SimpleGroup#getFieldRepetitionCount()

The following examples show how to use org.apache.parquet.example.data.simple.SimpleGroup#getFieldRepetitionCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetAsTextInputFormat.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
protected List<String> groupToStrings(SimpleGroup grp) {

            ArrayList<String> s = new ArrayList<>();

            for (int n = 0; n < grp.getType().getFieldCount(); n ++) {

                Type field = grp.getType().getType(n);
                    try {
                        if (!field.isPrimitive())
                           s.addAll(groupToStrings((SimpleGroup) grp.getGroup(n, 0))); // array of groups not (yet) supported
                        else if (field.getRepetition() == Type.Repetition.REPEATED) {

                            boolean is_binary =
                                field.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY;
                            StringBuilder sb = new StringBuilder("[");
                            ArrayList<String> arr = new ArrayList<>();
                            for (int i = 0; i < grp.getFieldRepetitionCount(n); i ++)
                                arr.add(is_binary ? "\"" + grp.getValueToString(n, i) + "\"" :
                                    grp.getValueToString(n, i));

                            sb.append(Joiner.on(", ").join(arr));
                            sb.append("]");
                            s.add(sb.toString());
                        }
                        else
                            s.add(grp.getValueToString(n, 0));
                    }
                    catch (RuntimeException e) {
                        if(e.getMessage().startsWith("not found") && field.getRepetition() == Type.Repetition.OPTIONAL)
                            s.add("");
                        else
                            throw e;
                    }
            }

            return s;
        }
 
Example 2
Source File: ParquetAsJsonInputFormat.java    From iow-hadoop-streaming with Apache License 2.0 4 votes vote down vote up
private void groupToJson(JsonGenerator currentGenerator, SimpleGroup grp)
      throws IOException {

    GroupType gt = grp.getType();

    currentGenerator.writeStartObject();
    for(int i = 0; i < gt.getFieldCount(); i ++) {

        String field = gt.getFieldName(i);
        try {
            Type t = gt.getType(i);
            int repetition = 1;
            boolean repeated = false;
            if (t.getRepetition() == Type.Repetition.REPEATED) {
                repeated = true;
                repetition = grp.getFieldRepetitionCount(i);
                currentGenerator.writeArrayFieldStart(field);
            }
            else
                currentGenerator.writeFieldName(field);

            for(int j = 0; j < repetition; j ++) {

                if (t.isPrimitive()) {
                    switch (t.asPrimitiveType().getPrimitiveTypeName()) {
                        case BINARY:
                            currentGenerator.writeString(grp.getString(i, j));
                            break;
                        case INT32:
                            currentGenerator.writeNumber(grp.getInteger(i, j));
                            break;
                        case INT96:
                        case INT64:
                            // clumsy way - TODO - Subclass SimpleGroup or something like that
                            currentGenerator.writeNumber(Long.parseLong(grp.getValueToString(i, j)));
                            break;
                        case DOUBLE:
                        case FLOAT:
                            currentGenerator.writeNumber(Double.parseDouble(grp.getValueToString(i, j)));
                            break;
                        case BOOLEAN:
                            currentGenerator.writeBoolean(grp.getBoolean(i, j));
                            break;
                        default:
                            throw new RuntimeException("Can't handle type " + gt.getType(i));
                    }
                } else {
                    groupToJson(currentGenerator, (SimpleGroup) grp.getGroup(i, j));
                }
            }

            if (repeated)
                currentGenerator.writeEndArray();
        }
        catch (Exception e) {
            if (e.getMessage().startsWith("not found") && gt.getType(i).getRepetition() == Type.Repetition.OPTIONAL)
                currentGenerator.writeNull();
            else
                 throw new RuntimeException(e);
        }
    }
    currentGenerator.writeEndObject();
}