org.apache.avro.io.parsing.Symbol Java Examples

The following examples show how to use org.apache.avro.io.parsing.Symbol. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
@Override
public long readLong() throws IOException {
    advance(Symbol.LONG);
    if (in.getCurrentToken().isNumeric()) {
        long result = in.getLongValue();
        in.nextToken();
        return result;
    } else {
        try {
            String s = in.getText();
            in.nextToken();
            return Long.parseLong(s);
        }
        catch(Exception e) {
            throw error("long (" + e.getMessage() + ")");
        }
    }
}
 
Example #2
Source File: SchemaValidationUtil.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Returns true if the Parser contains any Error symbol, indicating that it may fail
 * for some inputs.
 */
private static boolean hasErrors(Symbol symbol) {
  switch(symbol.kind) {
    case ALTERNATIVE:
      return hasErrors(symbol, ((Symbol.Alternative) symbol).symbols);
    case EXPLICIT_ACTION:
      return false;
    case IMPLICIT_ACTION:
      return symbol instanceof Symbol.ErrorAction;
    case REPEATER:
      Symbol.Repeater r = (Symbol.Repeater) symbol;
      return hasErrors(r.end) || hasErrors(symbol, r.production);
    case ROOT:
    case SEQUENCE:
      return hasErrors(symbol, symbol.production);
    case TERMINAL:
      return false;
    default:
      throw new RuntimeException("unknown symbol kind: " + symbol.kind);
  }
}
 
Example #3
Source File: FastDeserializerGeneratorBase.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
protected FieldAction seekFieldAction(boolean shouldReadCurrent, Schema.Field field,
    ListIterator<Symbol> symbolIterator) {

  Schema.Type type = field.schema().getType();

  if (!shouldReadCurrent) {
    return FieldAction.fromValues(type, false, EMPTY_SYMBOL);
  }

  boolean shouldRead = true;
  Symbol fieldSymbol = END_SYMBOL;

  if (Schema.Type.RECORD.equals(type)) {
    if (symbolIterator.hasNext()) {
      fieldSymbol = symbolIterator.next();
      if (fieldSymbol instanceof Symbol.SkipAction) {
        return FieldAction.fromValues(type, false, fieldSymbol);
      } else {
        symbolIterator.previous();
      }
    }
    return FieldAction.fromValues(type, true, symbolIterator);
  }

  while (symbolIterator.hasNext()) {
    Symbol symbol = symbolIterator.next();

    if (symbol instanceof Symbol.ErrorAction) {
      throw new FastDeserializerGeneratorException(((Symbol.ErrorAction) symbol).msg);
    }

    if (symbol instanceof Symbol.SkipAction) {
      shouldRead = false;
      fieldSymbol = symbol;
      break;
    }

    if (symbol instanceof Symbol.WriterUnionAction) {
      if (symbolIterator.hasNext()) {
        symbol = symbolIterator.next();

        if (symbol instanceof Symbol.Alternative) {
          shouldRead = true;
          fieldSymbol = symbol;
          break;
        }
      }
    }

    if (symbol.kind == Symbol.Kind.TERMINAL) {
      shouldRead = true;
      if (symbolIterator.hasNext()) {
        symbol = symbolIterator.next();

        if (symbol instanceof Symbol.Repeater) {
          fieldSymbol = symbol;
        } else {
          fieldSymbol = symbolIterator.previous();
        }
      } else if (!symbolIterator.hasNext() && getSymbolPrintName(symbol) != null) {
        fieldSymbol = symbol;
      }
      break;
    }
  }

  return FieldAction.fromValues(type, shouldRead, fieldSymbol);
}
 
Example #4
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
@Override
public String readString() throws IOException {
    advance(Symbol.STRING);
    if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
        parser.advance(Symbol.MAP_KEY_MARKER);
            if (in.getCurrentToken() != JsonToken.FIELD_NAME) {
                throw error("map-key");
            }
        } else {
            if (in.getCurrentToken() != JsonToken.VALUE_STRING) {
            throw error("string");
        }
    }
    String result = in.getText();
    in.nextToken();
    return result;
}
 
Example #5
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
@Override
public int readInt() throws IOException {
    advance(Symbol.INT);
    if (in.getCurrentToken().isNumeric()) {
        int result = in.getIntValue();
        in.nextToken();
        return result;
    } else {
        try {
            String s = in.getText();
            in.nextToken();
            return Integer.parseInt(s);
        }
        catch(Exception e) {
            throw error("int (" + e.getMessage() + ")");
        }
    }
}
 
Example #6
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
@Override
public boolean readBoolean() throws IOException {
    advance(Symbol.BOOLEAN);
    JsonToken t = in.getCurrentToken();
      if (t == JsonToken.VALUE_TRUE || t == JsonToken.VALUE_FALSE) {
        in.nextToken();
        return t == JsonToken.VALUE_TRUE;
    } else {
        String s = in.getText();
        if(s.equals("false") || s.equals("FALSE") || s.equals("0")) {
            in.nextToken();
            return false;
        }
        else if(s.equals("true") || s.equals("TRUE") || s.equals("1")) {
            in.nextToken();
            return true;
        }
        throw error("boolean");
    }
}
 
Example #7
Source File: GuidedJsonDecoder.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Read a string from the current location in parser.
 *
 * <p>This method differs from the original JsonDecoder by serializing all
 * structures captured by the current token into a JSON string. This enables
 * consistent behavior for handling variant types (e.g. field that can be a
 * boolean and a string) and for under-specified schemas.
 *
 * <p>This encoding is lossy because JSON strings are conflated with standard
 * strings. Consider the case where a number is decoded into a string. To
 * convert this Avro file back into the original JSON document, the encoder
 * must parse all strings as JSON and inline them into the tree. Now, if the
 * original JSON represents a JSON object as a string (e.g. `{"payload":
 * "{\"foo\":\"bar\"}"`), then the encoder will generate a new object that is
 * different from the original.
 *
 * <p>There are a few ways to avoid this if it is undesirable. One way is to use
 * a binary encoding for the JSON data such as BSON or base64. A second is to
 * normalize documents to avoid nested JSON encodings and to specify a schema
 * explictly to guide the proper typing.
 */
@Override
public String readString() throws IOException {
  parser.advance(Symbol.STRING);
  if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
    parser.advance(Symbol.MAP_KEY_MARKER);
    assertCurrentToken(JsonToken.FIELD_NAME, "map-key");
  }

  String result = null;
  if (in.getCurrentToken() == JsonToken.VALUE_STRING
      || in.getCurrentToken() == JsonToken.FIELD_NAME) {
    result = in.getValueAsString();
  } else {
    // Does this create excessive garbage collection?
    TokenBuffer buffer = new TokenBuffer(in);
    buffer.copyCurrentStructure(in);
    result = mapper.readTree(buffer.asParser()).toString();
    buffer.close();
  }
  in.nextToken();
  return result;
}
 
Example #8
Source File: GuidedJsonDecoder.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Find the index in the union of the current variant.
 *
 * <p>This method only supports a single nullable type. Having more than a single
 * type is invalid in this case and will cause the decoder to panic. This
 * behavior is by design, since BigQuery does not support variant types in
 * columns. It is also inefficient to match sub-documents against various
 * types, given the streaming interface and bias towards performance.
 *
 * <p>Variants of non-null types are invalid. We enforce this by ensuring there
 * are no more than 2 elements and that at least one of them is null if there
 * are 2. Unions are required to be non-empty.
 *
 * <li> Ok: [null], [type], [null, type]
 * <li> Bad: [type, type], [null, type, type]
 */
@Override
public int readIndex() throws IOException {
  parser.advance(Symbol.UNION);
  Symbol.Alternative top = (Symbol.Alternative) parser.popSymbol();

  int nullIndex = top.findLabel("null");
  int typeIndex = nullIndex == 0 ? 1 : 0;

  if ((nullIndex < 0 && top.size() == 2) || (top.size() > 2)) {
    throw new AvroTypeException("Variant types are not supported.");
  }

  int index = in.getCurrentToken() == JsonToken.VALUE_NULL ? nullIndex : typeIndex;
  parser.pushSymbol(top.getSymbol(index));
  return index;
}
 
Example #9
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
@Override
public int readEnum() throws IOException {
    advance(Symbol.ENUM);
    Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction) parser.popSymbol();
    if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
        in.getText();
        int n = top.findLabel(in.getText());
        if (n >= 0) {
            in.nextToken();
            return n;
        }
        throw new AvroTypeException("Unknown symbol in enum " + in.getText());
    } else {
        throw error("fixed");
    }
}
 
Example #10
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public ByteBuffer readBytes(ByteBuffer old) throws IOException {
    advance(Symbol.BYTES);
    if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
        byte[] result = readByteArray();
        in.nextToken();
        return ByteBuffer.wrap(result);
    } else {
        throw error("bytes");
    }
}
 
Example #11
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public void readNull() throws IOException {
    advance(Symbol.NULL);
    if (in.getCurrentToken() == JsonToken.VALUE_NULL) {
        in.nextToken();
    } else {
        throw error("null");
    }
}
 
Example #12
Source File: GuidedJsonDecoder.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public long readArrayStart() throws IOException {
  parser.advance(Symbol.ARRAY_START);
  assertCurrentToken(JsonToken.START_ARRAY, "array-start");
  in.nextToken();

  if (in.getCurrentToken() == JsonToken.END_ARRAY) {
    parser.advance(Symbol.ARRAY_END);
    in.nextToken();
    return 0;
  }
  return 1;
}
 
Example #13
Source File: FastDeserializerGeneratorBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
protected static String getSymbolPrintName(Symbol symbol) {
    String printName;
    try {
        Field field = symbol.getClass().getDeclaredField("printName");

        field.setAccessible(true);
        printName = (String) field.get(symbol);
        field.setAccessible(false);

    } catch (ReflectiveOperationException e) {
        throw new FastDeserializerGeneratorException(e);
    }

    return printName;
}
 
Example #14
Source File: GuidedJsonDecoder.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public long arrayNext() throws IOException {
  parser.advance(Symbol.ITEM_END);

  if (in.getCurrentToken() == JsonToken.END_ARRAY) {
    parser.advance(Symbol.ARRAY_END);
    in.nextToken();
    return 0;
  }
  return 1;
}
 
Example #15
Source File: GuidedJsonDecoder.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public long mapNext() throws IOException {
  parser.advance(Symbol.ITEM_END);

  if (in.getCurrentToken() == JsonToken.END_OBJECT) {
    parser.advance(Symbol.MAP_END);
    in.nextToken();
    return 0;
  }
  return 1;
}
 
Example #16
Source File: GuidedJsonDecoder.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public long readMapStart() throws IOException {
  parser.advance(Symbol.MAP_START);
  assertCurrentToken(JsonToken.START_OBJECT, "map-start");
  in.nextToken();

  if (in.getCurrentToken() == JsonToken.END_OBJECT) {
    parser.advance(Symbol.MAP_END);
    in.nextToken();
    return 0;
  }
  return 1;
}
 
Example #17
Source File: FastDeserializerGeneratorBase.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
protected static Symbol[] reverseSymbolArray(Symbol[] symbols) {
  Symbol[] reversedSymbols = new Symbol[symbols.length];

  for (int i = 0; i < symbols.length; i++) {
    reversedSymbols[symbols.length - i - 1] = symbols[i];
  }

  return reversedSymbols;
}
 
Example #18
Source File: FastDeserializerGeneratorBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
protected static Symbol[] reverseSymbolArray(Symbol[] symbols) {
    Symbol[] reversedSymbols = new Symbol[symbols.length];

    for (int i = 0; i < symbols.length; i++) {
        reversedSymbols[symbols.length - i - 1] = symbols[i];
    }

    return reversedSymbols;
}
 
Example #19
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public float readFloat() throws IOException {
    advance(Symbol.FLOAT);
    if (in.getCurrentToken().isNumeric()) {
        float result = in.getFloatValue();
        in.nextToken();
        return result;
    } else {
        try {
            String s = in.getText();
            in.nextToken();
            if (s.equals("NaN")) {
                return Float.NaN;
            }
            else if (s.equals("-Inf")) {
                return Float.NEGATIVE_INFINITY;
            }
            else if (s.equals("+Inf")) {
                return Float.POSITIVE_INFINITY;
            }
            else {
                return Float.parseFloat(s);
            }
        }
        catch (Exception e) {
            throw error("float (" + e.getMessage() + ")");
        }
    }
}
 
Example #20
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public double readDouble() throws IOException {
    advance(Symbol.DOUBLE);
    if (in.getCurrentToken().isNumeric()) {
        double result = in.getDoubleValue();
        in.nextToken();
        return result;
    } else {
        try {
            String s = in.getText();
            in.nextToken();
            if (s.equals("NaN")) {
                return Double.NaN;
            }
            else if (s.equals("-Inf")) {
                return Double.NEGATIVE_INFINITY;
            }
            else if (s.equals("+Inf")) {
                return Double.POSITIVE_INFINITY;
            }
            else {
                return Double.parseDouble(s);
            }
        }
        catch (Exception e) {
            throw error("double (" + e.getMessage() + ")");
        }
    }
}
 
Example #21
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public void skipString() throws IOException {
    advance(Symbol.STRING);
    if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
        parser.advance(Symbol.MAP_KEY_MARKER);
        if (in.getCurrentToken() != JsonToken.FIELD_NAME) {
            throw error("map-key");
        }
    } else {
          if (in.getCurrentToken() != JsonToken.VALUE_STRING) {
              throw error("string");
          }
    }
    in.nextToken();
}
 
Example #22
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public void skipBytes() throws IOException {
    advance(Symbol.BYTES);
    if (in.getCurrentToken() == JsonToken.VALUE_STRING) {
        in.nextToken();
    } else {
        throw error("bytes");
    }
}
 
Example #23
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
private void checkFixed(int size) throws IOException {
    advance(Symbol.FIXED);
    Symbol.IntCheckAction top = (Symbol.IntCheckAction) parser.popSymbol();
    if (size != top.size) {
        throw new AvroTypeException(
            "Incorrect length for fixed binary: expected " +
        top.size + " but received " + size + " bytes.");
    }
}
 
Example #24
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public long readArrayStart() throws IOException {
    advance(Symbol.ARRAY_START);
    if (in.getCurrentToken() == JsonToken.START_ARRAY) {
        in.nextToken();
        return doArrayNext();
    } else {
        throw error("array-start");
    }
}
 
Example #25
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
private long doArrayNext() throws IOException {
    if (in.getCurrentToken() == JsonToken.END_ARRAY) {
        parser.advance(Symbol.ARRAY_END);
        in.nextToken();
        return 0;
    } else {
        return 1;
    }
}
 
Example #26
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public long skipArray() throws IOException {
    advance(Symbol.ARRAY_START);
    if (in.getCurrentToken() == JsonToken.START_ARRAY) {
        in.skipChildren();
        in.nextToken();
        advance(Symbol.ARRAY_END);
    } else {
        throw error("array-start");
    }
    return 0;
}
 
Example #27
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public long readMapStart() throws IOException {
    advance(Symbol.MAP_START);
    if (in.getCurrentToken() == JsonToken.START_OBJECT) {
        in.nextToken();
        return doMapNext();
    } else {
        throw error("map-start");
    }
}
 
Example #28
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
private long doMapNext() throws IOException {
    if (in.getCurrentToken() == JsonToken.END_OBJECT) {
        in.nextToken();
        advance(Symbol.MAP_END);
    return 0;
    } else {
        return 1;
    }
}
 
Example #29
Source File: IOWJsonDecoder.java    From iow-hadoop-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public long skipMap() throws IOException {
    advance(Symbol.MAP_START);
    if (in.getCurrentToken() == JsonToken.START_OBJECT) {
        in.skipChildren();
        in.nextToken();
        advance(Symbol.MAP_END);
    } else {
        throw error("map-start");
    }
    return 0;
}
 
Example #30
Source File: SchemaValidationUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
private static boolean hasErrors(Symbol root, Symbol[] symbols) {
  if(null != symbols) {
    for(Symbol s: symbols) {
      if (s == root) {
        continue;
      }
      if (hasErrors(s)) {
        return true;
      }
    }
  }
  return false;
}