/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.vinci.transport.document; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; import org.apache.vinci.transport.XTalkTransporter; /** * Class for converting XTalk streams into SAX events. */ public class XTalkToSAX { public static final int INITIAL_BUF_SIZE = 256; private static final String cdataType = "CDATA"; private char[] charBuffer; private byte[] byteBuffer; private AttributesImpl workAttributes; // members initialzed by parse() to reduce argument passing. private InputStream is; private ContentHandler handler; public XTalkToSAX() { init(INITIAL_BUF_SIZE); } public XTalkToSAX(int bufSize) { init(bufSize); } private void init(int bufSize) { this.workAttributes = new AttributesImpl(); this.byteBuffer = new byte[bufSize]; this.charBuffer = new char[bufSize]; } /** * Initially, the XTalkToSAX processor creates a byte buffer and char buffer of size * INITIAL_BUF_SIZE. These buffer may grow during parsing to handle very large strings. Users can * determine the size of these arrays with this method. This method in conjunction with * resetBuffers lets application implement their own buffer management. Buffers can be reset * during parsing, but not from another thread. * @return - */ public int bufferSize() { return byteBuffer.length; } /** * Resets buffers to their initial size... this is useful because buffers can grow during parsing * and this allows the space to be reclaimed without having to undo references to the parser * object. * @param toSize - */ public void resizeBuffers(int toSize) { if (this.byteBuffer.length != toSize) { this.byteBuffer = new byte[toSize]; this.charBuffer = new char[toSize]; } } /** * Parse one document off of the incoming XTalk stream into SAX events. A side effect of parsing * is that internal arrays will grow to the size of the largest character string encountered in * the document. Use bufferSize() and resizeBuffers to manage memory in applications where very * large strings may be encountered and the same object is used to parse many incoming documents. * * @param is - * @param handler - * @throws IOException * if underlying IOException from the stream or if XTalk format is invalid. * @throws SAXException * if SAXException thrown by the handler * * @pre handler != null * @pre is != null */ public void parse(InputStream is, ContentHandler handler) throws IOException, SAXException { this.is = is; this.handler = handler; try { int marker = is.read(); if (marker == -1) { throw new EOFException(); } if ((byte) marker != XTalkTransporter.DOCUMENT_MARKER) { throw new IOException("Expected document marker: " + (char) marker); } int version = is.read(); if ((byte) version != XTalkTransporter.VERSION_CODE) { throw new IOException("Xtalk version code doesn't match " + (int) XTalkTransporter.VERSION_CODE + ": " + version); } handler.startDocument(); doTopLevelParse(); handler.endDocument(); } finally { // nullify refs to allow GC is = null; handler = null; } } private void doTopLevelParse() throws IOException, SAXException { int top_field_count = XTalkTransporter.readInt(is); // Skip over intro PI's. int marker; if (top_field_count < 1) { throw new IOException("No top level element."); } while ((marker = is.read()) == XTalkTransporter.PI_MARKER) { String target = consumeString(); String data = consumeString(); handler.processingInstruction(target, data); top_field_count--; if (top_field_count < 1) { throw new IOException("No top level element."); } } if ((byte) marker != XTalkTransporter.ELEMENT_MARKER) { throw new IOException("Expected element marker: " + (char) marker); } doElement(); top_field_count--; // Handle trailing PI's while (top_field_count > 0) { if (is.read() != XTalkTransporter.PI_MARKER) { throw new IOException("Expected PI marker."); } doProcessingInstruction(); top_field_count--; } } private void doProcessingInstruction() throws IOException, SAXException { String target = consumeString(); String data = consumeString(); handler.processingInstruction(target, data); } private void ensureCapacity(int bytesToRead) { if (byteBuffer.length < bytesToRead) { byteBuffer = new byte[byteBuffer.length + bytesToRead]; charBuffer = new char[charBuffer.length + bytesToRead]; } } private String consumeString() throws IOException { int bytesToRead = XTalkTransporter.readInt(is); ensureCapacity(bytesToRead); int charsRead = XTalkTransporter.consumeCharacters(is, byteBuffer, charBuffer, bytesToRead); return new String(charBuffer, 0, charsRead); } private void doElement() throws IOException, SAXException { // Parse an incoming element. String tagName = consumeString(); int attribute_count = XTalkTransporter.readInt(is); workAttributes.clear(); for (int i = 0; i < attribute_count; i++) { String attrName = consumeString(); String attrValue = consumeString(); workAttributes.addAttribute("", attrName, attrName, cdataType, attrValue); } handler.startElement("", tagName, tagName, workAttributes); int field_count = XTalkTransporter.readInt(is); for (int i = 0; i < field_count; i++) { int marker = is.read(); switch ((byte) marker) { case XTalkTransporter.PI_MARKER: doProcessingInstruction(); break; case XTalkTransporter.STRING_MARKER: int bytesToRead = XTalkTransporter.readInt(is); ensureCapacity(bytesToRead); int charsRead = XTalkTransporter.consumeCharacters(is, byteBuffer, charBuffer, bytesToRead); handler.characters(charBuffer, 0, charsRead); break; case XTalkTransporter.ELEMENT_MARKER: doElement(); break; default: throw new IOException("Unexpected marker: " + (char) marker); } } handler.endElement(null, null, tagName); } }