Rice Pastry API

rice.p2p.util
Class XMLParser

java.lang.Object
  extended by rice.p2p.util.XMLParser
All Implemented Interfaces:
org.xmlpull.v1.XmlPullParser

public class XMLParser
extends java.lang.Object
implements org.xmlpull.v1.XmlPullParser

This class is a memory-efficient implementation of most of the XML pull parsing API.


Nested Class Summary
 class XMLParser.CharArrayBuffer
          This class implements a char array buffer
 
Field Summary
protected  java.lang.String[] attributeKeys
          If the tag parsed was a start tag, the list of attribute-> value pairs
protected  java.lang.String[] attributeValues
           
protected  char[] buffer
          The internal buffer used to process data
static int BUFFER_SIZE
          The size of the internal buffer to allocate
protected  int bufferLimit
           
protected  int bufferPosition
          Internal pointers into the buffer
protected  StringCache cache
          The StringCache used to reduce the memory requirements
static java.lang.String[][] ENTITIES
           
protected  boolean inTag
          Whether or not we are currently in a tag...
protected  int mark
          Internal variable which keeps track of the current mark
protected  XMLParser.CharArrayBuffer marked
           
static int MAX_ATTRIBUTES
           
protected  java.lang.String name
          If the tag parsed was a start/end, the name of the tag
protected  int numAttributes
           
static char[] QUOTE
           
protected  java.io.Reader reader
          The internal reader used to read data
static char[] SINGLE
           
static char[] TAG_END
           
protected  java.util.Stack tags
          The internal stack of tags which have been read
protected  java.lang.String text
          If the tag parsed was text, the text
static char[] WHITESPACE
           
static char[] WHITESPACE_OR_EQUALS
           
static char[] WHITESPACE_OR_TAG_END
           
 
Fields inherited from interface org.xmlpull.v1.XmlPullParser
CDSECT, COMMENT, DOCDECL, END_DOCUMENT, END_TAG, ENTITY_REF, FEATURE_PROCESS_DOCDECL, FEATURE_PROCESS_NAMESPACES, FEATURE_REPORT_NAMESPACE_ATTRIBUTES, FEATURE_VALIDATION, IGNORABLE_WHITESPACE, NO_NAMESPACE, PROCESSING_INSTRUCTION, START_DOCUMENT, START_TAG, TEXT, TYPES
 
Constructor Summary
XMLParser()
          Constructor
 
Method Summary
protected  void addAttribute(java.lang.String key, java.lang.String value)
          Internal method which adds an attributes
protected  void clearAttributes()
          Internal method which clears the list of attributes
protected  boolean contains(char[] chars, char c)
          Internal method which checks for existence
protected  java.lang.String convert(java.lang.String string)
          Internal method which deconverts all of the HTML/XML entities like &, >, <, etc...
protected  char current()
          Method which returns the current char in the buffer
 void defineEntityReplacementText(java.lang.String entityName, java.lang.String replacementText)
           
protected  void expect(char c)
          An assertion method
protected  void fillBuffer()
          Internal method which actually fills the buffer
 int getAttributeCount()
           
 java.lang.String getAttributeName(int index)
           
 java.lang.String getAttributeNamespace(int index)
           
 java.lang.String getAttributePrefix(int index)
           
 java.lang.String getAttributeType(int index)
           
 java.lang.String getAttributeValue(int index)
           
 java.lang.String getAttributeValue(java.lang.String namespace, java.lang.String name)
          Returns the attributes value identified by namespace URI and namespace localName.
 int getColumnNumber()
           
 int getDepth()
           
 int getEventType()
          Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
 boolean getFeature(java.lang.String name)
           
 java.lang.String getInputEncoding()
           
 int getLineNumber()
           
 java.lang.String getName()
          For START_TAG or END_TAG events, the (local) name of the current element is returned when namespaces are enabled.
 java.lang.String getNamespace()
           
 java.lang.String getNamespace(java.lang.String prefix)
           
 int getNamespaceCount(int depth)
           
 java.lang.String getNamespacePrefix(int pos)
           
 java.lang.String getNamespaceUri(int pos)
           
 java.lang.String getPositionDescription()
           
 java.lang.String getPrefix()
           
 java.lang.Object getProperty(java.lang.String name)
           
 java.lang.String getText()
          Returns the text content of the current event as String.
 char[] getTextCharacters(int[] holderForStartAndLength)
           
 boolean isAttributeDefault(int index)
           
 boolean isEmptyElementTag()
           
 boolean isWhitespace()
          Checks whether the current TEXT event contains only whitespace characters.
 boolean isWhitespace(java.lang.String text)
          Internal method which checks for existence
protected  void mark()
          Sets the mark
 int next()
          Get next parsing event - element content wil be coalesced and only one TEXT event must be returned for whole element content (comments and processing instructions will be ignored and emtity references must be expanded or exception mus be thrown if entity reerence can not be exapnded).
 int nextTag()
           
 java.lang.String nextText()
           
 int nextToken()
           
protected  void parseAttributes()
          Method which parses all of the attributes of a start tag
protected  int parseDocumentTag()
          Method which parses a document tag
protected  int parseEndTag()
          Method which parses an end tag of the form
protected  int parseEndTag(java.lang.String tag)
          Method which parses an end tag of the form
protected  int parseStartTag()
          Method which parses a start tag
protected  int parseTag()
          Internal method which parses a tag
protected  int parseText()
          Method which parses an end tag of the form
protected  java.lang.String parseUntil(char c)
          Method which parses and returns up to the next token
protected  java.lang.String parseUntil(char[] chars)
          Method which parses and returns up to the next token
protected  void parseUntilNot(char[] chars)
          Method which parses up to the next token
 void require(int type, java.lang.String namespace, java.lang.String name)
           
 void setFeature(java.lang.String name, boolean state)
          ----- UNSUPPORTED METHODS -----
 void setInput(java.io.InputStream inputStream, java.lang.String inputEncoding)
           
 void setInput(java.io.Reader in)
          Set the input source for parser to the given reader and resets the parser.
 void setProperty(java.lang.String name, java.lang.Object value)
           
protected  void step()
          Method which steps forward in the buffer
protected  java.lang.String unmark()
          Unsets the mark
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

BUFFER_SIZE

public static final int BUFFER_SIZE
The size of the internal buffer to allocate

See Also:
Constant Field Values

MAX_ATTRIBUTES

public static final int MAX_ATTRIBUTES
See Also:
Constant Field Values

QUOTE

public static final char[] QUOTE

TAG_END

public static final char[] TAG_END

WHITESPACE

public static final char[] WHITESPACE

WHITESPACE_OR_TAG_END

public static final char[] WHITESPACE_OR_TAG_END

WHITESPACE_OR_EQUALS

public static final char[] WHITESPACE_OR_EQUALS

SINGLE

public static final char[] SINGLE

ENTITIES

public static final java.lang.String[][] ENTITIES

reader

protected java.io.Reader reader
The internal reader used to read data


buffer

protected char[] buffer
The internal buffer used to process data


bufferPosition

protected int bufferPosition
Internal pointers into the buffer


bufferLimit

protected int bufferLimit

cache

protected StringCache cache
The StringCache used to reduce the memory requirements


tags

protected java.util.Stack tags
The internal stack of tags which have been read


name

protected java.lang.String name
If the tag parsed was a start/end, the name of the tag


text

protected java.lang.String text
If the tag parsed was text, the text


attributeKeys

protected java.lang.String[] attributeKeys
If the tag parsed was a start tag, the list of attribute-> value pairs


attributeValues

protected java.lang.String[] attributeValues

numAttributes

protected int numAttributes

inTag

protected boolean inTag
Whether or not we are currently in a tag...


mark

protected int mark
Internal variable which keeps track of the current mark


marked

protected XMLParser.CharArrayBuffer marked
Constructor Detail

XMLParser

public XMLParser()
Constructor

Method Detail

setInput

public void setInput(java.io.Reader in)
              throws org.xmlpull.v1.XmlPullParserException
Set the input source for parser to the given reader and resets the parser. The event type is set to the initial value START_DOCUMENT. Setting the reader to null will just stop parsing and reset parser state, allowing the parser to free internal resources such as parsing buffers.

Specified by:
setInput in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getText

public java.lang.String getText()
Returns the text content of the current event as String. The value returned depends on current event type, for example for TEXT event it is element content (this is typical case when next() is used). See description of nextToken() for detailed description of possible returned values for different types of events.

NOTE: in case of ENTITY_REF, this method returns the entity replacement text (or null if not available). This is the only case where getText() and getTextCharacters() return different values.

Specified by:
getText in interface org.xmlpull.v1.XmlPullParser
See Also:
getEventType(), next(), nextToken()

getName

public java.lang.String getName()
For START_TAG or END_TAG events, the (local) name of the current element is returned when namespaces are enabled. When namespace processing is disabled, the raw name is returned. For ENTITY_REF events, the entity name is returned. If the current event is not START_TAG, END_TAG, or ENTITY_REF, null is returned.

Please note: To reconstruct the raw element name when namespaces are enabled and the prefix is not null, you will need to add the prefix and a colon to localName..

Specified by:
getName in interface org.xmlpull.v1.XmlPullParser

getAttributeValue

public java.lang.String getAttributeValue(java.lang.String namespace,
                                          java.lang.String name)
Returns the attributes value identified by namespace URI and namespace localName. If namespaces are disabled namespace must be null. If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.

NOTE: attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL is false) as described in XML 1.0 section 3.3.3 Attribute-Value Normalization

Specified by:
getAttributeValue in interface org.xmlpull.v1.XmlPullParser
Parameters:
namespace - Namespace of the attribute if namespaces are enabled otherwise must be null
name - If namespaces enabled local name of attribute otherwise just attribute name
Returns:
value of attribute or null if attribute with given name does not exist
See Also:
defineEntityReplacementText(java.lang.String, java.lang.String)

getEventType

public int getEventType()
                 throws org.xmlpull.v1.XmlPullParserException
Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)

Specified by:
getEventType in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException
See Also:
next(), nextToken()

next

public int next()
         throws org.xmlpull.v1.XmlPullParserException,
                java.io.IOException
Get next parsing event - element content wil be coalesced and only one TEXT event must be returned for whole element content (comments and processing instructions will be ignored and emtity references must be expanded or exception mus be thrown if entity reerence can not be exapnded). If element content is empty (content is "") then no TEXT event will be reported.

NOTE: empty element (such as <tag/>) will be reported with two separate events: START_TAG, END_TAG - it must be so to preserve parsing equivalency of empty element to <tag></tag>. (see isEmptyElementTag ())

Specified by:
next in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException
See Also:
isEmptyElementTag(), XmlPullParser.START_TAG, XmlPullParser.TEXT, XmlPullParser.END_TAG, XmlPullParser.END_DOCUMENT

isWhitespace

public boolean isWhitespace()
                     throws org.xmlpull.v1.XmlPullParserException
Checks whether the current TEXT event contains only whitespace characters. For IGNORABLE_WHITESPACE, this is always true. For TEXT and CDSECT, false is returned when the current event text contains at least one non-white space character. For any other event type an exception is thrown.

Please note: non-validating parsers are not able to distinguish whitespace and ignorable whitespace, except from whitespace outside the root element. Ignorable whitespace is reported as separate event, which is exposed via nextToken only.

Specified by:
isWhitespace in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

fillBuffer

protected void fillBuffer()
                   throws java.io.IOException
Internal method which actually fills the buffer

Throws:
java.io.IOException

current

protected char current()
                throws java.io.IOException
Method which returns the current char in the buffer

Returns:
The current char
Throws:
java.io.IOException

step

protected void step()
Method which steps forward in the buffer


mark

protected void mark()
Sets the mark


unmark

protected java.lang.String unmark()
Unsets the mark


clearAttributes

protected void clearAttributes()
Internal method which clears the list of attributes


addAttribute

protected void addAttribute(java.lang.String key,
                            java.lang.String value)
Internal method which adds an attributes


expect

protected void expect(char c)
               throws org.xmlpull.v1.XmlPullParserException,
                      java.io.IOException
An assertion method

Parameters:
the - expected char
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

isWhitespace

public boolean isWhitespace(java.lang.String text)
Internal method which checks for existence

Parameters:
chars - The chars to check for
char - The char

contains

protected boolean contains(char[] chars,
                           char c)
Internal method which checks for existence

Parameters:
chars - The chars to check for
char - The char

parseUntil

protected java.lang.String parseUntil(char[] chars)
                               throws java.io.IOException
Method which parses and returns up to the next token

Returns:
The token
Throws:
java.io.IOException

parseUntil

protected java.lang.String parseUntil(char c)
                               throws java.io.IOException
Method which parses and returns up to the next token

Returns:
The token
Throws:
java.io.IOException

parseUntilNot

protected void parseUntilNot(char[] chars)
                      throws java.io.IOException
Method which parses up to the next token

Throws:
java.io.IOException

parseEndTag

protected int parseEndTag(java.lang.String tag)
                   throws org.xmlpull.v1.XmlPullParserException,
                          java.io.IOException
Method which parses an end tag of the form

Parameters:
The - name of the parsed tag
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

parseTag

protected int parseTag()
                throws org.xmlpull.v1.XmlPullParserException,
                       java.io.IOException
Internal method which parses a tag

Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

parseEndTag

protected int parseEndTag()
                   throws org.xmlpull.v1.XmlPullParserException,
                          java.io.IOException
Method which parses an end tag of the form

Parameters:
The - name of the parsed tag
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

parseStartTag

protected int parseStartTag()
                     throws org.xmlpull.v1.XmlPullParserException,
                            java.io.IOException
Method which parses a start tag

Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

parseDocumentTag

protected int parseDocumentTag()
                        throws org.xmlpull.v1.XmlPullParserException,
                               java.io.IOException
Method which parses a document tag

Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

parseAttributes

protected void parseAttributes()
                        throws org.xmlpull.v1.XmlPullParserException,
                               java.io.IOException
Method which parses all of the attributes of a start tag

Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

parseText

protected int parseText()
                 throws org.xmlpull.v1.XmlPullParserException,
                        java.io.IOException
Method which parses an end tag of the form

Parameters:
The - name of the parsed tag
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

convert

protected java.lang.String convert(java.lang.String string)
Internal method which deconverts all of the HTML/XML entities like &, >, <, etc...

Parameters:
string - The string to convert
Returns:
The result

setFeature

public void setFeature(java.lang.String name,
                       boolean state)
                throws org.xmlpull.v1.XmlPullParserException
----- UNSUPPORTED METHODS -----

Specified by:
setFeature in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getFeature

public boolean getFeature(java.lang.String name)
Specified by:
getFeature in interface org.xmlpull.v1.XmlPullParser

setProperty

public void setProperty(java.lang.String name,
                        java.lang.Object value)
                 throws org.xmlpull.v1.XmlPullParserException
Specified by:
setProperty in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getProperty

public java.lang.Object getProperty(java.lang.String name)
Specified by:
getProperty in interface org.xmlpull.v1.XmlPullParser

setInput

public void setInput(java.io.InputStream inputStream,
                     java.lang.String inputEncoding)
              throws org.xmlpull.v1.XmlPullParserException
Specified by:
setInput in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getInputEncoding

public java.lang.String getInputEncoding()
Specified by:
getInputEncoding in interface org.xmlpull.v1.XmlPullParser

defineEntityReplacementText

public void defineEntityReplacementText(java.lang.String entityName,
                                        java.lang.String replacementText)
                                 throws org.xmlpull.v1.XmlPullParserException
Specified by:
defineEntityReplacementText in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getNamespaceCount

public int getNamespaceCount(int depth)
                      throws org.xmlpull.v1.XmlPullParserException
Specified by:
getNamespaceCount in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getNamespacePrefix

public java.lang.String getNamespacePrefix(int pos)
                                    throws org.xmlpull.v1.XmlPullParserException
Specified by:
getNamespacePrefix in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getNamespaceUri

public java.lang.String getNamespaceUri(int pos)
                                 throws org.xmlpull.v1.XmlPullParserException
Specified by:
getNamespaceUri in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getNamespace

public java.lang.String getNamespace(java.lang.String prefix)
Specified by:
getNamespace in interface org.xmlpull.v1.XmlPullParser

getDepth

public int getDepth()
Specified by:
getDepth in interface org.xmlpull.v1.XmlPullParser

getPositionDescription

public java.lang.String getPositionDescription()
Specified by:
getPositionDescription in interface org.xmlpull.v1.XmlPullParser

getLineNumber

public int getLineNumber()
Specified by:
getLineNumber in interface org.xmlpull.v1.XmlPullParser

getColumnNumber

public int getColumnNumber()
Specified by:
getColumnNumber in interface org.xmlpull.v1.XmlPullParser

getTextCharacters

public char[] getTextCharacters(int[] holderForStartAndLength)
Specified by:
getTextCharacters in interface org.xmlpull.v1.XmlPullParser

getNamespace

public java.lang.String getNamespace()
Specified by:
getNamespace in interface org.xmlpull.v1.XmlPullParser

getPrefix

public java.lang.String getPrefix()
Specified by:
getPrefix in interface org.xmlpull.v1.XmlPullParser

isEmptyElementTag

public boolean isEmptyElementTag()
                          throws org.xmlpull.v1.XmlPullParserException
Specified by:
isEmptyElementTag in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException

getAttributeNamespace

public java.lang.String getAttributeNamespace(int index)
Specified by:
getAttributeNamespace in interface org.xmlpull.v1.XmlPullParser

getAttributePrefix

public java.lang.String getAttributePrefix(int index)
Specified by:
getAttributePrefix in interface org.xmlpull.v1.XmlPullParser

getAttributeType

public java.lang.String getAttributeType(int index)
Specified by:
getAttributeType in interface org.xmlpull.v1.XmlPullParser

isAttributeDefault

public boolean isAttributeDefault(int index)
Specified by:
isAttributeDefault in interface org.xmlpull.v1.XmlPullParser

nextToken

public int nextToken()
              throws org.xmlpull.v1.XmlPullParserException,
                     java.io.IOException
Specified by:
nextToken in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

require

public void require(int type,
                    java.lang.String namespace,
                    java.lang.String name)
             throws org.xmlpull.v1.XmlPullParserException,
                    java.io.IOException
Specified by:
require in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

nextText

public java.lang.String nextText()
                          throws org.xmlpull.v1.XmlPullParserException,
                                 java.io.IOException
Specified by:
nextText in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

nextTag

public int nextTag()
            throws org.xmlpull.v1.XmlPullParserException,
                   java.io.IOException
Specified by:
nextTag in interface org.xmlpull.v1.XmlPullParser
Throws:
org.xmlpull.v1.XmlPullParserException
java.io.IOException

getAttributeCount

public int getAttributeCount()
Specified by:
getAttributeCount in interface org.xmlpull.v1.XmlPullParser

getAttributeName

public java.lang.String getAttributeName(int index)
Specified by:
getAttributeName in interface org.xmlpull.v1.XmlPullParser

getAttributeValue

public java.lang.String getAttributeValue(int index)
Specified by:
getAttributeValue in interface org.xmlpull.v1.XmlPullParser

Rice Pastry API

Copyright © 2001-2005 - Rice Pastry.


Imprint-Dataprotection