ToHTMLStreampublic class ToHTMLStream extends ToStream
Fields Summary |
---|
protected boolean | m_inDTDThis flag is set while receiving events from the DTD | private boolean | m_inBlockElemTrue if the current element is a block element. (seems like
this needs to be a stack. -sb). | protected static final CharInfo | m_htmlcharInfoMap that tells which XML characters should have special treatment, and it
provides character to entity name lookup. | static final Trie | m_elementFlagsA digital search trie for fast, case insensitive lookup of ElemDesc objects. | private static final ElemDesc | m_dummyDummy element for elements not found. | private boolean | m_specialEscapeURLsTrue if URLs should be specially escaped with the %xx form. | private boolean | m_omitMetaTagTrue if the META tag should be omitted. |
Constructors Summary |
---|
public ToHTMLStream()Default constructor.
super();
m_charInfo = m_htmlcharInfo;
// initialize namespaces
m_prefixMap = new NamespaceMappings();
|
Methods Summary |
---|
public void | addUniqueAttribute(java.lang.String name, java.lang.String value, int flags)This method is used to add an attribute to the currently open element.
The caller has guaranted that this attribute is unique, which means that it
not been seen before and will not be seen again.
try
{
final java.io.Writer writer = m_writer;
if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
{
// "flags" has indicated that the characters
// '>' '<' '&' and '"' are not in the value and
// m_htmlcharInfo has recorded that there are no other
// entities in the range 0 to 127 so we write out the
// value directly
writer.write(' ");
writer.write(name);
writer.write("=\"");
writer.write(value);
writer.write('"");
}
else if (
(flags & HTML_ATTREMPTY) > 0
&& (value.length() == 0 || value.equalsIgnoreCase(name)))
{
writer.write(' ");
writer.write(name);
}
else
{
writer.write(' ");
writer.write(name);
writer.write("=\"");
if ((flags & HTML_ATTRURL) > 0)
{
writeAttrURI(writer, value, m_specialEscapeURLs);
}
else
{
writeAttrString(writer, value, this.getEncoding());
}
writer.write('"");
}
} catch (IOException e) {
throw new SAXException(e);
}
| public void | attributeDecl(java.lang.String eName, java.lang.String aName, java.lang.String type, java.lang.String valueDefault, java.lang.String value)This method does nothing.
// The internal DTD subset is not serialized by the ToHTMLStream serializer
| public final void | cdata(char[] ch, int start, int length)Receive notification of cdata.
The Parser will call this method to report each chunk of
character data. SAX parsers may return all contiguous character
data in a single chunk, or they may split it into several
chunks; however, all of the characters in any single event
must come from the same external entity, so that the Locator
provides useful information.
The application must not attempt to read from the array
outside of the specified range.
Note that some parsers will report whitespace using the
ignorableWhitespace() method rather than this one (validating
parsers must do so).
if ((null != m_elemContext.m_elementName)
&& (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
|| m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
{
try
{
if (m_elemContext.m_startTagOpen)
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
m_ispreserve = true;
if (shouldIndent())
indent();
// writer.write(ch, start, length);
writeNormalizedChars(ch, start, length, true, m_lineSepUse);
}
catch (IOException ioe)
{
throw new org.xml.sax.SAXException(
XMLMessages.createXMLMessage(
XMLErrorResources.ER_OIERROR,
null),
ioe);
//"IO error", ioe);
}
}
else
{
super.cdata(ch, start, length);
}
| public final void | characters(char[] chars, int start, int length)Receive notification of character data.
The Parser will call this method to report each chunk of
character data. SAX parsers may return all contiguous character
data in a single chunk, or they may split it into several
chunks; however, all of the characters in any single event
must come from the same external entity, so that the Locator
provides useful information.
The application must not attempt to read from the array
outside of the specified range.
Note that some parsers will report whitespace using the
ignorableWhitespace() method rather than this one (validating
parsers must do so).
if (m_elemContext.m_isRaw)
{
try
{
if (m_elemContext.m_startTagOpen)
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
m_ispreserve = true;
// With m_ispreserve just set true it looks like shouldIndent()
// will always return false, so drop any possible indentation.
// if (shouldIndent())
// indent();
// writer.write("<![CDATA[");
// writer.write(chars, start, length);
writeNormalizedChars(chars, start, length, false, m_lineSepUse);
// writer.write("]]>");
// time to generate characters event
if (m_tracer != null)
super.fireCharEvent(chars, start, length);
return;
}
catch (IOException ioe)
{
throw new org.xml.sax.SAXException(
XMLMessages.createXMLMessage(
XMLErrorResources.ER_OIERROR,
null),
ioe);
//"IO error", ioe);
}
}
else
{
super.characters(chars, start, length);
}
| protected void | closeStartTag()For the enclosing elements starting tag write out out any attributes
followed by ">"
try
{
// finish processing attributes, time to fire off the start element event
if (m_tracer != null)
super.fireStartElem(m_elemContext.m_elementName);
int nAttrs = m_attributes.getLength();
if (nAttrs>0)
{
processAttributes(m_writer, nAttrs);
// clear attributes object for re-use with next element
m_attributes.clear();
}
m_writer.write('>");
/* whether Xalan or XSLTC, we have the prefix mappings now, so
* lets determine if the current element is specified in the cdata-
* section-elements list.
*/
if (m_cdataSectionElements != null)
m_elemContext.m_isCdataSection = isCdataSection();
if (m_doIndent)
{
m_isprevtext = false;
m_preserves.push(m_ispreserve);
}
}
catch(IOException e)
{
throw new SAXException(e);
}
| public void | comment(char[] ch, int start, int length)
// The internal DTD subset is not serialized by the ToHTMLStream serializer
if (m_inDTD)
return;
super.comment(ch, start, length);
| public void | elementDecl(java.lang.String name, java.lang.String model)This method does nothing.
// The internal DTD subset is not serialized by the ToHTMLStream serializer
| public void | endDTD()Report the end of DTD declarations.
m_inDTD = false;
/* for ToHTMLStream the DOCTYPE is entirely output in the
* startDocumentInternal() method, so don't do anything here
*/
| public final void | endDocument()Receive notification of the end of a document.
flushPending();
if (m_doIndent && !m_isprevtext)
{
try
{
outputLineSep();
}
catch(IOException e)
{
throw new SAXException(e);
}
}
flushWriter();
if (m_tracer != null)
super.fireEndDoc();
| public final void | endElement(java.lang.String namespaceURI, java.lang.String localName, java.lang.String name)Receive notification of the end of an element.
// deal with any pending issues
if (m_cdataTagOpen)
closeCDATA();
// if the element has a namespace, treat it like XML, not HTML
if (null != namespaceURI && namespaceURI.length() > 0)
{
super.endElement(namespaceURI, localName, name);
return;
}
try
{
ElemContext elemContext = m_elemContext;
final ElemDesc elemDesc = elemContext.m_elementDesc;
final int elemFlags = elemDesc.getFlags();
final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
// deal with any indentation issues
if (m_doIndent)
{
final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
boolean shouldIndent = false;
if (m_ispreserve)
{
m_ispreserve = false;
}
else if (m_doIndent && (!m_inBlockElem || isBlockElement))
{
m_startNewLine = true;
shouldIndent = true;
}
if (!elemContext.m_startTagOpen && shouldIndent)
indent(elemContext.m_currentElemDepth - 1);
m_inBlockElem = !isBlockElement;
}
final java.io.Writer writer = m_writer;
if (!elemContext.m_startTagOpen)
{
writer.write("</");
writer.write(name);
writer.write('>");
}
else
{
// the start-tag open when this method was called,
// so we need to process it now.
if (m_tracer != null)
super.fireStartElem(name);
// the starting tag was still open when we received this endElement() call
// so we need to process any gathered attributes NOW, before they go away.
int nAttrs = m_attributes.getLength();
if (nAttrs > 0)
{
processAttributes(m_writer, nAttrs);
// clear attributes object for re-use with next element
m_attributes.clear();
}
if (!elemEmpty)
{
// As per Dave/Paul recommendation 12/06/2000
// if (shouldIndent)
// writer.write('>');
// indent(m_currentIndent);
writer.write("></");
writer.write(name);
writer.write('>");
}
else
{
writer.write('>");
}
}
// clean up because the element has ended
if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
m_ispreserve = true;
m_isprevtext = false;
// fire off the end element event
if (m_tracer != null)
super.fireEndElem(name);
// OPTIMIZE-EMPTY
if (elemEmpty)
{
// a quick exit if the HTML element had no children.
// This block of code can be removed if the corresponding block of code
// in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
m_elemContext = elemContext.m_prev;
return;
}
// some more clean because the element has ended.
if (!elemContext.m_startTagOpen)
{
if (m_doIndent && !m_preserves.isEmpty())
m_preserves.pop();
}
m_elemContext = elemContext.m_prev;
// m_isRawStack.pop();
}
catch (IOException e)
{
throw new SAXException(e);
}
| public final void | endElement(java.lang.String elemName)
endElement(null, null, elemName);
| public final void | entityReference(java.lang.String name)Receive notivication of a entityReference.
try
{
final java.io.Writer writer = m_writer;
writer.write('&");
writer.write(name);
writer.write(';");
} catch(IOException e)
{
throw new SAXException(e);
}
| public void | externalEntityDecl(java.lang.String name, java.lang.String publicId, java.lang.String systemId)This method does nothing.
// The internal DTD subset is not serialized by the ToHTMLStream serializer
| public static final com.sun.org.apache.xml.internal.serializer.ElemDesc | getElemDesc(java.lang.String name)Get a description of the given element.
/* this method used to return m_dummy when name was null
* but now it doesn't check and and requires non-null name.
*/
Object obj = m_elementFlags.get(name);
if (null != obj)
return (ElemDesc)obj;
return m_dummy;
| private final boolean | getOmitMetaTag()Tells if the formatter should omit the META tag.
return m_omitMetaTag;
| private final boolean | getSpecialEscapeURLs()Tells if the formatter should use special URL escaping.
return m_specialEscapeURLs;
| protected synchronized void | init(java.io.OutputStream output, java.util.Properties format)Initialize the serializer with the specified output stream and output
format. Must be called before calling any of the serialize methods.
if (null == format)
{
format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
}
super.init(output,format, false);
| private void | initToHTMLStream()
// m_elementDesc = null;
m_inBlockElem = false;
m_inDTD = false;
// m_isRawStack.clear();
m_omitMetaTag = false;
m_specialEscapeURLs = true;
| public void | internalEntityDecl(java.lang.String name, java.lang.String value)This method does nothing.
// The internal DTD subset is not serialized by the ToHTMLStream serializer
| private boolean | isASCIIDigit(char c)Tell if a character is an ASCII digit.
return (c >= '0" && c <= '9");
| private boolean | isHHSign(java.lang.String str)Dmitri Ilyin: Makes sure if the String is HH encoded sign.
boolean sign = true;
try
{
char r = (char) Integer.parseInt(str, 16);
}
catch (NumberFormatException e)
{
sign = false;
}
return sign;
| private static java.lang.String | makeHHString(int i)Make an integer into an HH hex value.
Does no checking on the size of the input, since this
is only meant to be used locally by writeAttrURI.
String s = Integer.toHexString(i).toUpperCase();
if (s.length() == 1)
{
s = "0" + s;
}
return s;
| public void | namespaceAfterStartElement(java.lang.String prefix, java.lang.String uri)This method is used when a prefix/uri namespace mapping
is indicated after the element was started with a
startElement() and before and endElement().
startPrefixMapping(prefix,uri) would be used before the
startElement() call.
// hack for XSLTC with finding URI for default namespace
if (m_elemContext.m_elementURI == null)
{
String prefix1 = getPrefixPart(m_elemContext.m_elementName);
if (prefix1 == null && EMPTYSTRING.equals(prefix))
{
// the elements URI is not known yet, and it
// doesn't have a prefix, and we are currently
// setting the uri for prefix "", so we have
// the uri for the element... lets remember it
m_elemContext.m_elementURI = uri;
}
}
startPrefixMapping(prefix,uri,false);
| protected void | processAttribute(java.io.Writer writer, java.lang.String name, java.lang.String value, com.sun.org.apache.xml.internal.serializer.ElemDesc elemDesc)Process an attribute.
writer.write(' ");
if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
&& elemDesc != null
&& elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
{
writer.write(name);
}
else
{
// %REVIEW% %OPT%
// Two calls to single-char write may NOT
// be more efficient than one to string-write...
writer.write(name);
writer.write("=\"");
if ( elemDesc != null
&& elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
writeAttrURI(writer, value, m_specialEscapeURLs);
else
writeAttrString(writer, value, this.getEncoding());
writer.write('"");
}
| public void | processAttributes(java.io.Writer writer, int nAttrs)Process the attributes, which means to write out the currently
collected attributes to the writer. The attributes are not
cleared by this method
/*
* process the collected attributes
*/
for (int i = 0; i < nAttrs; i++)
{
processAttribute(
writer,
m_attributes.getQName(i),
m_attributes.getValue(i),
m_elemContext.m_elementDesc);
}
| public void | processingInstruction(java.lang.String target, java.lang.String data)Receive notification of a processing instruction.
// Process any pending starDocument and startElement first.
flushPending();
// Use a fairly nasty hack to tell if the next node is supposed to be
// unescaped text.
if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
{
startNonEscaping();
}
else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
{
endNonEscaping();
}
else
{
try
{
if (m_elemContext.m_startTagOpen)
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
else if (m_needToCallStartDocument)
startDocumentInternal();
if (shouldIndent())
indent();
final java.io.Writer writer = m_writer;
//writer.write("<?" + target);
writer.write("<?");
writer.write(target);
if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
writer.write(' ");
//writer.write(data + ">"); // different from XML
writer.write(data); // different from XML
writer.write('>"); // different from XML
// Always output a newline char if not inside of an
// element. The whitespace is not significant in that
// case.
if (m_elemContext.m_currentElemDepth <= 0)
outputLineSep();
m_startNewLine = true;
}
catch(IOException e)
{
throw new SAXException(e);
}
}
// now generate the PI event
if (m_tracer != null)
super.fireEscapingEvent(target, data);
| public boolean | reset()
boolean ret = super.reset();
if (!ret)
return false;
initToHTMLStream();
return true;
| public void | setOmitMetaTag(boolean bool)Tells if the formatter should omit the META tag.
m_omitMetaTag = bool;
| public void | setOutputFormat(java.util.Properties format)Specifies an output format for this serializer. It the
serializer has already been associated with an output format,
it will switch to the new format. This method should not be
called while the serializer is in the process of serializing
a document.
m_specialEscapeURLs =
OutputPropertyUtils.getBooleanProperty(
OutputPropertiesFactory.S_USE_URL_ESCAPING,
format);
m_omitMetaTag =
OutputPropertyUtils.getBooleanProperty(
OutputPropertiesFactory.S_OMIT_META_TAG,
format);
super.setOutputFormat(format);
| public void | setOutputStream(java.io.OutputStream output)Specifies an output stream to which the document should be
serialized. This method should not be called while the
serializer is in the process of serializing a document.
The encoding specified in the output properties is used, or
if no encoding was specified, the default for the selected
output method.
try
{
Properties format;
if (null == m_format)
format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
else
format = m_format;
init(output, format, true);
}
catch (UnsupportedEncodingException uee)
{
// Should have been warned in init, I guess...
}
| public void | setSpecialEscapeURLs(boolean bool)Tells if the formatter should use special URL escaping.
m_specialEscapeURLs = bool;
| public void | startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
m_inDTD = true;
super.startDTD(name, publicId, systemId);
| protected void | startDocumentInternal()Receive notification of the beginning of a document.
super.startDocumentInternal();
m_needToCallStartDocument = false;
m_needToOutputDocTypeDecl = true;
m_startNewLine = false;
setOmitXMLDeclaration(true);
if (true == m_needToOutputDocTypeDecl)
{
String doctypeSystem = getDoctypeSystem();
String doctypePublic = getDoctypePublic();
if ((null != doctypeSystem) || (null != doctypePublic))
{
final java.io.Writer writer = m_writer;
try
{
writer.write("<!DOCTYPE HTML");
if (null != doctypePublic)
{
writer.write(" PUBLIC \"");
writer.write(doctypePublic);
writer.write('"");
}
if (null != doctypeSystem)
{
if (null == doctypePublic)
writer.write(" SYSTEM \"");
else
writer.write('"");
writer.write(doctypeSystem);
writer.write('"");
}
writer.write('>");
outputLineSep();
}
catch(IOException e)
{
throw new SAXException(e);
}
}
}
m_needToOutputDocTypeDecl = false;
| public void | startElement(java.lang.String namespaceURI, java.lang.String localName, java.lang.String name, org.xml.sax.Attributes atts)Receive notification of the beginning of an element.
ElemContext elemContext = m_elemContext;
// clean up any pending things first
if (elemContext.m_startTagOpen)
{
closeStartTag();
elemContext.m_startTagOpen = false;
}
else if (m_cdataTagOpen)
{
closeCDATA();
m_cdataTagOpen = false;
}
else if (m_needToCallStartDocument)
{
startDocumentInternal();
m_needToCallStartDocument = false;
}
// if this element has a namespace then treat it like XML
if (null != namespaceURI && namespaceURI.length() > 0)
{
super.startElement(namespaceURI, localName, name, atts);
return;
}
try
{
ElemDesc elemDesc = getElemDesc(name);
int elemFlags = elemDesc.getFlags();
// deal with indentation issues first
if (m_doIndent)
{
boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
if (m_ispreserve)
m_ispreserve = false;
else if (
(null != elemContext.m_elementName)
&& (!m_inBlockElem
|| isBlockElement) /* && !isWhiteSpaceSensitive */
)
{
m_startNewLine = true;
indent();
}
m_inBlockElem = !isBlockElement;
}
// save any attributes for later processing
if (atts != null)
addAttributes(atts);
m_isprevtext = false;
final java.io.Writer writer = m_writer;
writer.write('<");
writer.write(name);
if (m_tracer != null)
firePseudoAttributes();
if ((elemFlags & ElemDesc.EMPTY) != 0)
{
// an optimization for elements which are expected
// to be empty.
m_elemContext = elemContext.push();
/* XSLTC sometimes calls namespaceAfterStartElement()
* so we need to remember the name
*/
m_elemContext.m_elementName = name;
m_elemContext.m_elementDesc = elemDesc;
return;
}
else
{
elemContext = elemContext.push(namespaceURI,localName,name);
m_elemContext = elemContext;
elemContext.m_elementDesc = elemDesc;
elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
}
if ((elemFlags & ElemDesc.HEADELEM) != 0)
{
// This is the <HEAD> element, do some special processing
closeStartTag();
elemContext.m_startTagOpen = false;
if (!m_omitMetaTag)
{
if (m_doIndent)
indent();
writer.write(
"<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
String encoding = getEncoding();
String encode = Encodings.getMimeEncoding(encoding);
writer.write(encode);
writer.write("\">");
}
}
}
catch (IOException e)
{
throw new SAXException(e);
}
| public void | writeAttrString(java.io.Writer writer, java.lang.String string, java.lang.String encoding)Writes the specified string after substituting specials,
and UTF-16 surrogates for character references &#xnn .
final int end = string.length();
if (end > m_attrBuff.length)
{
m_attrBuff = new char[end * 2 + 1];
}
string.getChars(0, end, m_attrBuff, 0);
final char[] chars = m_attrBuff;
int cleanStart = 0;
int cleanLength = 0;
char ch = 0;
for (int i = 0; i < end; i++)
{
ch = chars[i];
// System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
// System.out.println("ch: "+(int)ch);
// System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
// System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
{
cleanLength++;
}
else if ('<" == ch || '>" == ch)
{
cleanLength++; // no escaping in this case, as specified in 15.2
}
else if (
('&" == ch) && ((i + 1) < end) && ('{" == chars[i + 1]))
{
cleanLength++; // no escaping in this case, as specified in 15.2
}
else
{
if (cleanLength > 0)
{
writer.write(chars,cleanStart,cleanLength);
cleanLength = 0;
}
int pos = accumDefaultEntity(writer, ch, i, chars, end, false, false);
if (i != pos)
{
i = pos - 1;
}
else
{
if (isUTF16Surrogate(ch))
{
writeUTF16Surrogate(ch, chars, i, end);
i++; // two input characters processed
// this increments by one and the for()
// loop itself increments by another one.
}
// The next is kind of a hack to keep from escaping in the case
// of Shift_JIS and the like.
/*
else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
&& (ch != 160))
{
writer.write(ch); // no escaping in this case
}
else
*/
String entityName = m_charInfo.getEntityNameForChar(ch);
if (null != entityName)
{
writer.write('&");
writer.write(entityName);
writer.write(';");
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("");
writer.write(Integer.toString(ch));
writer.write(';");
}
}
cleanStart = i + 1;
}
} // end of for()
// are there any clean characters at the end of the array
// that we haven't processed yet?
if (cleanLength > 1)
{
// if the whole string can be written out as-is do so
// otherwise write out the clean chars at the end of the
// array
if (cleanStart == 0)
writer.write(string);
else
writer.write(chars, cleanStart, cleanLength);
}
else if (cleanLength == 1)
{
// a little optimization for 1 clean character
// (we could have let the previous if(...) handle them all)
writer.write(ch);
}
| public void | writeAttrURI(java.io.Writer writer, java.lang.String string, boolean doURLEscaping)Write the specified string after substituting non ASCII characters,
with %HH , where HH is the hex of the byte value.
// http://www.ietf.org/rfc/rfc2396.txt says:
// A URI is always in an "escaped" form, since escaping or unescaping a
// completed URI might change its semantics. Normally, the only time
// escape encodings can safely be made is when the URI is being created
// from its component parts; each component may have its own set of
// characters that are reserved, so only the mechanism responsible for
// generating or interpreting that component can determine whether or
// not escaping a character will change its semantics. Likewise, a URI
// must be separated into its components before the escaped characters
// within those components can be safely decoded.
//
// ...So we do our best to do limited escaping of the URL, without
// causing damage. If the URL is already properly escaped, in theory, this
// function should not change the string value.
final int end = string.length();
if (end > m_attrBuff.length)
{
m_attrBuff = new char[end*2 + 1];
}
string.getChars(0,end, m_attrBuff, 0);
final char[] chars = m_attrBuff;
int cleanStart = 0;
int cleanLength = 0;
char ch = 0;
for (int i = 0; i < end; i++)
{
ch = chars[i];
if ((ch < 32) || (ch > 126))
{
if (cleanLength > 0)
{
writer.write(chars, cleanStart, cleanLength);
cleanLength = 0;
}
if (doURLEscaping)
{
// Encode UTF16 to UTF8.
// Reference is Unicode, A Primer, by Tony Graham.
// Page 92.
// Note that Kay doesn't escape 0x20...
// if(ch == 0x20) // Not sure about this... -sb
// {
// writer.write(ch);
// }
// else
if (ch <= 0x7F)
{
writer.write('%");
writer.write(makeHHString(ch));
}
else if (ch <= 0x7FF)
{
// Clear low 6 bits before rotate, put high 4 bits in low byte,
// and set two high bits.
int high = (ch >> 6) | 0xC0;
int low = (ch & 0x3F) | 0x80;
// First 6 bits, + high bit
writer.write('%");
writer.write(makeHHString(high));
writer.write('%");
writer.write(makeHHString(low));
}
else if (isUTF16Surrogate(ch)) // high surrogate
{
// I'm sure this can be done in 3 instructions, but I choose
// to try and do it exactly like it is done in the book, at least
// until we are sure this is totally clean. I don't think performance
// is a big issue with this particular function, though I could be
// wrong. Also, the stuff below clearly does more masking than
// it needs to do.
// Clear high 6 bits.
int highSurrogate = ((int) ch) & 0x03FF;
// Middle 4 bits (wwww) + 1
// "Note that the value of wwww from the high surrogate bit pattern
// is incremented to make the uuuuu bit pattern in the scalar value
// so the surrogate pair don't address the BMP."
int wwww = ((highSurrogate & 0x03C0) >> 6);
int uuuuu = wwww + 1;
// next 4 bits
int zzzz = (highSurrogate & 0x003C) >> 2;
// low 2 bits
int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
// Get low surrogate character.
ch = chars[++i];
// Clear high 6 bits.
int lowSurrogate = ((int) ch) & 0x03FF;
// put the middle 4 bits into the bottom of yyyyyy (byte 3)
yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
// bottom 6 bits.
int xxxxxx = (lowSurrogate & 0x003F);
int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
int byte2 =
0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
int byte3 = 0x80 | yyyyyy;
int byte4 = 0x80 | xxxxxx;
writer.write('%");
writer.write(makeHHString(byte1));
writer.write('%");
writer.write(makeHHString(byte2));
writer.write('%");
writer.write(makeHHString(byte3));
writer.write('%");
writer.write(makeHHString(byte4));
}
else
{
int high = (ch >> 12) | 0xE0; // top 4 bits
int middle = ((ch & 0x0FC0) >> 6) | 0x80;
// middle 6 bits
int low = (ch & 0x3F) | 0x80;
// First 6 bits, + high bit
writer.write('%");
writer.write(makeHHString(high));
writer.write('%");
writer.write(makeHHString(middle));
writer.write('%");
writer.write(makeHHString(low));
}
}
else if (escapingNotNeeded(ch))
{
writer.write(ch);
}
else
{
writer.write("");
writer.write(Integer.toString(ch));
writer.write(';");
}
// In this character range we have first written out any previously accumulated
// "clean" characters, then processed the current more complicated character,
// which may have incremented "i".
// We now we reset the next possible clean character.
cleanStart = i + 1;
}
// Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
// not allowing quotes in the URI proper syntax, nor in the fragment
// identifier, we believe that it's OK to double escape quotes.
else if (ch == '"")
{
// If the character is a '%' number number, try to avoid double-escaping.
// There is a question if this is legal behavior.
// Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
// The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
// if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
// We are no longer escaping '%'
if (cleanLength > 0)
{
writer.write(chars, cleanStart, cleanLength);
cleanLength = 0;
}
// Mike Kay encodes this as ", so he may know something I don't?
if (doURLEscaping)
writer.write("%22");
else
writer.write("""); // we have to escape this, I guess.
// We have written out any clean characters, then the escaped '%' and now we
// We now we reset the next possible clean character.
cleanStart = i + 1;
}
else
{
// no processing for this character, just count how
// many characters in a row that we have that need no processing
cleanLength++;
}
}
// are there any clean characters at the end of the array
// that we haven't processed yet?
if (cleanLength > 1)
{
// if the whole string can be written out as-is do so
// otherwise write out the clean chars at the end of the
// array
if (cleanStart == 0)
writer.write(string);
else
writer.write(chars, cleanStart, cleanLength);
}
else if (cleanLength == 1)
{
// a little optimization for 1 clean character
// (we could have let the previous if(...) handle them all)
writer.write(ch);
}
|
|