URIpublic class URI extends Object implements SerializableA class to represent a Uniform Resource Identifier (URI). This class
is designed to handle the parsing of URIs and provide access to
the various components (scheme, host, port, userinfo, path, query
string and fragment) that may constitute a URI.
Parsing of a URI specification is done according to the URI
syntax described in
RFC 2396,
and amended by
RFC 2732.
Every absolute URI consists of a scheme, followed by a colon (':'),
followed by a scheme-specific part. For URIs that follow the
"generic URI" syntax, the scheme-specific part begins with two
slashes ("//") and may be followed by an authority segment (comprised
of user information, host, and port), path segment, query segment
and fragment. Note that RFC 2396 no longer specifies the use of the
parameters segment and excludes the "user:password" syntax as part of
the authority segment. If "user:password" appears in a URI, the entire
user/password string is stored as userinfo.
For URIs that do not follow the "generic URI" syntax (e.g. mailto),
the entire scheme-specific part is treated as the "path" portion
of the URI.
Note that, unlike the java.net.URL class, this class does not provide
any built-in network access functionality nor does it provide any
scheme-specific functionality (for example, it does not know a
default port for a specific scheme). Rather, it only knows the
grammar and basic set of operations that can be applied to a URI. |
Fields Summary |
---|
static final long | serialVersionUIDSerialization version. | private static final byte[] | fgLookupTable | private static final int | RESERVED_CHARACTERSreserved characters ;/?:@&=+$,[] | private static final int | MARK_CHARACTERSURI punctuation mark characters: -_.!~*'() - these, combined with
alphanumerics, constitute the "unreserved" characters | private static final int | SCHEME_CHARACTERSscheme can be composed of alphanumerics and these characters: +-. | private static final int | USERINFO_CHARACTERSuserinfo can be composed of unreserved, escaped and these
characters: ;:&=+$, | private static final int | ASCII_ALPHA_CHARACTERSASCII letter characters | private static final int | ASCII_DIGIT_CHARACTERSASCII digit characters | private static final int | ASCII_HEX_CHARACTERSASCII hex characters | private static final int | PATH_CHARACTERSPath characters | private static final int | MASK_ALPHA_NUMERICMask for alpha-numeric characters | private static final int | MASK_UNRESERVED_MASKMask for unreserved characters | private static final int | MASK_URI_CHARACTERMask for URI allowable characters except for % | private static final int | MASK_SCHEME_CHARACTERMask for scheme characters | private static final int | MASK_USERINFO_CHARACTERMask for userinfo characters | private static final int | MASK_PATH_CHARACTERMask for path characters | private String | m_schemeStores the scheme (usually the protocol) for this URI. | private String | m_userinfoIf specified, stores the userinfo for this URI; otherwise null | private String | m_hostIf specified, stores the host for this URI; otherwise null | private int | m_portIf specified, stores the port for this URI; otherwise -1 | private String | m_regAuthorityIf specified, stores the registry based authority for this URI; otherwise -1 | private String | m_pathIf specified, stores the path for this URI; otherwise null | private String | m_queryStringIf specified, stores the query string for this URI; otherwise
null. | private String | m_fragmentIf specified, stores the fragment for this URI; otherwise null | private static boolean | DEBUG |
Constructors Summary |
---|
public URI()Construct a new and uninitialized URI.
| public URI(URI p_other)Construct a new URI from another URI. All fields for this URI are
set equal to the fields of the URI passed in.
initialize(p_other);
| public URI(String p_uriSpec)Construct a new URI from a URI specification string. If the
specification follows the "generic URI" syntax, (two slashes
following the first colon), the specification will be parsed
accordingly - setting the scheme, userinfo, host,port, path, query
string and fragment fields as necessary. If the specification does
not follow the "generic URI" syntax, the specification is parsed
into a scheme and scheme-specific part (stored as the path) only.
this((URI)null, p_uriSpec);
| public URI(String p_uriSpec, boolean allowNonAbsoluteURI)Construct a new URI from a URI specification string. If the
specification follows the "generic URI" syntax, (two slashes
following the first colon), the specification will be parsed
accordingly - setting the scheme, userinfo, host,port, path, query
string and fragment fields as necessary. If the specification does
not follow the "generic URI" syntax, the specification is parsed
into a scheme and scheme-specific part (stored as the path) only.
Construct a relative URI if boolean is assigned to "true"
and p_uriSpec is not valid absolute URI, instead of throwing an exception.
this((URI)null, p_uriSpec, allowNonAbsoluteURI);
| public URI(URI p_base, String p_uriSpec)Construct a new URI from a base URI and a URI specification string.
The URI specification string may be a relative URI.
initialize(p_base, p_uriSpec);
| public URI(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI)Construct a new URI from a base URI and a URI specification string.
The URI specification string may be a relative URI.
Construct a relative URI if boolean is assigned to "true"
and p_uriSpec is not valid absolute URI and p_base is null
instead of throwing an exception.
initialize(p_base, p_uriSpec, allowNonAbsoluteURI);
| public URI(String p_scheme, String p_schemeSpecificPart)Construct a new URI that does not follow the generic URI syntax.
Only the scheme and scheme-specific part (stored as the path) are
initialized.
if (p_scheme == null || p_scheme.trim().length() == 0) {
throw new MalformedURIException(
"Cannot construct URI with null/empty scheme!");
}
if (p_schemeSpecificPart == null ||
p_schemeSpecificPart.trim().length() == 0) {
throw new MalformedURIException(
"Cannot construct URI with null/empty scheme-specific part!");
}
setScheme(p_scheme);
setPath(p_schemeSpecificPart);
| public URI(String p_scheme, String p_host, String p_path, String p_queryString, String p_fragment)Construct a new URI that follows the generic URI syntax from its
component parts. Each component is validated for syntax and some
basic semantic checks are performed as well. See the individual
setter methods for specifics.
this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
| public URI(String p_scheme, String p_userinfo, String p_host, int p_port, String p_path, String p_queryString, String p_fragment)Construct a new URI that follows the generic URI syntax from its
component parts. Each component is validated for syntax and some
basic semantic checks are performed as well. See the individual
setter methods for specifics.
if (p_scheme == null || p_scheme.trim().length() == 0) {
throw new MalformedURIException("Scheme is required!");
}
if (p_host == null) {
if (p_userinfo != null) {
throw new MalformedURIException(
"Userinfo may not be specified if host is not specified!");
}
if (p_port != -1) {
throw new MalformedURIException(
"Port may not be specified if host is not specified!");
}
}
if (p_path != null) {
if (p_path.indexOf('?") != -1 && p_queryString != null) {
throw new MalformedURIException(
"Query string cannot be specified in path and query string!");
}
if (p_path.indexOf('#") != -1 && p_fragment != null) {
throw new MalformedURIException(
"Fragment cannot be specified in both the path and fragment!");
}
}
setScheme(p_scheme);
setHost(p_host);
setPort(p_port);
setUserinfo(p_userinfo);
setPath(p_path);
setQueryString(p_queryString);
setFragment(p_fragment);
|
Methods Summary |
---|
public void | absolutize(org.apache.xerces.util.URI p_base)Absolutize URI with given base URI.
// check to see if this is the current doc - RFC 2396 5.2 #2
// note that this is slightly different from the RFC spec in that
// we don't include the check for query string being null
// - this handles cases where the urispec is just a query
// string or a fragment (e.g. "?y" or "#s") -
// see <http://www.ics.uci.edu/~fielding/url/test1.html> which
// identified this as a bug in the RFC
if (m_path.length() == 0 && m_scheme == null &&
m_host == null && m_regAuthority == null) {
m_scheme = p_base.getScheme();
m_userinfo = p_base.getUserinfo();
m_host = p_base.getHost();
m_port = p_base.getPort();
m_regAuthority = p_base.getRegBasedAuthority();
m_path = p_base.getPath();
if (m_queryString == null) {
m_queryString = p_base.getQueryString();
if (m_fragment == null) {
m_fragment = p_base.getFragment();
}
}
return;
}
// check for scheme - RFC 2396 5.2 #3
// if we found a scheme, it means absolute URI, so we're done
if (m_scheme == null) {
m_scheme = p_base.getScheme();
}
else {
return;
}
// check for authority - RFC 2396 5.2 #4
// if we found a host, then we've got a network path, so we're done
if (m_host == null && m_regAuthority == null) {
m_userinfo = p_base.getUserinfo();
m_host = p_base.getHost();
m_port = p_base.getPort();
m_regAuthority = p_base.getRegBasedAuthority();
}
else {
return;
}
// check for absolute path - RFC 2396 5.2 #5
if (m_path.length() > 0 &&
m_path.startsWith("/")) {
return;
}
// if we get to this point, we need to resolve relative path
// RFC 2396 5.2 #6
String path = "";
String basePath = p_base.getPath();
// 6a - get all but the last segment of the base URI path
if (basePath != null && basePath.length() > 0) {
int lastSlash = basePath.lastIndexOf('/");
if (lastSlash != -1) {
path = basePath.substring(0, lastSlash+1);
}
}
else if (m_path.length() > 0) {
path = "/";
}
// 6b - append the relative URI path
path = path.concat(m_path);
// 6c - remove all "./" where "." is a complete path segment
int index = -1;
while ((index = path.indexOf("/./")) != -1) {
path = path.substring(0, index+1).concat(path.substring(index+3));
}
// 6d - remove "." if path ends with "." as a complete path segment
if (path.endsWith("/.")) {
path = path.substring(0, path.length()-1);
}
// 6e - remove all "<segment>/../" where "<segment>" is a complete
// path segment not equal to ".."
index = 1;
int segIndex = -1;
String tempString = null;
while ((index = path.indexOf("/../", index)) > 0) {
tempString = path.substring(0, path.indexOf("/../"));
segIndex = tempString.lastIndexOf('/");
if (segIndex != -1) {
if (!tempString.substring(segIndex).equals("..")) {
path = path.substring(0, segIndex+1).concat(path.substring(index+4));
index = segIndex;
}
else {
index += 4;
}
}
else {
index += 4;
}
}
// 6f - remove ending "<segment>/.." where "<segment>" is a
// complete path segment
if (path.endsWith("/..")) {
tempString = path.substring(0, path.length()-3);
segIndex = tempString.lastIndexOf('/");
if (segIndex != -1) {
path = path.substring(0, segIndex+1);
}
}
m_path = path;
| public void | appendPath(java.lang.String p_addToPath)Append to the end of the path of this URI. If the current path does
not end in a slash and the path to be appended does not begin with
a slash, a slash will be appended to the current path before the
new segment is added. Also, if the current path ends in a slash
and the new segment begins with a slash, the extra slash will be
removed before the new segment is appended.
if (p_addToPath == null || p_addToPath.trim().length() == 0) {
return;
}
if (!isURIString(p_addToPath)) {
throw new MalformedURIException(
"Path contains invalid character!");
}
if (m_path == null || m_path.trim().length() == 0) {
if (p_addToPath.startsWith("/")) {
m_path = p_addToPath;
}
else {
m_path = "/" + p_addToPath;
}
}
else if (m_path.endsWith("/")) {
if (p_addToPath.startsWith("/")) {
m_path = m_path.concat(p_addToPath.substring(1));
}
else {
m_path = m_path.concat(p_addToPath);
}
}
else {
if (p_addToPath.startsWith("/")) {
m_path = m_path.concat(p_addToPath);
}
else {
m_path = m_path.concat("/" + p_addToPath);
}
}
| public boolean | equals(java.lang.Object p_test)Determines if the passed-in Object is equivalent to this URI.
if (p_test instanceof URI) {
URI testURI = (URI) p_test;
if (((m_scheme == null && testURI.m_scheme == null) ||
(m_scheme != null && testURI.m_scheme != null &&
m_scheme.equals(testURI.m_scheme))) &&
((m_userinfo == null && testURI.m_userinfo == null) ||
(m_userinfo != null && testURI.m_userinfo != null &&
m_userinfo.equals(testURI.m_userinfo))) &&
((m_host == null && testURI.m_host == null) ||
(m_host != null && testURI.m_host != null &&
m_host.equals(testURI.m_host))) &&
m_port == testURI.m_port &&
((m_path == null && testURI.m_path == null) ||
(m_path != null && testURI.m_path != null &&
m_path.equals(testURI.m_path))) &&
((m_queryString == null && testURI.m_queryString == null) ||
(m_queryString != null && testURI.m_queryString != null &&
m_queryString.equals(testURI.m_queryString))) &&
((m_fragment == null && testURI.m_fragment == null) ||
(m_fragment != null && testURI.m_fragment != null &&
m_fragment.equals(testURI.m_fragment)))) {
return true;
}
}
return false;
| public java.lang.String | getAuthority()Get the authority for this URI.
StringBuffer authority = new StringBuffer();
if (m_host != null || m_regAuthority != null) {
authority.append("//");
// Server based authority.
if (m_host != null) {
if (m_userinfo != null) {
authority.append(m_userinfo);
authority.append('@");
}
authority.append(m_host);
if (m_port != -1) {
authority.append(':");
authority.append(m_port);
}
}
// Registry based authority.
else {
authority.append(m_regAuthority);
}
}
return authority.toString();
| public java.lang.String | getFragment()Get the fragment for this URI.
return m_fragment;
| public java.lang.String | getHost()Get the host for this URI.
return m_host;
| public java.lang.String | getPath(boolean p_includeQueryString, boolean p_includeFragment)Get the path for this URI (optionally with the query string and
fragment).
StringBuffer pathString = new StringBuffer(m_path);
if (p_includeQueryString && m_queryString != null) {
pathString.append('?");
pathString.append(m_queryString);
}
if (p_includeFragment && m_fragment != null) {
pathString.append('#");
pathString.append(m_fragment);
}
return pathString.toString();
| public java.lang.String | getPath()Get the path for this URI. Note that the value returned is the path
only and does not include the query string or fragment.
return m_path;
| public int | getPort()Get the port for this URI.
return m_port;
| public java.lang.String | getQueryString()Get the query string for this URI.
return m_queryString;
| public java.lang.String | getRegBasedAuthority()Get the registry based authority for this URI.
return m_regAuthority;
| public java.lang.String | getScheme()Get the scheme for this URI.
return m_scheme;
| public java.lang.String | getSchemeSpecificPart()Get the scheme-specific part for this URI (everything following the
scheme and the first colon). See RFC 2396 Section 5.2 for spec.
StringBuffer schemespec = new StringBuffer();
if (m_host != null || m_regAuthority != null) {
schemespec.append("//");
// Server based authority.
if (m_host != null) {
if (m_userinfo != null) {
schemespec.append(m_userinfo);
schemespec.append('@");
}
schemespec.append(m_host);
if (m_port != -1) {
schemespec.append(':");
schemespec.append(m_port);
}
}
// Registry based authority.
else {
schemespec.append(m_regAuthority);
}
}
if (m_path != null) {
schemespec.append((m_path));
}
if (m_queryString != null) {
schemespec.append('?");
schemespec.append(m_queryString);
}
if (m_fragment != null) {
schemespec.append('#");
schemespec.append(m_fragment);
}
return schemespec.toString();
| public java.lang.String | getUserinfo()Get the userinfo for this URI.
return m_userinfo;
| private void | initialize(org.apache.xerces.util.URI p_other)Initialize all fields of this URI from another URI.
m_scheme = p_other.getScheme();
m_userinfo = p_other.getUserinfo();
m_host = p_other.getHost();
m_port = p_other.getPort();
m_regAuthority = p_other.getRegBasedAuthority();
m_path = p_other.getPath();
m_queryString = p_other.getQueryString();
m_fragment = p_other.getFragment();
| private void | initialize(org.apache.xerces.util.URI p_base, java.lang.String p_uriSpec, boolean allowNonAbsoluteURI)Initializes this URI from a base URI and a URI specification string.
See RFC 2396 Section 4 and Appendix B for specifications on parsing
the URI and Section 5 for specifications on resolving relative URIs
and relative paths.
String uriSpec = p_uriSpec;
int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
if (p_base == null && uriSpecLen == 0) {
if (allowNonAbsoluteURI) {
m_path = "";
return;
}
throw new MalformedURIException("Cannot initialize URI with empty parameters.");
}
// just make a copy of the base if spec is empty
if (uriSpecLen == 0) {
initialize(p_base);
return;
}
int index = 0;
// Check for scheme, which must be before '/', '?' or '#'.
int colonIdx = uriSpec.indexOf(':");
if (colonIdx != -1) {
final int searchFrom = colonIdx - 1;
// search backwards starting from character before ':'.
int slashIdx = uriSpec.lastIndexOf('/", searchFrom);
int queryIdx = uriSpec.lastIndexOf('?", searchFrom);
int fragmentIdx = uriSpec.lastIndexOf('#", searchFrom);
if (colonIdx == 0 || slashIdx != -1 ||
queryIdx != -1 || fragmentIdx != -1) {
// A standalone base is a valid URI according to spec
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowNonAbsoluteURI)) {
throw new MalformedURIException("No scheme found in URI.");
}
}
else {
initializeScheme(uriSpec);
index = m_scheme.length()+1;
// Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#") {
throw new MalformedURIException("Scheme specific part cannot be empty.");
}
}
}
else if (p_base == null && uriSpec.indexOf('#") != 0 && !allowNonAbsoluteURI) {
throw new MalformedURIException("No scheme found in URI.");
}
// Two slashes means we may have authority, but definitely means we're either
// matching net_path or abs_path. These two productions are ambiguous in that
// every net_path (except those containing an IPv6Reference) is an abs_path.
// RFC 2396 resolves this ambiguity by applying a greedy left most matching rule.
// Try matching net_path first, and if that fails we don't have authority so
// then attempt to match abs_path.
//
// net_path = "//" authority [ abs_path ]
// abs_path = "/" path_segments
if (((index+1) < uriSpecLen) &&
(uriSpec.charAt(index) == '/" && uriSpec.charAt(index+1) == '/")) {
index += 2;
int startPos = index;
// Authority will be everything up to path, query or fragment
char testChar = '\0";
while (index < uriSpecLen) {
testChar = uriSpec.charAt(index);
if (testChar == '/" || testChar == '?" || testChar == '#") {
break;
}
index++;
}
// Attempt to parse authority. If the section is an empty string
// this is a valid server based authority, so set the host to this
// value.
if (index > startPos) {
// If we didn't find authority we need to back up. Attempt to
// match against abs_path next.
if (!initializeAuthority(uriSpec.substring(startPos, index))) {
index = startPos - 2;
}
}
else {
m_host = "";
}
}
initializePath(uriSpec, index);
// Resolve relative URI to base URI - see RFC 2396 Section 5.2
// In some cases, it might make more sense to throw an exception
// (when scheme is specified is the string spec and the base URI
// is also specified, for example), but we're just following the
// RFC specifications
if (p_base != null) {
absolutize(p_base);
}
| private void | initialize(org.apache.xerces.util.URI p_base, java.lang.String p_uriSpec)Initializes this URI from a base URI and a URI specification string.
See RFC 2396 Section 4 and Appendix B for specifications on parsing
the URI and Section 5 for specifications on resolving relative URIs
and relative paths.
String uriSpec = p_uriSpec;
int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
if (p_base == null && uriSpecLen == 0) {
throw new MalformedURIException(
"Cannot initialize URI with empty parameters.");
}
// just make a copy of the base if spec is empty
if (uriSpecLen == 0) {
initialize(p_base);
return;
}
int index = 0;
// Check for scheme, which must be before '/', '?' or '#'.
int colonIdx = uriSpec.indexOf(':");
if (colonIdx != -1) {
final int searchFrom = colonIdx - 1;
// search backwards starting from character before ':'.
int slashIdx = uriSpec.lastIndexOf('/", searchFrom);
int queryIdx = uriSpec.lastIndexOf('?", searchFrom);
int fragmentIdx = uriSpec.lastIndexOf('#", searchFrom);
if (colonIdx == 0 || slashIdx != -1 ||
queryIdx != -1 || fragmentIdx != -1) {
// A standalone base is a valid URI according to spec
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) {
throw new MalformedURIException("No scheme found in URI.");
}
}
else {
initializeScheme(uriSpec);
index = m_scheme.length()+1;
// Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#") {
throw new MalformedURIException("Scheme specific part cannot be empty.");
}
}
}
else if (p_base == null && uriSpec.indexOf('#") != 0) {
throw new MalformedURIException("No scheme found in URI.");
}
// Two slashes means we may have authority, but definitely means we're either
// matching net_path or abs_path. These two productions are ambiguous in that
// every net_path (except those containing an IPv6Reference) is an abs_path.
// RFC 2396 resolves this ambiguity by applying a greedy left most matching rule.
// Try matching net_path first, and if that fails we don't have authority so
// then attempt to match abs_path.
//
// net_path = "//" authority [ abs_path ]
// abs_path = "/" path_segments
if (((index+1) < uriSpecLen) &&
(uriSpec.charAt(index) == '/" && uriSpec.charAt(index+1) == '/")) {
index += 2;
int startPos = index;
// Authority will be everything up to path, query or fragment
char testChar = '\0";
while (index < uriSpecLen) {
testChar = uriSpec.charAt(index);
if (testChar == '/" || testChar == '?" || testChar == '#") {
break;
}
index++;
}
// Attempt to parse authority. If the section is an empty string
// this is a valid server based authority, so set the host to this
// value.
if (index > startPos) {
// If we didn't find authority we need to back up. Attempt to
// match against abs_path next.
if (!initializeAuthority(uriSpec.substring(startPos, index))) {
index = startPos - 2;
}
}
else {
m_host = "";
}
}
initializePath(uriSpec, index);
// Resolve relative URI to base URI - see RFC 2396 Section 5.2
// In some cases, it might make more sense to throw an exception
// (when scheme is specified is the string spec and the base URI
// is also specified, for example), but we're just following the
// RFC specifications
if (p_base != null) {
absolutize(p_base);
}
| private boolean | initializeAuthority(java.lang.String p_uriSpec)Initialize the authority (either server or registry based)
for this URI from a URI string spec.
int index = 0;
int start = 0;
int end = p_uriSpec.length();
char testChar = '\0";
String userinfo = null;
// userinfo is everything up to @
if (p_uriSpec.indexOf('@", start) != -1) {
while (index < end) {
testChar = p_uriSpec.charAt(index);
if (testChar == '@") {
break;
}
index++;
}
userinfo = p_uriSpec.substring(start, index);
index++;
}
// host is everything up to last ':', or up to
// and including ']' if followed by ':'.
String host = null;
start = index;
boolean hasPort = false;
if (index < end) {
if (p_uriSpec.charAt(start) == '[") {
int bracketIndex = p_uriSpec.indexOf(']", start);
index = (bracketIndex != -1) ? bracketIndex : end;
if (index+1 < end && p_uriSpec.charAt(index+1) == ':") {
++index;
hasPort = true;
}
else {
index = end;
}
}
else {
int colonIndex = p_uriSpec.lastIndexOf(':", end);
index = (colonIndex > start) ? colonIndex : end;
hasPort = (index != end);
}
}
host = p_uriSpec.substring(start, index);
int port = -1;
if (host.length() > 0) {
// port
if (hasPort) {
index++;
start = index;
while (index < end) {
index++;
}
String portStr = p_uriSpec.substring(start, index);
if (portStr.length() > 0) {
// REVISIT: Remove this code.
/** for (int i = 0; i < portStr.length(); i++) {
if (!isDigit(portStr.charAt(i))) {
throw new MalformedURIException(
portStr +
" is invalid. Port should only contain digits!");
}
}**/
// REVISIT: Remove this code.
// Store port value as string instead of integer.
try {
port = Integer.parseInt(portStr);
if (port == -1) --port;
}
catch (NumberFormatException nfe) {
port = -2;
}
}
}
}
if (isValidServerBasedAuthority(host, port, userinfo)) {
m_host = host;
m_port = port;
m_userinfo = userinfo;
return true;
}
// Note: Registry based authority is being removed from a
// new spec for URI which would obsolete RFC 2396. If the
// spec is added to XML errata, processing of reg_name
// needs to be removed. - mrglavas.
else if (isValidRegistryBasedAuthority(p_uriSpec)) {
m_regAuthority = p_uriSpec;
return true;
}
return false;
| private void | initializePath(java.lang.String p_uriSpec, int p_nStartIndex)Initialize the path for this URI from a URI string spec.
if (p_uriSpec == null) {
throw new MalformedURIException(
"Cannot initialize path from null string!");
}
int index = p_nStartIndex;
int start = p_nStartIndex;
int end = p_uriSpec.length();
char testChar = '\0";
// path - everything up to query string or fragment
if (start < end) {
// RFC 2732 only allows '[' and ']' to appear in the opaque part.
if (getScheme() == null || p_uriSpec.charAt(start) == '/") {
// Scan path.
// abs_path = "/" path_segments
// rel_path = rel_segment [ abs_path ]
while (index < end) {
testChar = p_uriSpec.charAt(index);
// check for valid escape sequence
if (testChar == '%") {
if (index+2 >= end ||
!isHex(p_uriSpec.charAt(index+1)) ||
!isHex(p_uriSpec.charAt(index+2))) {
throw new MalformedURIException(
"Path contains invalid escape sequence!");
}
index += 2;
}
// Path segments cannot contain '[' or ']' since pchar
// production was not changed by RFC 2732.
else if (!isPathCharacter(testChar)) {
if (testChar == '?" || testChar == '#") {
break;
}
throw new MalformedURIException(
"Path contains invalid character: " + testChar);
}
++index;
}
}
else {
// Scan opaque part.
// opaque_part = uric_no_slash *uric
while (index < end) {
testChar = p_uriSpec.charAt(index);
if (testChar == '?" || testChar == '#") {
break;
}
// check for valid escape sequence
if (testChar == '%") {
if (index+2 >= end ||
!isHex(p_uriSpec.charAt(index+1)) ||
!isHex(p_uriSpec.charAt(index+2))) {
throw new MalformedURIException(
"Opaque part contains invalid escape sequence!");
}
index += 2;
}
// If the scheme specific part is opaque, it can contain '['
// and ']'. uric_no_slash wasn't modified by RFC 2732, which
// I've interpreted as an error in the spec, since the
// production should be equivalent to (uric - '/'), and uric
// contains '[' and ']'. - mrglavas
else if (!isURICharacter(testChar)) {
throw new MalformedURIException(
"Opaque part contains invalid character: " + testChar);
}
++index;
}
}
}
m_path = p_uriSpec.substring(start, index);
// query - starts with ? and up to fragment or end
if (testChar == '?") {
index++;
start = index;
while (index < end) {
testChar = p_uriSpec.charAt(index);
if (testChar == '#") {
break;
}
if (testChar == '%") {
if (index+2 >= end ||
!isHex(p_uriSpec.charAt(index+1)) ||
!isHex(p_uriSpec.charAt(index+2))) {
throw new MalformedURIException(
"Query string contains invalid escape sequence!");
}
index += 2;
}
else if (!isURICharacter(testChar)) {
throw new MalformedURIException(
"Query string contains invalid character: " + testChar);
}
index++;
}
m_queryString = p_uriSpec.substring(start, index);
}
// fragment - starts with #
if (testChar == '#") {
index++;
start = index;
while (index < end) {
testChar = p_uriSpec.charAt(index);
if (testChar == '%") {
if (index+2 >= end ||
!isHex(p_uriSpec.charAt(index+1)) ||
!isHex(p_uriSpec.charAt(index+2))) {
throw new MalformedURIException(
"Fragment contains invalid escape sequence!");
}
index += 2;
}
else if (!isURICharacter(testChar)) {
throw new MalformedURIException(
"Fragment contains invalid character: "+testChar);
}
index++;
}
m_fragment = p_uriSpec.substring(start, index);
}
| private void | initializeScheme(java.lang.String p_uriSpec)Initialize the scheme for this URI from a URI string spec.
int uriSpecLen = p_uriSpec.length();
int index = 0;
String scheme = null;
char testChar = '\0";
while (index < uriSpecLen) {
testChar = p_uriSpec.charAt(index);
if (testChar == ':" || testChar == '/" ||
testChar == '?" || testChar == '#") {
break;
}
index++;
}
scheme = p_uriSpec.substring(0, index);
if (scheme.length() == 0) {
throw new MalformedURIException("No scheme found in URI.");
}
else {
setScheme(scheme);
}
| public boolean | isAbsoluteURI()Returns whether this URI represents an absolute URI.
// presence of the scheme means absolute uri
return (m_scheme != null);
| private static boolean | isAlpha(char p_char)Determine whether a char is an alphabetic character: a-z or A-Z
return ((p_char >= 'a" && p_char <= 'z") || (p_char >= 'A" && p_char <= 'Z" ));
| private static boolean | isAlphanum(char p_char)Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
return (p_char <= 'z" && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0);
| public static boolean | isConformantSchemeName(java.lang.String p_scheme)Determine whether a scheme conforms to the rules for a scheme name.
A scheme is conformant if it starts with an alphanumeric, and
contains only alphanumerics, '+','-' and '.'.
if (p_scheme == null || p_scheme.trim().length() == 0) {
return false;
}
if (!isAlpha(p_scheme.charAt(0))) {
return false;
}
char testChar;
int schemeLength = p_scheme.length();
for (int i = 1; i < schemeLength; ++i) {
testChar = p_scheme.charAt(i);
if (!isSchemeCharacter(testChar)) {
return false;
}
}
return true;
| private static boolean | isDigit(char p_char)Determine whether a char is a digit.
return p_char >= '0" && p_char <= '9";
| public boolean | isGenericURI()Get the indicator as to whether this URI uses the "generic URI"
syntax.
// presence of the host (whether valid or empty) means
// double-slashes which means generic uri
return (m_host != null);
| private static boolean | isHex(char p_char)Determine whether a character is a hexadecimal character.
return (p_char <= 'f" && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0);
| private static boolean | isPathCharacter(char p_char)Determine whether a char is a path character.
return (p_char <= '~" && (fgLookupTable[p_char] & MASK_PATH_CHARACTER) != 0);
| private static boolean | isReservedCharacter(char p_char)Determine whether a character is a reserved character:
';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']'
return (p_char <= ']" && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0);
| private static boolean | isSchemeCharacter(char p_char)Determine whether a char is a scheme character.
return (p_char <= 'z" && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0);
| private static boolean | isURICharacter(char p_char)Determine whether a char is a URI character (reserved or
unreserved, not including '%' for escaped octets).
return (p_char <= '~" && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0);
| private static boolean | isURIString(java.lang.String p_uric)Determine whether a given string contains only URI characters (also
called "uric" in RFC 2396). uric consist of all reserved
characters, unreserved characters and escaped characters.
if (p_uric == null) {
return false;
}
int end = p_uric.length();
char testChar = '\0";
for (int i = 0; i < end; i++) {
testChar = p_uric.charAt(i);
if (testChar == '%") {
if (i+2 >= end ||
!isHex(p_uric.charAt(i+1)) ||
!isHex(p_uric.charAt(i+2))) {
return false;
}
else {
i += 2;
continue;
}
}
if (isURICharacter(testChar)) {
continue;
}
else {
return false;
}
}
return true;
| private static boolean | isUnreservedCharacter(char p_char)Determine whether a char is an unreserved character.
return (p_char <= '~" && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0);
| private static boolean | isUserinfoCharacter(char p_char)Determine whether a char is a userinfo character.
return (p_char <= 'z" && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0);
| private boolean | isValidRegistryBasedAuthority(java.lang.String authority)Determines whether the given string is a registry based authority.
int index = 0;
int end = authority.length();
char testChar;
while (index < end) {
testChar = authority.charAt(index);
// check for valid escape sequence
if (testChar == '%") {
if (index+2 >= end ||
!isHex(authority.charAt(index+1)) ||
!isHex(authority.charAt(index+2))) {
return false;
}
index += 2;
}
// can check against path characters because the set
// is the same except for '/' which we've already excluded.
else if (!isPathCharacter(testChar)) {
return false;
}
++index;
}
return true;
| private boolean | isValidServerBasedAuthority(java.lang.String host, int port, java.lang.String userinfo)Determines whether the components host, port, and user info
are valid as a server authority.
// Check if the host is well formed.
if (!isWellFormedAddress(host)) {
return false;
}
// Check that port is well formed if it exists.
// REVISIT: There's no restriction on port value ranges, but
// perform the same check as in setPort to be consistent. Pass
// in a string to this method instead of an integer.
if (port < -1 || port > 65535) {
return false;
}
// Check that userinfo is well formed if it exists.
if (userinfo != null) {
// Userinfo can contain alphanumerics, mark characters, escaped
// and ';',':','&','=','+','$',','
int index = 0;
int end = userinfo.length();
char testChar = '\0";
while (index < end) {
testChar = userinfo.charAt(index);
if (testChar == '%") {
if (index+2 >= end ||
!isHex(userinfo.charAt(index+1)) ||
!isHex(userinfo.charAt(index+2))) {
return false;
}
index += 2;
}
else if (!isUserinfoCharacter(testChar)) {
return false;
}
++index;
}
}
return true;
| public static boolean | isWellFormedAddress(java.lang.String address)Determine whether a string is syntactically capable of representing
a valid IPv4 address, IPv6 reference or the domain name of a network host.
A valid IPv4 address consists of four decimal digit groups separated by a
'.'. Each group must consist of one to three digits. See RFC 2732 Section 3,
and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname
consists of domain labels (each of which must begin and end with an alphanumeric
but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2.
if (address == null) {
return false;
}
int addrLength = address.length();
if (addrLength == 0) {
return false;
}
// Check if the host is a valid IPv6reference.
if (address.startsWith("[")) {
return isWellFormedIPv6Reference(address);
}
// Cannot start with a '.', '-', or end with a '-'.
if (address.startsWith(".") ||
address.startsWith("-") ||
address.endsWith("-")) {
return false;
}
// rightmost domain label starting with digit indicates IP address
// since top level domain label can only start with an alpha
// see RFC 2396 Section 3.2.2
int index = address.lastIndexOf('.");
if (address.endsWith(".")) {
index = address.substring(0, index).lastIndexOf('.");
}
if (index+1 < addrLength && isDigit(address.charAt(index+1))) {
return isWellFormedIPv4Address(address);
}
else {
// hostname = *( domainlabel "." ) toplabel [ "." ]
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
// RFC 2396 states that hostnames take the form described in
// RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
// to RFC 1034, hostnames are limited to 255 characters.
if (addrLength > 255) {
return false;
}
// domain labels can contain alphanumerics and '-"
// but must start and end with an alphanumeric
char testChar;
int labelCharCount = 0;
for (int i = 0; i < addrLength; i++) {
testChar = address.charAt(i);
if (testChar == '.") {
if (!isAlphanum(address.charAt(i-1))) {
return false;
}
if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) {
return false;
}
labelCharCount = 0;
}
else if (!isAlphanum(testChar) && testChar != '-") {
return false;
}
// RFC 1034: Labels must be 63 characters or less.
else if (++labelCharCount > 63) {
return false;
}
}
}
return true;
| public static boolean | isWellFormedIPv4Address(java.lang.String address)Determines whether a string is an IPv4 address as defined by
RFC 2373, and under the further constraint that it must be a 32-bit
address. Though not expressed in the grammar, in order to satisfy
the 32-bit address constraint, each segment of the address cannot
be greater than 255 (8 bits of information).
IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
int addrLength = address.length();
char testChar;
int numDots = 0;
int numDigits = 0;
// make sure that 1) we see only digits and dot separators, 2) that
// any dot separator is preceded and followed by a digit and
// 3) that we find 3 dots
//
// RFC 2732 amended RFC 2396 by replacing the definition
// of IPv4address with the one defined by RFC 2373. - mrglavas
//
// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
//
// One to three digits must be in each segment.
for (int i = 0; i < addrLength; i++) {
testChar = address.charAt(i);
if (testChar == '.") {
if ((i > 0 && !isDigit(address.charAt(i-1))) ||
(i+1 < addrLength && !isDigit(address.charAt(i+1)))) {
return false;
}
numDigits = 0;
if (++numDots > 3) {
return false;
}
}
else if (!isDigit(testChar)) {
return false;
}
// Check that that there are no more than three digits
// in this segment.
else if (++numDigits > 3) {
return false;
}
// Check that this segment is not greater than 255.
else if (numDigits == 3) {
char first = address.charAt(i-2);
char second = address.charAt(i-1);
if (!(first < '2" ||
(first == '2" &&
(second < '5" ||
(second == '5" && testChar <= '5"))))) {
return false;
}
}
}
return (numDots == 3);
| public static boolean | isWellFormedIPv6Reference(java.lang.String address)Determines whether a string is an IPv6 reference as defined
by RFC 2732, where IPv6address is defined in RFC 2373. The
IPv6 address is parsed according to Section 2.2 of RFC 2373,
with the additional constraint that the address be composed of
128 bits of information.
IPv6reference = "[" IPv6address "]"
Note: The BNF expressed in RFC 2373 Appendix B does not
accurately describe section 2.2, and was in fact removed from
RFC 3513, the successor of RFC 2373.
int addrLength = address.length();
int index = 1;
int end = addrLength-1;
// Check if string is a potential match for IPv6reference.
if (!(addrLength > 2 && address.charAt(0) == '["
&& address.charAt(end) == ']")) {
return false;
}
// Counter for the number of 16-bit sections read in the address.
int [] counter = new int[1];
// Scan hex sequence before possible '::' or IPv4 address.
index = scanHexSequence(address, index, end, counter);
if (index == -1) {
return false;
}
// Address must contain 128-bits of information.
else if (index == end) {
return (counter[0] == 8);
}
if (index+1 < end && address.charAt(index) == ':") {
if (address.charAt(index+1) == ':") {
// '::' represents at least one 16-bit group of zeros.
if (++counter[0] > 8) {
return false;
}
index += 2;
// Trailing zeros will fill out the rest of the address.
if (index == end) {
return true;
}
}
// If the second character wasn't ':', in order to be valid,
// the remainder of the string must match IPv4Address,
// and we must have read exactly 6 16-bit groups.
else {
return (counter[0] == 6) &&
isWellFormedIPv4Address(address.substring(index+1, end));
}
}
else {
return false;
}
// 3. Scan hex sequence after '::'.
int prevCount = counter[0];
index = scanHexSequence(address, index, end, counter);
// We've either reached the end of the string, the address ends in
// an IPv4 address, or it is invalid. scanHexSequence has already
// made sure that we have the right number of bits.
return (index == end) ||
(index != -1 && isWellFormedIPv4Address(
address.substring((counter[0] > prevCount) ? index+1 : index, end)));
| private static int | scanHexSequence(java.lang.String address, int index, int end, int[] counter)Helper method for isWellFormedIPv6Reference which scans the
hex sequences of an IPv6 address. It returns the index of the
next character to scan in the address, or -1 if the string
cannot match a valid IPv6 address.
char testChar;
int numDigits = 0;
int start = index;
// Trying to match the following productions:
// hexseq = hex4 *( ":" hex4)
// hex4 = 1*4HEXDIG
for (; index < end; ++index) {
testChar = address.charAt(index);
if (testChar == ':") {
// IPv6 addresses are 128-bit, so there can be at most eight sections.
if (numDigits > 0 && ++counter[0] > 8) {
return -1;
}
// This could be '::'.
if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':")) {
return index;
}
numDigits = 0;
}
// This might be invalid or an IPv4address. If it's potentially an IPv4address,
// backup to just after the last valid character that matches hexseq.
else if (!isHex(testChar)) {
if (testChar == '." && numDigits < 4 && numDigits > 0 && counter[0] <= 6) {
int back = index - numDigits - 1;
return (back >= start) ? back : (back+1);
}
return -1;
}
// There can be at most 4 hex digits per group.
else if (++numDigits > 4) {
return -1;
}
}
return (numDigits > 0 && ++counter[0] <= 8) ? end : -1;
| public void | setFragment(java.lang.String p_fragment)Set the fragment for this URI. A non-null value is valid only
if this is a URI conforming to the generic URI syntax and
the path value is not null.
if (p_fragment == null) {
m_fragment = null;
}
else if (!isGenericURI()) {
throw new MalformedURIException(
"Fragment can only be set for a generic URI!");
}
else if (getPath() == null) {
throw new MalformedURIException(
"Fragment cannot be set when path is null!");
}
else if (!isURIString(p_fragment)) {
throw new MalformedURIException(
"Fragment contains invalid character!");
}
else {
m_fragment = p_fragment;
}
| public void | setHost(java.lang.String p_host)Set the host for this URI. If null is passed in, the userinfo
field is also set to null and the port is set to -1.
Note: This method overwrites registry based authority if it
previously existed in this URI.
if (p_host == null || p_host.length() == 0) {
if (p_host != null) {
m_regAuthority = null;
}
m_host = p_host;
m_userinfo = null;
m_port = -1;
return;
}
else if (!isWellFormedAddress(p_host)) {
throw new MalformedURIException("Host is not a well formed address!");
}
m_host = p_host;
m_regAuthority = null;
| public void | setPath(java.lang.String p_path)Set the path for this URI. If the supplied path is null, then the
query string and fragment are set to null as well. If the supplied
path includes a query string and/or fragment, these fields will be
parsed and set as well. Note that, for URIs following the "generic
URI" syntax, the path specified should start with a slash.
For URIs that do not follow the generic URI syntax, this method
sets the scheme-specific part.
if (p_path == null) {
m_path = null;
m_queryString = null;
m_fragment = null;
}
else {
initializePath(p_path, 0);
}
| public void | setPort(int p_port)Set the port for this URI. -1 is used to indicate that the port is
not specified, otherwise valid port numbers are between 0 and 65535.
If a valid port number is passed in and the host field is null,
an exception is thrown.
if (p_port >= 0 && p_port <= 65535) {
if (m_host == null) {
throw new MalformedURIException(
"Port cannot be set when host is null!");
}
}
else if (p_port != -1) {
throw new MalformedURIException("Invalid port number!");
}
m_port = p_port;
| public void | setQueryString(java.lang.String p_queryString)Set the query string for this URI. A non-null value is valid only
if this is an URI conforming to the generic URI syntax and
the path value is not null.
if (p_queryString == null) {
m_queryString = null;
}
else if (!isGenericURI()) {
throw new MalformedURIException(
"Query string can only be set for a generic URI!");
}
else if (getPath() == null) {
throw new MalformedURIException(
"Query string cannot be set when path is null!");
}
else if (!isURIString(p_queryString)) {
throw new MalformedURIException(
"Query string contains invalid character!");
}
else {
m_queryString = p_queryString;
}
| public void | setRegBasedAuthority(java.lang.String authority)Sets the registry based authority for this URI.
Note: This method overwrites server based authority
if it previously existed in this URI.
if (authority == null) {
m_regAuthority = null;
return;
}
// reg_name = 1*( unreserved | escaped | "$" | "," |
// ";" | ":" | "@" | "&" | "=" | "+" )
else if (authority.length() < 1 ||
!isValidRegistryBasedAuthority(authority) ||
authority.indexOf('/") != -1) {
throw new MalformedURIException("Registry based authority is not well formed.");
}
m_regAuthority = authority;
m_host = null;
m_userinfo = null;
m_port = -1;
| public void | setScheme(java.lang.String p_scheme)Set the scheme for this URI. The scheme is converted to lowercase
before it is set.
if (p_scheme == null) {
throw new MalformedURIException(
"Cannot set scheme from null string!");
}
if (!isConformantSchemeName(p_scheme)) {
throw new MalformedURIException("The scheme is not conformant.");
}
m_scheme = p_scheme.toLowerCase();
| public void | setUserinfo(java.lang.String p_userinfo)Set the userinfo for this URI. If a non-null value is passed in and
the host value is null, then an exception is thrown.
if (p_userinfo == null) {
m_userinfo = null;
return;
}
else {
if (m_host == null) {
throw new MalformedURIException(
"Userinfo cannot be set when host is null!");
}
// userinfo can contain alphanumerics, mark characters, escaped
// and ';',':','&','=','+','$',','
int index = 0;
int end = p_userinfo.length();
char testChar = '\0";
while (index < end) {
testChar = p_userinfo.charAt(index);
if (testChar == '%") {
if (index+2 >= end ||
!isHex(p_userinfo.charAt(index+1)) ||
!isHex(p_userinfo.charAt(index+2))) {
throw new MalformedURIException(
"Userinfo contains invalid escape sequence!");
}
}
else if (!isUserinfoCharacter(testChar)) {
throw new MalformedURIException(
"Userinfo contains invalid character:"+testChar);
}
index++;
}
}
m_userinfo = p_userinfo;
| public java.lang.String | toString()Get the URI as a string specification. See RFC 2396 Section 5.2.
StringBuffer uriSpecString = new StringBuffer();
if (m_scheme != null) {
uriSpecString.append(m_scheme);
uriSpecString.append(':");
}
uriSpecString.append(getSchemeSpecificPart());
return uriSpecString.toString();
|
|