|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.basex.util.Token
public final class Token
This class provides convenience operations for handling 'Tokens'. Tokens are UTF-8 encoded strings, stored in a byte array.
Note that, to guarantee a consistent string representation, all string conversions should be done via the methods of this class.
Field Summary | |
---|---|
static byte[] |
COLON
Colon. |
static java.util.Comparator<byte[]> |
COMP
Comparator for byte arrays. |
static byte[] |
EMPTY
Empty token. |
static byte[] |
FALSE
Token 'false'. |
static byte[] |
HEX
Hex codes. |
static byte[] |
INF
Token 'INF'. |
static java.util.Comparator<byte[]> |
LC_COMP
Case-insensitive comparator for byte arrays. |
static byte[] |
NINF
Token '-INF'. |
static byte[] |
NULL
Token 'null'. |
static byte[] |
ONE
Number '1'. |
static byte[] |
SLASH
Slash. |
static byte[] |
SPACE
Space. |
static byte[] |
TRUE
Token 'true'. |
static java.lang.String |
UTF16
UTF16 encoding string. |
static java.lang.String |
UTF162
UTF16 encoding string. |
static java.lang.String |
UTF16BE
UTF16BE (=UTF16) encoding string. |
static java.lang.String |
UTF16LE
UTF16 encoding string. |
static java.lang.String |
UTF32
UTF16 encoding string. |
static java.lang.String |
UTF322
UTF16 encoding string. |
static java.lang.String |
UTF8
UTF8 encoding string. |
static java.lang.String |
UTF82
UTF8 encoding string (variant). |
static byte[] |
XML
XML token. |
static byte[] |
XMLC
XML token with colon. |
static byte[] |
XMLNS
XMLNS token. |
static byte[] |
XMLNSC
XMLNS token with colon. |
static byte[] |
ZERO
Number '0'. |
Method Summary | |
---|---|
static boolean |
ascii(byte[] token)
Checks if the specified token only consists of ASCII characters. |
static byte[] |
chop(byte[] token,
int max)
Chops a token to the specified length and adds dots. |
static byte[] |
chopNumber(byte[] token)
Finishes the numeric token, removing trailing zeroes. |
static int |
cl(byte cp)
Returns the length of the specified UTF8 byte. |
static int |
cl(byte[] token,
int pos)
Returns the length of a UTF8 character at the specified position. |
static byte[] |
concat(byte[] token1,
byte[] token2)
Concatenates two tokens. |
static byte[] |
concat(byte[] token1,
byte[] token2,
byte[] token3)
Concatenates three tokens. |
static boolean |
contains(byte[] token,
byte[] sub)
Checks if the first token contains the second token. |
static boolean |
contains(byte[] token,
int c)
Checks if the first token contains the specified character. |
static int |
cp(byte[] token,
int pos)
Returns the codepoint (unicode value) of the specified token, starting at the specified position. |
static int[] |
cps(byte[] token)
Converts a token to a sequence of codepoints. |
static byte[] |
delete(byte[] token,
int ch)
Deletes the specified character from the token. |
static int |
diff(byte[] token,
byte[] compare)
Compares two tokens lexicographically. |
static boolean |
digit(int ch)
Checks if the specified character is a digit (0 - 9). |
static boolean |
endsWith(byte[] token,
byte[] sub)
Checks if the first token ends with the second token. |
static boolean |
endsWith(byte[] token,
int ch)
Checks if the first token starts with the specified character. |
static boolean |
eq(byte[] token,
byte[]... tokens)
Compares several tokens for equality. |
static boolean |
eq(byte[] token1,
byte[] token2)
Compares two tokens for equality. |
static boolean |
eq(java.lang.String str,
java.lang.String... strings)
Compares several strings for equality. |
static boolean |
eqic(java.lang.String str,
java.lang.String... strings)
Compares several strings for equality, ignoring the case. |
static byte[] |
escape(byte[] token)
Escapes the specified token. |
static boolean |
ftChar(int ch)
Returns true if the specified character is a full-text letter or digit. |
static int |
hash(byte[] token)
Calculates a hash code for the specified token. |
static byte[] |
hex(byte[] val,
boolean uc)
Returns a hex representation of the specified byte array. |
static int |
indexOf(byte[] token,
byte[] sub)
Returns the position of the specified token or -1. |
static int |
indexOf(byte[] token,
byte[] sub,
int pos)
Returns the position of the specified token or -1. |
static int |
indexOf(byte[] token,
int c)
Returns the position of the specified character or -1. |
static int |
lastIndexOf(byte[] token,
int c)
Returns the last position of the specified character or -1. |
static byte[] |
lc(byte[] token)
Converts the specified token to lower case. |
static int |
lc(int ch)
Converts a character to lower case. |
static int |
len(byte[] token)
Returns the token length. |
static boolean |
letter(int ch)
Checks if the specified character is a computer letter (A - Z, a - z, _). |
static boolean |
letterOrDigit(int ch)
Checks if the specified character is a computer letter or digit. |
static byte[] |
local(byte[] name)
Returns the local name of the specified name. |
static byte[] |
max(byte[] token,
byte[] compare)
Returns the bigger token. |
static java.lang.String |
md5(java.lang.String string)
Returns an MD5 hash in lower case. |
static byte[] |
min(byte[] token,
byte[] compare)
Returns the smaller token. |
static byte[] |
norm(byte[] token)
Normalizes all whitespace occurrences from the specified token. |
static int |
norm(int ch)
Returns a normalized character without diacritics. |
static java.lang.String |
normEncoding(java.lang.String encoding)
Returns a unified representation of the specified encoding. |
static java.lang.String |
normEncoding(java.lang.String encoding,
java.lang.String old)
Returns a unified representation of the specified encoding. |
static int |
numDigits(int integer)
Checks number of digits of the specified integer. |
static byte[] |
prefix(byte[] name)
Returns the prefix of the specified token. |
static byte[] |
replace(byte[] token,
int search,
int replace)
Replaces the specified character and returns the result token. |
static byte[] |
replaceAll(byte[] token,
java.lang.String pattern,
java.lang.String replace)
Performs a regular expression on the specified string. |
static byte[][] |
split(byte[] token,
int sep)
Splits a token around matches of the given separator. |
static boolean |
startsWith(byte[] token,
byte[] sub)
Checks if the first token starts with the second token. |
static boolean |
startsWith(byte[] token,
int ch)
Checks if the first token starts with the specified character. |
static java.lang.String |
string(byte[] token)
Returns the specified token as string. |
static java.lang.String |
string(byte[] token,
int start,
int length)
Returns the specified token as string. |
static byte[] |
substring(byte[] token,
int start)
Returns a substring of the specified token. |
static byte[] |
substring(byte[] token,
int start,
int end)
Returns a substring of the specified token. |
static byte[] |
subtoken(byte[] token,
int start)
Returns a partial token. |
static byte[] |
subtoken(byte[] token,
int start,
int end)
Returns a partial token. |
static boolean |
supported(java.lang.String encoding)
Checks if the specified encoding is supported. |
static double |
toDouble(byte[] token)
Converts the specified token into a double value. |
static int |
toInt(byte[] token)
Converts the specified token into an integer value. |
static int |
toInt(byte[] token,
int start,
int end)
Converts the specified token into an integer value. |
static int |
toInt(java.lang.String string)
Converts the specified string into an integer value. |
static byte[] |
token(boolean bool)
Creates a byte array representation of the specified boolean value. |
static byte[] |
token(double dbl)
Creates a byte array representation from the specified double value; inspired by Xavier Franc's Qizx/open processor. |
static byte[] |
token(float flt)
Creates a byte array representation from the specified float value. |
static byte[] |
token(int integer)
Creates a byte array representation of the specified integer value. |
static byte[] |
token(long integer)
Creates a byte array representation from the specified long value, using Java's standard method. |
static byte[] |
token(java.lang.String string)
Converts a string to a byte array. |
static byte[][] |
tokens(java.lang.String... strings)
Converts the specified strings to tokens. |
static long |
toLong(byte[] token)
Converts the specified token into an long value. |
static long |
toLong(byte[] token,
int start,
int end)
Converts the specified token into an long value. |
static long |
toLong(java.lang.String string)
Converts the specified string into an long value. |
static int |
toSimpleInt(byte[] token)
Converts the specified token into a positive integer value. |
static byte[] |
trim(byte[] token)
Removes leading and trailing whitespaces from the specified token. |
static byte[] |
uc(byte[] token)
Converts the specified token to upper case. |
static int |
uc(int ch)
Converts a character to upper case. |
static byte[] |
uri(byte[] token,
boolean iri)
Returns a URI encoded token. |
static byte[] |
utf8(byte[] token,
java.lang.String encoding)
Converts a token from the input encoding to UTF8. |
static boolean |
ws(byte[] token)
Checks if the specified token has only whitespaces. |
static boolean |
ws(int ch)
Checks if the specified character is a whitespace. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final byte[] EMPTY
public static final byte[] XML
public static final byte[] XMLC
public static final byte[] XMLNS
public static final byte[] XMLNSC
public static final byte[] TRUE
public static final byte[] FALSE
public static final byte[] NULL
public static final byte[] INF
public static final byte[] NINF
public static final byte[] SPACE
public static final byte[] ZERO
public static final byte[] ONE
public static final byte[] SLASH
public static final byte[] COLON
public static final byte[] HEX
public static final java.lang.String UTF8
public static final java.lang.String UTF82
public static final java.lang.String UTF16
public static final java.lang.String UTF162
public static final java.lang.String UTF16BE
public static final java.lang.String UTF16LE
public static final java.lang.String UTF32
public static final java.lang.String UTF322
public static final java.util.Comparator<byte[]> COMP
public static final java.util.Comparator<byte[]> LC_COMP
Method Detail |
---|
public static java.lang.String string(byte[] token)
token
- token
public static java.lang.String string(byte[] token, int start, int length)
token
- tokenstart
- start positionlength
- length
public static boolean ascii(byte[] token)
token
- token
public static byte[] token(java.lang.String string)
string
- string to be converted
public static byte[][] tokens(java.lang.String... strings)
strings
- strings
public static byte[] utf8(byte[] token, java.lang.String encoding)
token
- token to be convertedencoding
- input encoding
public static java.lang.String normEncoding(java.lang.String encoding)
encoding
- input encoding (UTF-8 is returned for a null
reference)
public static java.lang.String normEncoding(java.lang.String encoding, java.lang.String old)
encoding
- input encoding (UTF-8 is returned for a null
reference)old
- previous encoding (optional)
public static boolean supported(java.lang.String encoding)
encoding
- encoding
public static int cp(byte[] token, int pos)
token
- tokenpos
- character position
public static int cl(byte cp)
cp
- codepoint
public static int cl(byte[] token, int pos)
token
- tokenpos
- position
public static int[] cps(byte[] token)
token
- token
public static int len(byte[] token)
token
- token
public static byte[] token(boolean bool)
bool
- boolean value to be converted
public static byte[] token(int integer)
integer
- int value to be converted
public static int numDigits(int integer)
integer
- number to be checked
public static byte[] token(long integer)
integer
- value to be converted
public static byte[] token(double dbl)
dbl
- double value to be converted
public static byte[] token(float flt)
flt
- float value to be converted
public static byte[] chopNumber(byte[] token)
token
- token to be modified
public static double toDouble(byte[] token)
Double.NaN
is returned if the input is invalid.
token
- token to be converted
public static long toLong(java.lang.String string)
Long.MIN_VALUE
is returned when the input is invalid.
string
- string to be converted
public static long toLong(byte[] token)
Long.MIN_VALUE
is returned when the input is invalid.
token
- token to be converted
public static long toLong(byte[] token, int start, int end)
Long.MIN_VALUE
is returned when the input is invalid.
token
- token to be convertedstart
- first byte to be parsedend
- last byte to be parsed - exclusive
public static int toInt(java.lang.String string)
Integer.MIN_VALUE
is returned when the input is invalid.
string
- string to be converted
public static int toInt(byte[] token)
Integer.MIN_VALUE
is returned when the input is invalid.
token
- token to be converted
public static int toInt(byte[] token, int start, int end)
Integer.MIN_VALUE
is returned when the input is invalid.
token
- token to be convertedstart
- first byte to be parsedend
- last byte to be parsed (exclusive)
public static int toSimpleInt(byte[] token)
Integer.MIN_VALUE
is returned if non-digits are found
or if the input is longer than nine characters.
token
- token to be converted
public static int hash(byte[] token)
token
- specified token
public static boolean eq(byte[] token1, byte[] token2)
token1
- first tokentoken2
- token to be compared
public static boolean eq(byte[] token, byte[]... tokens)
token
- tokentokens
- tokens to be compared
public static boolean eq(java.lang.String str, java.lang.String... strings)
str
- first stringstrings
- strings to be compared
public static boolean eqic(java.lang.String str, java.lang.String... strings)
str
- first stringstrings
- strings to be compared
public static int diff(byte[] token, byte[] compare)
token
- first tokencompare
- token to be compared
public static byte[] min(byte[] token, byte[] compare)
token
- first tokencompare
- token to be compared
public static byte[] max(byte[] token, byte[] compare)
token
- first tokencompare
- token to be compared
public static boolean contains(byte[] token, byte[] sub)
token
- tokensub
- token to be found
public static boolean contains(byte[] token, int c)
token
- tokenc
- character to be found
public static int indexOf(byte[] token, int c)
token
- tokenc
- character to be found
-1
public static int lastIndexOf(byte[] token, int c)
token
- tokenc
- character to be found
-1
public static int indexOf(byte[] token, byte[] sub)
token
- tokensub
- token to be found
-1
public static int indexOf(byte[] token, byte[] sub, int pos)
token
- tokensub
- token to be foundpos
- start position
public static boolean startsWith(byte[] token, int ch)
token
- tokench
- character to be found
public static boolean startsWith(byte[] token, byte[] sub)
token
- tokensub
- token to be found
public static boolean endsWith(byte[] token, int ch)
token
- tokench
- character to be bound
public static boolean endsWith(byte[] token, byte[] sub)
token
- tokensub
- token to be found
public static byte[] substring(byte[] token, int start)
subtoken(byte[], int)
instead.
token
- input tokenstart
- start position
public static byte[] substring(byte[] token, int start, int end)
subtoken(byte[], int)
instead.
token
- input tokenstart
- start positionend
- end position
public static byte[] subtoken(byte[] token, int start)
token
- input tokenstart
- start position
public static byte[] subtoken(byte[] token, int start, int end)
token
- input textstart
- start positionend
- end position
public static byte[][] split(byte[] token, int sep)
token
- token to be splitsep
- separation character
public static byte[] replaceAll(byte[] token, java.lang.String pattern, java.lang.String replace)
token
- token to matchpattern
- regular expressionreplace
- replacement string
public static boolean ws(byte[] token)
token
- token
public static byte[] replace(byte[] token, int search, int replace)
token
- token to be checkedsearch
- the character to be replacedreplace
- the new character
public static byte[] trim(byte[] token)
token
- token to be trimmed
public static byte[] chop(byte[] token, int max)
token
- token to be choppedmax
- maximum length
public static byte[] concat(byte[] token1, byte[] token2)
token1
- first tokentoken2
- second token
public static byte[] concat(byte[] token1, byte[] token2, byte[] token3)
TokenBuilder
instance can be used to
concatenate more than three tokens.
token1
- first tokentoken2
- second tokentoken3
- third token
public static byte[] delete(byte[] token, int ch)
token
- tokench
- character to be removed
public static byte[] norm(byte[] token)
token
- token
public static boolean ws(int ch)
ch
- the letter to be checked
public static boolean letter(int ch)
ch
- the letter to be checked
public static boolean digit(int ch)
ch
- the letter to be checked
public static boolean letterOrDigit(int ch)
ch
- the letter to be checked
public static boolean ftChar(int ch)
ch
- character to be tested
public static byte[] uc(byte[] token)
token
- token to be converted
public static int uc(int ch)
ch
- character to be converted
public static byte[] lc(byte[] token)
token
- token to be converted
public static int lc(int ch)
ch
- character to be converted
public static byte[] prefix(byte[] name)
name
- name
public static byte[] local(byte[] name)
name
- name
public static byte[] uri(byte[] token, boolean iri)
token
- tokeniri
- input
public static byte[] escape(byte[] token)
token
- token
public static java.lang.String md5(java.lang.String string)
string
- string to be hashed
public static byte[] hex(byte[] val, boolean uc)
val
- values to be mappeduc
- upper case
public static int norm(int ch)
ch
- character to be normalized
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |