Package com.actelion.research.util
Class StringFunctions
- java.lang.Object
-
- com.actelion.research.util.StringFunctions
-
public class StringFunctions extends java.lang.Object
-
-
Field Summary
Fields Modifier and Type Field Description static java.lang.StringPAT_NOT_ALPHANUMERICstatic java.lang.StringPAT_WHITESPACEstatic java.lang.String[]REGEX_META_CHARACTERSstatic java.lang.StringSEP
-
Constructor Summary
Constructors Constructor Description StringFunctions()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static booleancontainsLowerCase(java.lang.String s)static booleancontainsUpperCase(java.lang.String s)static java.lang.StringconvertToValidFileNameCharacters(java.lang.String str)Not allowed are: \ / : * ? < > |static intcountIntegerInText(java.lang.String txt)static intcountOccurence(java.lang.String str, char c)static intcountWordInText(java.lang.String txt)static java.lang.StringencodeHTML(java.lang.String txt)static booleanequal(byte[] b1, byte[] b2)static java.lang.StringescapeDanglingMetaCharacters(java.lang.String pattern)Escapes the meta characters in a regular expression pattern with \\.static java.lang.Stringextract(java.lang.String str, java.lang.String regex)static java.lang.StringextractInverse(java.lang.String str, java.lang.String regex)static java.lang.Stringformat(java.lang.String str)Keeps the minus.static java.lang.Stringformat(java.lang.String str, char replacement)Keeps the minus.static java.lang.Stringformat2DefinedLengthLeading(java.lang.String s, int length)static java.lang.Stringformat2DefinedLengthTrailing(java.lang.String s, int length)static java.lang.StringformatToCharactersAndDigits(java.lang.String str)static java.lang.StringformatToPrintableCharactersOnly(java.lang.String str)This function was implemented because in AxoSOMSampleView was a new line in SMILES molConvert from ChemAxon that is not detected by replaceAll("\n", "");static java.util.List<java.lang.String>getAllSubStrings(java.lang.String str, int minsize)static java.lang.StringgetAppendedSorted(java.lang.String s1, java.lang.String s2)static java.util.Comparator<java.lang.String>getComparatorLength()static java.text.DecimalFormatgetDecimalFormat(int precision)static int[][]getMatrixFromString(java.lang.String str, java.lang.String seperator)static java.lang.StringgetMaximumOverlap(java.util.List<java.lang.String> li, int minsize)Finds the maximum common String in all Strings.static java.lang.StringgetRandom(int min, int max)static java.util.List<java.lang.String>getSplittedOverlappingText(java.util.List<java.lang.String> liWords, int lenSubText, int lenOverlap)Generates a list with overlapstatic java.lang.StringgetString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd)static java.lang.StringgetString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd, int iFromIndex)static java.lang.StringgetStringFromRegEx(java.lang.String str, java.lang.String regex)static java.util.List<java.lang.String>getTokenized(java.lang.String txt, java.lang.String separator)Returns the tokenized and trimmed values.static java.util.List<java.lang.String>getTokenizedBySeperatorRegex(java.lang.String txt, java.lang.String regex)static java.util.List<java.lang.String>getTokenizedQuoted(java.lang.String txt)Get a list from quoted and comma or otherwise separated phrases.static java.util.List<java.lang.String>getWordsFormatted(java.lang.String txt)Removes all non characters and digits.static java.util.List<java.lang.String>getWordsFormattedUniqueLowerCase(java.lang.String txt)static java.lang.Stringhex2String(java.lang.String hex)static booleanisAllLetter(java.lang.String s)static booleanisAllLowerCase(java.lang.String s)static booleanisAllUpperCase(java.lang.String s)static booleanisCapitalizedWord(java.lang.String s)static booleanisMissingParenthesis(java.lang.String name)static booleanisRegexInString(java.lang.String str, java.lang.String regex)static booleanisUpperAndLowerCase(java.lang.String s)static voidmain(java.lang.String[] args)static java.util.List<java.awt.Point>match(java.lang.String str, java.lang.String regex)static java.awt.PointmatchFirst(java.lang.String str, java.lang.String regex)static java.lang.Stringmax(java.lang.String s1, java.lang.String s2)static java.lang.Stringmin(java.lang.String s1, java.lang.String s2)static intnextClosing(java.lang.String txt, int indexStart, char cOpen, char cClose)finds the next corresponding closing bracket char to the first open charstatic intnextClosingBracket(java.lang.String txt, int iIndexStart)finds the next balanced closing bracket "]" to the first open bracket "[" in the string.static java.lang.StringremoveCharacter(java.lang.StringBuilder str, char c)static java.lang.StringremoveCharacter(java.lang.String str, char c)static intsizeOf(java.lang.String s)https://stackoverflow.com/questions/4385623/bytes-of-a-string-in-java sizeof(string) = 8 + // object header used by the VM 8 + // 64-bit reference to char array (value) 8 + string.length() * 2 + // character array itself (object header + 16-bit chars) 4 + // offset integer 4 + // count integer 4 + // cached hash codestatic intsizeOf(java.util.List<java.lang.String> l)static java.lang.StringtoSortedString(java.util.List<java.lang.String> li)static java.lang.StringtoString(boolean[] arr)static java.lang.StringtoString(byte[] arr)static java.lang.StringtoString(double[] arr)static java.lang.StringtoString(double[] arr, java.text.NumberFormat nf)static java.lang.StringtoString(float[] arr, java.text.NumberFormat nf)static java.lang.StringtoString(int[][] arr, java.lang.String seperator)static java.lang.StringtoString(int[] arr, java.lang.String seperator)static java.lang.StringtoString(java.lang.Exception ex)static java.lang.StringtoString(java.lang.String[] arr, java.lang.String seperator)static java.lang.StringtoString(java.util.Collection<java.lang.String> li, java.lang.String sep)static java.lang.StringtoString(java.util.List<java.lang.Double> li, java.text.NumberFormat nf)static java.lang.StringtoString(java.util.List<java.lang.String> li)static java.lang.StringtoStringBinary(int v)static java.lang.StringtoStringBinary(long v)static java.lang.StringtoStringFileNameCompatible(double d)static java.lang.StringtoStringHex(java.lang.String s)static java.lang.StringtoStringInt(java.util.List<java.lang.Integer> li)static java.lang.StringtoStringInt(java.util.List<java.lang.Integer> li, java.lang.String sep)static java.lang.StringtoStringInteger(java.util.List<java.lang.Integer> li, java.lang.String sep)static java.lang.StringtoStringLong(java.util.List<java.lang.Long> li, java.lang.String sep)static java.lang.StringtoStringTabNL(java.lang.String[][] arr)Elements are separated by tabs and rows are separated by newline.
-
-
-
Field Detail
-
PAT_WHITESPACE
public static final java.lang.String PAT_WHITESPACE
- See Also:
- Constant Field Values
-
PAT_NOT_ALPHANUMERIC
public static final java.lang.String PAT_NOT_ALPHANUMERIC
- See Also:
- Constant Field Values
-
REGEX_META_CHARACTERS
public static final java.lang.String[] REGEX_META_CHARACTERS
-
SEP
public static final java.lang.String SEP
- See Also:
- Constant Field Values
-
-
Method Detail
-
getAppendedSorted
public static java.lang.String getAppendedSorted(java.lang.String s1, java.lang.String s2)
-
min
public static java.lang.String min(java.lang.String s1, java.lang.String s2)
-
max
public static java.lang.String max(java.lang.String s1, java.lang.String s2)
-
countIntegerInText
public static int countIntegerInText(java.lang.String txt)
-
countWordInText
public static int countWordInText(java.lang.String txt)
-
equal
public static boolean equal(byte[] b1, byte[] b2)
-
encodeHTML
public static java.lang.String encodeHTML(java.lang.String txt)
-
getComparatorLength
public static java.util.Comparator<java.lang.String> getComparatorLength()
-
getDecimalFormat
public static java.text.DecimalFormat getDecimalFormat(int precision)
-
getAllSubStrings
public static java.util.List<java.lang.String> getAllSubStrings(java.lang.String str, int minsize)
-
getRandom
public static java.lang.String getRandom(int min, int max)- Parameters:
min- minimum lengthmax- maximum length- Returns:
-
getMatrixFromString
public static int[][] getMatrixFromString(java.lang.String str, java.lang.String seperator)- Parameters:
str- has to be of this form [1,2,3][2,3,4]. The seperator has to be given.seperator-- Returns:
-
getMaximumOverlap
public static java.lang.String getMaximumOverlap(java.util.List<java.lang.String> li, int minsize)Finds the maximum common String in all Strings. Position independent.- Parameters:
li-- Returns:
-
removeCharacter
public static java.lang.String removeCharacter(java.lang.String str, char c)
-
removeCharacter
public static java.lang.String removeCharacter(java.lang.StringBuilder str, char c)
-
countOccurence
public static int countOccurence(java.lang.String str, char c)
-
convertToValidFileNameCharacters
public static java.lang.String convertToValidFileNameCharacters(java.lang.String str)
Not allowed are: \ / : * ? < > |- Parameters:
str- input string- Returns:
- string with -X- instead of the not allowed characters. 10.09.2003 MK
-
toStringFileNameCompatible
public static java.lang.String toStringFileNameCompatible(double d)
-
formatToPrintableCharactersOnly
public static java.lang.String formatToPrintableCharactersOnly(java.lang.String str)
This function was implemented because in AxoSOMSampleView was a new line in SMILES molConvert from ChemAxon that is not detected by replaceAll("\n", "");- Parameters:
str- input String- Returns:
- a String only with printable ASCII characters. No extended ASCII characters.
-
formatToCharactersAndDigits
public static java.lang.String formatToCharactersAndDigits(java.lang.String str)
-
format2DefinedLengthTrailing
public static java.lang.String format2DefinedLengthTrailing(java.lang.String s, int length)
-
format2DefinedLengthLeading
public static java.lang.String format2DefinedLengthLeading(java.lang.String s, int length)
-
format
public static java.lang.String format(java.lang.String str)
Keeps the minus. Every other ASCII character, not a letter nor a digit is replaced with '_'.- Parameters:
str-- Returns:
-
format
public static java.lang.String format(java.lang.String str, char replacement)Keeps the minus. Every other ASCII character, not a letter nor a digit is replaced withreplacement.- Parameters:
str-- Returns:
-
getString
public static java.lang.String getString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd)- Parameters:
sLine- input stringsStart- start tagsEnd- end tag- Returns:
- string between the two tags, if one the tags is not founds a string with the length 0 is returned.
-
getStringFromRegEx
public static java.lang.String getStringFromRegEx(java.lang.String str, java.lang.String regex)- Parameters:
str-regex-- Returns:
- null if substring not found.
-
isRegexInString
public static boolean isRegexInString(java.lang.String str, java.lang.String regex)
-
extract
public static java.lang.String extract(java.lang.String str, java.lang.String regex)- Parameters:
str-regex-- Returns:
- expression which was matched by regex.
-
extractInverse
public static java.lang.String extractInverse(java.lang.String str, java.lang.String regex)- Parameters:
str-regex-- Returns:
- the combined not matching parts of the string.
-
getString
public static java.lang.String getString(java.lang.String sLine, java.lang.String sStart, java.lang.String sEnd, int iFromIndex)- Parameters:
sLine- input stringsStart- start tagsEnd- end tagiFromIndex- start index- Returns:
- string between the two tags, if one the tags is not founds a string with the length 0 is returned.
-
getWordsFormatted
public static java.util.List<java.lang.String> getWordsFormatted(java.lang.String txt)
Removes all non characters and digits.- Parameters:
txt-- Returns:
-
getWordsFormattedUniqueLowerCase
public static java.util.List<java.lang.String> getWordsFormattedUniqueLowerCase(java.lang.String txt)
- Parameters:
txt-- Returns:
- formatted, unique and lower case
-
getTokenizedQuoted
public static java.util.List<java.lang.String> getTokenizedQuoted(java.lang.String txt)
Get a list from quoted and comma or otherwise separated phrases.- Parameters:
txt-- Returns:
-
getTokenized
public static java.util.List<java.lang.String> getTokenized(java.lang.String txt, java.lang.String separator)Returns the tokenized and trimmed values.- Parameters:
txt-separator-- Returns:
-
getTokenizedBySeperatorRegex
public static java.util.List<java.lang.String> getTokenizedBySeperatorRegex(java.lang.String txt, java.lang.String regex)
-
getSplittedOverlappingText
public static java.util.List<java.lang.String> getSplittedOverlappingText(java.util.List<java.lang.String> liWords, int lenSubText, int lenOverlap)Generates a list with overlap- Parameters:
liWords-lenSubText- so many words are in each entry.lenOverlap-- Returns:
-
sizeOf
public static int sizeOf(java.lang.String s)
https://stackoverflow.com/questions/4385623/bytes-of-a-string-in-java sizeof(string) = 8 + // object header used by the VM 8 + // 64-bit reference to char array (value) 8 + string.length() * 2 + // character array itself (object header + 16-bit chars) 4 + // offset integer 4 + // count integer 4 + // cached hash code- Parameters:
s-- Returns:
-
sizeOf
public static int sizeOf(java.util.List<java.lang.String> l)
-
toString
public static java.lang.String toString(double[] arr, java.text.NumberFormat nf)
-
toString
public static java.lang.String toString(float[] arr, java.text.NumberFormat nf)
-
toString
public static java.lang.String toString(byte[] arr)
-
toString
public static java.lang.String toString(boolean[] arr)
-
toString
public static java.lang.String toString(java.util.List<java.lang.Double> li, java.text.NumberFormat nf)
-
toStringInteger
public static java.lang.String toStringInteger(java.util.List<java.lang.Integer> li, java.lang.String sep)
-
toString
public static java.lang.String toString(double[] arr)
-
toString
public static java.lang.String toString(int[][] arr, java.lang.String seperator)
-
toString
public static java.lang.String toString(java.lang.String[] arr, java.lang.String seperator)
-
toString
public static java.lang.String toString(int[] arr, java.lang.String seperator)
-
toStringTabNL
public static java.lang.String toStringTabNL(java.lang.String[][] arr)
Elements are separated by tabs and rows are separated by newline.- Parameters:
arr-- Returns:
-
toString
public static java.lang.String toString(java.lang.Exception ex)
-
toString
public static java.lang.String toString(java.util.List<java.lang.String> li)
-
toString
public static java.lang.String toString(java.util.Collection<java.lang.String> li, java.lang.String sep)
-
toStringLong
public static java.lang.String toStringLong(java.util.List<java.lang.Long> li, java.lang.String sep)
-
toStringInt
public static java.lang.String toStringInt(java.util.List<java.lang.Integer> li, java.lang.String sep)
-
toStringInt
public static java.lang.String toStringInt(java.util.List<java.lang.Integer> li)
-
toSortedString
public static java.lang.String toSortedString(java.util.List<java.lang.String> li)
-
toStringBinary
public static java.lang.String toStringBinary(int v)
-
toStringBinary
public static java.lang.String toStringBinary(long v)
-
toStringHex
public static java.lang.String toStringHex(java.lang.String s)
-
hex2String
public static java.lang.String hex2String(java.lang.String hex)
-
nextClosingBracket
public static int nextClosingBracket(java.lang.String txt, int iIndexStart)finds the next balanced closing bracket "]" to the first open bracket "[" in the string.- Parameters:
txt- StringiIndexStart- start index- Returns:
- index of the next corresponding bracket
-
escapeDanglingMetaCharacters
public static java.lang.String escapeDanglingMetaCharacters(java.lang.String pattern)
Escapes the meta characters in a regular expression pattern with \\.- Parameters:
pattern-- Returns:
-
match
public static final java.util.List<java.awt.Point> match(java.lang.String str, java.lang.String regex)- Parameters:
str-regex-- Returns:
- list with points, x start, y end of matching string (offset after the last character matched).
-
matchFirst
public static final java.awt.Point matchFirst(java.lang.String str, java.lang.String regex)
-
nextClosing
public static int nextClosing(java.lang.String txt, int indexStart, char cOpen, char cClose)finds the next corresponding closing bracket char to the first open char- Parameters:
txt-indexStart-cOpen-cClose-- Returns:
-
isAllLetter
public static boolean isAllLetter(java.lang.String s)
-
isAllUpperCase
public static boolean isAllUpperCase(java.lang.String s)
-
isAllLowerCase
public static boolean isAllLowerCase(java.lang.String s)
-
containsUpperCase
public static boolean containsUpperCase(java.lang.String s)
-
containsLowerCase
public static boolean containsLowerCase(java.lang.String s)
-
isCapitalizedWord
public static boolean isCapitalizedWord(java.lang.String s)
- Parameters:
s-- Returns:
- true only if the first letter is capitalized and all other words are lower case letters.
-
isUpperAndLowerCase
public static boolean isUpperAndLowerCase(java.lang.String s)
-
isMissingParenthesis
public static boolean isMissingParenthesis(java.lang.String name)
- Parameters:
name-- Returns:
- false if for each opening parenthesis none closing one is present.
-
main
public static void main(java.lang.String[] args)
-
-