import java.util.regex.Pattern; import java.util.regex.Matcher; public class AutoFormat { public static StringBuffer replaceAll( StringBuffer in, String match, String replace ) { int pos; int ml = match.length(); int rl = replace.length(); pos = in.indexOf( match ); while ( pos >= 0 ) { in.replace( pos, pos + ml, replace ); pos = in.indexOf( match, pos + rl ); } return in; } /* this function has been pretty faithfully translated from the perl */ static StringBuffer plaintextFormat( StringBuffer in, boolean noescapetags ) { // Remove excess whitespace from the front and end of the text while ( in.length() > 0 && Character.isWhitespace( in.charAt( 0 ) ) ) { in.deleteCharAt( 0 ); } if ( in.length() <= 0 ) { return in; } int lastci; lastci = in.length() - 1; while ( Character.isWhitespace( in.charAt( lastci ) ) ) { in.deleteCharAt( lastci ); lastci = in.length() - 1; } // Perform standard plain-old-text conversions if ( ! noescapetags ) { in = replaceAll( in, "&", "&" ); in = replaceAll( in, "\"", """ ); in = replaceAll( in, "<", "<" ); in = replaceAll( in, ">", ">" ); } // in = replaceAll( in, "\r", "" ); in = replaceAll( in, "\t", " " ); // in = replaceAll( in, "\u00a0", " " ); // what is this? // filter chars int pos = 0; char prevc = '\0'; while ( pos < in.length() ) { char c; c = in.charAt( pos ); if ( c <= 8 || (c >= 0x0B && c <= 0x1F) ) { //s#[\x00-\x08\x0B-\x1F]##g; # Nuke control characters // strip control chars in.deleteCharAt( pos ); //} else if ( c == '\t' ) { // in.replace( pos, pos + 1, " " ); } else if ( c == '\n' ) { boolean run = false; int np = pos + 1; char nc = in.charAt( np ); int endnl = -1; while ( Character.isWhitespace( nc ) ) { run = true; if ( nc == '\n' ) { endnl = np; } np++; nc = in.charAt( np ); } if ( run && (endnl > 0) ) { //s#\n\s*\n#
\n#gs; in.replace( pos, endnl, "
" );
pos = endnl + 4;
prevc = '>';
} else {
//s#(?)\n#
\n#gs;
in.insert( pos, "
" );
pos += 5;
prevc = '>';
}
} else if ( prevc == '\n' && c == ' ' ) {
//s#^ # #gm;
in.replace( pos - 1, pos + 1, " " );
pos += 5;
prevc = ';';
} else if ( Character.isWhitespace( prevc ) && Character.isWhitespace( c ) ) {
int lastsp = pos + 1;
while ( lastsp < in.length() && Character.isWhitespace( in.charAt( lastsp ) ) ) {
lastsp++;
}
//s# # #g;
in.replace( pos - 1, lastsp, " " );
pos = lastsp + 5;
prevc = ';';
//} else if ( ! noescapetags && () ) {
// Change remaining non-ASCII chars to entities
//s!([^\n\t\x20-\x7E])!''.ord($1).';'!ge unless $noescapetags;
} else {
prevc = c;
pos++;
}
}
return in;
}
/* maybe pull urlSchemes from a db set variable? */
static String[] urlSchemes = {
"http://", "https://", "ftp://",
//"file://",
"mailto:",
};
/**
allowed forms of the original perl version:
[url text]
[text url]
{url text}
{text url}
url
text is [^\[][^]]+? inside []
[^{][^}]+? inside {}
for naked url, don't include any of ".!?_*=" as part of the url if they are the last char, and it should be followed by whitespace, one of "\n<()[]{}" or the end of the input.
currently only naked urls are augmented and they are considered as extending until whitespace or '<' or '>' with no special trimmings.
Don't people know HTML yet? Do we still need the shorthand [] forms?
*/
static StringBuffer autoLinkifyUrls( StringBuffer in ) {
int pos = 0;
int[] nextSchemePos = new int[urlSchemes.length];
int i;
int minSchemePos = in.length();
int minScheme = -1;
for ( i = 0; i < urlSchemes.length; i++ ) {
nextSchemePos[i] = in.indexOf( urlSchemes[i] );
if ( (nextSchemePos[i] >= 0) && (nextSchemePos[i] < minSchemePos) ) {
minSchemePos = nextSchemePos[i];
minScheme = i;
}
}
int nextLT = in.indexOf( "<" );
while ( pos < in.length() ) {
boolean needFullScan = false;
if ( minSchemePos >= in.length() ) {
// no matching schemes to linkify
return in;
}
nextLT = in.indexOf( "<", pos );
if ( nextLT >= 0 && nextLT < minSchemePos ) {
int nextGT = in.indexOf( ">", nextLT );
if ( nextGT > 0 ) {
// zoom scan to after end of tag
pos = nextGT + 1;
needFullScan = true;
} else {
// bogus < open tag!
in.replace( nextLT, nextLT + 1, "<" );
needFullScan = true;
pos = nextLT + 4;
}
} else {
// no '<' before match, linkify ...
int linkend = minSchemePos + urlSchemes[minScheme].length() - 1;
char c;
do {
linkend++;
if ( linkend >= in.length() ) {
break;
}
c = in.charAt( linkend );
} while ( !Character.isWhitespace( c ) && c != '<' && c != '>' );
String url = in.substring( minSchemePos, linkend );
String link = "" + url + "";
in.replace( minSchemePos, linkend, link );
needFullScan = true;
pos = minSchemePos + link.length() + 1;
//nextSchemePos[minScheme] = in.indexOf( urlSchemes[minScheme], pos );
}
// find next first
minSchemePos = in.length();
for ( i = 0; i < urlSchemes.length; i++ ) {
if ( needFullScan ) {
nextSchemePos[i] = in.indexOf( urlSchemes[i], pos );
}
if ( (nextSchemePos[i] >= 0) && (nextSchemePos[i] < minSchemePos) ) {
minSchemePos = nextSchemePos[i];
minScheme = i;
}
}
}
// Don't match URLs with <> in them
// my $url_regex = '(?:http|ftp|file)://[^\s<>]+?';
// my $url_regexg = '(?:http|ftp|file)://[^\s<>]+'; # greedy
// Mark URLs that are already in HTML attrs or links so we don't linkify them
/*
s#(<[^>]+="[^">]*)($url_regex)#$1\x00$2#gso;
s#(]*href=[^>]*>[^<]*)($url_regex)#$1\x00$2#gso;
# Grab expressions in brackets ('[]', '{}', or '<>', not '()')
# and if they end in a URL, linkify them.
s#\[([^\[][^]]+?)(?:\s| )*?($url_regex)\]#$1#gmsio;
s#{([^{][^}]+?)(?:\s| )*($url_regex)}#$1#gmsio;
s#\[($url_regexg)(?:\s| )+([^\[][^]]+?)\]#$2#gmsio;
s#{($url_regexg)(?:\s| )+([^{][^}]+?)}#$2#gmsio;
# Linkify all the remaining naked URLs
s#([^\x00]|^)($url_regex)(?=[.!?_*=]?[\s\n<()\[\]{}\x01\x02]|$)#$1$2#gmio;
# Remove placeholder chars
s#\x00##gs;*/
return in;
}
static abstract class PatternFilter {
PatternFilter next;
protected abstract Pattern pat();
public void filter( CharSequence in ) {
Matcher lilm = pat().matcher( in );
int pos = 0;
if ( lilm.find() ) {
do {
next.filter( in.subSequence( pos, lilm.start() ) );
core( lilm );
pos = lilm.end();
} while ( lilm.find() );
next.filter( in.subSequence( pos, in.length() ) );
} else {
lilm = null;
next.filter( in );
}
}
public void core( Matcher lilm ) {
next.pass( lilm.group() );
}
public void pass( CharSequence cs ) {
next.pass( cs );
}
}
static class LinkNoBreakFilter extends PatternFilter {
static Pattern linkNoBreak = Pattern.compile(
"(]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
}
static class BracketFilterA extends PatternFilter {
static Pattern linkNoBreak = Pattern.compile(
"\\[([^\\[][^]]+?)(?:\\s| )*?(" + urlRegex + ")\\]",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
String text = lilm.group( 1 );
String url = lilm.group( 2 );
next.pass( "" + text + "" );
}
}
static class BracketFilterB extends PatternFilter {
// s#\[($url_regexg)(?:\s| )+([^\[][^]]+?)\]#$2#gmsio;
static Pattern linkNoBreak = Pattern.compile(
"\\[(" + urlRegex + ")(?:\\s| )+([^\\[][^]]+?)\\]",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
String text = lilm.group( 2 );
String url = lilm.group( 1 );
next.pass( "" + text + "" );
}
}
static class NakedURLFilter extends PatternFilter {
/* s#([^\x00]|^)($url_regex)(?=[.!?_*=]?[\s\n<()\[\]{}\x01\x02]|$)#$1$2#gmio; */
static Pattern linkNoBreak = Pattern.compile(
"(" + urlRegex + "?)(?=[.!?_*=]?[\\s\\n<()\\[\\]{}]|$)",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
String url = lilm.group( 1 );
next.pass( "" + url + "" );
}
}
static class SBAccumFilter extends PatternFilter {
StringBuffer toret = new StringBuffer();
protected Pattern pat() { return null; }
public void filter( CharSequence cs ) {
toret.append( cs );
}
public void pass( CharSequence cs ) {
toret.append( cs );
}
}
static PatternFilter newDefaultFilterChain() {
SBAccumFilter tail = new SBAccumFilter();
NakedURLFilter nu = new NakedURLFilter();
nu.next = tail;
BracketFilterA bfa = new BracketFilterA();
bfa.next = nu;
BracketFilterB bfb = new BracketFilterB();
bfb.next = bfa;
LinkNoBreakFilter lnbf = new LinkNoBreakFilter();
lnbf.next = bfb;
return lnbf;
}
static String ltgt( String in ) {
return in.replaceAll( "<", "<" ).replaceAll( ">", ">" );
}
static String urlRegex = "(?:http://|ftp://|https://|mailto:)[^\\s<>]+";
static Pattern linkInTag = Pattern.compile("(<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)");
static Pattern linkInLink = Pattern.compile("(]*href=[^>]*>[^<]*" + urlRegex + "?)");
static Pattern linkNoBreak = Pattern.compile(
"(]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)");
static StringBuffer autoLinkifyUrlsR( StringBuffer in ) {
PatternFilter filts = newDefaultFilterChain();
filts.filter( in );
while ( filts.next != null ) {
filts = filts.next;
}
return ((SBAccumFilter)filts).toret;
/*
Matcher lilm = linkNoBreak.matcher( in );
System.out.println("
| "); System.out.println( ltgt(in.substring( pos, lilm.start() )) ); System.out.println( " | " ); System.out.println( ltgt(lilm.group()) ); System.out.println(" |
| "); System.out.println( ltgt(in.substring( pos )) ); System.out.println( " | " ); //System.out.println( lilm.group() ); System.out.println(" |
| "); System.out.println( i ); System.out.println( " | " ); System.out.println( they[i] ); System.out.println(" |
" ); System.out.println( testInputs[i] ); System.out.println( " | " ); System.out.println( sba ); System.out.println( " | " ); System.out.println( sbb ); System.out.println( " |
| " ); System.out.println( ltgt(sba.toString()) ); System.out.println( " | " ); System.out.println( ltgt(sbb.toString()) ); System.out.println( " |