import java.util.regex.Pattern;
import java.util.regex.Matcher;
/* Improved and cleaned up version of AutoFormat */
public class AutoFormat2 {
public static StringBuffer replaceAll( StringBuffer in, String match, String replace ) {
int pos;
int ml = match.length();
int rl = replace.length();
pos = in.indexOf( match );
while ( pos >= 0 ) {
in.replace( pos, pos + ml, replace );
pos = in.indexOf( match, pos + rl );
}
return in;
}
static StringBuffer stripUnwantedChars( StringBuffer in ) {
int pos = 0;
while ( pos < in.length() ) {
char c;
c = in.charAt( pos );
if ( c <= 8 || (c >= 0x0B && c <= 0x1F) || c == '\r' ) {
//s#[\x00-\x08\x0B-\x1F]##g; # Nuke control characters
// strip control chars
in.deleteCharAt( pos );
} else {
pos++;
}
}
return in;
}
static StringBuffer spacehack( StringBuffer in ) {
int pos = 0;
char prevc = '\0';
while ( pos < in.length() ) {
char c;
c = in.charAt( pos );
if ( prevc == '\n' && c == ' ' ) {
int lastsp = pos;
while ( lastsp < in.length() && Character.isWhitespace( in.charAt( lastsp ) ) ) {
lastsp++;
}
//s#^ # #gm;
if ( lastsp != pos ) {
in.replace( pos - 1, lastsp, " " );
pos = lastsp + 5;
prevc = ';';
} else {
in.replace( pos - 1, pos + 1, " " );
pos += 5;
prevc = ';';
}
} else if ( Character.isWhitespace( prevc ) && Character.isWhitespace( c ) ) {
int lastsp = pos + 1;
while ( lastsp < in.length() && Character.isWhitespace( in.charAt( lastsp ) ) ) {
lastsp++;
}
//s# # #g;
in.replace( pos - 1, lastsp, " " );
pos = lastsp + 5;
prevc = ';';
//} else if ( ! noescapetags && () ) {
// Change remaining non-ASCII chars to entities
//s!([^\n\t\x20-\x7E])!''.ord($1).';'!ge unless $noescapetags;
} else {
prevc = c;
pos++;
}
}
return in;
}
static abstract class PatternFilter {
PatternFilter next;
protected abstract Pattern pat();
public void filter( CharSequence in ) {
Matcher lilm = pat().matcher( in );
int pos = 0;
if ( lilm.find() ) {
do {
next.filter( in.subSequence( pos, lilm.start() ) );
core( lilm );
pos = lilm.end();
} while ( lilm.find() );
next.filter( in.subSequence( pos, in.length() ) );
} else {
lilm = null;
next.filter( in );
}
}
public void core( Matcher lilm ) {
next.pass( lilm.group() );
}
public void pass( CharSequence cs ) {
next.pass( cs );
}
}
static class LinkNoBreakFilter extends PatternFilter {
static Pattern linkNoBreak = Pattern.compile(
"(]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
}
static class BracketFilterA extends PatternFilter {
static Pattern linkNoBreak = Pattern.compile(
"\\[([^\\[][^]]+?)(?:\\s| )*?(" + urlRegex + ")\\]",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
String text = lilm.group( 1 );
String url = lilm.group( 2 );
next.pass( "" + text + "" );
}
}
static class BracketFilterB extends PatternFilter {
// s#\[($url_regexg)(?:\s| )+([^\[][^]]+?)\]#$2#gmsio;
static Pattern linkNoBreak = Pattern.compile(
"\\[(" + urlRegex + ")(?:\\s| )+([^\\[][^]]+?)\\]",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
String text = lilm.group( 2 );
String url = lilm.group( 1 );
next.pass( "" + text + "" );
}
}
static class NakedURLFilter extends PatternFilter {
/* s#([^\x00]|^)($url_regex)(?=[.!?_*=]?[\s\n<()\[\]{}\x01\x02]|$)#$1$2#gmio; */
static Pattern linkNoBreak = Pattern.compile(
"(" + urlRegex + "?)(?=[.!?_*=]?[\\s\\n<()\\[\\]{}]|$)",
Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
String url = lilm.group( 1 );
next.pass( "" + url + "" );
}
}
static class CtrlStripFilter extends PatternFilter {
//s#[\x00-\x08\x0B-\x1F]##g; # Nuke control characters
static Pattern linkNoBreak = Pattern.compile( "[\\x00-\\x08\\x0B-\\x1F\\r]+", Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
// do nothing, drops chars matched
}
}
static class ParagraphenateFilter extends PatternFilter {
static Pattern linkNoBreak = Pattern.compile( "(?|
)\\n[\\s\\n]*\\n", Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
next.pass( "
\n" );
}
}
static class BRifyFilter extends PatternFilter {
static Pattern linkNoBreak = Pattern.compile( "(?|
)\\n", Pattern.CASE_INSENSITIVE );
protected Pattern pat() { return linkNoBreak; }
public void core( Matcher lilm ) {
next.pass( "
\n" );
}
}
static class SBAccumFilter extends PatternFilter {
StringBuffer toret = new StringBuffer();
protected Pattern pat() { return null; }
public void filter( CharSequence cs ) {
toret.append( cs );
}
public void pass( CharSequence cs ) {
toret.append( cs );
}
}
static PatternFilter newDefaultFilterChain() {
ParagraphenateFilter pf = new ParagraphenateFilter();
BRifyFilter brf = new BRifyFilter();
LinkNoBreakFilter lnbf = new LinkNoBreakFilter();
BracketFilterA bfa = new BracketFilterA();
BracketFilterB bfb = new BracketFilterB();
NakedURLFilter nu = new NakedURLFilter();
SBAccumFilter tail = new SBAccumFilter();
pf.next = brf;
brf.next = lnbf;
lnbf.next = bfb;
bfb.next = bfa;
bfa.next = nu;
nu.next = tail;
return pf;
}
static String ltgt( String in ) {
return in.replaceAll( "&", "&" ).replaceAll( "<", "<" ).replaceAll( ">", ">" );
}
static String urlRegex = "(?:http://|ftp://|https://|mailto:|feed://)[^\\s<>]+";
static Pattern linkInTag = Pattern.compile("(<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)");
static Pattern linkInLink = Pattern.compile("(]*href=[^>]*>[^<]*" + urlRegex + "?)");
static Pattern linkNoBreak = Pattern.compile(
"(]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)");
static StringBuffer autoLinkifyUrlsR( StringBuffer in ) {
PatternFilter filts = newDefaultFilterChain();
filts.filter( in );
while ( filts.next != null ) {
filts = filts.next;
}
return ((SBAccumFilter)filts).toret;
}
/*
Most of this function has not been reimplemented because it is crazy or totally the wrong way to do it outside of perl (and possibly inside of perl)
*/
static StringBuffer autoFormat( StringBuffer in ) {
in = stripUnwantedChars( in );
in = autoLinkifyUrlsR( in );
in = spacehack( in );
return in;
}
/** test autoformat */
public static String[] testInputs = {
"a foo haha aoeu\nthingy\n\n\nyadda",
"http://frond.com/boo",
"wheee .... thud\nmailto:link@this",
"[implement text ftp://link]\n\n[https://also implement link then text]\n\ngot it?",
"http://bolson.org don't break this by overlinking it!",
"FTP://saoeuraoeu\n\n[case INSENSITIVITY test mailto:colonel@cwru.edu]",
"already has p
\nandalready has br
\ngot it?",
"How's\n the\n \n\nthe whitespace?",
};
public static void main( String[] argv ) {
System.out.println("
" ); System.out.println( testInputs[i] ); System.out.println( " | " ); System.out.println( sba ); System.out.println( " |
| " ); System.out.println( ltgt(sba.toString()) ); System.out.println( " |