import java.util.regex.Pattern; import java.util.regex.Matcher; /* Improved and cleaned up version of AutoFormat */ public class AutoFormat2 { public static StringBuffer replaceAll( StringBuffer in, String match, String replace ) { int pos; int ml = match.length(); int rl = replace.length(); pos = in.indexOf( match ); while ( pos >= 0 ) { in.replace( pos, pos + ml, replace ); pos = in.indexOf( match, pos + rl ); } return in; } static StringBuffer stripUnwantedChars( StringBuffer in ) { int pos = 0; while ( pos < in.length() ) { char c; c = in.charAt( pos ); if ( c <= 8 || (c >= 0x0B && c <= 0x1F) || c == '\r' ) { //s#[\x00-\x08\x0B-\x1F]##g; # Nuke control characters // strip control chars in.deleteCharAt( pos ); } else { pos++; } } return in; } static StringBuffer spacehack( StringBuffer in ) { int pos = 0; char prevc = '\0'; while ( pos < in.length() ) { char c; c = in.charAt( pos ); if ( prevc == '\n' && c == ' ' ) { int lastsp = pos; while ( lastsp < in.length() && Character.isWhitespace( in.charAt( lastsp ) ) ) { lastsp++; } //s#^ # #gm; if ( lastsp != pos ) { in.replace( pos - 1, lastsp, "  " ); pos = lastsp + 5; prevc = ';'; } else { in.replace( pos - 1, pos + 1, " " ); pos += 5; prevc = ';'; } } else if ( Character.isWhitespace( prevc ) && Character.isWhitespace( c ) ) { int lastsp = pos + 1; while ( lastsp < in.length() && Character.isWhitespace( in.charAt( lastsp ) ) ) { lastsp++; } //s# #  #g; in.replace( pos - 1, lastsp, "  " ); pos = lastsp + 5; prevc = ';'; //} else if ( ! noescapetags && () ) { // Change remaining non-ASCII chars to entities //s!([^\n\t\x20-\x7E])!'&#'.ord($1).';'!ge unless $noescapetags; } else { prevc = c; pos++; } } return in; } static abstract class PatternFilter { PatternFilter next; protected abstract Pattern pat(); public void filter( CharSequence in ) { Matcher lilm = pat().matcher( in ); int pos = 0; if ( lilm.find() ) { do { next.filter( in.subSequence( pos, lilm.start() ) ); core( lilm ); pos = lilm.end(); } while ( lilm.find() ); next.filter( in.subSequence( pos, in.length() ) ); } else { lilm = null; next.filter( in ); } } public void core( Matcher lilm ) { next.pass( lilm.group() ); } public void pass( CharSequence cs ) { next.pass( cs ); } } static class LinkNoBreakFilter extends PatternFilter { static Pattern linkNoBreak = Pattern.compile( "(]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } } static class BracketFilterA extends PatternFilter { static Pattern linkNoBreak = Pattern.compile( "\\[([^\\[][^]]+?)(?:\\s| )*?(" + urlRegex + ")\\]", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } public void core( Matcher lilm ) { String text = lilm.group( 1 ); String url = lilm.group( 2 ); next.pass( "" + text + "" ); } } static class BracketFilterB extends PatternFilter { // s#\[($url_regexg)(?:\s| )+([^\[][^]]+?)\]#$2#gmsio; static Pattern linkNoBreak = Pattern.compile( "\\[(" + urlRegex + ")(?:\\s| )+([^\\[][^]]+?)\\]", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } public void core( Matcher lilm ) { String text = lilm.group( 2 ); String url = lilm.group( 1 ); next.pass( "" + text + "" ); } } static class NakedURLFilter extends PatternFilter { /* s#([^\x00]|^)($url_regex)(?=[.!?_*=]?[\s\n<()\[\]{}\x01\x02]|$)#$1$2#gmio; */ static Pattern linkNoBreak = Pattern.compile( "(" + urlRegex + "?)(?=[.!?_*=]?[\\s\\n<()\\[\\]{}]|$)", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } public void core( Matcher lilm ) { String url = lilm.group( 1 ); next.pass( "" + url + "" ); } } static class CtrlStripFilter extends PatternFilter { //s#[\x00-\x08\x0B-\x1F]##g; # Nuke control characters static Pattern linkNoBreak = Pattern.compile( "[\\x00-\\x08\\x0B-\\x1F\\r]+", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } public void core( Matcher lilm ) { // do nothing, drops chars matched } } static class ParagraphenateFilter extends PatternFilter { static Pattern linkNoBreak = Pattern.compile( "(?|
)\\n[\\s\\n]*\\n", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } public void core( Matcher lilm ) { next.pass( "

\n" ); } } static class BRifyFilter extends PatternFilter { static Pattern linkNoBreak = Pattern.compile( "(?|
)\\n", Pattern.CASE_INSENSITIVE ); protected Pattern pat() { return linkNoBreak; } public void core( Matcher lilm ) { next.pass( "
\n" ); } } static class SBAccumFilter extends PatternFilter { StringBuffer toret = new StringBuffer(); protected Pattern pat() { return null; } public void filter( CharSequence cs ) { toret.append( cs ); } public void pass( CharSequence cs ) { toret.append( cs ); } } static PatternFilter newDefaultFilterChain() { ParagraphenateFilter pf = new ParagraphenateFilter(); BRifyFilter brf = new BRifyFilter(); LinkNoBreakFilter lnbf = new LinkNoBreakFilter(); BracketFilterA bfa = new BracketFilterA(); BracketFilterB bfb = new BracketFilterB(); NakedURLFilter nu = new NakedURLFilter(); SBAccumFilter tail = new SBAccumFilter(); pf.next = brf; brf.next = lnbf; lnbf.next = bfb; bfb.next = bfa; bfa.next = nu; nu.next = tail; return pf; } static String ltgt( String in ) { return in.replaceAll( "&", "&" ).replaceAll( "<", "<" ).replaceAll( ">", ">" ); } static String urlRegex = "(?:http://|ftp://|https://|mailto:|feed://)[^\\s<>]+"; static Pattern linkInTag = Pattern.compile("(<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)"); static Pattern linkInLink = Pattern.compile("(]*href=[^>]*>[^<]*" + urlRegex + "?)"); static Pattern linkNoBreak = Pattern.compile( "(]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)"); static StringBuffer autoLinkifyUrlsR( StringBuffer in ) { PatternFilter filts = newDefaultFilterChain(); filts.filter( in ); while ( filts.next != null ) { filts = filts.next; } return ((SBAccumFilter)filts).toret; } /* Most of this function has not been reimplemented because it is crazy or totally the wrong way to do it outside of perl (and possibly inside of perl) */ static StringBuffer autoFormat( StringBuffer in ) { in = stripUnwantedChars( in ); in = autoLinkifyUrlsR( in ); in = spacehack( in ); return in; } /** test autoformat */ public static String[] testInputs = { "a foo haha aoeu\nthingy\n\n\nyadda", "http://frond.com/boo", "wheee .... thud\nmailto:link@this", "[implement text ftp://link]\n\n[https://also implement link then text]\n\ngot it?", "http://bolson.org don't break this by overlinking it!", "FTP://saoeuraoeu\n\n[case INSENSITIVITY test mailto:colonel@cwru.edu]", "already has p

\nandalready has br
\ngot it?", "How's\n the\n \n\nthe whitespace?", }; public static void main( String[] argv ) { System.out.println("" ); for ( int i = 0; i < testInputs.length; i++ ) { StringBuffer sba; sba = autoFormat( new StringBuffer( testInputs[i] ) ); System.out.println( "" ); System.out.println( "" ); } System.out.println( "
" );
			System.out.println( testInputs[i] );
			System.out.println( "
" ); System.out.println( sba ); System.out.println( "
" ); System.out.println( ltgt(sba.toString()) ); System.out.println( "
" ); if ( false ) { try { java.io.BufferedReader in = new java.io.BufferedReader( new java.io.InputStreamReader( System.in ) ); String line; while ( (line = in.readLine()) != null ) { System.out.println( autoFormat( new StringBuffer( line ) ) ); } } catch ( Exception e ) { e.printStackTrace(); } } System.out.println(""); } }