import java.util.regex.Pattern;
import java.util.regex.Matcher;

public class AutoFormat {
	public static StringBuffer replaceAll( StringBuffer in, String match, String replace ) {
		int pos;
		int ml = match.length();
		int rl = replace.length();
		
		pos = in.indexOf( match );
		while ( pos >= 0 ) {
			in.replace( pos, pos + ml, replace );
			pos = in.indexOf( match, pos + rl );
		}
		return in;
	}

	/* this function has been pretty faithfully translated from the perl */
	static StringBuffer plaintextFormat( StringBuffer in, boolean noescapetags ) {
// Remove excess whitespace from the front and end of the text
		while ( in.length() > 0 && Character.isWhitespace( in.charAt( 0 ) ) ) {
			in.deleteCharAt( 0 );
		}
		if ( in.length() <= 0 ) {
			return in;
		}
		int lastci;
		lastci = in.length() - 1;
		while ( Character.isWhitespace( in.charAt( lastci ) ) ) {
			in.deleteCharAt( lastci );
			lastci = in.length() - 1;
		}
		
// Perform standard plain-old-text conversions
		if ( ! noescapetags ) {
			in = replaceAll( in, "&", "&amp;" );
			in = replaceAll( in, "\"", "&quot;" );
			in = replaceAll( in, "<", "&lt;" );
			in = replaceAll( in, ">", "&gt;" );
		}
		//		in = replaceAll( in, "\r", "" );
		in = replaceAll( in, "\t", "&nbsp;&nbsp;&nbsp; " );
		// in = replaceAll( in, "\u00a0", "&nbsp;" ); // what is this?
		// filter chars
		int pos = 0;
		char prevc = '\0';
		while ( pos < in.length() ) {
			char c;
			c = in.charAt( pos );
			if ( c <= 8 || (c >= 0x0B && c <= 0x1F) ) {
				//s#[\x00-\x08\x0B-\x1F]##g; # Nuke control characters
				// strip control chars
				in.deleteCharAt( pos );
			//} else if ( c == '\t' ) {
			//	in.replace( pos, pos + 1, "&nbsp;&nbsp;&nbsp; " );
			} else if ( c == '\n' ) {
				boolean run = false;
				int np = pos + 1;
				char nc = in.charAt( np );
				int endnl = -1;
				while ( Character.isWhitespace( nc ) ) {
					run = true;
					if ( nc == '\n' ) {
						endnl = np;
					}
					np++;
					nc = in.charAt( np );
				}
				if ( run && (endnl > 0) ) {
					//s#\n\s*\n#<p>\n#gs;
					in.replace( pos, endnl, "<p>" );
					pos = endnl + 4;
					prevc = '>';
				} else {
					//s#(?<!<p>)\n#<br>\n#gs;
					in.insert( pos, "<br>" );
					pos += 5;
					prevc = '>';
				}
			} else if ( prevc == '\n' && c == ' ' ) {
				//s#^ #&nbsp;#gm;
				in.replace( pos - 1, pos + 1, "&nbsp;" );
				pos += 5;
				prevc = ';';
			} else if ( Character.isWhitespace( prevc ) && Character.isWhitespace( c ) ) {
				int lastsp = pos + 1;
				while ( lastsp < in.length() && Character.isWhitespace( in.charAt( lastsp ) ) ) {
					lastsp++;
				}
				//s#  # &nbsp;#g;
				in.replace( pos - 1, lastsp, " &nbsp;" );
				pos = lastsp + 5;
				prevc = ';';
			//} else if ( ! noescapetags && () ) {
				// Change remaining non-ASCII chars to entities
				//s!([^\n\t\x20-\x7E])!'&#'.ord($1).';'!ge unless $noescapetags;
			} else {
				prevc = c;
				pos++;
			}
		}
		return in;
	}

	/* maybe pull urlSchemes from a db set variable? */
	static String[] urlSchemes = {
		"http://", "https://", "ftp://",
		//"file://",
		"mailto:",
	};
	/**
	allowed forms of the original perl version:
	 [url text]
	 [text url]
	 {url text}
	 {text url}
	 url
	 text is	[^\[][^]]+? inside []
	 [^{][^}]+?	inside {}
for naked url, don't include any of ".!?_*=" as part of the url if they are the last char, and it should be followed by whitespace, one of "\n<()[]{}" or the end of the input.

currently only naked urls are augmented and they are considered as extending until whitespace or '<' or '>' with no special trimmings.
Don't people know HTML yet? Do we still need the shorthand [] forms?
*/
	static StringBuffer autoLinkifyUrls( StringBuffer in ) {
		int pos = 0;
		int[] nextSchemePos = new int[urlSchemes.length];
		int i;
		int minSchemePos = in.length();
		int minScheme = -1;
		for ( i = 0; i < urlSchemes.length; i++ ) {
			nextSchemePos[i] = in.indexOf( urlSchemes[i] );
			if ( (nextSchemePos[i] >= 0) && (nextSchemePos[i] < minSchemePos) ) {
				minSchemePos = nextSchemePos[i];
				minScheme = i;
			}
		}
		int nextLT = in.indexOf( "<" );
		
		while ( pos < in.length() ) {
			boolean needFullScan = false;
			if ( minSchemePos >= in.length() ) {
				// no matching schemes to linkify
				return in;
			}
			nextLT = in.indexOf( "<", pos );
			if ( nextLT >= 0 && nextLT < minSchemePos ) {
				int nextGT = in.indexOf( ">", nextLT );
				if ( nextGT > 0 ) {
					// zoom scan to after end of tag
					pos = nextGT + 1;
					needFullScan = true;
				} else {
					// bogus < open tag!
					in.replace( nextLT, nextLT + 1, "&lt;" );
					needFullScan = true;
					pos = nextLT + 4;
				}
			} else {
				// no '<' before match, linkify ...
				int linkend = minSchemePos + urlSchemes[minScheme].length() - 1;
				char c;
				do {
					linkend++;
					if ( linkend >= in.length() ) {
						break;
					}
					c = in.charAt( linkend );
				} while ( !Character.isWhitespace( c ) && c != '<' && c != '>' );
				String url = in.substring( minSchemePos, linkend );
				String link = "<a href=\"" + url + "\">" + url + "</a>";
				in.replace( minSchemePos, linkend, link );
				needFullScan = true;
				pos = minSchemePos + link.length() + 1;
				//nextSchemePos[minScheme] = in.indexOf( urlSchemes[minScheme], pos );
			}
			// find next first
			minSchemePos = in.length();
			for ( i = 0; i < urlSchemes.length; i++ ) {
				if ( needFullScan ) {
					nextSchemePos[i] = in.indexOf( urlSchemes[i], pos );
				}
				if ( (nextSchemePos[i] >= 0) && (nextSchemePos[i] < minSchemePos) ) {
					minSchemePos = nextSchemePos[i];
					minScheme = i;
				}
			}
		}
		// Don't match URLs with <> in them
		//		my $url_regex = '(?:http|ftp|file)://[^\s<>]+?';
		//		my $url_regexg = '(?:http|ftp|file)://[^\s<>]+'; # greedy
					
		// Mark URLs that are already in HTML attrs or links so we don't linkify them
		/*
		 s#(<[^>]+="[^">]*)($url_regex)#$1\x00$2#gso;
		s#(<a\s[^>]*href=[^>]*>[^<]*)($url_regex)#$1\x00$2#gso;

		# Grab expressions in brackets ('[]', '{}', or '<>', not '()')
		# and if they end in a URL, linkify them.
		s#\[([^\[][^]]+?)(?:\s|&nbsp;)*?($url_regex)\]#<a href="\x00$2">$1</a>#gmsio;
		s#{([^{][^}]+?)(?:\s|&nbsp;)*($url_regex)}#<a href="\x00$2">$1</a>#gmsio;

		s#\[($url_regexg)(?:\s|&nbsp;)+([^\[][^]]+?)\]#<a href="\x00$1">$2</a>#gmsio;
		s#{($url_regexg)(?:\s|&nbsp;)+([^{][^}]+?)}#<a href="\x00$1">$2</a>#gmsio;

		# Linkify all the remaining naked URLs
		s#([^\x00]|^)($url_regex)(?=[.!?_*=]?[\s\n<()\[\]{}\x01\x02]|$)#$1<a href="$2">$2</a>#gmio;

		# Remove placeholder chars
		s#\x00##gs;*/
		return in;
	}

	static abstract class PatternFilter {
		PatternFilter next;
		protected abstract Pattern pat();
		public void filter( CharSequence in ) {
			Matcher lilm = pat().matcher( in );
			int pos = 0;
			if ( lilm.find() ) {
				do {
					next.filter( in.subSequence( pos, lilm.start() ) );
					core( lilm );
					pos = lilm.end();
				} while ( lilm.find() );
				next.filter( in.subSequence( pos, in.length() ) );
			} else {
				lilm = null;
				next.filter( in );
			}
		}
		public void core( Matcher lilm ) {
			next.pass( lilm.group() );
		}
		public void pass( CharSequence cs ) {
			next.pass( cs );
		}
	}
	static class LinkNoBreakFilter extends PatternFilter {
		static Pattern linkNoBreak = Pattern.compile(
			"(<a\\s[^>]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)",
			Pattern.CASE_INSENSITIVE );
		protected Pattern pat() { return linkNoBreak; }
	}
	static class BracketFilterA extends PatternFilter {
		static Pattern linkNoBreak = Pattern.compile(
			"\\[([^\\[][^]]+?)(?:\\s|&nbsp;)*?(" + urlRegex + ")\\]",
			Pattern.CASE_INSENSITIVE );
		protected Pattern pat() { return linkNoBreak; }
		public void core( Matcher lilm ) {
			String text = lilm.group( 1 );
			String url = lilm.group( 2 );
			next.pass( "<a href=\"" + url + "\">" + text + "</a>" );
		}
	}
	static class BracketFilterB extends PatternFilter {
		//		s#\[($url_regexg)(?:\s|&nbsp;)+([^\[][^]]+?)\]#<a href="\x00$1">$2</a>#gmsio;
		static Pattern linkNoBreak = Pattern.compile(
			"\\[(" + urlRegex + ")(?:\\s|&nbsp;)+([^\\[][^]]+?)\\]",
			Pattern.CASE_INSENSITIVE );
		protected Pattern pat() { return linkNoBreak; }
		public void core( Matcher lilm ) {
			String text = lilm.group( 2 );
			String url = lilm.group( 1 );
			next.pass( "<a href=\"" + url + "\">" + text + "</a>" );
		}
	}
	static class NakedURLFilter extends PatternFilter {
		/*	 s#([^\x00]|^)($url_regex)(?=[.!?_*=]?[\s\n<()\[\]{}\x01\x02]|$)#$1<a href="$2">$2</a>#gmio; */
		static Pattern linkNoBreak = Pattern.compile(
			"(" + urlRegex + "?)(?=[.!?_*=]?[\\s\\n<()\\[\\]{}]|$)",
			Pattern.CASE_INSENSITIVE );
		protected Pattern pat() { return linkNoBreak; }
		public void core( Matcher lilm ) {
			String url = lilm.group( 1 );
			next.pass( "<a href=\"" + url + "\">" + url + "</a>" );
		}
	}
	static class SBAccumFilter extends PatternFilter {
		StringBuffer toret = new StringBuffer();
		protected Pattern pat() { return null; }
		public void filter( CharSequence cs ) {
			toret.append( cs );
		}
		public void pass( CharSequence cs ) {
			toret.append( cs );
		}
	}
	static PatternFilter newDefaultFilterChain() {
		SBAccumFilter tail = new SBAccumFilter();
		NakedURLFilter nu = new NakedURLFilter();
		nu.next = tail;
		BracketFilterA bfa = new BracketFilterA();
		bfa.next = nu;
		BracketFilterB bfb = new BracketFilterB();
		bfb.next = bfa;
		LinkNoBreakFilter lnbf = new LinkNoBreakFilter();
		lnbf.next = bfb;
		return lnbf;
	}
	static String ltgt( String in ) {
		return in.replaceAll( "<", "&lt;" ).replaceAll( ">", "&gt;" );
	}
	static String urlRegex = "(?:http://|ftp://|https://|mailto:)[^\\s<>]+";
	static Pattern linkInTag = Pattern.compile("(<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)");
	static Pattern linkInLink = Pattern.compile("(<a\\s[^>]*href=[^>]*>[^<]*" + urlRegex + "?)");
	static Pattern linkNoBreak = Pattern.compile(
		"(<a\\s[^>]*href=[^>]*>[^<]*" + urlRegex + "?|<[^>]+=\"[^\">]*" + urlRegex + "?[^>]*>)");
	static StringBuffer autoLinkifyUrlsR( StringBuffer in ) {
		PatternFilter filts = newDefaultFilterChain();
		filts.filter( in );
		while ( filts.next != null ) {
			filts = filts.next;
		}
		return ((SBAccumFilter)filts).toret;
		/*
		Matcher lilm = linkNoBreak.matcher( in );
		System.out.println("<table border=\"1\">");
		int pos = 0;
		while ( lilm.find() ) {
			System.out.println("<tr><td>");
			System.out.println( ltgt(in.substring( pos, lilm.start() )) );
			System.out.println( "</td><td>" );
			System.out.println( ltgt(lilm.group()) );
			System.out.println("</td></tr>");
			pos = lilm.end();
		}
		System.out.println("<tr><td>");
		System.out.println( ltgt(in.substring( pos )) );
		System.out.println( "</td><td>" );
		//System.out.println( lilm.group() );
		System.out.println("</td></tr>");
		 */
		/*String[] they = linkInLink.split( in );
		for ( int i = 0; i < they.length; i++ ) {
			System.out.println("<tr><td>");
			System.out.println( i );
			System.out.println( "</td><td>" );
			System.out.println( they[i] );
			System.out.println("</td></tr>");
		}*/
		//System.out.println("</table>");
		//return in;
	}

	/*
	 Most of this function has not been reimplemented because it is crazy or totally the wrong way to do it outside of perl (and possibly inside of perl)
	 */
	static StringBuffer autoFormat( StringBuffer in ) {
		//local $_ = shift;
		//my $context = shift;

		/*
// Since we'll be using high-bit characters to mark escaped characters,
// turn existing ones into entities here.
		s!([\x80-\xFF])!'&#'.ord($1).';'!ge;
// Escape significant characters preceded by a backslash
		s#\\\\#\xDC#g; // Escape double backslashes first
		s#\\<#\x81#g;
		s#\\>#\x82#g;
		s#\\&#\x83#g;
		s#\\"#\x84#g;
		s#\\(\S)#chr(ord($1)|0x80)#ge; // Mark the high bit
		 */
// Change non-HTML-involved &< into entities
		// s/&(?![A-Za-z0-9#]+;)/&amp;/g;
		// s#<(?![A-Za-z/])#&lt;#g;
// Perform plaintext formatting
		in = plaintextFormat( in, true );
// Run the URL linkifier here so that clean_html's word breaking doesn't mess things up
		//in = autoLinkifyUrls( in );
		// use superior new regex-filter based autolinkify
		in = autoLinkifyUrlsR( in );
		/*
// Clean up HTML tags
		 // this is probably a pretty big project, but hasn't it already been done somewhere? [bolson]
		my $comment_ref = $S->html_checker->clean_html(\$_, $context);
		$_ = $$comment_ref;

// Make non-HTML-involved <>&" easier to sniff out.
		s#&lt;#\x01#g; # \x01 == < for now
		s#&gt;#\x02#g; # \x02 == > for now
		s#&amp;#\x03#g; # \x03 == & for now
		s#&quot;#\x04#g; # \x04 == " for now

// Escape any potentially special chars within tags and URLs exactly as if 
// the user had escaped them with a backslash
		my $url_regexg = '(?:http|ftp|file)://(?:[^\s<]|\Z)+(?=[\s<]|\Z)';
		my $tag_regex = '<[^><]*?>';

		s!($url_regexg|$tag_regex)!
		my $a = $1;
		$a =~ s#([^a-zA-Z0-9])#chr(ord($1)|0x80)#ge;
		$a
		!ge;

		$_ = $S->_auto_bold_italic($_);
		$_ = $S->_auto_create_ul($_);
		$_ = $S->_auto_create_ol($_);

		// Switch back the marked characters
		s#([\x80-\xFF])#chr(ord($1)&0x7F)#ge;
		s#\x01#&lt;#g;
		s#\x02#&gt;#g;
		s#\x03#&amp;#g;
		s#\x04#&quot;#g;*/
		return in;
	}

	/** test autoformat */
	public static String[] testInputs = {
		"a foo  haha   aoeu\nthingy\n\n\nyadda",
		"http://frond.com/boo",
		"<a href=\"http://don-t.mess.this.up.com:8080/right?yes\">wheee .... thud</a>\nmailto:link@this",
		"[implement text ftp://link]\n\n[https://also implement link then text]\n\ngot it?",
		"<a href=\"http://bolson.org/\">http://bolson.org</a> don't break this by overlinking it!",
		"<A hReF=\"hTTp://case.test/\">FTP://saoeuraoeu</a>\n\n[case INSENSITIVITY test mailto:colonel@cwru.edu]",
	};
	public static void main( String[] argv ) {
		System.out.println("<html><head><title></title></head><body bgcolor=\"#ffffff\" text=\"#000000\"><table border=\"1\">" );

		for ( int i = 0; i < testInputs.length; i++ ) {
			StringBuffer sba, sbb;
			sba = autoFormat( new StringBuffer( testInputs[i] ) );
			sbb = autoLinkifyUrls( plaintextFormat( new StringBuffer( testInputs[i] ), true ) );
			System.out.println( "<tr><td><pre>" );
			System.out.println( testInputs[i] );
			System.out.println( "</pre></td><td>" );
			System.out.println( sba );
			System.out.println( "</td><td>" );
			System.out.println( sbb );
			System.out.println( "</td></tr>" );
			System.out.println( "<tr><td></td><td>" );
			System.out.println( ltgt(sba.toString()) );
			System.out.println( "</td><td>" );
			System.out.println( ltgt(sbb.toString()) );
			System.out.println( "</td></tr>" );
		}
		System.out.println( "</table>" );
		if ( false ) {
		try {
			java.io.BufferedReader in = new java.io.BufferedReader( new java.io.InputStreamReader( System.in ) );
			String line;
			while ( (line = in.readLine()) != null ) {
				System.out.println( autoFormat( new StringBuffer( line ) ) );
			}
		} catch ( Exception e ) {
			e.printStackTrace();
		}
		}
		System.out.println("</body></html>");
	}
}