001    /*
002    
003    This is Textile
004    A Humane Web Text Generator
005    
006    Original PHP Version
007    Version 1.0
008    21 Feb, 2003
009    
010    Copyright (c) 2003, Dean Allen, www.textism.com
011    All rights reserved.
012    
013    This java version by Gareth Simpson 
014    1.0 April 2003
015    1.1 mid 2004
016    1.2 March 2006
017    _______
018    LICENSE
019    
020    Redistribution and use in source and binary forms, with or without 
021    modification, are permitted provided that the following conditions are met:
022    
023    * Redistributions of source code must retain the above copyright notice, 
024      this list of conditions and the following disclaimer.
025    
026    * Redistributions in binary form must reproduce the above copyright notice,
027      this list of conditions and the following disclaimer in the documentation
028      and/or other materials provided with the distribution.
029    
030    * Neither the name Textile nor the names of its contributors may be used to
031      endorse or promote products derived from this software without specific
032      prior written permission.
033    
034    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
035    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
036    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
037    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
038    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
039    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
040    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
041    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
042    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
043    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
044    POSSIBILITY OF SUCH DAMAGE.
045    
046    _____________
047    USING TEXTILE
048    
049    Block modifier syntax:
050    
051    Header: hn. 
052    Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
053    Example: <h1>Text</h1>
054    
055    Header with CSS class: hn(class).
056    Paragraphs beginning with 'hn(class). ' receive a CSS class attribute. 
057    Example: <h1 class="class">Text</h1>
058    
059    Paragraph: p. (applied by default)
060    Paragraphs beginning with 'p. ' are wrapped in paragraph tags.
061    Example: <p>Text</p>
062    
063    Paragraph with CSS class: p(class).
064    Paragraphs beginning with 'p(class). ' receive a CSS class attribute. 
065    Example: <p class="class">Text</p>
066    
067    Blockquote: bq.
068    Paragraphs beginning with 'bq. ' are wrapped in block quote tags.
069    Example: <blockquote>Text</blockquote>
070    
071    Blockquote with citation: bq(citeurl).
072    Paragraphs beginning with 'bq(citeurl). ' recieve a citation attribute. 
073    Example: <blockquote cite="citeurl">Text</blockquote>
074    
075    Numeric list: #
076    Consecutive paragraphs beginning with # are wrapped in ordered list tags.
077    Example: <ol><li>ordered list</li></ol>
078    
079    Bulleted list: *
080    Consecutive paragraphs beginning with * are wrapped in unordered list tags.
081    Example: <ul><li>unordered list</li></ul>
082    
083    
084    Phrase modifier syntax:
085    
086    _emphasis_             <em>emphasis</em>
087    __italic__             <i>italic</i>
088    *strong*               <strong>strong</strong>
089    **bold**               <b>bold</b>
090    ??citation??           <cite>citation</cite>
091    -deleted text-         <del>deleted</del>
092    +inserted text+        <ins>inserted</ins>
093    ^superscript^          <sup>superscript</sup>
094    ~subscript~            <sub>subscript</sub>
095    @code@                 <code>computer code</code>
096    
097    ==notextile==          leave text alone (do not format)
098    
099    "linktext":url         <a href="url">linktext</a>
100    "linktext(title)":url  <a href="url" title="title">linktext</a>
101    
102    !imageurl!             <img src="imageurl">
103    !imageurl(alt text)!   <img src="imageurl" alt="alt text" />
104    !imageurl!:linkurl     <a href="linkurl"><img src="imageurl" /></a>
105    
106    ABC(Always Be Closing) <acronym title="Always Be Closing">ABC</acronym>
107    
108    */
109    
110    package jtextile;
111    
112    import java.util.ArrayList;
113    import java.util.regex.Matcher;
114    import java.util.regex.Pattern;
115    
116    public class JTextile 
117            {
118                    @SuppressWarnings("unused")
119                    private static final int ENT_COMPAT = 0;
120                    private static final int ENT_NOQUOTES = 2;
121                    private static final int ENT_QUOTES = 3;
122                    
123                    
124                    
125                    public JTextile() 
126                    {
127                    } 
128                    
129                    
130                    public static String textile(String text) throws Exception
131                    {
132                            
133                            //$text = stripslashes($text);
134                            
135                            //# turn any incoming ampersands into a dummy character for now.
136                            //#  This uses a negative lookahead for alphanumerics followed by a semicolon,
137                            //#  implying an incoming html entity, to be skipped 
138                            text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);
139                            
140                            //# unentify angle brackets and ampersands
141                            text = replace(text,"&gt;", ">");
142                            text = replace(text,"&lt;", "<");
143                            text = replace(text,"&amp;", "&");
144                            
145                            
146                            //# zap carriage returns
147                            text = replace(text,"\r\n", "\n");
148                            
149                            
150                            //# zap tabs
151                            text = replace(text,"\t", "" );
152                            
153                            //  trim each line
154                            StringBuffer splitBuffer = new StringBuffer();
155                            
156                            String[] sList = text.split("/\n/");
157                            for(int i = 0; i < sList.length; i++)
158                            {
159                                    splitBuffer.append(sList[i].trim());
160                                    splitBuffer.append("\n");
161                            }
162                            
163                            text = splitBuffer.toString();
164                            
165                            //### Find and replace quick tags
166                            
167                            //# double equal signs mean <notextile>
168                            text = preg_replace("(^|\\s)==(.*?)==([^\\w]{0,2})","$1<notextile>$2</notextile>$3$4",text);
169                            
170                            //# image qtag
171                            text = preg_replace("!([^!\\s\\(=]+?)\\s?(\\(([^\\)]+?)\\))?!","<img src=\"$1\" alt=\"$3\" />",text);
172                            
173                            //# image with hyperlink
174                            text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);
175                            
176                            //# hyperlink qtag
177                            text = preg_replace("\"([^\"\\(]+)\\s?(\\(([^\\)]+)\\))?\":(\\S+?)([^\\w\\s\\/;]|[1-9]*?)(\\s|$)","<a href=\"$4\" title=\"$3\">$1</a>$5$6",text);
178                            
179                            //# arrange qtag delineators and replacements in an array
180                            String[] srcTags = {"\\*\\*","\\*","\\?\\?","-","\\+","~","@"};
181                            String[] replaceTags = {"b","strong","cite","del","ins","sub","code"};
182                            
183                            //# loop through the array, replacing qtags with html
184                            for(int i = 0; i < srcTags.length; i++)
185                            {
186                                    //text = preg_replace("(^|\\s|>)" + srcTags[i] + "\\b(.+?)\\b([^\\w\\s]*?)" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3</" + replaceTags[i] + ">$4$5",text);
187                                    text = preg_replace("(^|\\s|>)" + srcTags[i] + "([^ ])(.+?)?([^\\w\\s]*?)([^ ])" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3$4$5</" + replaceTags[i] + ">$6$7",text);
188                            }
189                            
190                            //# some weird bs with underscores and \b word boundaries, 
191                            //#  so we'll do those on their own
192                            
193                            text = preg_replace("(^|\\s)__(.*?)__([^\\w\\s]{0,2})","$1<i>$2</i>$3",text);   
194                            
195                            text = preg_replace("(^|\\s)_(.*?)_([^\\w\\s]{0,2})","$1<em>$2</em>$3",text); 
196                            
197                            text = preg_replace("\\^(.*?)\\^","<sup>$1</sup>",text);
198                            
199                            // ### Find and replace typographic chars and special tags
200                            
201                            //# small problem with double quotes at the end of a string
202                            
203                            text = preg_replace("\"$","\" ",text);
204                            
205                            //# NB: all these will wreak havoc inside <html> tags
206                            
207                            String[] glyph_search = {
208    //                                      "([^\\s[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)",  // escape [
209                                            "([^\\s\\[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)",  // single closing
210                                            "\\'", // single opening
211    //                                      "([^\\s[{])?\"(\\s|$)", // escape [
212                                            "([^\\s\\[{])?\"(\\s|$)", // # double closing
213                                            "\"", // double opening
214                                            "\\b( )?\\.{3}", // # ellipsis
215                                            "\\b([A-Z][A-Z0-9]{2,})\\b(\\(([^\\)]+)\\))", // # 3+ uppercase acronym
216                                            "(^|[^\"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)", // # 3+ uppercase caps
217                                            "\\s?--\\s?", // # em dash
218                                            "\\s-\\s", // # en dash
219                                            "(\\d+)-(\\d+)", // # en dash
220                                            "(\\d+) ?x ?(\\d+)", //# dimension sign
221                                            "\\b ?(\\((tm|TM)\\))", // trademark
222                                            "\\b ?(\\([rR]\\))", // # registered
223                                            "\\b ?(\\([cC]\\))" // # registered     
224                            };
225                            
226                            
227                            String[] glyph_replace = {     
228                                            "$1&#8217;$2",              //# single closing
229                                            "&#8216;",                //# single opening
230                                            "$1&#8221;$2",              //# double closing
231                                            "&#8220;",                //# double opening
232                                            "$1&#8230;",              //# ellipsis
233                                            "<acronym title=\"$2\">$1</acronym>", //# 3+ uppercase acronym
234                                            //"$1<span class=\"caps\">$2</span>$3", //# 3+ uppercase caps
235                                            "$1$2$3", //# 3+ uppercase caps
236                                            "&#8212;",                //# em dash
237                                            " &#8211; ",              //# en dash
238                                            "$1&#8211;$2",              //# en dash
239                                            "$1&#215;$2",             //# dimension sign
240                                            "&#8482;",                //# trademark
241                                            "&#174;",               //# registered
242                                            "&#169;"                //# copyright
243                            };
244                            
245                            
246                            
247                            
248                            //    # set toggle for turning off replacements between <code> or <pre>
249                            boolean codepre = false;
250                            boolean notextile = false;
251                            
252                            //# if there is no html, do a simple search and replace
253                            
254                            if(!preg_match("<.[^<]*>",text))
255                            {
256                                    text = preg_replace(glyph_search,glyph_replace,text);
257                            }
258                            else 
259                            {
260                                    
261                                    StringBuffer out = new StringBuffer();
262                                    //# else split the text into an array at <.*>
263                                    //$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);
264                                    String[] textSplit = preg_split("<.[^<]*>",text);
265                                    for(int i = 0; i < textSplit.length; i++)
266                                    {
267                                            
268                                            //  # matches are off if we're between <code>, <pre> etc. 
269                                            if(preg_match("<(code|pre|kbd)>",textSplit[i].toLowerCase()))
270                                            {
271                                                    codepre = true; 
272                                            }
273                                            if(preg_match("<notextile>",textSplit[i].toLowerCase()))
274                                            {
275                                                    codepre = true;
276                                                    notextile = true;
277                                            }
278                                            else if(preg_match("</(code|pre|kbd)>",textSplit[i].toLowerCase()))
279                                            {
280                                                    codepre = false; 
281                                            }
282                                            else if(preg_match("</notextile>",textSplit[i].toLowerCase()))
283                                            {
284                                                    codepre = false; 
285                                                    notextile = false;
286                                            }
287                                            
288                                            if(!preg_match("<.[^<]*?>",textSplit[i]) && codepre == false)
289                                            {
290                                                    textSplit[i] = preg_replace(glyph_search,glyph_replace,textSplit[i]);
291                                            }
292                                            
293                                            //# convert htmlspecial if between <code>
294                                            if (codepre == true && notextile == false){
295                                                    textSplit[i] = htmlspecialchars(textSplit[i],ENT_NOQUOTES);
296                                                    textSplit[i] = replace(textSplit[i],"&lt;pre&gt;","<pre>");
297                                                    textSplit[i] = replace(textSplit[i],"&lt;code&gt;","<code>");
298                                                    textSplit[i] = replace(textSplit[i],"&lt;notextile&gt;","<notextile>");
299                                            }
300                                            
301                                            if(notextile == true)
302                                            {
303                                                    textSplit[i] = replace(textSplit[i],"\n","({)(})");
304                                            }
305                                            
306                                            //# each line gets pushed to a new array
307                                            out.append( textSplit[i]);
308                                    }
309                                    
310                                    text = out.toString();
311                                    
312                                    
313                            }
314                            
315                            //### Block level formatting
316                            
317                            //# deal with forced breaks; this is going to be a problem between
318                            //#  <pre> tags, but we'll clean them later
319                            
320                            
321                            //////!!! not working 
322                            //text = preg_replace("(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])", "$1$2$3<br />$4", text);
323                            //text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
324                            
325                            
326                            text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
327                            
328                            
329                            //# might be a problem with lists
330                            text = replace(text,"l><br />", "l>\n");
331                            
332                            boolean pre = false;
333                            
334                            
335                            String[] block_find = {
336                                            "^\\s?\\*\\s(.*)",            //# bulleted list *
337                                            "^\\s?#\\s(.*)",              //# numeric list #
338                                            "^bq\\. (.*)",                //# blockquote bq.
339                                            "^bq\\((\\S+?)\\). (.*)",                //# blockquote bq(cite-url).
340                                            "^h(\\d)\\(([\\w]+)\\)\\.\\s(.*)",  //# header hn(class).  w/ css class
341                                            "^h(\\d)\\. (.*)",            //# plain header hn.
342                                            "^p\\(([[:alnum:]]+)\\)\\.\\s(.*)",   //# para p(class).  w/ css class
343                                            "^p\\. (.*)",                 //# plain paragraph
344                                            "^([^\\t ]+.*)"               //# remaining plain paragraph
345                            };
346                            
347                            /*
348                             String[]  block_find = {
349                             "/^\\s?\\*\\s(.*)/",                         //                      # bulleted list *
350                             "/^\\s?#\\s(.*)/",                       //                         # numeric list #
351                             "/^bq\\. (.*)/",                         //                        # blockquote bq.
352                             "/^h(\\d)\\(([[:alnum:]]+)\\)\\.\\s(.*)/", //  # header hn(class).  w/ css class
353                             "/^h(\\d)\\. (.*)/",                     //                         # plain header hn.
354                             "/^p\\(([[:alnum:]]+)\\)\\.\\s(.*)/",      //         # para p(class).  w/ css class
355                             "/^p\\. (.*)/i",                       //                          # plain paragraph
356                             "/^([^\\t ]+.*)/i"                     //                          # remaining plain paragraph
357                             };      
358                             */
359                            String[] block_replace = {
360    //                                      "\t<liu>$1</liu>$2",
361    //                                      "\t<lio>$1</lio>$2",
362                                            "\t<liu>$1</liu>",
363                                            "\t<lio>$1</lio>",
364                                            "\t<blockquote>$1</blockquote>",
365                                            "\t<blockquote cite=\"$1\">$2</blockquote>",
366                                            "\t<h$1 class=\"$2\">$3</h$1>$4",
367    //                                      "\t<h$1>$2</h$1>$3",
368                                            "\t<h$1>$2</h$1>",
369                                            "\t<p class=\"$1\">$2</p>$3",
370                                            "\t<p>$1</p>",
371    //                                      "\t<p>$1</p>$2"
372                                            "\t<p>$1</p>"
373                            };
374                            
375                            
376                            StringBuffer blockBuffer = new StringBuffer();
377                            
378                            String list = "";
379                            
380                            //  This done to ensure that lists close after themselves
381                            text += " \n";
382                            
383                            
384                            //# split the text into an array by newlines
385                            String[] bList = text.split("\n");
386                            for(int i = 0; i <= bList.length; i++)
387                            {
388                                    String line = " ";
389                                    if(i < bList.length)
390                                            line = bList[i];
391                                    
392                                    
393                                    //#make sure the line isn't blank
394                                    if (true || line.length() > 0 ) // actually i think we want blank lines
395                                    {
396                                            
397                                            //# matches are off if we're between <pre> or <code> tags 
398                                            if(line.toLowerCase().indexOf("<pre>") > -1)
399                                            { 
400                                                    pre = true; 
401                                            }
402                                            
403                                            //# deal with block replacements first, then see if we're in a list
404                                            if (!pre)
405                                            {
406                                                    line = preg_replace(block_find,block_replace,line);
407                                            }
408                                            
409                                            //# kill any br tags that slipped in earlier
410                                            if (pre == true)
411                                            {
412                                                    line = replace(line,"<br />","\n");
413                                            } 
414                                            
415                                            //# matches back on after </pre> 
416                                            if(line.toLowerCase().indexOf("</pre>") > -1)
417                                            { 
418                                                    pre = false; 
419                                            }
420                                            
421                                            //# at the beginning of a list, $line switches to a value
422                                            if (list.length() == 0 && preg_match("\\t<li",line))
423                                            {
424                                                    line = preg_replace("^(\\t<li)(o|u)","\n<$2l>\n$1$2",line);
425                                                    list = line.substring(2,3);
426                                            } 
427                                            //# at the end of a list, $line switches to empty
428                                            else if (list.length() > 0 && !preg_match("\\t<li" + list,line))
429                                            {
430                                                    line = preg_replace("^(.*)$","</" + list + "l>\n$1",line); 
431                                                    list = "";
432                                            }
433                                    }
434                                    // push each line to a new array once it's processed
435                                    blockBuffer.append(line);
436                                    blockBuffer.append("\n");
437                                    
438                            }
439                            text = blockBuffer.toString();
440                            
441                            
442                            
443                            //#clean up <notextile>
444                            text = preg_replace("<\\/?notextile>", "",text);  
445                            
446                            //#clean up <notextile>
447                            text = replace(text,"({)(})", "\n");  
448                            
449                            //# clean up liu and lio
450                            text = preg_replace("<(\\/?)li(u|o)>", "<$1li>",text);
451                            
452                            //# turn the temp char back to an ampersand entity
453                            text = replace(text,"x%x%","&#38;");
454                            
455                            //# Newline linebreaks, just for markup tidiness
456                            text = replace(text,"<br />","<br />\n");   
457                            
458                            return text;
459                    } 
460                    
461                    
462                    
463                    /**
464                     * Does just that.
465                     * 
466                     * @param source      The string to start with
467                     * @param searchFor   The string we are looking for
468                     * @param replaceWith The replacement
469                     * 
470                     * @return  The reformatted string
471                     * 
472                     */
473                    private static String replace ( String source , String searchFor , String replaceWith )
474                    {
475                            if (source == null || "".equals(source)) {
476                                    return source;
477                            }
478                            
479                            if (replaceWith == null) {
480                                    return source;
481                            }
482                            
483                            if ("".equals(searchFor)) {
484                                    return source;
485                            }
486                            
487                            int s = 0;
488                            int e = 0;
489                            StringBuffer result = new StringBuffer();
490                            
491                            while ((e = source.indexOf(searchFor, s)) >= 0) 
492                            {
493                                    result.append(source.substring(s, e));
494                                    result.append(replaceWith);
495                                    s = e + searchFor.length();
496                            }
497                            result.append(source.substring(s));
498                            return result.toString();
499                            
500                    }
501                    
502                    private static String htmlspecialchars(String text, int mode)
503                    {
504                            text = replace(text,"&", "&amp;");
505                            if (mode != ENT_NOQUOTES)
506                                    text = replace(text,"\"", "&quot;");
507                            if (mode == ENT_QUOTES)
508                                    text = replace(text,"'", "&#039;");
509                            text = replace(text,"<", "&lt;");
510                            text = replace(text,">", "&gt;");
511                            return text ;
512                    }
513                    
514                    private static String preg_replace(String pattern,String replace,String text) throws Exception
515                    {
516                            
517    //                      gnu.regexp.RE r = new gnu.regexp.RE(pattern);
518    //                      return r.substituteAll(text,replace);
519                            return Pattern.compile(pattern).matcher(text).replaceAll(replace);
520                    }
521                    
522                    private static String preg_replace(String[] pattern,String[] replace,String text) throws Exception
523                    {
524                            for(int i = 0; i < pattern.length; i++)
525                            {
526                                    text = preg_replace(pattern[i],replace[i],text);
527                            }
528                            return text;
529                    }
530                    
531                    private static boolean preg_match(String pattern,String text) throws Exception
532                    {
533    //                      gnu.regexp.RE r = new gnu.regexp.RE(pattern);
534    //                      return r.getMatch(text) != null;
535                            return Pattern.compile(pattern).matcher(text).find();
536                    }
537                    
538                    private static String[] preg_split(String pattern,String text) throws Exception
539                    {
540                            int startAt = 0;
541                            ArrayList<String> tempList = new ArrayList<String>();
542                            
543    //                      gnu.regexp.RE r = new gnu.regexp.RE(pattern);
544                            
545                            Matcher m = Pattern.compile(pattern).matcher(text);
546                            m.find();
547    //                      gnu.regexp.REMatch match = r.getMatch(text);
548                            
549                            while(m.find())
550                            {                  
551                                    String beforeMatch = text.substring(startAt, m.start());      
552                                    tempList.add(beforeMatch);
553                                    tempList.add(text.substring(m.start(), m.end()));         
554                                    startAt = m.end();
555                            }
556                            
557                            tempList.add(text.substring(startAt));
558                            
559                            //  copy out our templist to an array of strings which is what we return
560                            String[] ret = new String[tempList.size()];
561                            
562                            for(int i = 0; i < ret.length; i++)
563                            {
564                                    ret[i] = tempList.get(i);
565                            }
566                            
567                            return ret;
568                    }
569                    
570            }