001 /*
002
003 This is Textile
004 A Humane Web Text Generator
005
006 Original PHP Version
007 Version 1.0
008 21 Feb, 2003
009
010 Copyright (c) 2003, Dean Allen, www.textism.com
011 All rights reserved.
012
013 This java version by Gareth Simpson
014 1.0 April 2003
015 1.1 mid 2004
016 1.2 March 2006
017 _______
018 LICENSE
019
020 Redistribution and use in source and binary forms, with or without
021 modification, are permitted provided that the following conditions are met:
022
023 * Redistributions of source code must retain the above copyright notice,
024 this list of conditions and the following disclaimer.
025
026 * Redistributions in binary form must reproduce the above copyright notice,
027 this list of conditions and the following disclaimer in the documentation
028 and/or other materials provided with the distribution.
029
030 * Neither the name Textile nor the names of its contributors may be used to
031 endorse or promote products derived from this software without specific
032 prior written permission.
033
034 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
035 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
036 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
037 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
038 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
039 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
040 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
041 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
042 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
043 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
044 POSSIBILITY OF SUCH DAMAGE.
045
046 _____________
047 USING TEXTILE
048
049 Block modifier syntax:
050
051 Header: hn.
052 Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
053 Example: <h1>Text</h1>
054
055 Header with CSS class: hn(class).
056 Paragraphs beginning with 'hn(class). ' receive a CSS class attribute.
057 Example: <h1 class="class">Text</h1>
058
059 Paragraph: p. (applied by default)
060 Paragraphs beginning with 'p. ' are wrapped in paragraph tags.
061 Example: <p>Text</p>
062
063 Paragraph with CSS class: p(class).
064 Paragraphs beginning with 'p(class). ' receive a CSS class attribute.
065 Example: <p class="class">Text</p>
066
067 Blockquote: bq.
068 Paragraphs beginning with 'bq. ' are wrapped in block quote tags.
069 Example: <blockquote>Text</blockquote>
070
071 Blockquote with citation: bq(citeurl).
072 Paragraphs beginning with 'bq(citeurl). ' recieve a citation attribute.
073 Example: <blockquote cite="citeurl">Text</blockquote>
074
075 Numeric list: #
076 Consecutive paragraphs beginning with # are wrapped in ordered list tags.
077 Example: <ol><li>ordered list</li></ol>
078
079 Bulleted list: *
080 Consecutive paragraphs beginning with * are wrapped in unordered list tags.
081 Example: <ul><li>unordered list</li></ul>
082
083
084 Phrase modifier syntax:
085
086 _emphasis_ <em>emphasis</em>
087 __italic__ <i>italic</i>
088 *strong* <strong>strong</strong>
089 **bold** <b>bold</b>
090 ??citation?? <cite>citation</cite>
091 -deleted text- <del>deleted</del>
092 +inserted text+ <ins>inserted</ins>
093 ^superscript^ <sup>superscript</sup>
094 ~subscript~ <sub>subscript</sub>
095 @code@ <code>computer code</code>
096
097 ==notextile== leave text alone (do not format)
098
099 "linktext":url <a href="url">linktext</a>
100 "linktext(title)":url <a href="url" title="title">linktext</a>
101
102 !imageurl! <img src="imageurl">
103 !imageurl(alt text)! <img src="imageurl" alt="alt text" />
104 !imageurl!:linkurl <a href="linkurl"><img src="imageurl" /></a>
105
106 ABC(Always Be Closing) <acronym title="Always Be Closing">ABC</acronym>
107
108 */
109
110 package jtextile;
111
112 import java.util.ArrayList;
113 import java.util.regex.Matcher;
114 import java.util.regex.Pattern;
115
116 public class JTextile
117 {
118 @SuppressWarnings("unused")
119 private static final int ENT_COMPAT = 0;
120 private static final int ENT_NOQUOTES = 2;
121 private static final int ENT_QUOTES = 3;
122
123
124
125 public JTextile()
126 {
127 }
128
129
130 public static String textile(String text) throws Exception
131 {
132
133 //$text = stripslashes($text);
134
135 //# turn any incoming ampersands into a dummy character for now.
136 //# This uses a negative lookahead for alphanumerics followed by a semicolon,
137 //# implying an incoming html entity, to be skipped
138 text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);
139
140 //# unentify angle brackets and ampersands
141 text = replace(text,">", ">");
142 text = replace(text,"<", "<");
143 text = replace(text,"&", "&");
144
145
146 //# zap carriage returns
147 text = replace(text,"\r\n", "\n");
148
149
150 //# zap tabs
151 text = replace(text,"\t", "" );
152
153 // trim each line
154 StringBuffer splitBuffer = new StringBuffer();
155
156 String[] sList = text.split("/\n/");
157 for(int i = 0; i < sList.length; i++)
158 {
159 splitBuffer.append(sList[i].trim());
160 splitBuffer.append("\n");
161 }
162
163 text = splitBuffer.toString();
164
165 //### Find and replace quick tags
166
167 //# double equal signs mean <notextile>
168 text = preg_replace("(^|\\s)==(.*?)==([^\\w]{0,2})","$1<notextile>$2</notextile>$3$4",text);
169
170 //# image qtag
171 text = preg_replace("!([^!\\s\\(=]+?)\\s?(\\(([^\\)]+?)\\))?!","<img src=\"$1\" alt=\"$3\" />",text);
172
173 //# image with hyperlink
174 text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);
175
176 //# hyperlink qtag
177 text = preg_replace("\"([^\"\\(]+)\\s?(\\(([^\\)]+)\\))?\":(\\S+?)([^\\w\\s\\/;]|[1-9]*?)(\\s|$)","<a href=\"$4\" title=\"$3\">$1</a>$5$6",text);
178
179 //# arrange qtag delineators and replacements in an array
180 String[] srcTags = {"\\*\\*","\\*","\\?\\?","-","\\+","~","@"};
181 String[] replaceTags = {"b","strong","cite","del","ins","sub","code"};
182
183 //# loop through the array, replacing qtags with html
184 for(int i = 0; i < srcTags.length; i++)
185 {
186 //text = preg_replace("(^|\\s|>)" + srcTags[i] + "\\b(.+?)\\b([^\\w\\s]*?)" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3</" + replaceTags[i] + ">$4$5",text);
187 text = preg_replace("(^|\\s|>)" + srcTags[i] + "([^ ])(.+?)?([^\\w\\s]*?)([^ ])" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3$4$5</" + replaceTags[i] + ">$6$7",text);
188 }
189
190 //# some weird bs with underscores and \b word boundaries,
191 //# so we'll do those on their own
192
193 text = preg_replace("(^|\\s)__(.*?)__([^\\w\\s]{0,2})","$1<i>$2</i>$3",text);
194
195 text = preg_replace("(^|\\s)_(.*?)_([^\\w\\s]{0,2})","$1<em>$2</em>$3",text);
196
197 text = preg_replace("\\^(.*?)\\^","<sup>$1</sup>",text);
198
199 // ### Find and replace typographic chars and special tags
200
201 //# small problem with double quotes at the end of a string
202
203 text = preg_replace("\"$","\" ",text);
204
205 //# NB: all these will wreak havoc inside <html> tags
206
207 String[] glyph_search = {
208 // "([^\\s[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)", // escape [
209 "([^\\s\\[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)", // single closing
210 "\\'", // single opening
211 // "([^\\s[{])?\"(\\s|$)", // escape [
212 "([^\\s\\[{])?\"(\\s|$)", // # double closing
213 "\"", // double opening
214 "\\b( )?\\.{3}", // # ellipsis
215 "\\b([A-Z][A-Z0-9]{2,})\\b(\\(([^\\)]+)\\))", // # 3+ uppercase acronym
216 "(^|[^\"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)", // # 3+ uppercase caps
217 "\\s?--\\s?", // # em dash
218 "\\s-\\s", // # en dash
219 "(\\d+)-(\\d+)", // # en dash
220 "(\\d+) ?x ?(\\d+)", //# dimension sign
221 "\\b ?(\\((tm|TM)\\))", // trademark
222 "\\b ?(\\([rR]\\))", // # registered
223 "\\b ?(\\([cC]\\))" // # registered
224 };
225
226
227 String[] glyph_replace = {
228 "$1’$2", //# single closing
229 "‘", //# single opening
230 "$1”$2", //# double closing
231 "“", //# double opening
232 "$1…", //# ellipsis
233 "<acronym title=\"$2\">$1</acronym>", //# 3+ uppercase acronym
234 //"$1<span class=\"caps\">$2</span>$3", //# 3+ uppercase caps
235 "$1$2$3", //# 3+ uppercase caps
236 "—", //# em dash
237 " – ", //# en dash
238 "$1–$2", //# en dash
239 "$1×$2", //# dimension sign
240 "™", //# trademark
241 "®", //# registered
242 "©" //# copyright
243 };
244
245
246
247
248 // # set toggle for turning off replacements between <code> or <pre>
249 boolean codepre = false;
250 boolean notextile = false;
251
252 //# if there is no html, do a simple search and replace
253
254 if(!preg_match("<.[^<]*>",text))
255 {
256 text = preg_replace(glyph_search,glyph_replace,text);
257 }
258 else
259 {
260
261 StringBuffer out = new StringBuffer();
262 //# else split the text into an array at <.*>
263 //$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);
264 String[] textSplit = preg_split("<.[^<]*>",text);
265 for(int i = 0; i < textSplit.length; i++)
266 {
267
268 // # matches are off if we're between <code>, <pre> etc.
269 if(preg_match("<(code|pre|kbd)>",textSplit[i].toLowerCase()))
270 {
271 codepre = true;
272 }
273 if(preg_match("<notextile>",textSplit[i].toLowerCase()))
274 {
275 codepre = true;
276 notextile = true;
277 }
278 else if(preg_match("</(code|pre|kbd)>",textSplit[i].toLowerCase()))
279 {
280 codepre = false;
281 }
282 else if(preg_match("</notextile>",textSplit[i].toLowerCase()))
283 {
284 codepre = false;
285 notextile = false;
286 }
287
288 if(!preg_match("<.[^<]*?>",textSplit[i]) && codepre == false)
289 {
290 textSplit[i] = preg_replace(glyph_search,glyph_replace,textSplit[i]);
291 }
292
293 //# convert htmlspecial if between <code>
294 if (codepre == true && notextile == false){
295 textSplit[i] = htmlspecialchars(textSplit[i],ENT_NOQUOTES);
296 textSplit[i] = replace(textSplit[i],"<pre>","<pre>");
297 textSplit[i] = replace(textSplit[i],"<code>","<code>");
298 textSplit[i] = replace(textSplit[i],"<notextile>","<notextile>");
299 }
300
301 if(notextile == true)
302 {
303 textSplit[i] = replace(textSplit[i],"\n","({)(})");
304 }
305
306 //# each line gets pushed to a new array
307 out.append( textSplit[i]);
308 }
309
310 text = out.toString();
311
312
313 }
314
315 //### Block level formatting
316
317 //# deal with forced breaks; this is going to be a problem between
318 //# <pre> tags, but we'll clean them later
319
320
321 //////!!! not working
322 //text = preg_replace("(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])", "$1$2$3<br />$4", text);
323 //text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
324
325
326 text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
327
328
329 //# might be a problem with lists
330 text = replace(text,"l><br />", "l>\n");
331
332 boolean pre = false;
333
334
335 String[] block_find = {
336 "^\\s?\\*\\s(.*)", //# bulleted list *
337 "^\\s?#\\s(.*)", //# numeric list #
338 "^bq\\. (.*)", //# blockquote bq.
339 "^bq\\((\\S+?)\\). (.*)", //# blockquote bq(cite-url).
340 "^h(\\d)\\(([\\w]+)\\)\\.\\s(.*)", //# header hn(class). w/ css class
341 "^h(\\d)\\. (.*)", //# plain header hn.
342 "^p\\(([[:alnum:]]+)\\)\\.\\s(.*)", //# para p(class). w/ css class
343 "^p\\. (.*)", //# plain paragraph
344 "^([^\\t ]+.*)" //# remaining plain paragraph
345 };
346
347 /*
348 String[] block_find = {
349 "/^\\s?\\*\\s(.*)/", // # bulleted list *
350 "/^\\s?#\\s(.*)/", // # numeric list #
351 "/^bq\\. (.*)/", // # blockquote bq.
352 "/^h(\\d)\\(([[:alnum:]]+)\\)\\.\\s(.*)/", // # header hn(class). w/ css class
353 "/^h(\\d)\\. (.*)/", // # plain header hn.
354 "/^p\\(([[:alnum:]]+)\\)\\.\\s(.*)/", // # para p(class). w/ css class
355 "/^p\\. (.*)/i", // # plain paragraph
356 "/^([^\\t ]+.*)/i" // # remaining plain paragraph
357 };
358 */
359 String[] block_replace = {
360 // "\t<liu>$1</liu>$2",
361 // "\t<lio>$1</lio>$2",
362 "\t<liu>$1</liu>",
363 "\t<lio>$1</lio>",
364 "\t<blockquote>$1</blockquote>",
365 "\t<blockquote cite=\"$1\">$2</blockquote>",
366 "\t<h$1 class=\"$2\">$3</h$1>$4",
367 // "\t<h$1>$2</h$1>$3",
368 "\t<h$1>$2</h$1>",
369 "\t<p class=\"$1\">$2</p>$3",
370 "\t<p>$1</p>",
371 // "\t<p>$1</p>$2"
372 "\t<p>$1</p>"
373 };
374
375
376 StringBuffer blockBuffer = new StringBuffer();
377
378 String list = "";
379
380 // This done to ensure that lists close after themselves
381 text += " \n";
382
383
384 //# split the text into an array by newlines
385 String[] bList = text.split("\n");
386 for(int i = 0; i <= bList.length; i++)
387 {
388 String line = " ";
389 if(i < bList.length)
390 line = bList[i];
391
392
393 //#make sure the line isn't blank
394 if (true || line.length() > 0 ) // actually i think we want blank lines
395 {
396
397 //# matches are off if we're between <pre> or <code> tags
398 if(line.toLowerCase().indexOf("<pre>") > -1)
399 {
400 pre = true;
401 }
402
403 //# deal with block replacements first, then see if we're in a list
404 if (!pre)
405 {
406 line = preg_replace(block_find,block_replace,line);
407 }
408
409 //# kill any br tags that slipped in earlier
410 if (pre == true)
411 {
412 line = replace(line,"<br />","\n");
413 }
414
415 //# matches back on after </pre>
416 if(line.toLowerCase().indexOf("</pre>") > -1)
417 {
418 pre = false;
419 }
420
421 //# at the beginning of a list, $line switches to a value
422 if (list.length() == 0 && preg_match("\\t<li",line))
423 {
424 line = preg_replace("^(\\t<li)(o|u)","\n<$2l>\n$1$2",line);
425 list = line.substring(2,3);
426 }
427 //# at the end of a list, $line switches to empty
428 else if (list.length() > 0 && !preg_match("\\t<li" + list,line))
429 {
430 line = preg_replace("^(.*)$","</" + list + "l>\n$1",line);
431 list = "";
432 }
433 }
434 // push each line to a new array once it's processed
435 blockBuffer.append(line);
436 blockBuffer.append("\n");
437
438 }
439 text = blockBuffer.toString();
440
441
442
443 //#clean up <notextile>
444 text = preg_replace("<\\/?notextile>", "",text);
445
446 //#clean up <notextile>
447 text = replace(text,"({)(})", "\n");
448
449 //# clean up liu and lio
450 text = preg_replace("<(\\/?)li(u|o)>", "<$1li>",text);
451
452 //# turn the temp char back to an ampersand entity
453 text = replace(text,"x%x%","&");
454
455 //# Newline linebreaks, just for markup tidiness
456 text = replace(text,"<br />","<br />\n");
457
458 return text;
459 }
460
461
462
463 /**
464 * Does just that.
465 *
466 * @param source The string to start with
467 * @param searchFor The string we are looking for
468 * @param replaceWith The replacement
469 *
470 * @return The reformatted string
471 *
472 */
473 private static String replace ( String source , String searchFor , String replaceWith )
474 {
475 if (source == null || "".equals(source)) {
476 return source;
477 }
478
479 if (replaceWith == null) {
480 return source;
481 }
482
483 if ("".equals(searchFor)) {
484 return source;
485 }
486
487 int s = 0;
488 int e = 0;
489 StringBuffer result = new StringBuffer();
490
491 while ((e = source.indexOf(searchFor, s)) >= 0)
492 {
493 result.append(source.substring(s, e));
494 result.append(replaceWith);
495 s = e + searchFor.length();
496 }
497 result.append(source.substring(s));
498 return result.toString();
499
500 }
501
502 private static String htmlspecialchars(String text, int mode)
503 {
504 text = replace(text,"&", "&");
505 if (mode != ENT_NOQUOTES)
506 text = replace(text,"\"", """);
507 if (mode == ENT_QUOTES)
508 text = replace(text,"'", "'");
509 text = replace(text,"<", "<");
510 text = replace(text,">", ">");
511 return text ;
512 }
513
514 private static String preg_replace(String pattern,String replace,String text) throws Exception
515 {
516
517 // gnu.regexp.RE r = new gnu.regexp.RE(pattern);
518 // return r.substituteAll(text,replace);
519 return Pattern.compile(pattern).matcher(text).replaceAll(replace);
520 }
521
522 private static String preg_replace(String[] pattern,String[] replace,String text) throws Exception
523 {
524 for(int i = 0; i < pattern.length; i++)
525 {
526 text = preg_replace(pattern[i],replace[i],text);
527 }
528 return text;
529 }
530
531 private static boolean preg_match(String pattern,String text) throws Exception
532 {
533 // gnu.regexp.RE r = new gnu.regexp.RE(pattern);
534 // return r.getMatch(text) != null;
535 return Pattern.compile(pattern).matcher(text).find();
536 }
537
538 private static String[] preg_split(String pattern,String text) throws Exception
539 {
540 int startAt = 0;
541 ArrayList<String> tempList = new ArrayList<String>();
542
543 // gnu.regexp.RE r = new gnu.regexp.RE(pattern);
544
545 Matcher m = Pattern.compile(pattern).matcher(text);
546 m.find();
547 // gnu.regexp.REMatch match = r.getMatch(text);
548
549 while(m.find())
550 {
551 String beforeMatch = text.substring(startAt, m.start());
552 tempList.add(beforeMatch);
553 tempList.add(text.substring(m.start(), m.end()));
554 startAt = m.end();
555 }
556
557 tempList.add(text.substring(startAt));
558
559 // copy out our templist to an array of strings which is what we return
560 String[] ret = new String[tempList.size()];
561
562 for(int i = 0; i < ret.length; i++)
563 {
564 ret[i] = tempList.get(i);
565 }
566
567 return ret;
568 }
569
570 }