diff options
Diffstat (limited to 'tests/examplefiles/markdown.lsp')
-rwxr-xr-x | tests/examplefiles/markdown.lsp | 679 |
1 files changed, 0 insertions, 679 deletions
diff --git a/tests/examplefiles/markdown.lsp b/tests/examplefiles/markdown.lsp deleted file mode 100755 index 8159082b..00000000 --- a/tests/examplefiles/markdown.lsp +++ /dev/null @@ -1,679 +0,0 @@ -#!/usr/bin/env newlisp - -;; @module markdown -;; @author cormullion -;; @description a port of John Gruber's Markdown to newLISP -;; @location http://unbalanced-parentheses.nfshost.com/ -;; @version of date 2011-10-02 22:36:02 -;; version history: at the end -;; a port of John Gruber's Markdown.pl (http://daringfireball.net/markdown) script to newLISP... -;; see his original Perl script for explanations of the fearsome regexen and -;; byzantine logic, etc... -;; TODO: -;; the following Markdown tests fail: -;; Inline HTML (Advanced) ... FAILED -;; Links, reference style ... FAILED -- nested brackets -;; Links, shortcut references ... FAILED -;; Markdown Documentation - Syntax ... FAILED -;; Ordered and unordered lists ... FAILED -- a nested ordered list error -;; parens in url : .jpg) see (Images.text) -;; Add: email address scrambling - -(context 'Hash) -(define HashTable:HashTable) - -(define (build-escape-table) - (set '*escape-chars* [text]\`*_{}[]()>#+-.![/text]) - (dolist (c (explode *escape-chars*)) - (HashTable c (hash c)))) - -(define (init-hash txt) - ; finds a hash identifier that doesn't occur anywhere in the text - (set 'counter 0) - (set 'hash-prefix "HASH") - (set 'hash-id (string hash-prefix counter)) - (do-while (find hash-id txt) - (set 'hash-id (string hash-prefix (inc counter)))) - (Hash:build-escape-table)) - -(define (hash s) - (HashTable s (string hash-id (inc counter)))) - -(context 'markdown) - -(define (markdown:markdown txt) - (initialize) - (Hash:init-hash txt) - (unescape-special-chars - (block-transforms - (strip-link-definitions - (protect - (cleanup txt)))))) - -(define (initialize) - (set '*escape-pairs* '( - ({\\\\} {\}) - ({\\`} {`}) - ({\\\*} {*}) - ({\\_} {_}) - ([text]\\\{[/text] [text]{[/text]) - ([text]\\\}[/text] [text]}[/text]) - ({\\\[} {[}) - ({\\\]} {]}) - ({\\\(} {(}) - ({\\\)} {)}) - ({\\>} {>}) - ({\\\#} {#}) - ({\\\+} {+}) - ({\\\-} {-}) - ({\\\.} {.}) - ({\\!} {!}))) - (set '*hashed-html-blocks* '()) - (set '*list-level* 0)) - -(define (block-transforms txt) - (form-paragraphs - (protect - (block-quotes - (code-blocks - (lists - (horizontal-rules - (headers txt)))))))) - -(define (span-transforms txt) - (line-breaks - (emphasis - (amps-and-angles - (auto-links - (anchors - (images - (escape-special-chars - (escape-special-chars (code-spans txt) 'inside-attributes))))))))) - -(define (tokenize-html xhtml) -; return list of tag/text portions of xhtml text - (letn ( - (tag-match [text]((?s:<!(-- .*? -- \s*)+>)| -(?s:<\?.*?\?>)| -(?:<[a-z/!$](?:[^<>]| -(?:<[a-z/!$](?:[^<>]| -(?:<[a-z/!$](?:[^<>]| -(?:<[a-z/!$](?:[^<>]| -(?:<[a-z/!$](?:[^<>]| -(?:<[a-z/!$](?:[^<>])*>))*>))*>))*>))*>))*>))[/text]) ; yeah, well... - (str xhtml) - (len (length str)) - (pos 0) - (tokens '())) - (while (set 'tag-start (find tag-match str 8)) - (if (< pos tag-start) - (push (list 'text (slice str pos (- tag-start pos))) tokens -1)) - (push (list 'tag $0) tokens -1) - (set 'str (slice str (+ tag-start (length $0)))) - (set 'pos 0)) - ; leftovers - (if (< pos len) - (push (list 'text (slice str pos (- len pos))) tokens -1)) - tokens)) - -(define (escape-special-chars txt (within-tag-attributes nil)) - (let ((temp (tokenize-html txt)) - (new-text {})) - (dolist (pair temp) - (if (= (first pair) 'tag) - ; 'tag - (begin - (set 'new-text (replace {\\} (last pair) (HashTable {\\}) 0)) - (replace [text](?<=.)</?code>(?=.)[/text] new-text (HashTable {`}) 0) - (replace {\*} new-text (HashTable {*}) 0) - (replace {_} new-text (HashTable {_} ) 0)) - ; 'text - (if within-tag-attributes - (set 'new-text (last pair)) - (set 'new-text (encode-backslash-escapes (last pair))))) - (setf (temp $idx) (list (first pair) new-text))) - ; return as text - (join (map last temp)))) - -(define (encode-backslash-escapes t) - (dolist (pair *escape-pairs*) - (replace (first pair) t (HashTable (last pair)) 14))) - -(define (encode-code s) - ; encode/escape certain characters inside Markdown code runs - (replace {&} s "&" 0) - (replace {<} s "<" 0) - (replace {>} s ">" 0) - (replace {\*} s (HashTable {\\}) 0) - (replace {_} s (HashTable {_}) 0) - (replace "{" s (HashTable "{") 0) - (replace {\[} s (HashTable {[}) 0) - (replace {\]} s (HashTable {]}) 0) - (replace {\\} s (HashTable "\\") 0)) - -(define (code-spans s) - (replace - {(?<!\\)(`+)(.+?)(?<!`)\1(?!`)} - s - (string {<code>} (encode-code (trim $2)) {</code>}) - 2)) - -(define (encode-alt s) - (replace {&} s "&" 0) - (replace {"} s """ 0)) - -(define (images txt) - (let ((alt-text {}) - (url {}) - (title {}) - (ref-regex {(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])}) - (inline-regex {(!\[(.*?)\]\([ \t]*<?(\S+?)>?[ \t]*((['"])(.*?)\5[ \t]*)?\))}) - (whole-match {}) - (result {}) - (id-ref {}) - (url {})) - ; reference links ![alt text][id] - (replace - ref-regex - txt - (begin - (set 'whole-match $1 'alt-text $2 'id-ref $3) - (if alt-text - (replace {"} alt-text {"} 0)) - (if (empty? id-ref) - (set 'id-ref (lower-case alt-text))) - (if (lookup id-ref *link-database*) - (set 'url (first (lookup id-ref *link-database*))) - (set 'url nil)) - (if url - (begin - (replace {\*} url (HashTable {*}) 0) - (replace {_} url (HashTable {_}) 0) - )) - (if (last (lookup id-ref *link-database*)) - ; title - (begin - (set 'title (last (lookup id-ref *link-database*))) - (replace {"} title {"} 0) - (replace {\*} title (HashTable {*}) 0) - (replace {_} title (HashTable {_}) 0)) - ; no title - (set 'title {}) - ) - (if url - (set 'result (string - {<img src="} - (trim url) - {" alt="} - alt-text {" } - (if (not (empty? title)) - (string { title="} title {"}) {}) - { />})) - (set 'result whole-match)) - ) - 0 - ) - ; inline image refs:  - (replace - inline-regex - txt - (begin - (set 'whole-match $1) - (set 'alt-text $2) - (set 'url $3) - (set 'title $6) - (if alt-text - (replace {"} alt-text {"} 0) - (set 'alt-text {})) - (if title - (begin - (replace {"} title {"} 0) - (replace {\*} title (HashTable {*}) 0) - (replace {_} title (HashTable {_}) 0)) - (set 'title {})) - (replace {\*} url (HashTable {*}) 0) - (replace {_} url (HashTable {_}) 0) - (string - {<img src="} - (trim url) - {" alt="} - alt-text {" } - (if title (string {title="} title {"}) {}) { />}) - ) - 0 - ) - ; empty ones are possible - (set '$1 {}) - (replace {!\[(.*?)\]\([ \t]*\)} - txt - (string {<img src="" alt="} $1 {" title="" />}) - 0))) - -(define (make-anchor link-text id-ref ) -; Link defs are in the form: ^[id]: url "optional title" -; stored in link db list as (id (url title)) -; params are text to be linked and the id of the link in the db -; eg bar 1 for [bar][1] - - (let ((title {}) - (id id-ref) - (url nil)) - (if link-text - (begin - (replace {"} link-text {"} 0) - (replace {\n} link-text { } 0) - (replace {[ ]?\n} link-text { } 0))) - (if (null? id ) (set 'id (lower-case link-text))) - (if (not (nil? (lookup id *link-database*))) - (begin - (set 'url (first (lookup id *link-database*))) - (replace {\*} url (HashTable {*}) 0) - (replace {_} url (HashTable {_}) 0) - (if (set 'title (last (lookup id *link-database*))) - (begin - (replace {"} title {"} 0) - (replace {\*} title (HashTable {*}) 0) - (replace {_} title (HashTable {_}) 0)) - (set 'title {}))) - (set 'url nil)) - (if url - (string {<a href="} (trim url) - {"} - (if (not (= title {})) (string { title="} (trim title) {"}) {}) - {>} link-text {</a>}) - (string {[} link-text {][} id-ref {]})))) - -(define (anchors txt) - (letn ((nested-brackets {(?>[^\[\]]+)*}) - (ref-link-regex (string {(\[(} nested-brackets {)\][ ]?(?:\n[ ]*)?\[(.*?)\])})) - (inline-regex {(\[(.*?)\]\([ ]*<?(.*?\)?)>?[ ]*((['"])(.*?)\5[ \t]*)?\))}) - (link-text {}) - (url {}) - (title {})) - ; reference-style links: [link text] [id] - (set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {} '$6 {}) ; i still don't think I should have to do this... - - ; what about this regex instead? - (set 'ref-link-regex {(\[(.*?)\][ ]?\[(.*?)\])}) - - (replace ref-link-regex txt (make-anchor $2 $3) 8) ; $2 is link text, $3 is id - ; inline links: [link text](url "optional title") - (set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {} '$6 {}) - (replace - inline-regex - txt - (begin - (set 'link-text $2) - (set 'url $3) - (set 'title $6) - (if link-text (replace {"} link-text {"} 0)) - (if title - (begin - (replace {"} title {"} 0) - (replace {\*} title (HashTable {*}) 0) - (replace {_} title (HashTable {_}) 0)) - (set 'title {})) - (replace {\*} url (HashTable {*}) 0) - (replace {_} url (HashTable {_}) 0) - (replace {^<(.*)>$} url $1 0) - (string - {<a href="} - (trim url) - {"} - (if (not (= title {})) - (string { title="} (trim title) {"}) - {}) - {>} link-text {</a>} - )) - 8 - ) ; replace - ) txt) - -(define (auto-links txt) - (replace - [text]<((https?|ftp):[^'">\s]+)>[/text] - txt - (string {<a href="} $1 {">} $1 {</a>}) - 0 - ) - ; to-do: email ... -) - -(define (amps-and-angles txt) -; Smart processing for ampersands and angle brackets - (replace - [text]&(?!\#?[xX]?(?:[0-9a-fA-F]+|\w+);)[/text] - txt - {&} - 10 - ) - (replace - [text]<(?![a-z/?\$!])[/text] - txt - {<} - 10)) - -(define (emphasis txt) - ; italics/bold: strong first - (replace - [text] (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 [/text] - txt - (string {<strong>} $2 {</strong>}) - 8 - ) - (replace - [text] (\*|_) (?=\S) (.+?) (?<=\S) \1 [/text] - txt - (string {<em>} $2 {</em>}) - 8 - )) - -(define (line-breaks txt) - ; handles line break markers - (replace " {2,}\n" txt " <br/>\n" 0)) - -(define (hex-str-to-unicode-char strng) - ; given a five character string, assume it's "U" + 4 hex chars and convert - ; return the character... - (char (int (string "0x" (1 strng)) 0 16))) - -(define (ustring s) - ; any four digit string preceded by U - (replace "U[0-9a-f]{4,}" s (hex-str-to-unicode-char $0) 0)) - -(define (cleanup txt) - ; cleanup the text by normalizing some possible variations - (replace "\r\n|\r" txt "\n" 0) ; standardize line ends - (push "\n\n" txt -1) ; end with two returns - (set 'txt (detab txt)) ; convert tabs to spaces - - ; convert inline Unicode: - (set 'txt (ustring txt)) - (replace "\n[ \t]+\n" txt "\n\n" 0) ; lines with only spaces and tabs - ) - -(define (protect txt) - ; protect or "hash html blocks" - (letn ((nested-block-regex [text](^<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b(.*\n)*?</\2>[ \t]*(?=\n+|\Z))[/text]) - (liberal-tag-regex [text](^<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b(.*\n)*?.*</\2>[ \t]*(?=\n+|\Z))[/text]) - (hr-regex [text](?:(?<=\n\n)|\A\n?)([ ]{0,3}<(hr)\b([^<>])*?/?>[ \t]*(?=\n{2,}|\Z))[/text]) - (html-comment-regex [text](?:(?<=\n\n)|\A\n?)([ ]{0,3}(?s:<!(--.*?--\s*)+>)[ \t]*(?=\n{2,}|\Z))[/text]) - (results '()) - (chunk-count (length (set 'chunks (parse txt "\n\n")))) - (chunk-size 500)) - - ; due to a limitation in PCRE, long sections have to be divided up otherwise we'll crash - ; so divide up long texts into chunks, then do the regex on each chunk - ; not an ideal solution, but it works ok :( - - (for (i 0 chunk-count chunk-size) - ; do a chunk - (set 'text-chunk (join (i (- (min chunk-count (- (+ i chunk-size) 1)) i) chunks) "\n\n")) - (dolist (rgx (list nested-block-regex liberal-tag-regex hr-regex html-comment-regex)) - (replace - rgx - text-chunk - (begin - (set 'key (Hash:hash $1)) - (push (list key $1 ) *hashed-html-blocks* -1) - (string "\n\n" key "\n\n")) - 2)) - ; save this partial result - (push text-chunk results -1) - ) ; for - ; return string result - (join results "\n\n"))) - -(define (unescape-special-chars t) - ; Swap back in all the special characters we've hidden. - (dolist (pair (HashTable)) - (replace (last pair) t (first pair) 10)) t) - -(define (strip-link-definitions txt) - ; strip link definitions from the text and store them - ; Link defs are in the form: ^[id]: url "optional title" - ; stored in link db list as (id (url title)) - (let ((link-db '()) - (url {}) - (id {}) - (title {})) - (replace - [text]^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(?<=\s)["(](.+?)[")][ \t]*)?(?:\n+|\Z)[/text] - txt - (begin - (set 'id (lower-case $1) 'url (amps-and-angles $2) 'title $3) - (if title (replace {"} title {"} 0)) - (push (list id (list url title)) link-db) - (set '$3 {}) ; necessary? - (string {}) ; remove from text - ) - 10) - (set '*link-database* link-db) - txt)) - -(define (horizontal-rules txt) - (replace - [text]^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$[/text] - txt - "\n<hr />" - 14) - (replace - [text]^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$[/text] - txt - "\n<hr />" - 14) - (replace - [text]^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$[/text] - txt - "\n<hr />" - 14)) - -(define (headers txt) - ; setext headers - (let ((level 1)) - (replace - [text]^(.+)[ \t]*\n=+[ \t]*\n+[/text] - txt - (string "<h1>" (span-transforms $1) "</h1>\n\n") - 2) - - (replace - [text]^(.+)[ \t]*\n-+[ \t]*\n+[/text] - txt - (string "<h2>" (span-transforms $1) "</h2>\n\n") - 2) - ; atx headers - (replace - [text]^(\#{1,6})\s*(.+?)[ ]*\#*(\n+)[/text] - txt - (begin - (set 'level (length $1)) - (string "<h" level ">" (span-transforms $2) "</h" level ">\n\n") - ) - 2))) - -(define (lists txt) - (letn ((marker-ul {[*+-]}) - (marker-ol {\d+[.]}) - (marker-any (string {(?:} marker-ul {|} marker-ol {)})) - (whole-list-regex (string [text](([ ]{0,3}([/text] marker-any [text])[ \t]+)(?s:.+?)(\z|\n{2,}(?=\S)(?![ \t]*[/text] marker-any [text][ \t]+)))[/text])) - (my-list {}) - (list-type {}) - (my-result {})) - (replace - (if (> *list-level* 0) - (string {^} whole-list-regex) - (string {(?:(?<=\n\n)|\A\n?)} whole-list-regex)) - txt - (begin - (set 'my-list $1) - (if (find $3 marker-ul) - (set 'list-type "ul" 'marker-type marker-ul) - (set 'list-type "ol" 'marker-type marker-ol)) - (replace [text]\n{2,}[/text] my-list "\n\n\n" 0) - (set 'my-result (process-list-items my-list marker-any)) - (replace {\s+$} my-result {} 0) - (string {<} list-type {>} "\n" my-result "\n" {</} list-type {>} "\n")) - 10 ; must be multiline - ))) - -(define (process-list-items list-text marker-any) - (let ((list-regex (string [text](\n)?(^[ \t]*)([/text] marker-any [text])[ \t]+((?s:.+?)(\n{1,2}))(?=\n*(\z|\2([/text] marker-any [text])[ \t]+))[/text])) - (item {}) - (leading-line {}) - (leading-space {}) - (result {})) - (inc *list-level*) - (replace [text]\n{2,}\z[/text] list-text "\n" 0) - (set '$1 {} '$2 {} '$3 {} '$4 {} '$5 {}) - (replace - list-regex - list-text - (begin - (set 'item $4) - (set 'leading-line $1) - (set 'leading-space $2) - (if (or (not (empty? leading-line)) (ends-with item "\n{2,}" 0)) - (set 'item (block-transforms (outdent item))) - ; recurse for sub lists - (begin - (set 'item (lists (outdent item))) - (set 'item (span-transforms (trim item "\n"))) - )) - (string {<li>} item {</li>} "\n")) - 10) - (dec *list-level*) - list-text)) - -(define (code-blocks txt) - (let ((code-block {}) - (token-list '())) - (replace - [text](?:\n\n|\A)((?:(?:[ ]{4}|\t).*\n+)+)((?=^[ ]{0,3}\S)|\Z)[/text] - txt - (begin - (set 'code-block $1) - ; format if Nestor module is loaded and it's not marked as plain - (if (and (not (starts-with code-block " ;plain\n")) (context? Nestor)) - ; format newlisp - (begin - ; remove flag if present - (replace "[ ]{4};newlisp\n" code-block {} 0) - (set 'code-block (protect (Nestor:nlx-to-html (Nestor:my-read (trim (detab (outdent code-block)) "\n"))))) - code-block) - ; don't format - (begin - ; trim leading and trailing newlines - (replace "[ ]{4};plain\n" code-block {} 0) - (set 'code-block (trim (detab (encode-code (outdent code-block))) "\n")) - (set '$1 {}) - (set 'code-block (string "\n\n<pre><code>" code-block "\n</code></pre>\n\n"))))) - 10))) - -(define (block-quotes txt) - (let ((block-quote {})) - (replace - [text]((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)[/text] - txt - (begin - (set 'block-quote $1) - (replace {^[ ]*>[ ]?} block-quote {} 2) - (replace {^[ ]+$} block-quote {} 2) - (set 'block-quote (block-transforms block-quote)) ; recurse - ; remove leading spaces - (replace - {(\s*<pre>.+?</pre>)} - block-quote - (trim $1) - 2) - (string "<blockquote>\n" block-quote "\n</blockquote>\n\n")) - 2))) - -(define (outdent s) - (replace [text]^(\t|[ ]{1,4})[/text] s {} 2)) - -(define (detab s) - (replace [text](.*?)\t[/text] - s - (string $1 (dup { } (- 4 (% (length $1) 4)))) - 2)) - -(define (form-paragraphs txt) - (let ((grafs '()) - (original nil)) - (set 'txt (trim txt "\n")) ; strip blank lines before and after - (set 'grafs (parse txt "\n{2,}" 0)) ; split - (dolist (p grafs) - (if (set 'original (lookup p *hashed-html-blocks*)) - ; html blocks - (setf (grafs $idx) original) - ; wrap <p> tags round everything else - (setf (grafs $idx) (string {<p>} (replace {^[ ]*} (span-transforms p) {} (+ 4 8 16)) {</p>})))) - (join grafs "\n\n"))) - -[text] -; three command line arguments: let's hope last one is a file -(when (= 3 (length (main-args))) - (println (markdown (read-file (main-args 2)))) - (exit)) - -; hack for command-line and module loading -(set 'level (sys-info 3)) - -; if level is 2, then we're probably invoking markdown.lsp directly -; if level is > 3, then we're probably loading it into another script... - -(when (= level 2) - ; running on command line, read STDIN and execute: - (while (read-line) - (push (current-line) *stdin* -1)) - (println (markdown (join *stdin* "\n"))) - (exit)) -[/text] - -;; version 2011-09-16 16:31:29 -;; Changed to different hash routine. Profiling shows that hashing takes 40% of the execution time. -;; Unfortunately this new version is only very slightly faster. -;; Command-line arguments hack in previous version doesn't work. -;; -;; version 2011-08-18 15:04:40 -;; various fixes, and added hack for running this from the command-line: -;; echo "hi there" | newlisp markdown.lsp -;; echo "hello world" | markdown.lsp -;; cat file.text | newlisp markdown.lsp -;; -;; version 2010-11-14 17:34:52 -;; some problems in ustring. Probably remove it one day, as it's non standard... -;; -;; version 2010-10-14 18:41:38 -;; added code to work round PCRE crash in (protect ... -;; -;; version date 2010-07-10 22:20:25 -;; modified call to 'read' since lutz has changed it -;; -;; version date 2009-11-16 22:10:10 -;; fixed bug in tokenize.html -;; -;; version date 2008-10-08 18:44:46 -;; changed nth-set to setf to be version-10 ready. -;; This means that now this script will NOT work with -;; earlier versions of newLISP!!!!!!!!!!! -;; requires Nestor if you want source code colouring... -;; -;; version date 2008-08-08 16:54:56 -;; changed (unless to (if (not ... :( -;; -;; version date 2008-07-20 14:!2:29 -;; added hex-str-to-unicode-char ustring -;; -;; version date 2008-03-07 15:36:09 -;; fixed load error -;; -;; version date 2007-11-17 16:20:57 -;; added syntax colouring module -;; -;; version date 2007-11-14 09:19:42 -;; removed reliance on dostring for compatibility with 9.1 - - -; eof
\ No newline at end of file |