honk/markitzero.go

//
// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

package main

import (
	"fmt"
	"regexp"
	"strings"

	"golang.org/x/net/html"
	"humungus.tedunangst.com/r/webs/synlight"
)

var re_bolder = regexp.MustCompile(`(^|\W)\*\*((?s:.*?))\*\*($|\W)`)
var re_italicer = regexp.MustCompile(`(^|\W)\*((?s:.*?))\*($|\W)`)
var re_bigcoder = regexp.MustCompile("```(.*)\n?((?s:.*?))\n?```\n?")
var re_coder = regexp.MustCompile("`([^`]*)`")
var re_quoter = regexp.MustCompile(`(?m:^&gt; (.*)(\n- ?(.*))?\n?)`)
var re_reciter = regexp.MustCompile(`(<cite><a href=".*?">)https://twitter.com/([^/]+)/.*?(</a></cite>)`)
var re_link = regexp.MustCompile(`.?.?https?://[^\s"]+[\w/)!]`)
var re_zerolink = regexp.MustCompile(`\[([^]]*)\]\(([^)]*\)?)\)`)
var re_imgfix = regexp.MustCompile(`<img ([^>]*)>`)
var re_lister = regexp.MustCompile(`((^|\n)(\+|-).*)+\n?`)

var lighter = synlight.New(synlight.Options{Format: synlight.HTML})

func markitzero(s string) string {
	// prepare the string
	s = strings.TrimSpace(s)
	s = strings.Replace(s, "\r", "", -1)

	// save away the code blocks so we don't mess them up further
	var bigcodes, lilcodes, images []string
	s = re_bigcoder.ReplaceAllStringFunc(s, func(code string) string {
		bigcodes = append(bigcodes, code)
		return "``````"
	})
	s = re_coder.ReplaceAllStringFunc(s, func(code string) string {
		lilcodes = append(lilcodes, code)
		return "`x`"
	})
	s = re_imgfix.ReplaceAllStringFunc(s, func(img string) string {
		images = append(images, img)
		return "<img x>"
	})

	// fewer side effects than html.EscapeString
	buf := make([]byte, 0, len(s))
	for _, c := range []byte(s) {
		switch c {
		case '&':
			buf = append(buf, []byte("&amp;")...)
		case '<':
			buf = append(buf, []byte("&lt;")...)
		case '>':
			buf = append(buf, []byte("&gt;")...)
		default:
			buf = append(buf, c)
		}
	}
	s = string(buf)

	// mark it zero
	s = re_link.ReplaceAllStringFunc(s, linkreplacer)
	s = re_zerolink.ReplaceAllString(s, `<a href="$2">$1</a>`)
	s = re_bolder.ReplaceAllString(s, "$1<b>$2</b>$3")
	s = re_italicer.ReplaceAllString(s, "$1<i>$2</i>$3")
	s = re_quoter.ReplaceAllString(s, "<blockquote>$1<br><cite>$3</cite></blockquote><p>")
	s = re_reciter.ReplaceAllString(s, "$1$2$3")
	s = strings.Replace(s, "\n---\n", "<hr><p>", -1)

	s = re_lister.ReplaceAllStringFunc(s, func(m string) string {
		m = strings.Trim(m, "\n")
		items := strings.Split(m, "\n")
		r := "<ul>"
		for _, item := range items {
			r += "<li>" + strings.Trim(item[1:], " ")
		}
		r += "</ul><p>"
		return r
	})

	// restore images
	s = strings.Replace(s, "&lt;img x&gt;", "<img x>", -1)
	s = re_imgfix.ReplaceAllStringFunc(s, func(string) string {
		img := images[0]
		images = images[1:]
		return img
	})

	// now restore the code blocks
	s = re_coder.ReplaceAllStringFunc(s, func(string) string {
		code := lilcodes[0]
		lilcodes = lilcodes[1:]
		code = html.EscapeString(code)
		return code
	})
	s = re_bigcoder.ReplaceAllStringFunc(s, func(string) string {
		code := bigcodes[0]
		bigcodes = bigcodes[1:]
		m := re_bigcoder.FindStringSubmatch(code)
		return "<pre><code>" + lighter.HighlightString(m[2], m[1]) + "</code></pre><p>"
	})
	s = re_coder.ReplaceAllString(s, "<code>$1</code>")

	// some final fixups
	s = strings.Replace(s, "\n", "<br>", -1)
	s = strings.Replace(s, "<br><blockquote>", "<blockquote>", -1)
	s = strings.Replace(s, "<br><cite></cite>", "", -1)
	s = strings.Replace(s, "<br><pre>", "<pre>", -1)
	s = strings.Replace(s, "<br><ul>", "<ul>", -1)
	s = strings.Replace(s, "<p><br>", "<p>", -1)
	return s
}

func linkreplacer(url string) string {
	if url[0:2] == "](" {
		return url
	}
	prefix := ""
	for !strings.HasPrefix(url, "http") {
		prefix += url[0:1]
		url = url[1:]
	}
	addparen := false
	adddot := false
	if strings.HasSuffix(url, ")") && strings.IndexByte(url, '(') == -1 {
		url = url[:len(url)-1]
		addparen = true
	}
	if strings.HasSuffix(url, ".") {
		url = url[:len(url)-1]
		adddot = true
	}
	url = fmt.Sprintf(`<a href="%s">%s</a>`, url, url)
	if adddot {
		url += "."
	}
	if addparen {
		url += ")"
	}
	return prefix + url
}
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`//`
			`// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>`
			`//`
			`// Permission to use, copy, modify, and distribute this software for any`
			`// purpose with or without fee is hereby granted, provided that the above`
			`// copyright notice and this permission notice appear in all copies.`
			`//`
			`// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES`
			`// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF`
			`// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR`
			`// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES`
			`// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN`
			`// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF`
			`// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.`

			`package main`

			`import (`
			`"fmt"`
			`"regexp"`
			`"strings"`

revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`"golang.org/x/net/html"`
syntax highlighting because why not. go code or go home. 2019-10-10 06:40:29 +02:00			`"humungus.tedunangst.com/r/webs/synlight"`
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`)`

try more aggresive bold matching and hope it doesn't break 2019-10-25 06:27:00 +02:00			var re_bolder = regexp.MustCompile(`(^\|\W)\\((?s:.?))\\*($\|\W)`)
			var re_italicer = regexp.MustCompile(`(^\|\W)\((?s:.?))\*($\|\W)`)
syntax highlighting because why not. go code or go home. 2019-10-10 06:40:29 +02:00			var re_bigcoder = regexp.MustCompile("```(.)\n?((?s:.?))\n?```\n?")
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			var re_coder = regexp.MustCompile("`([^`]*)`")
auto fixup twitter citations 2019-11-25 04:54:55 +01:00			var re_quoter = regexp.MustCompile(`(?m:^> (.)(\n- ?(.))?\n?)`)
			var re_reciter = regexp.MustCompile(`(<cite><a href=".?">)https://twitter.com/([^/]+)/.?(</a></cite>)`)
a few more autolink edge cases 2019-10-22 05:43:15 +02:00			var re_link = regexp.MustCompile(`.?.?https?://[^\s"]+[\w/)!]`)
add [link](url) support to markdown 2019-10-09 20:03:16 +02:00			var re_zerolink = regexp.MustCompile(`\[([^]])\]\(([^)]\)?)\)`)
support inline imgs in markdown 2019-10-22 06:16:28 +02:00			var re_imgfix = regexp.MustCompile(`<img ([^>]*)>`)
add lists support to markdown 2019-11-12 20:45:39 +01:00			var re_lister = regexp.MustCompile(`((^\|\n)(\+\|-).*)+\n?`)
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00
syntax highlighting because why not. go code or go home. 2019-10-10 06:40:29 +02:00			`var lighter = synlight.New(synlight.Options{Format: synlight.HTML})`

revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`func markitzero(s string) string {`
			`// prepare the string`
			`s = strings.TrimSpace(s)`
			`s = strings.Replace(s, "\r", "", -1)`

			`// save away the code blocks so we don't mess them up further`
			`var bigcodes, lilcodes, images []string`
			`s = re_bigcoder.ReplaceAllStringFunc(s, func(code string) string {`
			`bigcodes = append(bigcodes, code)`
			return "``````"
			`})`
			`s = re_coder.ReplaceAllStringFunc(s, func(code string) string {`
			`lilcodes = append(lilcodes, code)`
			return "`x`"
			`})`
			`s = re_imgfix.ReplaceAllStringFunc(s, func(img string) string {`
			`images = append(images, img)`
			`return "<img x>"`
			`})`

			`// fewer side effects than html.EscapeString`
a few more autolink edge cases 2019-10-22 05:43:15 +02:00			`buf := make([]byte, 0, len(s))`
			`for _, c := range []byte(s) {`
			`switch c {`
			`case '&':`
			`buf = append(buf, []byte("&")...)`
			`case '<':`
			`buf = append(buf, []byte("<")...)`
			`case '>':`
			`buf = append(buf, []byte(">")...)`
			`default:`
			`buf = append(buf, c)`
			`}`
			`}`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`s = string(buf)`
a faster version of markitzero, though i'm uncertain it's worth it 2019-12-02 08:44:24 +01:00
			`// mark it zero`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`s = re_link.ReplaceAllStringFunc(s, linkreplacer)`
			s = re_zerolink.ReplaceAllString(s, `<a href="$2">$1</a>`)
			`s = re_bolder.ReplaceAllString(s, "$1<b>$2</b>$3")`
			`s = re_italicer.ReplaceAllString(s, "$1<i>$2</i>$3")`
			`s = re_quoter.ReplaceAllString(s, "<blockquote>$1<br><cite>$3</cite></blockquote><p>")`
			`s = re_reciter.ReplaceAllString(s, "$1$2$3")`
			`s = strings.Replace(s, "\n---\n", "<hr><p>", -1)`

			`s = re_lister.ReplaceAllStringFunc(s, func(m string) string {`
			`m = strings.Trim(m, "\n")`
			`items := strings.Split(m, "\n")`
			`r := "<ul>"`
			`for _, item := range items {`
			`r += "<li>" + strings.Trim(item[1:], " ")`
			`}`
			`r += "</ul><p>"`
			`return r`
			`})`
a faster version of markitzero, though i'm uncertain it's worth it 2019-12-02 08:44:24 +01:00
support inline imgs in markdown 2019-10-22 06:16:28 +02:00			`// restore images`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`s = strings.Replace(s, "<img x>", "<img x>", -1)`
			`s = re_imgfix.ReplaceAllStringFunc(s, func(string) string {`
support inline imgs in markdown 2019-10-22 06:16:28 +02:00			`img := images[0]`
			`images = images[1:]`
			`return img`
			`})`

better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`// now restore the code blocks`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`s = re_coder.ReplaceAllStringFunc(s, func(string) string {`
			`code := lilcodes[0]`
			`lilcodes = lilcodes[1:]`
			`code = html.EscapeString(code)`
			`return code`
			`})`
			`s = re_bigcoder.ReplaceAllStringFunc(s, func(string) string {`
			`code := bigcodes[0]`
			`bigcodes = bigcodes[1:]`
			`m := re_bigcoder.FindStringSubmatch(code)`
			`return "<pre><code>" + lighter.HighlightString(m[2], m[1]) + "</code></pre><p>"`
			`})`
			`s = re_coder.ReplaceAllString(s, "<code>$1</code>")`
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00
			`// some final fixups`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`s = strings.Replace(s, "\n", "<br>", -1)`
			`s = strings.Replace(s, "<br><blockquote>", "<blockquote>", -1)`
			`s = strings.Replace(s, "<br><cite></cite>", "", -1)`
			`s = strings.Replace(s, "<br><pre>", "<pre>", -1)`
			`s = strings.Replace(s, "<br><ul>", "<ul>", -1)`
			`s = strings.Replace(s, "<p><br>", "<p>", -1)`
			`return s`
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`}`

revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`func linkreplacer(url string) string {`
add [link](url) support to markdown 2019-10-09 20:03:16 +02:00			`if url[0:2] == "](" {`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`return url`
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`}`
add [link](url) support to markdown 2019-10-09 20:03:16 +02:00			`prefix := ""`
			`for !strings.HasPrefix(url, "http") {`
			`prefix += url[0:1]`
			`url = url[1:]`
			`}`
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`addparen := false`
			`adddot := false`
			`if strings.HasSuffix(url, ")") && strings.IndexByte(url, '(') == -1 {`
			`url = url[:len(url)-1]`
			`addparen = true`
			`}`
			`if strings.HasSuffix(url, ".") {`
			`url = url[:len(url)-1]`
			`adddot = true`
			`}`
hackish, but yet more effective, means of getting class=mention on all links 2019-10-28 21:05:18 +01:00			url = fmt.Sprintf(`<a href="%s">%s</a>`, url, url)
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`if adddot {`
			`url += "."`
			`}`
			`if addparen {`
			`url += ")"`
			`}`
revert for now, this is probably not the slow point for anything 2019-12-02 08:48:12 +01:00			`return prefix + url`
better markdown linebreaks (and add the file, doh) 2019-10-06 21:14:18 +02:00			`}`