a faster version of markitzero, though i'm uncertain it's worth it

2019-12-02 02:44:24 -05:00 · 2019-12-02 02:44:24 -05:00 · 95dfba8d14
commit 95dfba8d14
parent beaceaadb1
1 changed files with 112 additions and 65 deletions
--- a/markitzero.go
+++ b/markitzero.go
@ -16,11 +16,11 @@
 package main
 import (
 	"bytes"
 	"fmt"
 	"regexp"
 	"strings"
 	"golang.org/x/net/html"
 	"humungus.tedunangst.com/r/webs/synlight"
 )
@ -37,27 +37,8 @@ var re_lister = regexp.MustCompile(`((^|\n)(\+|-).*)+\n?`)
 var lighter = synlight.New(synlight.Options{Format: synlight.HTML})
-func markitzero(s string) string {
+// fewer side effects than html.EscapeString
-	// prepare the string
+func fasterescaper(s []byte) []byte {
 	s = strings.TrimSpace(s)
 	s = strings.Replace(s, "\r", "", -1)
 	// save away the code blocks so we don't mess them up further
 	var bigcodes, lilcodes, images []string
 	s = re_bigcoder.ReplaceAllStringFunc(s, func(code string) string {
 		bigcodes = append(bigcodes, code)
 		return "``````"
 	})
 	s = re_coder.ReplaceAllStringFunc(s, func(code string) string {
 		lilcodes = append(lilcodes, code)
 		return "`x`"
 	})
 	s = re_imgfix.ReplaceAllStringFunc(s, func(img string) string {
 		images = append(images, img)
 		return "<img x>"
 	})
 	// fewer side effects than html.EscapeString
 	buf := make([]byte, 0, len(s))
 	for _, c := range []byte(s) {
 		switch c {
@ -71,64 +52,130 @@ func markitzero(s string) string {
 			buf = append(buf, c)
 		}
 	}
-	s = string(buf)
+	return buf
 }
-	// mark it zero
+func replaceifmatch(re *regexp.Regexp, input []byte, repl []byte) []byte {
-	s = re_link.ReplaceAllStringFunc(s, linkreplacer)
+	if !re.Match(input) {
-	s = re_zerolink.ReplaceAllString(s, `<a href="$2">$1</a>`)
+		return input
-	s = re_bolder.ReplaceAllString(s, "$1<b>$2</b>$3")
+	}
-	s = re_italicer.ReplaceAllString(s, "$1<i>$2</i>$3")
+	return re.ReplaceAll(input, repl)
-	s = re_quoter.ReplaceAllString(s, "<blockquote>$1<br><cite>$3</cite></blockquote><p>")
+}
 	s = re_reciter.ReplaceAllString(s, "$1$2$3")
 	s = strings.Replace(s, "\n---\n", "<hr><p>", -1)
-	s = re_lister.ReplaceAllStringFunc(s, func(m string) string {
+func replaceifmatchfn(re *regexp.Regexp, input []byte, repl func([]byte) []byte) []byte {
-		m = strings.Trim(m, "\n")
+	if !re.Match(input) {
-		items := strings.Split(m, "\n")
+		return input
-		r := "<ul>"
+	}
-		for _, item := range items {
+	return re.ReplaceAllFunc(input, repl)
-			r += "<li>" + strings.Trim(item[1:], " ")
+}
-		}
+
-		r += "</ul><p>"
+func replacenocopy(input []byte, pat []byte, repl []byte) []byte {
-		return r
+	if !bytes.Contains(input, pat) {
 		return input
 	}
 	return bytes.Replace(input, pat, repl, -1)
 }
 func markitzero(ss string) string {
 	s := []byte(ss)
 	// prepare the string
 	s = bytes.TrimSpace(s)
 	s = replacenocopy(s, []byte("\r"), []byte(""))
 	hascode := bytes.Contains(s, []byte("`"))
 	// save away the code blocks so we don't mess them up further
 	var bigcodes, lilcodes, images [][]byte
 	if hascode {
 		s = replaceifmatchfn(re_bigcoder, s, func(code []byte) []byte {
 			bigcodes = append(bigcodes, code)
 			return []byte("``````")
 		})
 		s = replaceifmatchfn(re_coder, s, func(code []byte) []byte {
 			lilcodes = append(lilcodes, code)
 			return []byte("`x`")
 		})
 	}
 	s = replaceifmatchfn(re_imgfix, s, func(img []byte) []byte {
 		images = append(images, img)
 		return []byte("<img x>")
 	})
 	s = fasterescaper(s)
 	// mark it zero
 	if bytes.Contains(s, []byte("http")) {
 		s = replaceifmatchfn(re_link, s, linkreplacer)
 	}
 	s = replaceifmatch(re_zerolink, s, []byte(`<a href="$2">$1</a>`))
 	if bytes.Contains(s, []byte("**")) {
 		s = replaceifmatch(re_bolder, s, []byte("$1<b>$2</b>$3"))
 	}
 	if bytes.Contains(s, []byte("*")) {
 		s = replaceifmatch(re_italicer, s, []byte("$1<i>$2</i>$3"))
 	}
 	if bytes.Contains(s, []byte("&gt; ")) {
 		s = replaceifmatch(re_quoter, s, []byte("<blockquote>$1<br><cite>$3</cite></blockquote><p>"))
 		s = replaceifmatch(re_reciter, s, []byte("$1$2$3"))
 	}
 	s = replacenocopy(s, []byte("\n---\n"), []byte("<hr><p>"))
 	if bytes.Contains(s, []byte("\n+")) || bytes.Contains(s, []byte("\n-")) {
 		s = replaceifmatchfn(re_lister, s, func(m []byte) []byte {
 			m = bytes.Trim(m, "\n")
 			items := bytes.Split(m, []byte("\n"))
 			r := []byte("<ul>")
 			for _, item := range items {
 				r = append(r, []byte("<li>")...)
 				r = append(r, bytes.Trim(item[1:], " ")...)
 			}
 			r = append(r, []byte("</ul><p>")...)
 			return r
 		})
 	}
 	// restore images
-	s = strings.Replace(s, "&lt;img x&gt;", "<img x>", -1)
+	s = replacenocopy(s, []byte("&lt;img x&gt;"), []byte("<img x>"))
-	s = re_imgfix.ReplaceAllStringFunc(s, func(string) string {
+	s = replaceifmatchfn(re_imgfix, s, func([]byte) []byte {
 		img := images[0]
 		images = images[1:]
 		return img
 	})
 	// now restore the code blocks
-	s = re_coder.ReplaceAllStringFunc(s, func(string) string {
+	if hascode {
-		code := lilcodes[0]
+		s = replaceifmatchfn(re_coder, s, func([]byte) []byte {
-		lilcodes = lilcodes[1:]
+			code := lilcodes[0]
-		code = html.EscapeString(code)
+			lilcodes = lilcodes[1:]
-		return code
+			return fasterescaper(code)
-	})
+		})
-	s = re_bigcoder.ReplaceAllStringFunc(s, func(string) string {
+		s = replaceifmatchfn(re_bigcoder, s, func([]byte) []byte {
-		code := bigcodes[0]
+			code := bigcodes[0]
-		bigcodes = bigcodes[1:]
+			bigcodes = bigcodes[1:]
-		m := re_bigcoder.FindStringSubmatch(code)
+			m := re_bigcoder.FindSubmatch(code)
-		return "<pre><code>" + lighter.HighlightString(m[2], m[1]) + "</code></pre><p>"
+			var buf bytes.Buffer
-	})
+			buf.WriteString("<pre><code>")
-	s = re_coder.ReplaceAllString(s, "<code>$1</code>")
+			lighter.Highlight(m[2], string(m[1]), &buf)
 			buf.WriteString("</code></pre><p>")
 			return buf.Bytes()
 		})
 		s = replaceifmatch(re_coder, s, []byte("<code>$1</code>"))
 	}
 	// some final fixups
-	s = strings.Replace(s, "\n", "<br>", -1)
+	s = replacenocopy(s, []byte("\n"), []byte("<br>"))
-	s = strings.Replace(s, "<br><blockquote>", "<blockquote>", -1)
+	s = replacenocopy(s, []byte("<br><blockquote>"), []byte("<blockquote>"))
-	s = strings.Replace(s, "<br><cite></cite>", "", -1)
+	s = replacenocopy(s, []byte("<br><cite></cite>"), []byte(""))
-	s = strings.Replace(s, "<br><pre>", "<pre>", -1)
+	s = replacenocopy(s, []byte("<br><pre>"), []byte("<pre>"))
-	s = strings.Replace(s, "<br><ul>", "<ul>", -1)
+	s = replacenocopy(s, []byte("<br><ul>"), []byte("<ul>"))
-	s = strings.Replace(s, "<p><br>", "<p>", -1)
+	s = replacenocopy(s, []byte("<p><br>"), []byte("<p>"))
-	return s
+	return string(s)
 }
-func linkreplacer(url string) string {
+func linkreplacer(burl []byte) []byte {
 	url := string(burl)
 	if url[0:2] == "](" {
-		return url
+		return burl
 	}
 	prefix := ""
 	for !strings.HasPrefix(url, "http") {
@ -152,5 +199,5 @@ func linkreplacer(url string) string {
 	if addparen {
 		url += ")"
 	}
-	return prefix + url
+	return []byte(prefix + url)
 }