diff --git a/fun.go b/fun.go index fab6981..a639c93 100644 --- a/fun.go +++ b/fun.go @@ -26,9 +26,12 @@ import ( "regexp" "strings" "sync" + + "humungus.tedunangst.com/r/webs/htfilter" ) func reverbolate(honks []*Honk) { + filt := htfilter.New() for _, h := range honks { h.What += "ed" if h.Honker == "" { @@ -56,7 +59,7 @@ func reverbolate(honks []*Honk) { if precis != "" { precis = "

summary: " + precis + "

" } - h.HTML = cleanstring(precis + h.Noise) + h.HTML, _ = filt.String(precis + h.Noise) emuxifier := func(e string) string { for _, d := range h.Donks { if d.Name == e { diff --git a/go.mod b/go.mod index 8287bd5..5c41fe9 100644 --- a/go.mod +++ b/go.mod @@ -6,5 +6,5 @@ require ( golang.org/x/crypto v0.0.0-20190424203555-c05e17bb3b2d golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 humungus.tedunangst.com/r/go-sqlite3 v1.1.2 - humungus.tedunangst.com/r/webs v0.4.3 + humungus.tedunangst.com/r/webs v0.4.4 ) diff --git a/go.sum b/go.sum index 5df4c66..12c776a 100644 --- a/go.sum +++ b/go.sum @@ -14,3 +14,5 @@ humungus.tedunangst.com/r/go-sqlite3 v1.1.2 h1:bRAXNRZ4VNFRFhhG4tdudK4Lv4ktHQAHE humungus.tedunangst.com/r/go-sqlite3 v1.1.2/go.mod h1:FtEEmQM7U2Ey1TuEEOyY1BmphTZnmiEjPsNLEAkpf/M= humungus.tedunangst.com/r/webs v0.4.3 h1:L0id2lZDK+lmuWswd+iOV4T0LXvZe92SqD50AuZDnDM= humungus.tedunangst.com/r/webs v0.4.3/go.mod h1:6yLLDXBaE4pKURa/3/bxoQPod37uAqc/Kq8J0IopWW0= +humungus.tedunangst.com/r/webs v0.4.4 h1:uK1YW+eGQ0JADiSs7Ipt0ljFfQw7e73924wMm4V3gss= +humungus.tedunangst.com/r/webs v0.4.4/go.mod h1:79Ww3HmgE1m+HXU0r0b9hkOD3JuDzXoGiEauHuKcwBI= diff --git a/honk.go b/honk.go index 850161c..3bbaaab 100644 --- a/honk.go +++ b/honk.go @@ -34,6 +34,7 @@ import ( "time" "github.com/gorilla/mux" + "humungus.tedunangst.com/r/webs/htfilter" "humungus.tedunangst.com/r/webs/image" "humungus.tedunangst.com/r/webs/login" "humungus.tedunangst.com/r/webs/rss" @@ -498,10 +499,11 @@ func honkpage(w http.ResponseWriter, r *http.Request, u *login.UserInfo, user *W w.Header().Set("Cache-Control", "max-age=60") } if user != nil { + filt := htfilter.New() templinfo["Name"] = user.Name whatabout := user.About whatabout = obfusbreak(user.About) - templinfo["WhatAbout"] = cleanstring(whatabout) + templinfo["WhatAbout"], _ = filt.String(whatabout) } templinfo["Honks"] = honks templinfo["ServerMessage"] = infomsg diff --git a/html.go b/html.go deleted file mode 100644 index 0a96ca8..0000000 --- a/html.go +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2019 Ted Unangst -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -package main - -import ( - "fmt" - "html/template" - "io" - "log" - "net/url" - "regexp" - "sort" - "strings" - - "golang.org/x/net/html" -) - -var permittedtags = []string{ - "div", "h1", "h2", "h3", "h4", "h5", "h6", - "table", "thead", "tbody", "th", "tr", "td", "colgroup", "col", - "p", "br", "pre", "code", "blockquote", "q", - "samp", "mark", "ins", "dfn", "cite", "abbr", "address", - "strong", "em", "b", "i", "s", "u", "sub", "sup", "del", "tt", "small", - "ol", "ul", "li", "dl", "dt", "dd", -} -var permittedattr = []string{"colspan", "rowspan"} -var bannedtags = []string{"script", "style"} - -func init() { - sort.Strings(permittedtags) - sort.Strings(permittedattr) - sort.Strings(bannedtags) -} - -func contains(array []string, tag string) bool { - idx := sort.SearchStrings(array, tag) - return idx < len(array) && array[idx] == tag -} - -func getattr(node *html.Node, attr string) string { - for _, a := range node.Attr { - if a.Key == attr { - return a.Val - } - } - return "" -} - -func hasclass(node *html.Node, class string) bool { - return strings.Contains(" "+getattr(node, "class")+" ", " "+class+" ") -} - -func writetag(w io.Writer, node *html.Node) { - io.WriteString(w, "<") - io.WriteString(w, node.Data) - for _, attr := range node.Attr { - if contains(permittedattr, attr.Key) { - fmt.Fprintf(w, ` %s="%s"`, attr.Key, html.EscapeString(attr.Val)) - } - } - io.WriteString(w, ">") -} - -func render(w io.Writer, node *html.Node) { - if node.Type == html.ElementNode { - tag := node.Data - switch { - case tag == "a": - href := getattr(node, "href") - hrefurl, err := url.Parse(href) - if err != nil { - href = "#BROKEN-" + href - } else { - href = hrefurl.String() - } - fmt.Fprintf(w, ``, html.EscapeString(href)) - case tag == "img": - div := replaceimg(node) - if div != "skip" { - io.WriteString(w, div) - } - case tag == "span": - case tag == "iframe": - src := html.EscapeString(getattr(node, "src")) - fmt.Fprintf(w, `<iframe src="%s">`, src, src) - case contains(permittedtags, tag): - writetag(w, node) - case contains(bannedtags, tag): - return - } - } else if node.Type == html.TextNode { - io.WriteString(w, html.EscapeString(node.Data)) - } - - for c := node.FirstChild; c != nil; c = c.NextSibling { - render(w, c) - } - - if node.Type == html.ElementNode { - tag := node.Data - if tag == "a" || (contains(permittedtags, tag) && tag != "br") { - fmt.Fprintf(w, "", tag) - } - if tag == "p" || tag == "div" { - io.WriteString(w, "\n") - } - } -} - -func replaceimg(node *html.Node) string { - src := getattr(node, "src") - alt := getattr(node, "alt") - //title := getattr(node, "title") - if hasclass(node, "Emoji") && alt != "" { - return html.EscapeString(alt) - } - return html.EscapeString(fmt.Sprintf(``, src)) -} - -func cleannode(node *html.Node) template.HTML { - var buf strings.Builder - render(&buf, node) - return template.HTML(buf.String()) -} - -func cleanstring(shtml string) template.HTML { - reader := strings.NewReader(shtml) - body, err := html.Parse(reader) - if err != nil { - log.Printf("error parsing html: %s", err) - return "" - } - return cleannode(body) -} - -func textonly(w io.Writer, node *html.Node) { - switch node.Type { - case html.ElementNode: - tag := node.Data - switch { - case tag == "a": - href := getattr(node, "href") - fmt.Fprintf(w, ``, href) - case tag == "img": - io.WriteString(w, "") - case contains(bannedtags, tag): - return - } - case html.TextNode: - io.WriteString(w, node.Data) - } - for c := node.FirstChild; c != nil; c = c.NextSibling { - textonly(w, c) - } - if node.Type == html.ElementNode { - tag := node.Data - if tag == "a" { - fmt.Fprintf(w, "", tag) - } - if tag == "p" || tag == "div" { - io.WriteString(w, "\n") - } - } -} - -var re_whitespaceeater = regexp.MustCompile("[ \t\r]*\n[ \t\r]*") -var re_blanklineeater = regexp.MustCompile("\n\n+") -var re_tabeater = regexp.MustCompile("[ \t]+") - -func htmltotext(shtml template.HTML) string { - reader := strings.NewReader(string(shtml)) - body, _ := html.Parse(reader) - var buf strings.Builder - textonly(&buf, body) - rv := buf.String() - rv = re_whitespaceeater.ReplaceAllLiteralString(rv, "\n") - rv = re_blanklineeater.ReplaceAllLiteralString(rv, "\n\n") - rv = re_tabeater.ReplaceAllLiteralString(rv, " ") - for len(rv) > 0 && rv[0] == '\n' { - rv = rv[1:] - } - return rv -}