use htfilter from webs
This commit is contained in:
parent
1813bb7c39
commit
1f3e7a8479
5
fun.go
5
fun.go
|
@ -26,9 +26,12 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"humungus.tedunangst.com/r/webs/htfilter"
|
||||||
)
|
)
|
||||||
|
|
||||||
func reverbolate(honks []*Honk) {
|
func reverbolate(honks []*Honk) {
|
||||||
|
filt := htfilter.New()
|
||||||
for _, h := range honks {
|
for _, h := range honks {
|
||||||
h.What += "ed"
|
h.What += "ed"
|
||||||
if h.Honker == "" {
|
if h.Honker == "" {
|
||||||
|
@ -56,7 +59,7 @@ func reverbolate(honks []*Honk) {
|
||||||
if precis != "" {
|
if precis != "" {
|
||||||
precis = "<p>summary: " + precis + "<p>"
|
precis = "<p>summary: " + precis + "<p>"
|
||||||
}
|
}
|
||||||
h.HTML = cleanstring(precis + h.Noise)
|
h.HTML, _ = filt.String(precis + h.Noise)
|
||||||
emuxifier := func(e string) string {
|
emuxifier := func(e string) string {
|
||||||
for _, d := range h.Donks {
|
for _, d := range h.Donks {
|
||||||
if d.Name == e {
|
if d.Name == e {
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -6,5 +6,5 @@ require (
|
||||||
golang.org/x/crypto v0.0.0-20190424203555-c05e17bb3b2d
|
golang.org/x/crypto v0.0.0-20190424203555-c05e17bb3b2d
|
||||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3
|
||||||
humungus.tedunangst.com/r/go-sqlite3 v1.1.2
|
humungus.tedunangst.com/r/go-sqlite3 v1.1.2
|
||||||
humungus.tedunangst.com/r/webs v0.4.3
|
humungus.tedunangst.com/r/webs v0.4.4
|
||||||
)
|
)
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -14,3 +14,5 @@ humungus.tedunangst.com/r/go-sqlite3 v1.1.2 h1:bRAXNRZ4VNFRFhhG4tdudK4Lv4ktHQAHE
|
||||||
humungus.tedunangst.com/r/go-sqlite3 v1.1.2/go.mod h1:FtEEmQM7U2Ey1TuEEOyY1BmphTZnmiEjPsNLEAkpf/M=
|
humungus.tedunangst.com/r/go-sqlite3 v1.1.2/go.mod h1:FtEEmQM7U2Ey1TuEEOyY1BmphTZnmiEjPsNLEAkpf/M=
|
||||||
humungus.tedunangst.com/r/webs v0.4.3 h1:L0id2lZDK+lmuWswd+iOV4T0LXvZe92SqD50AuZDnDM=
|
humungus.tedunangst.com/r/webs v0.4.3 h1:L0id2lZDK+lmuWswd+iOV4T0LXvZe92SqD50AuZDnDM=
|
||||||
humungus.tedunangst.com/r/webs v0.4.3/go.mod h1:6yLLDXBaE4pKURa/3/bxoQPod37uAqc/Kq8J0IopWW0=
|
humungus.tedunangst.com/r/webs v0.4.3/go.mod h1:6yLLDXBaE4pKURa/3/bxoQPod37uAqc/Kq8J0IopWW0=
|
||||||
|
humungus.tedunangst.com/r/webs v0.4.4 h1:uK1YW+eGQ0JADiSs7Ipt0ljFfQw7e73924wMm4V3gss=
|
||||||
|
humungus.tedunangst.com/r/webs v0.4.4/go.mod h1:79Ww3HmgE1m+HXU0r0b9hkOD3JuDzXoGiEauHuKcwBI=
|
||||||
|
|
4
honk.go
4
honk.go
|
@ -34,6 +34,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
|
"humungus.tedunangst.com/r/webs/htfilter"
|
||||||
"humungus.tedunangst.com/r/webs/image"
|
"humungus.tedunangst.com/r/webs/image"
|
||||||
"humungus.tedunangst.com/r/webs/login"
|
"humungus.tedunangst.com/r/webs/login"
|
||||||
"humungus.tedunangst.com/r/webs/rss"
|
"humungus.tedunangst.com/r/webs/rss"
|
||||||
|
@ -498,10 +499,11 @@ func honkpage(w http.ResponseWriter, r *http.Request, u *login.UserInfo, user *W
|
||||||
w.Header().Set("Cache-Control", "max-age=60")
|
w.Header().Set("Cache-Control", "max-age=60")
|
||||||
}
|
}
|
||||||
if user != nil {
|
if user != nil {
|
||||||
|
filt := htfilter.New()
|
||||||
templinfo["Name"] = user.Name
|
templinfo["Name"] = user.Name
|
||||||
whatabout := user.About
|
whatabout := user.About
|
||||||
whatabout = obfusbreak(user.About)
|
whatabout = obfusbreak(user.About)
|
||||||
templinfo["WhatAbout"] = cleanstring(whatabout)
|
templinfo["WhatAbout"], _ = filt.String(whatabout)
|
||||||
}
|
}
|
||||||
templinfo["Honks"] = honks
|
templinfo["Honks"] = honks
|
||||||
templinfo["ServerMessage"] = infomsg
|
templinfo["ServerMessage"] = infomsg
|
||||||
|
|
196
html.go
196
html.go
|
@ -1,196 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
|
|
||||||
//
|
|
||||||
// Permission to use, copy, modify, and distribute this software for any
|
|
||||||
// purpose with or without fee is hereby granted, provided that the above
|
|
||||||
// copyright notice and this permission notice appear in all copies.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
||||||
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
||||||
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
||||||
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"html/template"
|
|
||||||
"io"
|
|
||||||
"log"
|
|
||||||
"net/url"
|
|
||||||
"regexp"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
|
||||||
)
|
|
||||||
|
|
||||||
var permittedtags = []string{
|
|
||||||
"div", "h1", "h2", "h3", "h4", "h5", "h6",
|
|
||||||
"table", "thead", "tbody", "th", "tr", "td", "colgroup", "col",
|
|
||||||
"p", "br", "pre", "code", "blockquote", "q",
|
|
||||||
"samp", "mark", "ins", "dfn", "cite", "abbr", "address",
|
|
||||||
"strong", "em", "b", "i", "s", "u", "sub", "sup", "del", "tt", "small",
|
|
||||||
"ol", "ul", "li", "dl", "dt", "dd",
|
|
||||||
}
|
|
||||||
var permittedattr = []string{"colspan", "rowspan"}
|
|
||||||
var bannedtags = []string{"script", "style"}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
sort.Strings(permittedtags)
|
|
||||||
sort.Strings(permittedattr)
|
|
||||||
sort.Strings(bannedtags)
|
|
||||||
}
|
|
||||||
|
|
||||||
func contains(array []string, tag string) bool {
|
|
||||||
idx := sort.SearchStrings(array, tag)
|
|
||||||
return idx < len(array) && array[idx] == tag
|
|
||||||
}
|
|
||||||
|
|
||||||
func getattr(node *html.Node, attr string) string {
|
|
||||||
for _, a := range node.Attr {
|
|
||||||
if a.Key == attr {
|
|
||||||
return a.Val
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasclass(node *html.Node, class string) bool {
|
|
||||||
return strings.Contains(" "+getattr(node, "class")+" ", " "+class+" ")
|
|
||||||
}
|
|
||||||
|
|
||||||
func writetag(w io.Writer, node *html.Node) {
|
|
||||||
io.WriteString(w, "<")
|
|
||||||
io.WriteString(w, node.Data)
|
|
||||||
for _, attr := range node.Attr {
|
|
||||||
if contains(permittedattr, attr.Key) {
|
|
||||||
fmt.Fprintf(w, ` %s="%s"`, attr.Key, html.EscapeString(attr.Val))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
io.WriteString(w, ">")
|
|
||||||
}
|
|
||||||
|
|
||||||
func render(w io.Writer, node *html.Node) {
|
|
||||||
if node.Type == html.ElementNode {
|
|
||||||
tag := node.Data
|
|
||||||
switch {
|
|
||||||
case tag == "a":
|
|
||||||
href := getattr(node, "href")
|
|
||||||
hrefurl, err := url.Parse(href)
|
|
||||||
if err != nil {
|
|
||||||
href = "#BROKEN-" + href
|
|
||||||
} else {
|
|
||||||
href = hrefurl.String()
|
|
||||||
}
|
|
||||||
fmt.Fprintf(w, `<a href="%s" rel=noreferrer>`, html.EscapeString(href))
|
|
||||||
case tag == "img":
|
|
||||||
div := replaceimg(node)
|
|
||||||
if div != "skip" {
|
|
||||||
io.WriteString(w, div)
|
|
||||||
}
|
|
||||||
case tag == "span":
|
|
||||||
case tag == "iframe":
|
|
||||||
src := html.EscapeString(getattr(node, "src"))
|
|
||||||
fmt.Fprintf(w, `<iframe src="<a href="%s">%s</a>">`, src, src)
|
|
||||||
case contains(permittedtags, tag):
|
|
||||||
writetag(w, node)
|
|
||||||
case contains(bannedtags, tag):
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else if node.Type == html.TextNode {
|
|
||||||
io.WriteString(w, html.EscapeString(node.Data))
|
|
||||||
}
|
|
||||||
|
|
||||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
|
||||||
render(w, c)
|
|
||||||
}
|
|
||||||
|
|
||||||
if node.Type == html.ElementNode {
|
|
||||||
tag := node.Data
|
|
||||||
if tag == "a" || (contains(permittedtags, tag) && tag != "br") {
|
|
||||||
fmt.Fprintf(w, "</%s>", tag)
|
|
||||||
}
|
|
||||||
if tag == "p" || tag == "div" {
|
|
||||||
io.WriteString(w, "\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func replaceimg(node *html.Node) string {
|
|
||||||
src := getattr(node, "src")
|
|
||||||
alt := getattr(node, "alt")
|
|
||||||
//title := getattr(node, "title")
|
|
||||||
if hasclass(node, "Emoji") && alt != "" {
|
|
||||||
return html.EscapeString(alt)
|
|
||||||
}
|
|
||||||
return html.EscapeString(fmt.Sprintf(`<img src="%s">`, src))
|
|
||||||
}
|
|
||||||
|
|
||||||
func cleannode(node *html.Node) template.HTML {
|
|
||||||
var buf strings.Builder
|
|
||||||
render(&buf, node)
|
|
||||||
return template.HTML(buf.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
func cleanstring(shtml string) template.HTML {
|
|
||||||
reader := strings.NewReader(shtml)
|
|
||||||
body, err := html.Parse(reader)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("error parsing html: %s", err)
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return cleannode(body)
|
|
||||||
}
|
|
||||||
|
|
||||||
func textonly(w io.Writer, node *html.Node) {
|
|
||||||
switch node.Type {
|
|
||||||
case html.ElementNode:
|
|
||||||
tag := node.Data
|
|
||||||
switch {
|
|
||||||
case tag == "a":
|
|
||||||
href := getattr(node, "href")
|
|
||||||
fmt.Fprintf(w, `<a href="%s">`, href)
|
|
||||||
case tag == "img":
|
|
||||||
io.WriteString(w, "<img>")
|
|
||||||
case contains(bannedtags, tag):
|
|
||||||
return
|
|
||||||
}
|
|
||||||
case html.TextNode:
|
|
||||||
io.WriteString(w, node.Data)
|
|
||||||
}
|
|
||||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
|
||||||
textonly(w, c)
|
|
||||||
}
|
|
||||||
if node.Type == html.ElementNode {
|
|
||||||
tag := node.Data
|
|
||||||
if tag == "a" {
|
|
||||||
fmt.Fprintf(w, "</%s>", tag)
|
|
||||||
}
|
|
||||||
if tag == "p" || tag == "div" {
|
|
||||||
io.WriteString(w, "\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var re_whitespaceeater = regexp.MustCompile("[ \t\r]*\n[ \t\r]*")
|
|
||||||
var re_blanklineeater = regexp.MustCompile("\n\n+")
|
|
||||||
var re_tabeater = regexp.MustCompile("[ \t]+")
|
|
||||||
|
|
||||||
func htmltotext(shtml template.HTML) string {
|
|
||||||
reader := strings.NewReader(string(shtml))
|
|
||||||
body, _ := html.Parse(reader)
|
|
||||||
var buf strings.Builder
|
|
||||||
textonly(&buf, body)
|
|
||||||
rv := buf.String()
|
|
||||||
rv = re_whitespaceeater.ReplaceAllLiteralString(rv, "\n")
|
|
||||||
rv = re_blanklineeater.ReplaceAllLiteralString(rv, "\n\n")
|
|
||||||
rv = re_tabeater.ReplaceAllLiteralString(rv, " ")
|
|
||||||
for len(rv) > 0 && rv[0] == '\n' {
|
|
||||||
rv = rv[1:]
|
|
||||||
}
|
|
||||||
return rv
|
|
||||||
}
|
|
Loading…
Reference in New Issue