revise hoot match to exclude translations.

from coelacanth
This commit is contained in:
Ted Unangst 2022-08-07 16:02:58 -04:00
parent 19a2ccbf44
commit d0c18338f6
1 changed files with 3 additions and 3 deletions

View File

@ -28,7 +28,7 @@ import (
"humungus.tedunangst.com/r/webs/htfilter" "humungus.tedunangst.com/r/webs/htfilter"
) )
var tweetsel = cascadia.MustCompile("div[itemProp=articleBody]") var tweetsel = cascadia.MustCompile("div[data-testid=tweetText]")
var linksel = cascadia.MustCompile("a time") var linksel = cascadia.MustCompile("a time")
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor") var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
var imgsel = cascadia.MustCompile("div[data-testid=tweetPhoto] img") var imgsel = cascadia.MustCompile("div[data-testid=tweetPhoto] img")
@ -66,7 +66,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
divs := tweetsel.MatchAll(root) divs := tweetsel.MatchAll(root)
for i, div := range divs { for i, div := range divs {
{ {
twp := div.Parent.Parent.Parent twp := div.Parent.Parent.Parent.Parent.Parent
link := url link := url
alink := linksel.MatchFirst(twp) alink := linksel.MatchFirst(twp)
if alink == nil { if alink == nil {
@ -100,7 +100,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
continue continue
} }
twp := div.Parent.Parent.Parent twp := div.Parent.Parent.Parent.Parent.Parent
link := url link := url
alink := linksel.MatchFirst(twp) alink := linksel.MatchFirst(twp)
if alink == nil { if alink == nil {