revise hoot match to exclude translations.

from coelacanth
This commit is contained in:
Ted Unangst 2022-08-07 16:02:58 -04:00
parent 19a2ccbf44
commit d0c18338f6
1 changed files with 3 additions and 3 deletions

View File

@ -28,7 +28,7 @@ import (
"humungus.tedunangst.com/r/webs/htfilter"
)
var tweetsel = cascadia.MustCompile("div[itemProp=articleBody]")
var tweetsel = cascadia.MustCompile("div[data-testid=tweetText]")
var linksel = cascadia.MustCompile("a time")
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
var imgsel = cascadia.MustCompile("div[data-testid=tweetPhoto] img")
@ -66,7 +66,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
divs := tweetsel.MatchAll(root)
for i, div := range divs {
{
twp := div.Parent.Parent.Parent
twp := div.Parent.Parent.Parent.Parent.Parent
link := url
alink := linksel.MatchFirst(twp)
if alink == nil {
@ -100,7 +100,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
continue
}
twp := div.Parent.Parent.Parent
twp := div.Parent.Parent.Parent.Parent.Parent
link := url
alink := linksel.MatchFirst(twp)
if alink == nil {