revise hoot match to exclude translations.
from coelacanth
This commit is contained in:
parent
19a2ccbf44
commit
d0c18338f6
6
hoot.go
6
hoot.go
|
@ -28,7 +28,7 @@ import (
|
||||||
"humungus.tedunangst.com/r/webs/htfilter"
|
"humungus.tedunangst.com/r/webs/htfilter"
|
||||||
)
|
)
|
||||||
|
|
||||||
var tweetsel = cascadia.MustCompile("div[itemProp=articleBody]")
|
var tweetsel = cascadia.MustCompile("div[data-testid=tweetText]")
|
||||||
var linksel = cascadia.MustCompile("a time")
|
var linksel = cascadia.MustCompile("a time")
|
||||||
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
|
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
|
||||||
var imgsel = cascadia.MustCompile("div[data-testid=tweetPhoto] img")
|
var imgsel = cascadia.MustCompile("div[data-testid=tweetPhoto] img")
|
||||||
|
@ -66,7 +66,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
|
||||||
divs := tweetsel.MatchAll(root)
|
divs := tweetsel.MatchAll(root)
|
||||||
for i, div := range divs {
|
for i, div := range divs {
|
||||||
{
|
{
|
||||||
twp := div.Parent.Parent.Parent
|
twp := div.Parent.Parent.Parent.Parent.Parent
|
||||||
link := url
|
link := url
|
||||||
alink := linksel.MatchFirst(twp)
|
alink := linksel.MatchFirst(twp)
|
||||||
if alink == nil {
|
if alink == nil {
|
||||||
|
@ -100,7 +100,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
twp := div.Parent.Parent.Parent
|
twp := div.Parent.Parent.Parent.Parent.Parent
|
||||||
link := url
|
link := url
|
||||||
alink := linksel.MatchFirst(twp)
|
alink := linksel.MatchFirst(twp)
|
||||||
if alink == nil {
|
if alink == nil {
|
||||||
|
|
Loading…
Reference in New Issue