twitter is simply incapable of leaving well enough alone.

This commit is contained in:
Ted Unangst 2022-07-02 17:57:29 -04:00
parent a967575f05
commit 6ada675630
2 changed files with 34 additions and 2 deletions

View File

@ -1,5 +1,9 @@
changelog
=== next
+ Try to fix hoot again because Twitter did a Twitter.
=== 0.9.8 Tentative Tentacle
+ Switch database to WAL mode.

32
hoot.go
View File

@ -28,8 +28,8 @@ import (
"humungus.tedunangst.com/r/webs/htfilter"
)
var tweetsel = cascadia.MustCompile("p.tweet-text")
var linksel = cascadia.MustCompile("a.tweet-timestamp")
var tweetsel = cascadia.MustCompile("div[itemProp=articleBody]")
var linksel = cascadia.MustCompile("a time")
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
var imgsel = cascadia.MustCompile("div.js-adaptive-photo img")
var authorregex = regexp.MustCompile("twitter.com/([^/]+)")
@ -65,6 +65,34 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
divs := tweetsel.MatchAll(root)
for i, div := range divs {
{
twp := div.Parent.Parent.Parent
link := url
alink := linksel.MatchFirst(twp)
if alink == nil {
if i != 0 {
dlog.Printf("missing link")
continue
}
} else {
alink = alink.Parent
link = "https://twitter.com" + htfilter.GetAttr(alink, "href")
}
authormatch := authorregex.FindStringSubmatch(link)
if len(authormatch) < 2 {
dlog.Printf("no author?: %s", link)
continue
}
author := authormatch[1]
if author != wanted {
continue
}
text := htf.NodeText(div)
text = strings.Replace(text, "\n", " ", -1)
fmt.Fprintf(&buf, "> @%s: %s\n", author, text)
continue
}
twp := div.Parent.Parent.Parent
link := url
alink := linksel.MatchFirst(twp)