twitter is simply incapable of leaving well enough alone.
This commit is contained in:
parent
a967575f05
commit
6ada675630
|
@ -1,5 +1,9 @@
|
||||||
changelog
|
changelog
|
||||||
|
|
||||||
|
=== next
|
||||||
|
|
||||||
|
+ Try to fix hoot again because Twitter did a Twitter.
|
||||||
|
|
||||||
=== 0.9.8 Tentative Tentacle
|
=== 0.9.8 Tentative Tentacle
|
||||||
|
|
||||||
+ Switch database to WAL mode.
|
+ Switch database to WAL mode.
|
||||||
|
|
32
hoot.go
32
hoot.go
|
@ -28,8 +28,8 @@ import (
|
||||||
"humungus.tedunangst.com/r/webs/htfilter"
|
"humungus.tedunangst.com/r/webs/htfilter"
|
||||||
)
|
)
|
||||||
|
|
||||||
var tweetsel = cascadia.MustCompile("p.tweet-text")
|
var tweetsel = cascadia.MustCompile("div[itemProp=articleBody]")
|
||||||
var linksel = cascadia.MustCompile("a.tweet-timestamp")
|
var linksel = cascadia.MustCompile("a time")
|
||||||
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
|
var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor")
|
||||||
var imgsel = cascadia.MustCompile("div.js-adaptive-photo img")
|
var imgsel = cascadia.MustCompile("div.js-adaptive-photo img")
|
||||||
var authorregex = regexp.MustCompile("twitter.com/([^/]+)")
|
var authorregex = regexp.MustCompile("twitter.com/([^/]+)")
|
||||||
|
@ -65,6 +65,34 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
|
||||||
|
|
||||||
divs := tweetsel.MatchAll(root)
|
divs := tweetsel.MatchAll(root)
|
||||||
for i, div := range divs {
|
for i, div := range divs {
|
||||||
|
{
|
||||||
|
twp := div.Parent.Parent.Parent
|
||||||
|
link := url
|
||||||
|
alink := linksel.MatchFirst(twp)
|
||||||
|
if alink == nil {
|
||||||
|
if i != 0 {
|
||||||
|
dlog.Printf("missing link")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
alink = alink.Parent
|
||||||
|
link = "https://twitter.com" + htfilter.GetAttr(alink, "href")
|
||||||
|
}
|
||||||
|
authormatch := authorregex.FindStringSubmatch(link)
|
||||||
|
if len(authormatch) < 2 {
|
||||||
|
dlog.Printf("no author?: %s", link)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
author := authormatch[1]
|
||||||
|
if author != wanted {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
text := htf.NodeText(div)
|
||||||
|
text = strings.Replace(text, "\n", " ", -1)
|
||||||
|
fmt.Fprintf(&buf, "> @%s: %s\n", author, text)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
twp := div.Parent.Parent.Parent
|
twp := div.Parent.Parent.Parent
|
||||||
link := url
|
link := url
|
||||||
alink := linksel.MatchFirst(twp)
|
alink := linksel.MatchFirst(twp)
|
||||||
|
|
Loading…
Reference in New Issue