match all images in hoots

This commit is contained in:
Ted Unangst 2022-02-05 16:23:40 -05:00
parent e01a9caf4d
commit 28a061a03f
1 changed files with 8 additions and 6 deletions

14
hoot.go
View File

@ -44,21 +44,23 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
log.Printf("error parsing hoot: %s", err) log.Printf("error parsing hoot: %s", err)
return url return url
} }
divs := tweetsel.MatchAll(root)
url = strings.Replace(url, "mobile.twitter.com", "twitter.com", -1) url = strings.Replace(url, "mobile.twitter.com", "twitter.com", -1)
var wanted string
wantmatch := authorregex.FindStringSubmatch(url) wantmatch := authorregex.FindStringSubmatch(url)
var wanted string
if len(wantmatch) == 2 { if len(wantmatch) == 2 {
wanted = wantmatch[1] wanted = wantmatch[1]
} }
var buf strings.Builder
fmt.Fprintf(&buf, "%s\n", url)
var htf htfilter.Filter var htf htfilter.Filter
htf.Imager = func(node *html.Node) string { htf.Imager = func(node *html.Node) string {
return fmt.Sprintf(" <img src='%s'>", htfilter.GetAttr(node, "src")) return fmt.Sprintf(" <img src='%s'>", htfilter.GetAttr(node, "src"))
} }
var buf strings.Builder
fmt.Fprintf(&buf, "%s\n", url)
divs := tweetsel.MatchAll(root)
for i, div := range divs { for i, div := range divs {
twp := div.Parent.Parent.Parent twp := div.Parent.Parent.Parent
link := url link := url
@ -87,7 +89,7 @@ func hootextractor(r io.Reader, url string, seen map[string]bool) string {
if author != wanted { if author != wanted {
continue continue
} }
if img := imgsel.MatchFirst(twp); img != nil { for img := imgsel.MatchFirst(twp); img != nil; imgsel.MatchFirst(twp) {
img.Parent.RemoveChild(img) img.Parent.RemoveChild(img)
div.AppendChild(img) div.AppendChild(img)
} }