twitter import fixes by benjojo

This commit is contained in:
Ted Unangst 2022-11-29 12:52:42 -05:00
parent a59e0477b6
commit 580970253e
1 changed files with 136 additions and 35 deletions

171
import.go
View File

@ -21,6 +21,7 @@ import (
"fmt" "fmt"
"html" "html"
"io/ioutil" "io/ioutil"
"log"
"os" "os"
"regexp" "regexp"
"sort" "sort"
@ -223,26 +224,118 @@ func importTwitter(username, source string) {
} }
type Tweet struct { type Tweet struct {
ID_str string
Created_at string
Full_text string
In_reply_to_screen_name string
In_reply_to_status_id string
Entities struct {
Hashtags []struct {
Text string
}
Media []struct {
Url string
Media_url string
}
Urls []struct {
Url string
Expanded_url string
}
}
date time.Time date time.Time
convoy string convoy string
Tweet struct {
CreatedAt string `json:"created_at"`
DisplayTextRange []string `json:"display_text_range"`
EditInfo struct {
Initial struct {
EditTweetIds []string `json:"editTweetIds"`
EditableUntil string `json:"editableUntil"`
EditsRemaining string `json:"editsRemaining"`
IsEditEligible bool `json:"isEditEligible"`
} `json:"initial"`
} `json:"edit_info"`
Entities struct {
Hashtags []struct {
Indices []string `json:"indices"`
Text string `json:"text"`
} `json:"hashtags"`
Media []struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
ID string `json:"id"`
IdStr string `json:"id_str"`
Indices []string `json:"indices"`
MediaURL string `json:"media_url"`
MediaUrlHttps string `json:"media_url_https"`
Sizes struct {
Large struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"large"`
Medium struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"medium"`
Small struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"small"`
Thumb struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"thumb"`
} `json:"sizes"`
Type string `json:"type"`
URL string `json:"url"`
} `json:"media"`
Symbols []interface{} `json:"symbols"`
Urls []struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
Indices []string `json:"indices"`
URL string `json:"url"`
} `json:"urls"`
UserMentions []interface{} `json:"user_mentions"`
} `json:"entities"`
ExtendedEntities struct {
Media []struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
ID string `json:"id"`
IdStr string `json:"id_str"`
Indices []string `json:"indices"`
MediaURL string `json:"media_url"`
MediaUrlHttps string `json:"media_url_https"`
Sizes struct {
Large struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"large"`
Medium struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"medium"`
Small struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"small"`
Thumb struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"thumb"`
} `json:"sizes"`
Type string `json:"type"`
URL string `json:"url"`
} `json:"media"`
} `json:"extended_entities"`
FavoriteCount string `json:"favorite_count"`
Favorited bool `json:"favorited"`
FullText string `json:"full_text"`
ID string `json:"id"`
IdStr string `json:"id_str"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
InReplyToStatusID string `json:"in_reply_to_status_id"`
InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
InReplyToUserID string `json:"in_reply_to_user_id"`
InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
Lang string `json:"lang"`
PossiblySensitive bool `json:"possibly_sensitive"`
RetweetCount string `json:"retweet_count"`
Retweeted bool `json:"retweeted"`
Source string `json:"source"`
Truncated bool `json:"truncated"`
} `json:"tweet"`
} }
var tweets []*Tweet var tweets []*Tweet
@ -260,8 +353,8 @@ func importTwitter(username, source string) {
fd.Close() fd.Close()
tweetmap := make(map[string]*Tweet) tweetmap := make(map[string]*Tweet)
for _, t := range tweets { for _, t := range tweets {
t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at) t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
tweetmap[t.ID_str] = t tweetmap[t.Tweet.IdStr] = t
} }
sort.Slice(tweets, func(i, j int) bool { sort.Slice(tweets, func(i, j int) bool {
return tweets[i].date.Before(tweets[j].date) return tweets[i].date.Before(tweets[j].date)
@ -271,26 +364,33 @@ func importTwitter(username, source string) {
row := stmtFindXonk.QueryRow(user.ID, xid) row := stmtFindXonk.QueryRow(user.ID, xid)
err := row.Scan(&id) err := row.Scan(&id)
if err == nil { if err == nil {
log.Printf("id = %v", id)
return true return true
} }
return false return false
} }
log.Printf("importing %v tweets", len(tweets))
for _, t := range tweets { for _, t := range tweets {
xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str) xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
if havetwid(xid) { if havetwid(xid) {
continue continue
} }
if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
log.Printf("skipping, unworthy tweet")
continue
}
what := "honk" what := "honk"
noise := "" noise := ""
if parent := tweetmap[t.In_reply_to_status_id]; parent != nil { if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
t.convoy = parent.convoy t.convoy = parent.convoy
what = "tonk" what = "tonk"
} else { } else {
t.convoy = "data:,acoustichonkytonk-" + t.ID_str t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
if t.In_reply_to_screen_name != "" { if t.Tweet.InReplyToScreenName != "" {
noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n", noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
t.In_reply_to_screen_name, t.In_reply_to_status_id) t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
what = "tonk" what = "tonk"
} }
} }
@ -308,17 +408,17 @@ func importTwitter(username, source string) {
Public: true, Public: true,
Whofore: 2, Whofore: 2,
} }
noise += t.Full_text noise += t.Tweet.FullText
// unbelievable // unbelievable
noise = html.UnescapeString(noise) noise = html.UnescapeString(noise)
for _, r := range t.Entities.Urls { for _, r := range t.Tweet.Entities.Urls {
noise = strings.Replace(noise, r.Url, r.Expanded_url, -1) noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
} }
for _, m := range t.Entities.Media { for _, m := range t.Tweet.Entities.Media {
u := m.Media_url u := m.MediaURL
idx := strings.LastIndexByte(u, '/') idx := strings.LastIndexByte(u, '/')
u = u[idx+1:] u = u[idx+1:]
fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u) fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
data, err := ioutil.ReadFile(fname) data, err := ioutil.ReadFile(fname)
if err != nil { if err != nil {
elog.Printf("error reading media: %s", fname) elog.Printf("error reading media: %s", fname)
@ -335,12 +435,13 @@ func importTwitter(username, source string) {
FileID: fileid, FileID: fileid,
} }
honk.Donks = append(honk.Donks, donk) honk.Donks = append(honk.Donks, donk)
noise = strings.Replace(noise, m.Url, "", -1) noise = strings.Replace(noise, m.URL, "", -1)
} }
for _, ht := range t.Entities.Hashtags { for _, ht := range t.Tweet.Entities.Hashtags {
honk.Onts = append(honk.Onts, "#"+ht.Text) honk.Onts = append(honk.Onts, "#"+ht.Text)
} }
honk.Noise = noise honk.Noise = noise
savehonk(&honk) err := savehonk(&honk)
log.Printf("honk saved %v -> %v", xid, err)
} }
} }