honk/import.go

526 lines
14 KiB
Go
Raw Normal View History

2019-11-13 00:00:00 +00:00
//
// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
package main
import (
"encoding/csv"
2019-11-13 00:00:00 +00:00
"encoding/json"
"fmt"
"html"
2019-11-13 00:00:00 +00:00
"io/ioutil"
2022-11-29 17:52:42 +00:00
"log"
2019-11-13 00:00:00 +00:00
"os"
"regexp"
2019-11-13 00:00:00 +00:00
"sort"
"strings"
"time"
)
func importMain(username, flavor, source string) {
switch flavor {
case "mastodon":
importMastodon(username, source)
2019-11-13 00:00:00 +00:00
case "twitter":
importTwitter(username, source)
2023-06-12 21:31:13 +00:00
case "instagram":
importInstagram(username, source)
2019-11-13 00:00:00 +00:00
default:
2022-02-06 05:42:13 +00:00
elog.Fatal("unknown source flavor")
2019-11-13 00:00:00 +00:00
}
}
type TootObject struct {
Summary string
Content string
InReplyTo string
Conversation string
Published time.Time
Tag []struct {
Type string
Name string
}
Attachment []struct {
Type string
MediaType string
Url string
Name string
}
}
type PlainTootObject TootObject
func (obj *TootObject) UnmarshalJSON(b []byte) error {
p := (*PlainTootObject)(obj)
json.Unmarshal(b, p)
return nil
}
func importMastodon(username, source string) {
user, err := butwhatabout(username)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Fatal(err)
}
if _, err := os.Stat(source + "/outbox.json"); err == nil {
importMastotoots(user, source)
} else {
ilog.Printf("skipping outbox.json!")
}
if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
importMastotooters(user, source)
} else {
ilog.Printf("skipping following_accounts.csv!")
}
}
func importMastotoots(user *WhatAbout, source string) {
type Toot struct {
Id string
Type string
To []string
Cc []string
Object TootObject
}
var outbox struct {
OrderedItems []Toot
}
ilog.Println("Importing honks...")
fd, err := os.Open(source + "/outbox.json")
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Fatal(err)
}
dec := json.NewDecoder(fd)
err = dec.Decode(&outbox)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Fatalf("error parsing json: %s", err)
}
fd.Close()
havetoot := func(xid string) bool {
var id int64
row := stmtFindXonk.QueryRow(user.ID, xid)
err := row.Scan(&id)
if err == nil {
return true
}
return false
}
re_tootid := regexp.MustCompile("[^/]+$")
for _, item := range outbox.OrderedItems {
toot := item
if toot.Type != "Create" {
continue
}
2022-04-27 00:22:59 +00:00
if strings.HasSuffix(toot.Id, "/activity") {
toot.Id = strings.TrimSuffix(toot.Id, "/activity")
}
tootid := re_tootid.FindString(toot.Id)
xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
if havetoot(xid) {
continue
}
honk := Honk{
UserID: user.ID,
What: "honk",
Honker: user.URL,
XID: xid,
RID: toot.Object.InReplyTo,
Date: toot.Object.Published,
URL: xid,
Audience: append(toot.To, toot.Cc...),
Noise: toot.Object.Content,
Convoy: toot.Object.Conversation,
Whofore: 2,
Format: "html",
Precis: toot.Object.Summary,
}
if honk.RID != "" {
honk.What = "tonk"
}
if !loudandproud(honk.Audience) {
honk.Whofore = 3
}
for _, att := range toot.Object.Attachment {
switch att.Type {
case "Document":
fname := fmt.Sprintf("%s/%s", source, att.Url)
data, err := ioutil.ReadFile(fname)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Printf("error reading media: %s", fname)
continue
}
u := xfiltrate()
name := att.Name
desc := name
newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Printf("error saving media: %s", fname)
continue
}
donk := &Donk{
FileID: fileid,
}
honk.Donks = append(honk.Donks, donk)
}
}
for _, t := range toot.Object.Tag {
switch t.Type {
case "Hashtag":
honk.Onts = append(honk.Onts, t.Name)
}
}
savehonk(&honk)
}
}
func importMastotooters(user *WhatAbout, source string) {
ilog.Println("Importing honkers...")
fd, err := os.Open(source + "/following_accounts.csv")
if err != nil {
elog.Fatal(err)
}
r := csv.NewReader(fd)
data, err := r.ReadAll()
if err != nil {
elog.Fatal(err)
}
fd.Close()
var meta HonkerMeta
mj, _ := jsonify(&meta)
for i, d := range data {
if i == 0 {
continue
}
url := "@" + d[0]
name := ""
flavor := "peep"
combos := ""
2023-02-19 22:46:59 +00:00
_, err := savehonker(user, url, name, flavor, combos, mj)
if err != nil {
elog.Printf("trouble with a honker: %s", err)
}
}
}
2019-11-13 00:00:00 +00:00
func importTwitter(username, source string) {
user, err := butwhatabout(username)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Fatal(err)
2019-11-13 00:00:00 +00:00
}
type Tweet struct {
2019-11-13 06:42:44 +00:00
date time.Time
convoy string
2022-11-29 17:52:42 +00:00
Tweet struct {
CreatedAt string `json:"created_at"`
DisplayTextRange []string `json:"display_text_range"`
EditInfo struct {
Initial struct {
EditTweetIds []string `json:"editTweetIds"`
EditableUntil string `json:"editableUntil"`
EditsRemaining string `json:"editsRemaining"`
IsEditEligible bool `json:"isEditEligible"`
} `json:"initial"`
} `json:"edit_info"`
Entities struct {
Hashtags []struct {
Indices []string `json:"indices"`
Text string `json:"text"`
} `json:"hashtags"`
Media []struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
ID string `json:"id"`
IdStr string `json:"id_str"`
Indices []string `json:"indices"`
MediaURL string `json:"media_url"`
MediaUrlHttps string `json:"media_url_https"`
Sizes struct {
Large struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"large"`
Medium struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"medium"`
Small struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"small"`
Thumb struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"thumb"`
} `json:"sizes"`
Type string `json:"type"`
URL string `json:"url"`
} `json:"media"`
Symbols []interface{} `json:"symbols"`
Urls []struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
Indices []string `json:"indices"`
URL string `json:"url"`
} `json:"urls"`
UserMentions []interface{} `json:"user_mentions"`
} `json:"entities"`
ExtendedEntities struct {
Media []struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
ID string `json:"id"`
IdStr string `json:"id_str"`
Indices []string `json:"indices"`
MediaURL string `json:"media_url"`
MediaUrlHttps string `json:"media_url_https"`
Sizes struct {
Large struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"large"`
Medium struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"medium"`
Small struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"small"`
Thumb struct {
H string `json:"h"`
Resize string `json:"resize"`
W string `json:"w"`
} `json:"thumb"`
} `json:"sizes"`
Type string `json:"type"`
URL string `json:"url"`
} `json:"media"`
} `json:"extended_entities"`
FavoriteCount string `json:"favorite_count"`
Favorited bool `json:"favorited"`
FullText string `json:"full_text"`
ID string `json:"id"`
IdStr string `json:"id_str"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
InReplyToStatusID string `json:"in_reply_to_status_id"`
InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
InReplyToUserID string `json:"in_reply_to_user_id"`
InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
Lang string `json:"lang"`
PossiblySensitive bool `json:"possibly_sensitive"`
RetweetCount string `json:"retweet_count"`
Retweeted bool `json:"retweeted"`
Source string `json:"source"`
Truncated bool `json:"truncated"`
} `json:"tweet"`
2019-11-13 00:00:00 +00:00
}
var tweets []*Tweet
fd, err := os.Open(source + "/tweet.js")
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Fatal(err)
2019-11-13 00:00:00 +00:00
}
2019-11-13 06:42:44 +00:00
// skip past window.YTD.tweet.part0 =
2019-11-13 00:00:00 +00:00
fd.Seek(25, 0)
dec := json.NewDecoder(fd)
err = dec.Decode(&tweets)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Fatalf("error parsing json: %s", err)
2019-11-13 00:00:00 +00:00
}
fd.Close()
2019-11-13 06:42:44 +00:00
tweetmap := make(map[string]*Tweet)
2019-11-13 00:00:00 +00:00
for _, t := range tweets {
2022-11-29 17:52:42 +00:00
t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
tweetmap[t.Tweet.IdStr] = t
2019-11-13 00:00:00 +00:00
}
sort.Slice(tweets, func(i, j int) bool {
return tweets[i].date.Before(tweets[j].date)
})
havetwid := func(xid string) bool {
var id int64
row := stmtFindXonk.QueryRow(user.ID, xid)
err := row.Scan(&id)
if err == nil {
2022-11-29 17:52:42 +00:00
log.Printf("id = %v", id)
return true
}
return false
}
2022-11-29 17:52:42 +00:00
log.Printf("importing %v tweets", len(tweets))
2019-11-13 00:00:00 +00:00
for _, t := range tweets {
2022-11-29 17:52:42 +00:00
xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
if havetwid(xid) {
continue
}
2022-11-29 17:52:42 +00:00
if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
log.Printf("skipping, unworthy tweet")
continue
}
2019-11-13 00:00:00 +00:00
what := "honk"
2019-11-13 06:42:44 +00:00
noise := ""
2022-11-29 17:52:42 +00:00
if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
2019-11-13 06:42:44 +00:00
t.convoy = parent.convoy
2019-11-13 00:00:00 +00:00
what = "tonk"
2019-11-13 06:42:44 +00:00
} else {
2022-11-29 17:52:42 +00:00
t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
if t.Tweet.InReplyToScreenName != "" {
2019-11-13 06:42:44 +00:00
noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
2022-11-29 17:52:42 +00:00
t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
2019-11-13 06:42:44 +00:00
what = "tonk"
}
2019-11-13 00:00:00 +00:00
}
audience := []string{thewholeworld}
honk := Honk{
UserID: user.ID,
Username: user.Name,
What: what,
Honker: user.URL,
XID: xid,
Date: t.date,
Format: "markdown",
Audience: audience,
2019-11-13 06:42:44 +00:00
Convoy: t.convoy,
2019-11-13 00:00:00 +00:00
Public: true,
Whofore: 2,
}
2022-11-29 17:52:42 +00:00
noise += t.Tweet.FullText
// unbelievable
noise = html.UnescapeString(noise)
2022-11-29 17:52:42 +00:00
for _, r := range t.Tweet.Entities.Urls {
noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
2019-11-13 00:00:00 +00:00
}
2022-11-29 17:52:42 +00:00
for _, m := range t.Tweet.Entities.Media {
u := m.MediaURL
2019-11-13 00:00:00 +00:00
idx := strings.LastIndexByte(u, '/')
u = u[idx+1:]
2022-11-29 17:52:42 +00:00
fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
2019-11-13 00:00:00 +00:00
data, err := ioutil.ReadFile(fname)
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Printf("error reading media: %s", fname)
2019-11-13 00:00:00 +00:00
continue
}
newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
2019-11-13 00:00:00 +00:00
if err != nil {
2022-02-06 05:42:13 +00:00
elog.Printf("error saving media: %s", fname)
2019-11-13 00:00:00 +00:00
continue
}
donk := &Donk{
FileID: fileid,
}
honk.Donks = append(honk.Donks, donk)
2022-11-29 17:52:42 +00:00
noise = strings.Replace(noise, m.URL, "", -1)
2019-11-13 00:00:00 +00:00
}
2022-11-29 17:52:42 +00:00
for _, ht := range t.Tweet.Entities.Hashtags {
2019-11-13 00:00:00 +00:00
honk.Onts = append(honk.Onts, "#"+ht.Text)
}
honk.Noise = noise
2022-11-29 17:52:42 +00:00
err := savehonk(&honk)
log.Printf("honk saved %v -> %v", xid, err)
2019-11-13 00:00:00 +00:00
}
}
2023-06-12 21:31:13 +00:00
func importInstagram(username, source string) {
user, err := butwhatabout(username)
if err != nil {
elog.Fatal(err)
}
type Gram struct {
Media []struct {
URI string
Creation int64 `json:"creation_timestamp"`
Title string
}
}
var grams []*Gram
fd, err := os.Open(source + "/content/posts_1.json")
if err != nil {
elog.Fatal(err)
}
dec := json.NewDecoder(fd)
err = dec.Decode(&grams)
if err != nil {
elog.Fatalf("error parsing json: %s", err)
}
fd.Close()
log.Printf("importing %d grams", len(grams))
sort.Slice(grams, func(i, j int) bool {
return grams[i].Media[0].Creation < grams[j].Media[0].Creation
})
for _, g0 := range grams {
g := g0.Media[0]
xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
what := "honk"
noise := g.Title
convoy := "data:,acoustichonkytonk-" + xfiltrate()
date := time.Unix(g.Creation, 0)
audience := []string{thewholeworld}
honk := Honk{
UserID: user.ID,
Username: user.Name,
What: what,
Honker: user.URL,
XID: xid,
Date: date,
Format: "markdown",
Audience: audience,
Convoy: convoy,
Public: true,
Whofore: 2,
}
{
u := xfiltrate()
fname := fmt.Sprintf("%s/%s", source, g.URI)
data, err := ioutil.ReadFile(fname)
if err != nil {
elog.Printf("error reading media: %s", fname)
continue
}
newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
if err != nil {
elog.Printf("error saving media: %s", fname)
continue
}
donk := &Donk{
FileID: fileid,
}
honk.Donks = append(honk.Donks, donk)
}
honk.Noise = noise
err := savehonk(&honk)
log.Printf("honk saved %v -> %v", xid, err)
}
}