honk/import.go

300 lines
6.9 KiB
Go
Raw Normal View History

2019-11-13 01:00:00 +01:00
//
// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
package main
import (
"encoding/json"
"fmt"
"html"
2019-11-13 01:00:00 +01:00
"io/ioutil"
"os"
"regexp"
2019-11-13 01:00:00 +01:00
"sort"
"strings"
"time"
)
func importMain(username, flavor, source string) {
switch flavor {
case "mastodon":
importMastodon(username, source)
2019-11-13 01:00:00 +01:00
case "twitter":
importTwitter(username, source)
default:
2022-02-06 06:42:13 +01:00
elog.Fatal("unknown source flavor")
2019-11-13 01:00:00 +01:00
}
}
type TootObject struct {
Summary string
Content string
InReplyTo string
Conversation string
Published time.Time
Tag []struct {
Type string
Name string
}
Attachment []struct {
Type string
MediaType string
Url string
Name string
}
}
type PlainTootObject TootObject
func (obj *TootObject) UnmarshalJSON(b []byte) error {
p := (*PlainTootObject)(obj)
json.Unmarshal(b, p)
return nil
}
func importMastodon(username, source string) {
user, err := butwhatabout(username)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Fatal(err)
}
type Toot struct {
Id string
Type string
To []string
Cc []string
Object TootObject
}
var outbox struct {
OrderedItems []Toot
}
fd, err := os.Open(source + "/outbox.json")
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Fatal(err)
}
dec := json.NewDecoder(fd)
err = dec.Decode(&outbox)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Fatalf("error parsing json: %s", err)
}
fd.Close()
havetoot := func(xid string) bool {
var id int64
row := stmtFindXonk.QueryRow(user.ID, xid)
err := row.Scan(&id)
if err == nil {
return true
}
return false
}
re_tootid := regexp.MustCompile("[^/]+$")
for _, item := range outbox.OrderedItems {
toot := item
if toot.Type != "Create" {
continue
}
2022-04-27 02:22:59 +02:00
if strings.HasSuffix(toot.Id, "/activity") {
toot.Id = strings.TrimSuffix(toot.Id, "/activity")
}
tootid := re_tootid.FindString(toot.Id)
xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
if havetoot(xid) {
continue
}
honk := Honk{
UserID: user.ID,
What: "honk",
Honker: user.URL,
XID: xid,
RID: toot.Object.InReplyTo,
Date: toot.Object.Published,
URL: xid,
Audience: append(toot.To, toot.Cc...),
Noise: toot.Object.Content,
Convoy: toot.Object.Conversation,
Whofore: 2,
Format: "html",
Precis: toot.Object.Summary,
}
if honk.RID != "" {
honk.What = "tonk"
}
if !loudandproud(honk.Audience) {
honk.Whofore = 3
}
for _, att := range toot.Object.Attachment {
switch att.Type {
case "Document":
fname := fmt.Sprintf("%s/%s", source, att.Url)
data, err := ioutil.ReadFile(fname)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Printf("error reading media: %s", fname)
continue
}
u := xfiltrate()
name := att.Name
desc := name
newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Printf("error saving media: %s", fname)
continue
}
donk := &Donk{
FileID: fileid,
}
honk.Donks = append(honk.Donks, donk)
}
}
for _, t := range toot.Object.Tag {
switch t.Type {
case "Hashtag":
honk.Onts = append(honk.Onts, t.Name)
}
}
savehonk(&honk)
}
}
2019-11-13 01:00:00 +01:00
func importTwitter(username, source string) {
user, err := butwhatabout(username)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Fatal(err)
2019-11-13 01:00:00 +01:00
}
type Tweet struct {
ID_str string
Created_at string
Full_text string
In_reply_to_screen_name string
2019-11-13 07:42:44 +01:00
In_reply_to_status_id string
2019-11-13 01:00:00 +01:00
Entities struct {
Hashtags []struct {
Text string
}
Media []struct {
Url string
Media_url string
}
Urls []struct {
Url string
Expanded_url string
}
}
2019-11-13 07:42:44 +01:00
date time.Time
convoy string
2019-11-13 01:00:00 +01:00
}
var tweets []*Tweet
fd, err := os.Open(source + "/tweet.js")
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Fatal(err)
2019-11-13 01:00:00 +01:00
}
2019-11-13 07:42:44 +01:00
// skip past window.YTD.tweet.part0 =
2019-11-13 01:00:00 +01:00
fd.Seek(25, 0)
dec := json.NewDecoder(fd)
err = dec.Decode(&tweets)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Fatalf("error parsing json: %s", err)
2019-11-13 01:00:00 +01:00
}
fd.Close()
2019-11-13 07:42:44 +01:00
tweetmap := make(map[string]*Tweet)
2019-11-13 01:00:00 +01:00
for _, t := range tweets {
t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
2019-11-13 07:42:44 +01:00
tweetmap[t.ID_str] = t
2019-11-13 01:00:00 +01:00
}
sort.Slice(tweets, func(i, j int) bool {
return tweets[i].date.Before(tweets[j].date)
})
havetwid := func(xid string) bool {
var id int64
row := stmtFindXonk.QueryRow(user.ID, xid)
err := row.Scan(&id)
if err == nil {
return true
}
return false
}
2019-11-13 01:00:00 +01:00
for _, t := range tweets {
xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
if havetwid(xid) {
continue
}
2019-11-13 01:00:00 +01:00
what := "honk"
2019-11-13 07:42:44 +01:00
noise := ""
if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
t.convoy = parent.convoy
2019-11-13 01:00:00 +01:00
what = "tonk"
2019-11-13 07:42:44 +01:00
} else {
t.convoy = "data:,acoustichonkytonk-" + t.ID_str
2019-11-13 07:42:44 +01:00
if t.In_reply_to_screen_name != "" {
noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
t.In_reply_to_screen_name, t.In_reply_to_status_id)
what = "tonk"
}
2019-11-13 01:00:00 +01:00
}
audience := []string{thewholeworld}
honk := Honk{
UserID: user.ID,
Username: user.Name,
What: what,
Honker: user.URL,
XID: xid,
Date: t.date,
Format: "markdown",
Audience: audience,
2019-11-13 07:42:44 +01:00
Convoy: t.convoy,
2019-11-13 01:00:00 +01:00
Public: true,
Whofore: 2,
}
2019-11-13 07:42:44 +01:00
noise += t.Full_text
// unbelievable
noise = html.UnescapeString(noise)
2019-11-13 01:00:00 +01:00
for _, r := range t.Entities.Urls {
noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
}
for _, m := range t.Entities.Media {
u := m.Media_url
idx := strings.LastIndexByte(u, '/')
u = u[idx+1:]
fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
data, err := ioutil.ReadFile(fname)
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Printf("error reading media: %s", fname)
2019-11-13 01:00:00 +01:00
continue
}
newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
2019-11-13 01:00:00 +01:00
if err != nil {
2022-02-06 06:42:13 +01:00
elog.Printf("error saving media: %s", fname)
2019-11-13 01:00:00 +01:00
continue
}
donk := &Donk{
FileID: fileid,
}
honk.Donks = append(honk.Donks, donk)
noise = strings.Replace(noise, m.Url, "", -1)
}
for _, ht := range t.Entities.Hashtags {
honk.Onts = append(honk.Onts, "#"+ht.Text)
}
honk.Noise = noise
savehonk(&honk)
}
}