dedupe blob file data

This commit is contained in:
Ted Unangst 2020-09-30 15:20:40 -04:00
parent 023073de03
commit 65712620c5
4 changed files with 80 additions and 22 deletions

View File

@ -17,6 +17,7 @@ package main
import ( import (
"bytes" "bytes"
"crypto/sha512"
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -509,20 +510,41 @@ func savefile(name string, desc string, url string, media string, local bool, da
return fileid, err return fileid, err
} }
func hashfiledata(data []byte) string {
h := sha512.New512_256()
h.Write(data)
return fmt.Sprintf("%x", h.Sum(nil))
}
func savefileandxid(name string, desc string, url string, media string, local bool, data []byte) (int64, string, error) { func savefileandxid(name string, desc string, url string, media string, local bool, data []byte) (int64, string, error) {
xid := xfiltrate() var xid string
switch media { if local {
case "image/png": hash := hashfiledata(data)
xid += ".png" row := stmtCheckFileData.QueryRow(hash)
case "image/jpeg": err := row.Scan(&xid)
xid += ".jpg" if err == sql.ErrNoRows {
case "application/pdf": xid = xfiltrate()
xid += ".pdf" switch media {
case "text/plain": case "image/png":
xid += ".txt" xid += ".png"
} case "image/jpeg":
if url == "" { xid += ".jpg"
url = fmt.Sprintf("https://%s/d/%s", serverName, xid) case "application/pdf":
xid += ".pdf"
case "text/plain":
xid += ".txt"
}
_, err = stmtSaveFileData.Exec(xid, media, hash, data)
if err != nil {
return 0, "", err
}
} else if err != nil {
log.Printf("error checking file hash: %s", err)
return 0, "", err
}
if url == "" {
url = fmt.Sprintf("https://%s/d/%s", serverName, xid)
}
} }
res, err := stmtSaveFile.Exec(xid, name, desc, url, media, local) res, err := stmtSaveFile.Exec(xid, name, desc, url, media, local)
@ -530,12 +552,6 @@ func savefileandxid(name string, desc string, url string, media string, local bo
return 0, "", err return 0, "", err
} }
fileid, _ := res.LastInsertId() fileid, _ := res.LastInsertId()
if local {
_, err = stmtSaveFileData.Exec(xid, media, data)
if err != nil {
return 0, "", err
}
}
return fileid, xid, nil return fileid, xid, nil
} }
@ -890,6 +906,7 @@ var stmtHonksFromLongAgo *sql.Stmt
var stmtHonksByHonker, stmtSaveHonk, stmtUserByName, stmtUserByNumber *sql.Stmt var stmtHonksByHonker, stmtSaveHonk, stmtUserByName, stmtUserByNumber *sql.Stmt
var stmtEventHonks, stmtOneBonk, stmtFindZonk, stmtFindXonk, stmtSaveDonk *sql.Stmt var stmtEventHonks, stmtOneBonk, stmtFindZonk, stmtFindXonk, stmtSaveDonk *sql.Stmt
var stmtFindFile, stmtGetFileData, stmtSaveFileData, stmtSaveFile *sql.Stmt var stmtFindFile, stmtGetFileData, stmtSaveFileData, stmtSaveFile *sql.Stmt
var stmtCheckFileData *sql.Stmt
var stmtAddDoover, stmtGetDoovers, stmtLoadDoover, stmtZapDoover, stmtOneHonker *sql.Stmt var stmtAddDoover, stmtGetDoovers, stmtLoadDoover, stmtZapDoover, stmtOneHonker *sql.Stmt
var stmtUntagged, stmtDeleteHonk, stmtDeleteDonks, stmtDeleteOnts, stmtSaveZonker *sql.Stmt var stmtUntagged, stmtDeleteHonk, stmtDeleteDonks, stmtDeleteOnts, stmtSaveZonker *sql.Stmt
var stmtGetZonkers, stmtRecentHonkers, stmtGetXonker, stmtSaveXonker, stmtDeleteXonker *sql.Stmt var stmtGetZonkers, stmtRecentHonkers, stmtGetXonker, stmtSaveXonker, stmtDeleteXonker *sql.Stmt
@ -951,7 +968,8 @@ func prepareStatements(db *sql.DB) {
stmtDeleteDonks = preparetodie(db, "delete from donks where honkid = ?") stmtDeleteDonks = preparetodie(db, "delete from donks where honkid = ?")
stmtSaveFile = preparetodie(db, "insert into filemeta (xid, name, description, url, media, local) values (?, ?, ?, ?, ?, ?)") stmtSaveFile = preparetodie(db, "insert into filemeta (xid, name, description, url, media, local) values (?, ?, ?, ?, ?, ?)")
blobdb := openblobdb() blobdb := openblobdb()
stmtSaveFileData = preparetodie(blobdb, "insert into filedata (xid, media, content) values (?, ?, ?)") stmtSaveFileData = preparetodie(blobdb, "insert into filedata (xid, media, hash, content) values (?, ?, ?, ?)")
stmtCheckFileData = preparetodie(blobdb, "select xid from filedata where hash = ?")
stmtGetFileData = preparetodie(blobdb, "select media, content from filedata where xid = ?") stmtGetFileData = preparetodie(blobdb, "select media, content from filedata where xid = ?")
stmtFindXonk = preparetodie(db, "select honkid from honks where userid = ? and xid = ?") stmtFindXonk = preparetodie(db, "select honkid from honks where userid = ? and xid = ?")
stmtFindFile = preparetodie(db, "select fileid, xid from filemeta where url = ? and local = 1") stmtFindFile = preparetodie(db, "select fileid, xid from filemeta where url = ? and local = 1")

View File

@ -2,6 +2,8 @@ changelog
=== next === next
+ Dedupe blob file data.
- Custom lingo for those who don't like honking. - Custom lingo for those who don't like honking.
+ Better support for rich text bios. + Better support for rich text bios.

View File

@ -23,7 +23,7 @@ import (
"time" "time"
) )
var myVersion = 39 var myVersion = 40
type dbexecer interface { type dbexecer interface {
Exec(query string, args ...interface{}) (sql.Result, error) Exec(query string, args ...interface{}) (sql.Result, error)
@ -168,6 +168,39 @@ func upgradedb() {
doordie(db, "update config set value = 39 where key = 'dbversion'") doordie(db, "update config set value = 39 where key = 'dbversion'")
fallthrough fallthrough
case 39: case 39:
blobdb := openblobdb()
doordie(blobdb, "alter table filedata add column hash text")
doordie(blobdb, "create index idx_filehash on filedata(hash)")
rows, err := blobdb.Query("select xid, content from filedata")
if err != nil {
log.Fatal(err)
}
m := make(map[string]string)
for rows.Next() {
var xid string
var data sql.RawBytes
err := rows.Scan(&xid, &data)
if err != nil {
log.Fatal(err)
}
hash := hashfiledata(data)
m[xid] = hash
}
rows.Close()
tx, err := blobdb.Begin()
if err != nil {
log.Fatal(err)
}
for xid, hash := range m {
doordie(tx, "update filedata set hash = ? where xid = ?", hash, xid)
}
err = tx.Commit()
if err != nil {
log.Fatal(err)
}
doordie(db, "update config set value = 40 where key = 'dbversion'")
fallthrough
case 40:
default: default:
log.Fatalf("can't upgrade unknown version %d", dbversion) log.Fatalf("can't upgrade unknown version %d", dbversion)

View File

@ -172,7 +172,7 @@ func initblobdb() {
log.Print(err) log.Print(err)
return return
} }
_, err = blobdb.Exec("create table filedata (xid text, media text, content blob)") _, err = blobdb.Exec("create table filedata (xid text, media text, hash text, content blob)")
if err != nil { if err != nil {
log.Print(err) log.Print(err)
return return
@ -182,6 +182,11 @@ func initblobdb() {
log.Print(err) log.Print(err)
return return
} }
_, err = blobdb.Exec("create index idx_filehash on filedata(hash)")
if err != nil {
log.Print(err)
return
}
blobdb.Close() blobdb.Close()
} }