generate statistics for the year of 2017

This commit is contained in:
2018-02-22 17:46:16 -06:00
parent cd72fb5cd7
commit 50c31e38b5
11 changed files with 1330 additions and 36 deletions

2
src/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
top-of-reddit
vendor

45
src/Gopkg.lock generated Normal file
View File

@@ -0,0 +1,45 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
name = "github.com/boltdb/bolt"
packages = ["."]
revision = "2f1ce7a837dcb8da3ec595b1dac9d0632f0f99e8"
version = "v1.3.1"
[[projects]]
name = "github.com/mattn/go-runewidth"
packages = ["."]
revision = "9e777a8366cce605130a531d2cd6363d07ad7317"
version = "v0.0.2"
[[projects]]
branch = "master"
name = "github.com/olekukonko/tablewriter"
packages = ["."]
revision = "b8a9be070da40449e501c3c4730a889e42d87a9e"
[[projects]]
name = "github.com/tidwall/gjson"
packages = ["."]
revision = "01f00f129617a6fe98941fb920d6c760241b54d2"
version = "v1.1.0"
[[projects]]
branch = "master"
name = "github.com/tidwall/match"
packages = ["."]
revision = "1731857f09b1f38450e2c12409748407822dc6be"
[[projects]]
branch = "master"
name = "golang.org/x/sys"
packages = ["unix"]
revision = "f6cff0780e542efa0c8e864dc8fa522808f6a598"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "34c72af529ac66d9c56bbe5e1f8601ae30e5ae71eb725d6592ddf2c6b07ec107"
solver-name = "gps-cdcl"
solver-version = 1

42
src/Gopkg.toml Normal file
View File

@@ -0,0 +1,42 @@
# Gopkg.toml example
#
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"
#
# [prune]
# non-go = false
# go-tests = true
# unused-packages = true
[[constraint]]
name = "github.com/boltdb/bolt"
version = "1.3.1"
[[constraint]]
name = "github.com/tidwall/gjson"
version = "1.1.0"
[prune]
go-tests = true
unused-packages = true
[[constraint]]
branch = "master"
name = "github.com/olekukonko/tablewriter"

376
src/main.go Normal file
View File

@@ -0,0 +1,376 @@
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"os/exec"
"sort"
"strconv"
"time"
"github.com/boltdb/bolt"
"github.com/mgerb/top-of-reddit/src/model"
"github.com/tidwall/gjson"
)
const (
REDDIT_URL string = "https://www.reddit.com/r/"
USER_AGENT string = "top-of-reddit:bot"
DATE_FORMAT string = "01-02-2006"
YEAR_FORMAT string = "2006"
MONTH_FORMAT string = "01"
)
var (
// buckets
DAILY_BUCKET []byte = []byte("daily_bucket")
MAIN_BUCKET []byte = []byte("main")
// store the current day to keep track when day turns over
TODAY_KEY []byte = []byte("today_date")
)
func main() {
// start database connection
db := openDbSession()
defer db.Close()
for {
fmt.Println("Updating...")
// get reddit posts from r/all
response, err := getPosts("all")
if err != nil {
log.Println(err.Error())
} else {
// store posts in RedditPost slice
posts, err := convertPosts(response)
if err != nil {
log.Println(err.Error())
} else {
// update the daily bucket with posts
updateDailyPosts(db, DAILY_BUCKET, getTodayBucket(), posts)
checkDateChange(db)
}
}
time.Sleep(time.Second * 30)
}
}
// open database session
func openDbSession() *bolt.DB {
database, err := bolt.Open("reddit.db", 0600, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
log.Fatal(err)
}
return database
}
// returns the post bucket for today
func getTodayBucket() []byte {
return []byte(time.Now().Format(DATE_FORMAT))
}
// get time object of yesterday
func getYesterdayTime() time.Time {
return time.Now().AddDate(0, 0, -1)
}
// returns the post bucket for yesterday
func getYesterdayBucket() []byte {
return []byte(getYesterdayTime().Format(DATE_FORMAT))
}
// returns date string for folder path
func getFolderPath() string {
yesterday := getYesterdayTime()
year := yesterday.Format(YEAR_FORMAT)
month := yesterday.Format(MONTH_FORMAT)
return "../" + year + "/" + month
}
func checkDateChange(db *bolt.DB) {
err := db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists(MAIN_BUCKET)
if err != nil {
return err
}
storedDay := b.Get(TODAY_KEY)
// if day turns over
if storedDay == nil || string(getTodayBucket()) != string(storedDay) {
// set today's date in database
err := b.Put(TODAY_KEY, getTodayBucket())
if err != nil {
return err
}
// if no data exists for yesterday
if storedDay == nil {
storedDay = getTodayBucket()
}
fmt.Println("Creating markdown!")
storedPosts, err := getStoredPosts(db, DAILY_BUCKET, storedDay)
if err != nil {
return err
}
err = writePostsToFile(string(storedDay), storedPosts)
if err != nil {
return err
}
// push to github
err = pushToGithub()
if err != nil {
return err
}
}
return nil
})
if err != nil {
log.Println(err)
return
}
}
func writePostsToFile(fileName string, posts []model.RedditPost) error {
folderPath := getFolderPath()
// create directory if not exists
if _, err := os.Stat(folderPath); os.IsNotExist(err) {
os.MkdirAll(folderPath, 0700)
}
// create new markdown file
file, err := os.Create(folderPath + "/" + fileName + ".md")
defer file.Close()
if err != nil {
return err
}
for index, p := range posts {
permalink := "http://reddit.com" + p.Permalink
file.WriteString("## " + strconv.Itoa(index+1) + ". [" + p.Title + "](" + permalink + ") - " + strconv.Itoa(p.Score) + "\n")
file.WriteString("#### [r/" + p.Subreddit + "](http://reddit.com/r/" + p.Subreddit + ")")
file.WriteString(" - [u/" + p.Author + "](http://reddit.com/u/" + p.Author + ") - ")
file.WriteString(strconv.Itoa(p.Num_comments) + " Comments - ")
file.WriteString("Top position achieved: " + strconv.Itoa(p.TopPosition) + "\n\n")
// don't post image link if thumbnail doesn't exist
if p.Thumbnail == "default" || p.Thumbnail == "self" {
continue
}
// don't show thumbnail if NSFW
if p.Over_18 {
file.WriteString("<a href=\"" + p.Url + "\"><img src=\"https://github.com/mgerb/top-of-reddit/raw/master/nsfw.jpg\"></img></a>\n\n")
} else {
file.WriteString("<a href=\"" + p.Url + "\"><img src=\"" + p.Thumbnail + "\"></img></a>\n\n")
}
}
file.Sync()
return nil
}
// get a RedditPost slice
func getStoredPosts(db *bolt.DB, bucket []byte, day []byte) ([]model.RedditPost, error) {
posts := []model.RedditPost{}
err := db.View(func(tx *bolt.Tx) error {
tx.Bucket(bucket).Bucket(day).ForEach(func(_, v []byte) error {
tempPost := model.RedditPost{}
err := json.Unmarshal(v, &tempPost)
posts = append(posts, tempPost)
if err != nil {
return err
}
return nil
})
return nil
})
// sort posts by score
sort.Sort(ByScore(posts))
if err != nil {
return []model.RedditPost{}, err
}
return posts, nil
}
// stores new posts in the bucket only if they do not exist
func updateDailyPosts(db *bolt.DB, bucket []byte, day []byte, redditPosts []model.RedditPost) error {
err := db.Update(func(tx *bolt.Tx) error {
daily_bucket, err := tx.CreateBucketIfNotExists(bucket)
if err != nil {
return err
}
today, err := daily_bucket.CreateBucketIfNotExists(day)
if err != nil {
return err
}
for index, post := range redditPosts {
// check if post was in yesterdays top posts
yesterday := daily_bucket.Bucket(getYesterdayBucket())
if yesterday != nil && yesterday.Get([]byte(post.ID)) != nil {
continue
}
post.TopPosition = index + 1
// get value stored in database
storedPostString := today.Get([]byte(post.ID))
// if post is already stored in database - check to update highest score
if storedPostString != nil {
storedPost := model.RedditPost{}
err := json.Unmarshal(storedPostString, &storedPost)
if err != nil {
return err
}
// only store the highest score a post achieves
if storedPost.Score > post.Score {
post.Score = storedPost.Score
}
// only store the highest position a post achieves
if storedPost.TopPosition < index+1 {
post.TopPosition = storedPost.TopPosition
}
} else {
fmt.Println("Updating new post: " + post.Title)
}
// convert json to string
postString, err := json.Marshal(post)
if err != nil {
return err
}
// store in database
err = today.Put([]byte(post.ID), []byte(postString))
if err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
return nil
}
// convert reddit response string to RedditPost slice
func convertPosts(postString string) ([]model.RedditPost, error) {
posts := []model.RedditPost{}
for _, p := range gjson.Get(postString, "data.children").Array() {
tempPost := model.RedditPost{}
err := json.Unmarshal([]byte(p.Get("data").String()), &tempPost)
if err != nil {
return posts, err
}
posts = append(posts, tempPost)
}
return posts, nil
}
// send http request to reddit
func getPosts(subreddit string) (string, error) {
client := &http.Client{}
req, err := http.NewRequest("GET", REDDIT_URL+subreddit+".json", nil)
req.Header.Add("User-Agent", USER_AGENT)
response, err := client.Do(req)
if err != nil {
return "", err
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func pushToGithub() error {
fmt.Println("Pushing to Github...")
commitMessage := "Adding posts for " + string(getYesterdayBucket())
out, err := exec.Command("git", "add", ".").Output()
if err != nil {
return err
}
fmt.Println(string(out))
out, err = exec.Command("git", "commit", "-m", commitMessage).Output()
if err != nil {
return err
}
fmt.Println(string(out))
out, err = exec.Command("git", "push", "origin", "master").Output()
if err != nil {
return err
}
fmt.Println(string(out))
return nil
}
// sorting
type ByScore []model.RedditPost
func (s ByScore) Len() int {
return len(s)
}
func (s ByScore) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s ByScore) Less(i, j int) bool {
return s[i].Score > s[j].Score
}

23
src/model/reddit-post.go Normal file
View File

@@ -0,0 +1,23 @@
package model
// RedditPost -
type RedditPost struct {
Subreddit string `json:"subreddit"`
ID string `json:"id"`
Gilded int `json:"gilded"`
Score int `json:"score"`
Author string `json:"author"`
Domain string `json:"domain"`
Over_18 bool `json:"over_18"`
Thumbnail string `json:"thumbnail"`
Permalink string `json:"permalink"`
Url string `json:"url"`
Title string `json:"title"`
Created float64 `json:"created"`
Created_utc float64 `json:"created_utc"`
Num_comments int `json:"num_comments"`
Ups int `json:"ups"`
// extra fields
TopPosition int // highest achieved position on front page
}

BIN
src/reddit.db Normal file

Binary file not shown.

BIN
src/scripts/circle.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

View File

@@ -0,0 +1,155 @@
package main
import (
"encoding/json"
"log"
"os"
"sort"
"strconv"
"time"
"github.com/mgerb/top-of-reddit/src/model"
"github.com/olekukonko/tablewriter"
"github.com/boltdb/bolt"
)
var conn *bolt.DB
func init() {
conn, _ = bolt.Open("../reddit.db", 0600, &bolt.Options{Timeout: 1 * time.Second})
}
func main() {
posts, err := getAllPosts()
if err != nil {
log.Fatal(err)
}
err = writeSubredditListToFile(posts)
if err != nil {
log.Fatal(err)
}
err = writeStatsToFile(posts)
if err != nil {
log.Fatal(err)
}
}
// get posts from database file
func getAllPosts() ([]model.RedditPost, error) {
posts := []model.RedditPost{}
err := conn.View(func(tx *bolt.Tx) error {
dailyBucket := tx.Bucket([]byte("daily_bucket"))
return dailyBucket.ForEach(func(key, val []byte) error {
b := dailyBucket.Bucket(key)
return b.ForEach(func(k, v []byte) error {
var post model.RedditPost
err := json.Unmarshal(b.Get(k), &post)
if err != nil {
return err
}
posts = append(posts, post)
return nil
})
})
})
return posts, err
}
// write subreddits to file for the word cloud generator
func writeSubredditListToFile(posts []model.RedditPost) error {
for _, post := range posts {
err := appendFile("subreddits.txt", post.Subreddit)
if err != nil {
return err
}
}
return nil
}
// create markdown table with subreddit stats
func writeStatsToFile(posts []model.RedditPost) error {
groupedPosts := groupBySubreddit(posts)
countList := [][]model.RedditPost{}
// convert to list
for _, v := range groupedPosts {
countList = append(countList, v)
}
// sort by post count
sort.Slice(countList, func(i, j int) bool {
return len(countList[i]) > len(countList[j])
})
data := [][]string{}
for _, v := range countList {
title := "[" + v[0].Title + "]" + "(https://www.reddit.com" + v[0].Permalink + ")"
data = append(data, []string{v[0].Subreddit, strconv.Itoa(len(v)), title, strconv.Itoa(v[0].Score)})
}
file, _ := os.Create("counts.md")
table := tablewriter.NewWriter(file)
table.SetAutoWrapText(false)
table.SetHeader([]string{"Subreddit", "Total", "Top Post", "Score"})
table.SetBorders(tablewriter.Border{Left: true, Top: false, Right: true, Bottom: false})
table.SetCenterSeparator("|")
table.AppendBulk(data) // Add Bulk Data
table.Render()
return nil
}
func groupBySubreddit(posts []model.RedditPost) map[string][]model.RedditPost {
groupedPosts := map[string][]model.RedditPost{}
// group posts by subreddit
for _, v := range posts {
if _, ok := groupedPosts[v.Subreddit]; ok {
groupedPosts[v.Subreddit] = append(groupedPosts[v.Subreddit], v)
} else {
groupedPosts[v.Subreddit] = []model.RedditPost{v}
}
}
// order posts by view count
for _, v := range groupedPosts {
sort.Slice(v, func(i, j int) bool {
return v[i].Score > v[j].Score
})
}
return groupedPosts
}
func appendFile(path, text string) error {
f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
return err
}
defer f.Close()
_, err = f.WriteString(text + "\n")
if err != nil {
return err
}
return nil
}

9
src/scripts/wordcloud.sh Executable file
View File

@@ -0,0 +1,9 @@
wordcloud_cli.py \
--text subreddits.txt \
--imagefile output.png \
--color red \
--background white \
--height 2000 \
--width 2000 \
--margin 10 \
--mask ./circle.png