generate statistics for the year of 2017
This commit is contained in:
BIN
2017wordcloud.png
Normal file
BIN
2017wordcloud.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 254 KiB |
2
.gitignore → src/.gitignore
vendored
2
.gitignore → src/.gitignore
vendored
@@ -1,2 +1,2 @@
|
|||||||
top-of-reddit
|
top-of-reddit
|
||||||
|
vendor
|
||||||
45
src/Gopkg.lock
generated
Normal file
45
src/Gopkg.lock
generated
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||||
|
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
name = "github.com/boltdb/bolt"
|
||||||
|
packages = ["."]
|
||||||
|
revision = "2f1ce7a837dcb8da3ec595b1dac9d0632f0f99e8"
|
||||||
|
version = "v1.3.1"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
name = "github.com/mattn/go-runewidth"
|
||||||
|
packages = ["."]
|
||||||
|
revision = "9e777a8366cce605130a531d2cd6363d07ad7317"
|
||||||
|
version = "v0.0.2"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
name = "github.com/olekukonko/tablewriter"
|
||||||
|
packages = ["."]
|
||||||
|
revision = "b8a9be070da40449e501c3c4730a889e42d87a9e"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
name = "github.com/tidwall/gjson"
|
||||||
|
packages = ["."]
|
||||||
|
revision = "01f00f129617a6fe98941fb920d6c760241b54d2"
|
||||||
|
version = "v1.1.0"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
name = "github.com/tidwall/match"
|
||||||
|
packages = ["."]
|
||||||
|
revision = "1731857f09b1f38450e2c12409748407822dc6be"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
name = "golang.org/x/sys"
|
||||||
|
packages = ["unix"]
|
||||||
|
revision = "f6cff0780e542efa0c8e864dc8fa522808f6a598"
|
||||||
|
|
||||||
|
[solve-meta]
|
||||||
|
analyzer-name = "dep"
|
||||||
|
analyzer-version = 1
|
||||||
|
inputs-digest = "34c72af529ac66d9c56bbe5e1f8601ae30e5ae71eb725d6592ddf2c6b07ec107"
|
||||||
|
solver-name = "gps-cdcl"
|
||||||
|
solver-version = 1
|
||||||
42
src/Gopkg.toml
Normal file
42
src/Gopkg.toml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# Gopkg.toml example
|
||||||
|
#
|
||||||
|
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
|
||||||
|
# for detailed Gopkg.toml documentation.
|
||||||
|
#
|
||||||
|
# required = ["github.com/user/thing/cmd/thing"]
|
||||||
|
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||||
|
#
|
||||||
|
# [[constraint]]
|
||||||
|
# name = "github.com/user/project"
|
||||||
|
# version = "1.0.0"
|
||||||
|
#
|
||||||
|
# [[constraint]]
|
||||||
|
# name = "github.com/user/project2"
|
||||||
|
# branch = "dev"
|
||||||
|
# source = "github.com/myfork/project2"
|
||||||
|
#
|
||||||
|
# [[override]]
|
||||||
|
# name = "github.com/x/y"
|
||||||
|
# version = "2.4.0"
|
||||||
|
#
|
||||||
|
# [prune]
|
||||||
|
# non-go = false
|
||||||
|
# go-tests = true
|
||||||
|
# unused-packages = true
|
||||||
|
|
||||||
|
|
||||||
|
[[constraint]]
|
||||||
|
name = "github.com/boltdb/bolt"
|
||||||
|
version = "1.3.1"
|
||||||
|
|
||||||
|
[[constraint]]
|
||||||
|
name = "github.com/tidwall/gjson"
|
||||||
|
version = "1.1.0"
|
||||||
|
|
||||||
|
[prune]
|
||||||
|
go-tests = true
|
||||||
|
unused-packages = true
|
||||||
|
|
||||||
|
[[constraint]]
|
||||||
|
branch = "master"
|
||||||
|
name = "github.com/olekukonko/tablewriter"
|
||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/boltdb/bolt"
|
"github.com/boltdb/bolt"
|
||||||
|
"github.com/mgerb/top-of-reddit/src/model"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -33,27 +34,6 @@ var (
|
|||||||
TODAY_KEY []byte = []byte("today_date")
|
TODAY_KEY []byte = []byte("today_date")
|
||||||
)
|
)
|
||||||
|
|
||||||
type RedditPost struct {
|
|
||||||
Subreddit string `json:"subreddit"`
|
|
||||||
ID string `json:"id"`
|
|
||||||
Gilded int `json:"gilded"`
|
|
||||||
Score int `json:"score"`
|
|
||||||
Author string `json:"author"`
|
|
||||||
Domain string `json:"domain"`
|
|
||||||
Over_18 bool `json:"over_18"`
|
|
||||||
Thumbnail string `json:"thumbnail"`
|
|
||||||
Permalink string `json:"permalink"`
|
|
||||||
Url string `json:"url"`
|
|
||||||
Title string `json:"title"`
|
|
||||||
Created float64 `json:"created"`
|
|
||||||
Created_utc float64 `json:"created_utc"`
|
|
||||||
Num_comments int `json:"num_comments"`
|
|
||||||
Ups int `json:"ups"`
|
|
||||||
|
|
||||||
// extra fields
|
|
||||||
TopPosition int // highest achieved position on front page
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// start database connection
|
// start database connection
|
||||||
db := openDbSession()
|
db := openDbSession()
|
||||||
@@ -112,7 +92,7 @@ func getFolderPath() string {
|
|||||||
yesterday := getYesterdayTime()
|
yesterday := getYesterdayTime()
|
||||||
year := yesterday.Format(YEAR_FORMAT)
|
year := yesterday.Format(YEAR_FORMAT)
|
||||||
month := yesterday.Format(MONTH_FORMAT)
|
month := yesterday.Format(MONTH_FORMAT)
|
||||||
return year + "/" + month
|
return "../" + year + "/" + month
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkDateChange(db *bolt.DB) {
|
func checkDateChange(db *bolt.DB) {
|
||||||
@@ -171,7 +151,7 @@ func checkDateChange(db *bolt.DB) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func writePostsToFile(fileName string, posts []RedditPost) error {
|
func writePostsToFile(fileName string, posts []model.RedditPost) error {
|
||||||
folderPath := getFolderPath()
|
folderPath := getFolderPath()
|
||||||
|
|
||||||
// create directory if not exists
|
// create directory if not exists
|
||||||
@@ -214,13 +194,13 @@ func writePostsToFile(fileName string, posts []RedditPost) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get a RedditPost slice
|
// get a RedditPost slice
|
||||||
func getStoredPosts(db *bolt.DB, bucket []byte, day []byte) ([]RedditPost, error) {
|
func getStoredPosts(db *bolt.DB, bucket []byte, day []byte) ([]model.RedditPost, error) {
|
||||||
|
|
||||||
posts := []RedditPost{}
|
posts := []model.RedditPost{}
|
||||||
|
|
||||||
err := db.View(func(tx *bolt.Tx) error {
|
err := db.View(func(tx *bolt.Tx) error {
|
||||||
tx.Bucket(bucket).Bucket(day).ForEach(func(_, v []byte) error {
|
tx.Bucket(bucket).Bucket(day).ForEach(func(_, v []byte) error {
|
||||||
tempPost := RedditPost{}
|
tempPost := model.RedditPost{}
|
||||||
err := json.Unmarshal(v, &tempPost)
|
err := json.Unmarshal(v, &tempPost)
|
||||||
posts = append(posts, tempPost)
|
posts = append(posts, tempPost)
|
||||||
|
|
||||||
@@ -238,14 +218,14 @@ func getStoredPosts(db *bolt.DB, bucket []byte, day []byte) ([]RedditPost, error
|
|||||||
sort.Sort(ByScore(posts))
|
sort.Sort(ByScore(posts))
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return []RedditPost{}, err
|
return []model.RedditPost{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return posts, nil
|
return posts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// stores new posts in the bucket only if they do not exist
|
// stores new posts in the bucket only if they do not exist
|
||||||
func updateDailyPosts(db *bolt.DB, bucket []byte, day []byte, redditPosts []RedditPost) error {
|
func updateDailyPosts(db *bolt.DB, bucket []byte, day []byte, redditPosts []model.RedditPost) error {
|
||||||
err := db.Update(func(tx *bolt.Tx) error {
|
err := db.Update(func(tx *bolt.Tx) error {
|
||||||
|
|
||||||
daily_bucket, err := tx.CreateBucketIfNotExists(bucket)
|
daily_bucket, err := tx.CreateBucketIfNotExists(bucket)
|
||||||
@@ -272,7 +252,7 @@ func updateDailyPosts(db *bolt.DB, bucket []byte, day []byte, redditPosts []Redd
|
|||||||
|
|
||||||
// if post is already stored in database - check to update highest score
|
// if post is already stored in database - check to update highest score
|
||||||
if storedPostString != nil {
|
if storedPostString != nil {
|
||||||
storedPost := RedditPost{}
|
storedPost := model.RedditPost{}
|
||||||
err := json.Unmarshal(storedPostString, &storedPost)
|
err := json.Unmarshal(storedPostString, &storedPost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -315,11 +295,11 @@ func updateDailyPosts(db *bolt.DB, bucket []byte, day []byte, redditPosts []Redd
|
|||||||
}
|
}
|
||||||
|
|
||||||
// convert reddit response string to RedditPost slice
|
// convert reddit response string to RedditPost slice
|
||||||
func convertPosts(postString string) ([]RedditPost, error) {
|
func convertPosts(postString string) ([]model.RedditPost, error) {
|
||||||
posts := []RedditPost{}
|
posts := []model.RedditPost{}
|
||||||
|
|
||||||
for _, p := range gjson.Get(postString, "data.children").Array() {
|
for _, p := range gjson.Get(postString, "data.children").Array() {
|
||||||
tempPost := RedditPost{}
|
tempPost := model.RedditPost{}
|
||||||
|
|
||||||
err := json.Unmarshal([]byte(p.Get("data").String()), &tempPost)
|
err := json.Unmarshal([]byte(p.Get("data").String()), &tempPost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -381,7 +361,7 @@ func pushToGithub() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sorting
|
// sorting
|
||||||
type ByScore []RedditPost
|
type ByScore []model.RedditPost
|
||||||
|
|
||||||
func (s ByScore) Len() int {
|
func (s ByScore) Len() int {
|
||||||
return len(s)
|
return len(s)
|
||||||
23
src/model/reddit-post.go
Normal file
23
src/model/reddit-post.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package model
|
||||||
|
|
||||||
|
// RedditPost -
|
||||||
|
type RedditPost struct {
|
||||||
|
Subreddit string `json:"subreddit"`
|
||||||
|
ID string `json:"id"`
|
||||||
|
Gilded int `json:"gilded"`
|
||||||
|
Score int `json:"score"`
|
||||||
|
Author string `json:"author"`
|
||||||
|
Domain string `json:"domain"`
|
||||||
|
Over_18 bool `json:"over_18"`
|
||||||
|
Thumbnail string `json:"thumbnail"`
|
||||||
|
Permalink string `json:"permalink"`
|
||||||
|
Url string `json:"url"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Created float64 `json:"created"`
|
||||||
|
Created_utc float64 `json:"created_utc"`
|
||||||
|
Num_comments int `json:"num_comments"`
|
||||||
|
Ups int `json:"ups"`
|
||||||
|
|
||||||
|
// extra fields
|
||||||
|
TopPosition int // highest achieved position on front page
|
||||||
|
}
|
||||||
BIN
src/scripts/circle.png
Normal file
BIN
src/scripts/circle.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 39 KiB |
155
src/scripts/generate-stats.go
Normal file
155
src/scripts/generate-stats.go
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mgerb/top-of-reddit/src/model"
|
||||||
|
"github.com/olekukonko/tablewriter"
|
||||||
|
|
||||||
|
"github.com/boltdb/bolt"
|
||||||
|
)
|
||||||
|
|
||||||
|
var conn *bolt.DB
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
conn, _ = bolt.Open("../reddit.db", 0600, &bolt.Options{Timeout: 1 * time.Second})
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
posts, err := getAllPosts()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writeSubredditListToFile(posts)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writeStatsToFile(posts)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// get posts from database file
|
||||||
|
func getAllPosts() ([]model.RedditPost, error) {
|
||||||
|
posts := []model.RedditPost{}
|
||||||
|
|
||||||
|
err := conn.View(func(tx *bolt.Tx) error {
|
||||||
|
dailyBucket := tx.Bucket([]byte("daily_bucket"))
|
||||||
|
|
||||||
|
return dailyBucket.ForEach(func(key, val []byte) error {
|
||||||
|
|
||||||
|
b := dailyBucket.Bucket(key)
|
||||||
|
|
||||||
|
return b.ForEach(func(k, v []byte) error {
|
||||||
|
var post model.RedditPost
|
||||||
|
err := json.Unmarshal(b.Get(k), &post)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
posts = append(posts, post)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
return posts, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// write subreddits to file for the word cloud generator
|
||||||
|
func writeSubredditListToFile(posts []model.RedditPost) error {
|
||||||
|
for _, post := range posts {
|
||||||
|
err := appendFile("subreddits.txt", post.Subreddit)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// create markdown table with subreddit stats
|
||||||
|
func writeStatsToFile(posts []model.RedditPost) error {
|
||||||
|
|
||||||
|
groupedPosts := groupBySubreddit(posts)
|
||||||
|
|
||||||
|
countList := [][]model.RedditPost{}
|
||||||
|
|
||||||
|
// convert to list
|
||||||
|
for _, v := range groupedPosts {
|
||||||
|
countList = append(countList, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort by post count
|
||||||
|
sort.Slice(countList, func(i, j int) bool {
|
||||||
|
return len(countList[i]) > len(countList[j])
|
||||||
|
})
|
||||||
|
|
||||||
|
data := [][]string{}
|
||||||
|
|
||||||
|
for _, v := range countList {
|
||||||
|
title := "[" + v[0].Title + "]" + "(https://www.reddit.com" + v[0].Permalink + ")"
|
||||||
|
data = append(data, []string{v[0].Subreddit, strconv.Itoa(len(v)), title, strconv.Itoa(v[0].Score)})
|
||||||
|
}
|
||||||
|
|
||||||
|
file, _ := os.Create("counts.md")
|
||||||
|
|
||||||
|
table := tablewriter.NewWriter(file)
|
||||||
|
table.SetAutoWrapText(false)
|
||||||
|
table.SetHeader([]string{"Subreddit", "Total", "Top Post", "Score"})
|
||||||
|
table.SetBorders(tablewriter.Border{Left: true, Top: false, Right: true, Bottom: false})
|
||||||
|
table.SetCenterSeparator("|")
|
||||||
|
table.AppendBulk(data) // Add Bulk Data
|
||||||
|
table.Render()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func groupBySubreddit(posts []model.RedditPost) map[string][]model.RedditPost {
|
||||||
|
|
||||||
|
groupedPosts := map[string][]model.RedditPost{}
|
||||||
|
|
||||||
|
// group posts by subreddit
|
||||||
|
for _, v := range posts {
|
||||||
|
if _, ok := groupedPosts[v.Subreddit]; ok {
|
||||||
|
groupedPosts[v.Subreddit] = append(groupedPosts[v.Subreddit], v)
|
||||||
|
} else {
|
||||||
|
groupedPosts[v.Subreddit] = []model.RedditPost{v}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// order posts by view count
|
||||||
|
for _, v := range groupedPosts {
|
||||||
|
sort.Slice(v, func(i, j int) bool {
|
||||||
|
return v[i].Score > v[j].Score
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return groupedPosts
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendFile(path, text string) error {
|
||||||
|
f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
_, err = f.WriteString(text + "\n")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
9
src/scripts/wordcloud.sh
Executable file
9
src/scripts/wordcloud.sh
Executable file
@@ -0,0 +1,9 @@
|
|||||||
|
wordcloud_cli.py \
|
||||||
|
--text subreddits.txt \
|
||||||
|
--imagefile output.png \
|
||||||
|
--color red \
|
||||||
|
--background white \
|
||||||
|
--height 2000 \
|
||||||
|
--width 2000 \
|
||||||
|
--margin 10 \
|
||||||
|
--mask ./circle.png
|
||||||
Reference in New Issue
Block a user