commit d4fd93546c7cff8a29bc987851edaeed2c5cc7b2 Author: mgerb42 Date: Thu Feb 23 06:03:29 2017 +0000 init diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..1003d3bd --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* !text !filter !merge !diff diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4c67f33a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +reddit.db +top-of-reddit + diff --git a/README.md b/README.md new file mode 100644 index 00000000..33407425 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# Top of Reddit + +- Every post that makes it to the front page of [r/all](http://reddit.com/r/all). +- Updated daily. +- Sorted by highest score achieved. +- GoLang +- [BoltDB](https://github.com/boltdb/bolt) for persistance. + +Inspired by [github-trending](https://github.com/josephyzhou/github-trending). diff --git a/main.go b/main.go new file mode 100644 index 00000000..e3409d12 --- /dev/null +++ b/main.go @@ -0,0 +1,368 @@ +package main + +import ( + "encoding/json" + "fmt" + "github.com/boltdb/bolt" + "github.com/tidwall/gjson" + "io/ioutil" + "log" + "net/http" + "os" + "os/exec" + "sort" + "strconv" + "time" +) + +const ( + REDDIT_URL string = "https://www.reddit.com/r/" + USER_AGENT string = "top-of-reddit:bot" + DATE_FORMAT string = "01-02-2006" +) + +var ( + // buckets + DAILY_BUCKET []byte = []byte("daily_bucket") + MAIN_BUCKET []byte = []byte("main") // main bucket for keeping track of the current day + + // keys + TODAY_KEY []byte = []byte("today_date") +) + +type RedditPost struct { + Subreddit string `json:"subreddit"` + ID string `json:"id"` + Gilded int `json:"gilded"` + Score int `json:"score"` + Author string `json:"author"` + Domain string `json:"domain"` + Over_18 bool `json:"over_18"` + Thumbnail string `json:"thumbnail"` + Permalink string `json:"permalink"` + Url string `json:"url"` + Title string `json:"title"` + Created float64 `json:"created"` + Created_utc float64 `json:"created_utc"` + Num_comments int `json:"num_comments"` + Ups int `json:"ups"` + + // extra fields + TopPosition int // highest achieved position on front page +} + +func main() { + // start database connection + db := openDbSession() + defer db.Close() + + // start main program loop + for { + fmt.Println("Updating...") + // send http request for json data + response, err := getPosts("all") + if err != nil { + log.Println(err.Error()) + } else { + // create RedditPost slice + posts, err := convertPosts(response) + if err != nil { + log.Println(err.Error()) + } else { + // update the daily bucket with posts + updateDailyPosts(db, DAILY_BUCKET, getTodayBucket(), posts) + checkDateChange(db) + } + + } + + time.Sleep(time.Second * 30) + } +} + +// start the main database session +func openDbSession() *bolt.DB { + database, err := bolt.Open("reddit.db", 0600, &bolt.Options{Timeout: 1 * time.Second}) + if err != nil { + log.Fatal(err) + } + + return database +} + +// returns the post bucket for today +func getTodayBucket() []byte { + return []byte(time.Now().Format(DATE_FORMAT)) +} + +// returns the post bucket for today +func getYesterdayBucket() []byte { + yesterday := time.Now().AddDate(0, 0, -1) + return []byte(yesterday.Format(DATE_FORMAT)) +} + +func checkDateChange(db *bolt.DB) { + err := db.Update(func(tx *bolt.Tx) error { + + b, err := tx.CreateBucketIfNotExists(MAIN_BUCKET) + + if err != nil { + return err + } + + storedDay := b.Get(TODAY_KEY) + + // if the day changes + if storedDay == nil || string(getTodayBucket()) != string(storedDay) { + // set today's date in database + err := b.Put(TODAY_KEY, []byte(getTodayBucket())) + + if err != nil { + return err + } + + if storedDay == nil { + storedDay = getTodayBucket() + } + + // if there was a previous stored key todayDate - create markdown file + fmt.Println("Creating markdown!") + + storedPosts, err := getStoredPosts(db, DAILY_BUCKET, storedDay) + + if err != nil { + return err + } + + err = writePostsToFile(string(storedDay), storedPosts) + + if err != nil { + return err + } + + // push to github + err = pushToGithub() + + if err != nil { + return err + } + } + + return nil + }) + + if err != nil { + log.Println(err) + return + } +} + +func writePostsToFile(fileName string, posts []RedditPost) error { + // create new markdown file + file, err := os.Create(fileName + ".md") + defer file.Close() + + if err != nil { + return err + } + + for index, p := range posts { + permalink := "http://reddit.com" + p.Permalink + file.WriteString("## " + strconv.Itoa(index+1) + ". [" + p.Title + "](" + permalink + ") - " + strconv.Itoa(p.Score) + "\n") + file.WriteString("#### [r/" + p.Subreddit + "](http://reddit.com/r/" + p.Subreddit + ")") + file.WriteString(" - [u/" + p.Author + "](http://reddit.com/u/" + p.Author + ") - ") + file.WriteString(strconv.Itoa(p.Num_comments) + " Comments - ") + file.WriteString("Top position achieved: " + strconv.Itoa(p.TopPosition) + "\n\n") + + // don't show thumbnail if NSFW + if p.Over_18 { + file.WriteString("\n\n") + } else { + file.WriteString("\n\n") + } + } + + file.Sync() + + return nil +} + +// get a RedditPost slice +func getStoredPosts(db *bolt.DB, bucket []byte, day []byte) ([]RedditPost, error) { + + posts := []RedditPost{} + + err := db.View(func(tx *bolt.Tx) error { + tx.Bucket(bucket).Bucket(day).ForEach(func(_, v []byte) error { + tempPost := RedditPost{} + err := json.Unmarshal(v, &tempPost) + posts = append(posts, tempPost) + sort.Sort(ByScore(posts)) + + if err != nil { + return err + } + + return nil + }) + + return nil + }) + + if err != nil { + return []RedditPost{}, err + } + + return posts, nil +} + +// stores new posts in the bucket only if they do not exist +func updateDailyPosts(db *bolt.DB, bucket []byte, day []byte, redditPosts []RedditPost) error { + err := db.Update(func(tx *bolt.Tx) error { + + daily_bucket, err := tx.CreateBucketIfNotExists(bucket) + if err != nil { + return err + } + + today, err := daily_bucket.CreateBucketIfNotExists(day) + if err != nil { + return err + } + + for index, post := range redditPosts { + // check if post was in yesterdays top posts + yesterday := daily_bucket.Bucket(getYesterdayBucket()) + if yesterday != nil && yesterday.Get([]byte(post.ID)) != nil { + continue + } + + post.TopPosition = index + 1 + + // get value stored in database + storedPostString := today.Get([]byte(post.ID)) + + // if post is already stored in database - check to update highest score + if storedPostString != nil { + storedPost := RedditPost{} + err := json.Unmarshal(storedPostString, &storedPost) + if err != nil { + return err + } + + // only store the highest score a post receives + if storedPost.Score > post.Score { + post.Score = storedPost.Score + } + + // only store the highest position a post receives + if storedPost.TopPosition > index+1 { + post.TopPosition = storedPost.TopPosition + } + } else { + fmt.Println("Updating new post: " + post.Title) + } + + // serialize json + postString, err := json.Marshal(post) + if err != nil { + return err + } + + // store in database + err = today.Put([]byte(post.ID), []byte(postString)) + if err != nil { + return err + } + } + + return nil + }) + + if err != nil { + return err + } + + return nil +} + +// convert reddit response string to RedditPost slice +func convertPosts(postString string) ([]RedditPost, error) { + posts := []RedditPost{} + + for _, p := range gjson.Get(postString, "data.children").Array() { + tempPost := RedditPost{} + + err := json.Unmarshal([]byte(p.Get("data").String()), &tempPost) + if err != nil { + return posts, err + } + + posts = append(posts, tempPost) + } + + return posts, nil +} + +// send http request to reddit and obtain the response string +func getPosts(subreddit string) (string, error) { + client := &http.Client{} + + req, err := http.NewRequest("GET", REDDIT_URL+subreddit+".json", nil) + + req.Header.Add("User-Agent", USER_AGENT) + + response, err := client.Do(req) + if err != nil { + return "", err + } + + defer response.Body.Close() + + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return "", err + } + + return string(body), nil +} + +func pushToGithub() error { + fmt.Println("Pushing to Github...") + commitMessage := "Adding posts for " + string(getTodayBucket()) + + out, err := exec.Command("git", "add", ".").Output() + if err != nil { + return err + } + fmt.Println(string(out)) + + out, err = exec.Command("git", "commit", "-m", commitMessage).Output() + if err != nil { + return err + } + fmt.Println(string(out)) + + out, err = exec.Command("git", "push", "origin", "master").Output() + if err != nil { + return err + } + fmt.Println(string(out)) + + return nil +} + +// sorting +type ByScore []RedditPost + +func (s ByScore) Len() int { + return len(s) +} + +func (s ByScore) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s ByScore) Less(i, j int) bool { + return s[i].Score > s[j].Score +} diff --git a/nsfw.jpg b/nsfw.jpg new file mode 100644 index 00000000..5030c4db Binary files /dev/null and b/nsfw.jpg differ