mirror of https://github.com/matrix-org/go-neb.git
Browse Source
Merge pull request #78 from matrix-org/kegan/feedreader
Merge pull request #78 from matrix-org/kegan/feedreader
Implement feedreader servicepull/68/merge
Kegsay
8 years ago
committed by
GitHub
5 changed files with 256 additions and 117 deletions
-
10src/github.com/matrix-org/go-neb/goneb.go
-
14src/github.com/matrix-org/go-neb/polling/polling.go
-
236src/github.com/matrix-org/go-neb/services/feedreader/feedreader.go
-
111src/github.com/matrix-org/go-neb/services/rss/rss.go
-
2src/github.com/matrix-org/go-neb/types/types.go
@ -0,0 +1,236 @@ |
|||||
|
package services |
||||
|
|
||||
|
import ( |
||||
|
"errors" |
||||
|
"fmt" |
||||
|
log "github.com/Sirupsen/logrus" |
||||
|
"github.com/matrix-org/go-neb/database" |
||||
|
"github.com/matrix-org/go-neb/matrix" |
||||
|
"github.com/matrix-org/go-neb/polling" |
||||
|
"github.com/matrix-org/go-neb/types" |
||||
|
"github.com/mmcdole/gofeed" |
||||
|
"html" |
||||
|
"time" |
||||
|
) |
||||
|
|
||||
|
const minPollingIntervalSeconds = (10 * 60) // 10min
|
||||
|
|
||||
|
type feedPoller struct{} |
||||
|
|
||||
|
func (p *feedPoller) IntervalSecs() int64 { return 10 } |
||||
|
func (p *feedPoller) OnPoll(s types.Service, cli *matrix.Client) { |
||||
|
logger := log.WithFields(log.Fields{ |
||||
|
"service_id": s.ServiceID(), |
||||
|
"service_type": s.ServiceType(), |
||||
|
}) |
||||
|
|
||||
|
frService, ok := s.(*feedReaderService) |
||||
|
if !ok { |
||||
|
logger.Error("FeedReader: OnPoll called without a Feed Service instance") |
||||
|
return |
||||
|
} |
||||
|
now := time.Now().Unix() // Second resolution
|
||||
|
|
||||
|
// Work out which feeds should be polled
|
||||
|
var pollFeeds []string |
||||
|
for u, feedInfo := range frService.Feeds { |
||||
|
if feedInfo.NextPollTimestampSecs == 0 || now >= feedInfo.NextPollTimestampSecs { |
||||
|
// re-query this feed
|
||||
|
pollFeeds = append(pollFeeds, u) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if len(pollFeeds) == 0 { |
||||
|
return |
||||
|
} |
||||
|
|
||||
|
// Query each feed and send new items to subscribed rooms
|
||||
|
for _, u := range pollFeeds { |
||||
|
feed, items, err := p.queryFeed(frService, u) |
||||
|
if err != nil { |
||||
|
logger.WithField("feed_url", u).WithError(err).Error("Failed to query feed") |
||||
|
continue |
||||
|
} |
||||
|
// Loop backwards since [0] is the most recent and we want to send in chronological order
|
||||
|
for i := len(items) - 1; i >= 0; i-- { |
||||
|
item := items[i] |
||||
|
if err := p.sendToRooms(frService, cli, u, feed, item); err != nil { |
||||
|
logger.WithFields(log.Fields{ |
||||
|
"feed_url": u, |
||||
|
log.ErrorKey: err, |
||||
|
"item": item, |
||||
|
}).Error("Failed to send item to room") |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Persist the service to save the next poll times
|
||||
|
if _, err := database.GetServiceDB().StoreService(frService); err != nil { |
||||
|
logger.WithError(err).Error("Failed to persist next poll times for service") |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Query the given feed, update relevant timestamps and return NEW items
|
||||
|
func (p *feedPoller) queryFeed(s *feedReaderService, feedURL string) (*gofeed.Feed, []gofeed.Item, error) { |
||||
|
log.WithField("feed_url", feedURL).Info("Querying feed") |
||||
|
var items []gofeed.Item |
||||
|
fp := gofeed.NewParser() |
||||
|
feed, err := fp.ParseURL(feedURL) |
||||
|
if err != nil { |
||||
|
return nil, items, err |
||||
|
} |
||||
|
|
||||
|
// Work out which items are new, if any (based on the last updated TS we have)
|
||||
|
// If the TS is 0 then this is the first ever poll, so let's not send 10s of events
|
||||
|
// into the room and just do new ones from this point onwards.
|
||||
|
if s.Feeds[feedURL].FeedUpdatedTimestampSecs != 0 { |
||||
|
for _, i := range feed.Items { |
||||
|
if i == nil || i.PublishedParsed == nil { |
||||
|
continue |
||||
|
} |
||||
|
if i.PublishedParsed.Unix() > s.Feeds[feedURL].FeedUpdatedTimestampSecs { |
||||
|
items = append(items, *i) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
now := time.Now().Unix() // Second resolution
|
||||
|
|
||||
|
// Work out when this feed was last updated
|
||||
|
var feedLastUpdatedTs int64 |
||||
|
if feed.UpdatedParsed != nil { |
||||
|
feedLastUpdatedTs = feed.UpdatedParsed.Unix() |
||||
|
} else if len(feed.Items) > 0 { |
||||
|
i := feed.Items[0] |
||||
|
if i != nil && i.PublishedParsed != nil { |
||||
|
feedLastUpdatedTs = i.PublishedParsed.Unix() |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Work out when to next poll this feed
|
||||
|
nextPollTsSec := now + minPollingIntervalSeconds |
||||
|
if s.Feeds[feedURL].PollIntervalMins > int(minPollingIntervalSeconds/60) { |
||||
|
nextPollTsSec = now + int64(s.Feeds[feedURL].PollIntervalMins*60) |
||||
|
} |
||||
|
// TODO: Handle the 'sy' Syndication extension to control update interval.
|
||||
|
// See http://www.feedforall.com/syndication.htm and http://web.resource.org/rss/1.0/modules/syndication/
|
||||
|
|
||||
|
p.updateFeedInfo(s, feedURL, nextPollTsSec, feedLastUpdatedTs) |
||||
|
return feed, items, nil |
||||
|
} |
||||
|
|
||||
|
func (p *feedPoller) updateFeedInfo(s *feedReaderService, feedURL string, nextPollTs, feedUpdatedTs int64) { |
||||
|
for u := range s.Feeds { |
||||
|
if u != feedURL { |
||||
|
continue |
||||
|
} |
||||
|
f := s.Feeds[u] |
||||
|
f.NextPollTimestampSecs = nextPollTs |
||||
|
f.FeedUpdatedTimestampSecs = feedUpdatedTs |
||||
|
s.Feeds[u] = f |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func (p *feedPoller) sendToRooms(s *feedReaderService, cli *matrix.Client, feedURL string, feed *gofeed.Feed, item gofeed.Item) error { |
||||
|
logger := log.WithField("feed_url", feedURL).WithField("title", item.Title) |
||||
|
logger.Info("New feed item") |
||||
|
var rooms []string |
||||
|
for roomID, urls := range s.Rooms { |
||||
|
for _, u := range urls { |
||||
|
if u == feedURL { |
||||
|
rooms = append(rooms, roomID) |
||||
|
break |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
for _, roomID := range rooms { |
||||
|
if _, err := cli.SendMessageEvent(roomID, "m.room.message", itemToHTML(feed, item)); err != nil { |
||||
|
logger.WithError(err).WithField("room_id", roomID).Error("Failed to send to room") |
||||
|
} |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
// SomeOne posted a new article: Title Of The Entry ( https://someurl.com/blag )
|
||||
|
func itemToHTML(feed *gofeed.Feed, item gofeed.Item) matrix.HTMLMessage { |
||||
|
return matrix.GetHTMLMessage("m.notice", fmt.Sprintf( |
||||
|
"<i>%s</i> posted a new article: %s ( %s )", |
||||
|
html.EscapeString(feed.Title), html.EscapeString(item.Title), html.EscapeString(item.Link), |
||||
|
)) |
||||
|
} |
||||
|
|
||||
|
type feedReaderService struct { |
||||
|
types.DefaultService |
||||
|
id string |
||||
|
serviceUserID string |
||||
|
Feeds map[string]struct { // feed_url => { }
|
||||
|
PollIntervalMins int `json:"poll_interval_mins"` |
||||
|
NextPollTimestampSecs int64 // Internal: When we should poll again
|
||||
|
FeedUpdatedTimestampSecs int64 // Internal: The last time the feed was updated
|
||||
|
} `json:"feeds"` |
||||
|
Rooms map[string][]string `json:"rooms"` // room_id => [ feed_url ]
|
||||
|
} |
||||
|
|
||||
|
func (s *feedReaderService) ServiceUserID() string { return s.serviceUserID } |
||||
|
func (s *feedReaderService) ServiceID() string { return s.id } |
||||
|
func (s *feedReaderService) ServiceType() string { return "feedreader" } |
||||
|
func (s *feedReaderService) Poller() types.Poller { return &feedPoller{} } |
||||
|
|
||||
|
// Register will check the liveness of each RSS feed given. If all feeds check out okay, no error is returned.
|
||||
|
func (s *feedReaderService) Register(oldService types.Service, client *matrix.Client) error { |
||||
|
if len(s.Feeds) == 0 { |
||||
|
// this is an error UNLESS the old service had some feeds in which case they are deleting us :(
|
||||
|
var numOldFeeds int |
||||
|
oldFeedService, ok := oldService.(*feedReaderService) |
||||
|
if !ok { |
||||
|
log.WithField("service_id", oldService.ServiceID()).Error("Old service isn't a FeedReaderService") |
||||
|
} else { |
||||
|
numOldFeeds = len(oldFeedService.Feeds) |
||||
|
} |
||||
|
if numOldFeeds == 0 { |
||||
|
return errors.New("An RSS feed must be specified.") |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
// Make sure we can parse the feed
|
||||
|
for feedURL := range s.Feeds { |
||||
|
fp := gofeed.NewParser() |
||||
|
if _, err := fp.ParseURL(feedURL); err != nil { |
||||
|
return fmt.Errorf("Failed to read URL %s: %s", feedURL, err.Error()) |
||||
|
} |
||||
|
} |
||||
|
// Make sure all feeds are accounted for (appear at least once) in the room map, AND make sure there
|
||||
|
// are no weird new feeds in those rooms
|
||||
|
for roomID, roomFeeds := range s.Rooms { |
||||
|
for _, f := range roomFeeds { |
||||
|
if _, exists := s.Feeds[f]; !exists { |
||||
|
return fmt.Errorf("Feed URL %s in room %s does not exist in the Feeds section", f, roomID) |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return nil |
||||
|
} |
||||
|
|
||||
|
func (s *feedReaderService) PostRegister(oldService types.Service) { |
||||
|
if len(s.Feeds) == 0 { // bye-bye :(
|
||||
|
logger := log.WithFields(log.Fields{ |
||||
|
"service_id": s.ServiceID(), |
||||
|
"service_type": s.ServiceType(), |
||||
|
}) |
||||
|
logger.Info("Deleting service: No feeds remaining.") |
||||
|
polling.StopPolling(s) |
||||
|
if err := database.GetServiceDB().DeleteService(s.ServiceID()); err != nil { |
||||
|
logger.WithError(err).Error("Failed to delete service") |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
func init() { |
||||
|
types.RegisterService(func(serviceID, serviceUserID, webhookEndpointURL string) types.Service { |
||||
|
r := &feedReaderService{ |
||||
|
id: serviceID, |
||||
|
serviceUserID: serviceUserID, |
||||
|
} |
||||
|
return r |
||||
|
}) |
||||
|
} |
@ -1,111 +0,0 @@ |
|||||
package services |
|
||||
|
|
||||
import ( |
|
||||
"errors" |
|
||||
log "github.com/Sirupsen/logrus" |
|
||||
"github.com/matrix-org/go-neb/database" |
|
||||
"github.com/matrix-org/go-neb/matrix" |
|
||||
"github.com/matrix-org/go-neb/polling" |
|
||||
"github.com/matrix-org/go-neb/types" |
|
||||
"time" |
|
||||
) |
|
||||
|
|
||||
type rssPoller struct{} |
|
||||
|
|
||||
func (p *rssPoller) IntervalSecs() int64 { return 10 } |
|
||||
func (p *rssPoller) OnPoll(s types.Service) { |
|
||||
rsss, ok := s.(*rssService) |
|
||||
if !ok { |
|
||||
log.WithField("service_id", s.ServiceID()).Error("RSS: OnPoll called without an RSS Service") |
|
||||
return |
|
||||
} |
|
||||
now := time.Now().Unix() // Second resolution
|
|
||||
// URL => [ RoomID ]
|
|
||||
urlsToRooms := make(map[string][]string) |
|
||||
|
|
||||
for roomID, roomInfo := range rsss.Rooms { |
|
||||
for u, feedInfo := range roomInfo.Feeds { |
|
||||
if feedInfo.LastPollTimestampSecs == 0 || (feedInfo.LastPollTimestampSecs+(int64(feedInfo.PollIntervalMins)*60)) > now { |
|
||||
// re-query this feed
|
|
||||
urlsToRooms[u] = append(urlsToRooms[u], roomID) |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// TODO: Some polling
|
|
||||
} |
|
||||
|
|
||||
type rssService struct { |
|
||||
types.DefaultService |
|
||||
id string |
|
||||
serviceUserID string |
|
||||
Rooms map[string]struct { // room_id => {}
|
|
||||
Feeds map[string]struct { // URL => { }
|
|
||||
PollIntervalMins int `json:"poll_interval_mins"` |
|
||||
LastPollTimestampSecs int64 |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
func (s *rssService) ServiceUserID() string { return s.serviceUserID } |
|
||||
func (s *rssService) ServiceID() string { return s.id } |
|
||||
func (s *rssService) ServiceType() string { return "rss" } |
|
||||
func (s *rssService) Poller() types.Poller { return &rssPoller{} } |
|
||||
|
|
||||
// Register will check the liveness of each RSS feed given. If all feeds check out okay, no error is returned.
|
|
||||
func (s *rssService) Register(oldService types.Service, client *matrix.Client) error { |
|
||||
feeds := feedUrls(s) |
|
||||
if len(feeds) == 0 { |
|
||||
// this is an error UNLESS the old service had some feeds in which case they are deleting us :(
|
|
||||
oldFeeds := feedUrls(oldService) |
|
||||
if len(oldFeeds) == 0 { |
|
||||
return errors.New("An RSS feed must be specified.") |
|
||||
} |
|
||||
} |
|
||||
return nil |
|
||||
} |
|
||||
|
|
||||
func (s *rssService) PostRegister(oldService types.Service) { |
|
||||
if len(feedUrls(s)) == 0 { // bye-bye :(
|
|
||||
logger := log.WithFields(log.Fields{ |
|
||||
"service_id": s.ServiceID(), |
|
||||
"service_type": s.ServiceType(), |
|
||||
}) |
|
||||
logger.Info("Deleting service (0 feeds)") |
|
||||
polling.StopPolling(s) |
|
||||
if err := database.GetServiceDB().DeleteService(s.ServiceID()); err != nil { |
|
||||
logger.WithError(err).Error("Failed to delete service") |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// feedUrls returns a list of feed urls for this service
|
|
||||
func feedUrls(srv types.Service) []string { |
|
||||
var feeds []string |
|
||||
s, ok := srv.(*rssService) |
|
||||
if !ok { |
|
||||
return feeds |
|
||||
} |
|
||||
|
|
||||
urlSet := make(map[string]bool) |
|
||||
for _, roomInfo := range s.Rooms { |
|
||||
for u := range roomInfo.Feeds { |
|
||||
urlSet[u] = true |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
for u := range urlSet { |
|
||||
feeds = append(feeds, u) |
|
||||
} |
|
||||
return feeds |
|
||||
} |
|
||||
|
|
||||
func init() { |
|
||||
types.RegisterService(func(serviceID, serviceUserID, webhookEndpointURL string) types.Service { |
|
||||
r := &rssService{ |
|
||||
id: serviceID, |
|
||||
serviceUserID: serviceUserID, |
|
||||
} |
|
||||
return r |
|
||||
}) |
|
||||
} |
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue