|
|
@ -256,17 +256,7 @@ func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error) |
|
|
|
} |
|
|
|
|
|
|
|
// Patch up the item list: make sure each item has a GUID.
|
|
|
|
for idx := 0; idx < len(feed.Items); idx++ { |
|
|
|
itm := feed.Items[idx] |
|
|
|
if itm.GUID == "" { |
|
|
|
if itm.Link != "" { |
|
|
|
itm.GUID = itm.Link |
|
|
|
} else if itm.Title != "" { |
|
|
|
itm.GUID = itm.Title |
|
|
|
} |
|
|
|
feed.Items[idx] = itm |
|
|
|
} |
|
|
|
} |
|
|
|
ensureItemsHaveGUIDs(feed) |
|
|
|
|
|
|
|
// Work out which items are new, if any (based on the last updated TS we have)
|
|
|
|
// If the TS is 0 then this is the first ever poll, so let's not send 10s of events
|
|
|
@ -285,20 +275,28 @@ func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error) |
|
|
|
// TODO: Handle the 'sy' Syndication extension to control update interval.
|
|
|
|
// See http://www.feedforall.com/syndication.htm and http://web.resource.org/rss/1.0/modules/syndication/
|
|
|
|
|
|
|
|
// map items to guid strings
|
|
|
|
var guids []string |
|
|
|
for _, itm := range feed.Items { |
|
|
|
guids = append(guids, itm.GUID) |
|
|
|
} |
|
|
|
|
|
|
|
// Work out which GUIDs to remember. We don't want to remember every GUID ever as that leads to completely
|
|
|
|
// unbounded growth of data.
|
|
|
|
f := s.Feeds[feedURL] |
|
|
|
// Some RSS feeds can return a very small number of items then bounce
|
|
|
|
// back to their "normal" size, so we cannot just clobber the recent GUID list per request or else we'll
|
|
|
|
// forget what we sent and resend it. Instead, we'll keep 2x the max number of items that we've ever
|
|
|
|
// seen from this feed, up to a max of 1000.
|
|
|
|
maxGuids := 2 * len(feed.Items) |
|
|
|
if len(f.RecentGUIDs) > maxGuids { |
|
|
|
maxGuids = len(f.RecentGUIDs) // already 2x'd.
|
|
|
|
} |
|
|
|
if maxGuids > 1000 { |
|
|
|
maxGuids = 1000 |
|
|
|
} |
|
|
|
|
|
|
|
if len(guids) != len(f.RecentGUIDs) { |
|
|
|
log.WithFields(log.Fields{ |
|
|
|
"new_guids": guids, |
|
|
|
"old_guids": f.RecentGUIDs, |
|
|
|
"feed_url": feedURL, |
|
|
|
}).Warn("GUID length mismatch") |
|
|
|
lastSet := uniqueStrings(f.RecentGUIDs) // e.g. [4,5,6]
|
|
|
|
thisSet := uniqueGuids(feed.Items) // e.g. [1,2,3]
|
|
|
|
guids := append(thisSet, lastSet...) // e.g. [1,2,3,4,5,6]
|
|
|
|
guids = uniqueStrings(guids) |
|
|
|
if len(guids) > maxGuids { |
|
|
|
// Critically this favours the NEWEST elements, which are the ones we're most likely to see again.
|
|
|
|
guids = guids[0:maxGuids] |
|
|
|
} |
|
|
|
|
|
|
|
// Update the service config to persist the new times
|
|
|
@ -343,8 +341,12 @@ func (s *Service) newItems(feedURL string, allItems []*gofeed.Item) (items []gof |
|
|
|
} |
|
|
|
|
|
|
|
func (s *Service) sendToRooms(cli *matrix.Client, feedURL string, feed *gofeed.Feed, item gofeed.Item) error { |
|
|
|
logger := log.WithField("feed_url", feedURL).WithField("title", item.Title) |
|
|
|
logger.Info("New feed item") |
|
|
|
logger := log.WithFields(log.Fields{ |
|
|
|
"feed_url": feedURL, |
|
|
|
"title": item.Title, |
|
|
|
"guid": item.GUID, |
|
|
|
}) |
|
|
|
logger.Info("Sending new feed item") |
|
|
|
for _, roomID := range s.Feeds[feedURL].Rooms { |
|
|
|
if _, err := cli.SendMessageEvent(roomID, "m.room.message", itemToHTML(feed, item)); err != nil { |
|
|
|
logger.WithError(err).WithField("room_id", roomID).Error("Failed to send to room") |
|
|
@ -361,6 +363,50 @@ func itemToHTML(feed *gofeed.Feed, item gofeed.Item) matrix.HTMLMessage { |
|
|
|
)) |
|
|
|
} |
|
|
|
|
|
|
|
func ensureItemsHaveGUIDs(feed *gofeed.Feed) { |
|
|
|
for idx := 0; idx < len(feed.Items); idx++ { |
|
|
|
itm := feed.Items[idx] |
|
|
|
if itm.GUID == "" { |
|
|
|
if itm.Link != "" { |
|
|
|
itm.GUID = itm.Link |
|
|
|
} else if itm.Title != "" { |
|
|
|
itm.GUID = itm.Title |
|
|
|
} |
|
|
|
feed.Items[idx] = itm |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// uniqueStrings returns a new slice of strings with duplicate elements removed.
|
|
|
|
// Order is otherwise preserved.
|
|
|
|
func uniqueStrings(a []string) []string { |
|
|
|
ret := []string{} |
|
|
|
seen := make(map[string]bool) |
|
|
|
for _, str := range a { |
|
|
|
if seen[str] { |
|
|
|
continue |
|
|
|
} |
|
|
|
seen[str] = true |
|
|
|
ret = append(ret, str) |
|
|
|
} |
|
|
|
return ret |
|
|
|
} |
|
|
|
|
|
|
|
// uniqueGuids returns a new slice of GUID strings with duplicate elements removed.
|
|
|
|
// Order is otherwise preserved.
|
|
|
|
func uniqueGuids(a []*gofeed.Item) []string { |
|
|
|
ret := []string{} |
|
|
|
seen := make(map[string]bool) |
|
|
|
for _, item := range a { |
|
|
|
if seen[item.GUID] { |
|
|
|
continue |
|
|
|
} |
|
|
|
seen[item.GUID] = true |
|
|
|
ret = append(ret, item.GUID) |
|
|
|
} |
|
|
|
return ret |
|
|
|
} |
|
|
|
|
|
|
|
type userAgentRoundTripper struct { |
|
|
|
Transport http.RoundTripper |
|
|
|
} |
|
|
|