@ -260,17 +260,7 @@ func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error)
}
// Patch up the item list: make sure each item has a GUID.
for idx := 0 ; idx < len ( feed . Items ) ; idx ++ {
itm := feed . Items [ idx ]
if itm . GUID == "" {
if itm . Link != "" {
itm . GUID = itm . Link
} else if itm . Title != "" {
itm . GUID = itm . Title
}
feed . Items [ idx ] = itm
}
}
ensureItemsHaveGUIDs ( feed )
// Work out which items are new, if any (based on the last updated TS we have)
// If the TS is 0 then this is the first ever poll, so let's not send 10s of events
@ -289,26 +279,33 @@ func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error)
// TODO: Handle the 'sy' Syndication extension to control update interval.
// See http://www.feedforall.com/syndication.htm and http://web.resource.org/rss/1.0/modules/syndication/
// map items to guid strings
var guids [ ] string
for _ , itm := range feed . Items {
guids = append ( guids , itm . GUID )
}
// Work out which GUIDs to remember. We don't want to remember every GUID ever as that leads to completely
// unbounded growth of data.
f := s . Feeds [ feedURL ]
// Some RSS feeds can return a very small number of items then bounce
// back to their "normal" size, so we cannot just clobber the recent GUID list per request or else we'll
// forget what we sent and resend it. Instead, we'll keep 2x the max number of items that we've ever
// seen from this feed, up to a max of 1000.
maxGuids := 2 * len ( feed . Items )
if len ( f . RecentGUIDs ) > maxGuids {
maxGuids = len ( f . RecentGUIDs ) // already 2x'd.
}
if maxGuids > 1000 {
maxGuids = 1000
}
if len ( guids ) != len ( f . RecentGUIDs ) {
log . WithFields ( log . Fields {
"new_guids" : guids ,
"old_guids" : f . RecentGUIDs ,
"feed_url" : feedURL ,
} ) . Warn ( "GUID length mismatch" )
lastSet := uniqueStrings ( f . RecentGUIDs ) // e.g. [4,5,6]
thisSet := uniqueGuids ( feed . Items ) // e.g. [1,2,3]
guids := append ( thisSet , lastSet ... ) // e.g. [1,2,3,4,5,6]
if len ( guids ) > maxGuids {
// Critically this favours the NEWEST elements, which are the ones we're most likely to see again.
guids = guids [ 0 : maxGuids ]
}
// Update the service config to persist the new times
f . NextPollTimestampSecs = nextPollTsSec
f . FeedUpdatedTimestampSecs = now
f . RecentGUIDs = guids
f . RecentGUIDs = uniqueStrings ( guids )
f . IsFailing = false
s . Feeds [ feedURL ] = f
@ -347,8 +344,12 @@ func (s *Service) newItems(feedURL string, allItems []*gofeed.Item) (items []gof
}
func ( s * Service ) sendToRooms ( cli * matrix . Client , feedURL string , feed * gofeed . Feed , item gofeed . Item ) error {
logger := log . WithField ( "feed_url" , feedURL ) . WithField ( "title" , item . Title )
logger . Info ( "New feed item" )
logger := log . WithFields ( log . Fields {
"feed_url" : feedURL ,
"title" : item . Title ,
"guid" : item . GUID ,
} )
logger . Info ( "Sending new feed item" )
for _ , roomID := range s . Feeds [ feedURL ] . Rooms {
if _ , err := cli . SendMessageEvent ( roomID , "m.room.message" , itemToHTML ( feed , item ) ) ; err != nil {
logger . WithError ( err ) . WithField ( "room_id" , roomID ) . Error ( "Failed to send to room" )
@ -365,6 +366,50 @@ func itemToHTML(feed *gofeed.Feed, item gofeed.Item) matrix.HTMLMessage {
) )
}
func ensureItemsHaveGUIDs ( feed * gofeed . Feed ) {
for idx := 0 ; idx < len ( feed . Items ) ; idx ++ {
itm := feed . Items [ idx ]
if itm . GUID == "" {
if itm . Link != "" {
itm . GUID = itm . Link
} else if itm . Title != "" {
itm . GUID = itm . Title
}
feed . Items [ idx ] = itm
}
}
}
// uniqueStrings returns a new slice of strings with duplicate elements removed.
// Order is otherwise preserved.
func uniqueStrings ( a [ ] string ) [ ] string {
ret := [ ] string { }
seen := make ( map [ string ] bool )
for _ , str := range a {
if seen [ str ] {
continue
}
seen [ str ] = true
ret = append ( ret , str )
}
return ret
}
// uniqueGuids returns a new slice of GUID strings with duplicate elements removed.
// Order is otherwise preserved.
func uniqueGuids ( a [ ] * gofeed . Item ) [ ] string {
ret := [ ] string { }
seen := make ( map [ string ] bool )
for _ , item := range a {
if seen [ item . GUID ] {
continue
}
seen [ item . GUID ] = true
ret = append ( ret , item . GUID )
}
return ret
}
type userAgentRoundTripper struct {
Transport http . RoundTripper
}