Browse Source

Merge pull request #136 from matrix-org/kegan/feed-leak

Don't use gofeed.ParseURL
kegan/rss-no-cache
Kegsay 8 years ago
committed by GitHub
parent
commit
9f77111702
  1. 28
      src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go

28
src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go

@ -88,9 +88,7 @@ func (s *Service) Register(oldService types.Service, client *matrix.Client) erro
}
// Make sure we can parse the feed
for feedURL, feedInfo := range s.Feeds {
fp := gofeed.NewParser()
fp.Client = cachingClient
if _, err := fp.ParseURL(feedURL); err != nil {
if _, err := readFeed(feedURL); err != nil {
return fmt.Errorf("Failed to read URL %s: %s", feedURL, err.Error())
}
if len(feedInfo.Rooms) == 0 {
@ -243,9 +241,7 @@ func (s *Service) nextTimestamp() time.Time {
func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error) {
log.WithField("feed_url", feedURL).Info("Querying feed")
var items []gofeed.Item
fp := gofeed.NewParser()
fp.Client = cachingClient
feed, err := fp.ParseURL(feedURL)
feed, err := readFeed(feedURL)
// check for no items in addition to any returned errors as it appears some RSS feeds
// do not consistently return items.
if err == nil && len(feed.Items) == 0 {
@ -374,6 +370,26 @@ func (rt userAgentRoundTripper) RoundTrip(req *http.Request) (*http.Response, er
return rt.Transport.RoundTrip(req)
}
func readFeed(feedURL string) (*gofeed.Feed, error) {
// Don't use fp.ParseURL because it leaks on non-2xx responses as of 2016/11/29 (cac19c6c27)
fp := gofeed.NewParser()
resp, err := cachingClient.Get(feedURL)
if resp != nil {
defer resp.Body.Close()
}
if err != nil {
return nil, err
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, gofeed.HTTPError{
StatusCode: resp.StatusCode,
Status: resp.Status,
}
}
return fp.Parse(resp.Body)
}
func init() {
lruCache := lrucache.New(1024*1024*20, 0) // 20 MB cache, no max-age
cachingClient = &http.Client{

Loading…
Cancel
Save