From 17228713450829f80204268ffa3b62036f3c0a6c Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 29 Nov 2016 14:17:19 +0000 Subject: [PATCH 1/2] Don't use gofeed.ParseURL It leaks the response because it doesn't close the `resp.Body` on non-2xx. --- .../go-neb/services/rssbot/rssbot.go | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go index b9c32d4..531c687 100644 --- a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go +++ b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go @@ -88,9 +88,7 @@ func (s *Service) Register(oldService types.Service, client *matrix.Client) erro } // Make sure we can parse the feed for feedURL, feedInfo := range s.Feeds { - fp := gofeed.NewParser() - fp.Client = cachingClient - if _, err := fp.ParseURL(feedURL); err != nil { + if _, err := readFeed(feedURL); err != nil { return fmt.Errorf("Failed to read URL %s: %s", feedURL, err.Error()) } if len(feedInfo.Rooms) == 0 { @@ -243,9 +241,7 @@ func (s *Service) nextTimestamp() time.Time { func (s *Service) queryFeed(feedURL string) (*gofeed.Feed, []gofeed.Item, error) { log.WithField("feed_url", feedURL).Info("Querying feed") var items []gofeed.Item - fp := gofeed.NewParser() - fp.Client = cachingClient - feed, err := fp.ParseURL(feedURL) + feed, err := readFeed(feedURL) // check for no items in addition to any returned errors as it appears some RSS feeds // do not consistently return items. if err == nil && len(feed.Items) == 0 { @@ -374,6 +370,25 @@ func (rt userAgentRoundTripper) RoundTrip(req *http.Request) (*http.Response, er return rt.Transport.RoundTrip(req) } +func readFeed(feedURL string) (*gofeed.Feed, error) { + fp := gofeed.NewParser() + resp, err := cachingClient.Get(feedURL) + if resp != nil { + defer resp.Body.Close() + } + if err != nil { + return nil, err + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, gofeed.HTTPError{ + StatusCode: resp.StatusCode, + Status: resp.Status, + } + } + return fp.Parse(resp.Body) +} + func init() { lruCache := lrucache.New(1024*1024*20, 0) // 20 MB cache, no max-age cachingClient = &http.Client{ From 7522ddf1a4e93b58b01db19258a6e88d35163c72 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 29 Nov 2016 14:44:30 +0000 Subject: [PATCH 2/2] Explain why no ParseURL --- src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go index 531c687..7dc3312 100644 --- a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go +++ b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go @@ -371,6 +371,7 @@ func (rt userAgentRoundTripper) RoundTrip(req *http.Request) (*http.Response, er } func readFeed(feedURL string) (*gofeed.Feed, error) { + // Don't use fp.ParseURL because it leaks on non-2xx responses as of 2016/11/29 (cac19c6c27) fp := gofeed.NewParser() resp, err := cachingClient.Get(feedURL) if resp != nil {