From fcd3befb09b023422e9b993ca7161b4f840c2426 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 27 Oct 2016 15:09:19 +0100 Subject: [PATCH] HTML decode the RSS title/description fields --- .../matrix-org/go-neb/services/rssbot/rssbot.go | 9 +++++++++ .../matrix-org/go-neb/services/rssbot/rssbot_test.go | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go index 1533e86..613e65e 100644 --- a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go +++ b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go @@ -279,6 +279,15 @@ func (s *rssBotService) newItems(feedURL string, allItems []*gofeed.Item) (items continue } + // Decode HTML for and <description>: + // The RSS 2.0 Spec http://cyber.harvard.edu/rss/rss.html#hrelementsOfLtitemgt supports a bunch + // of weird ways to put HTML into <title> and <description> tags. Not all RSS feed producers run + // these fields through entity encoders (some have ' unencoded, others have it as ’). We'll + // assume that all RSS fields are sending HTML for these fields and run them through a standard decoder. + // This will inevitably break for some people, but that group of people are probably smaller, so *shrug*. + i.Title = html.UnescapeString(i.Title) + i.Description = html.UnescapeString(i.Description) + items = append(items, *i) } return diff --git a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go index 19bbac5..d75d096 100644 --- a/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go +++ b/src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go @@ -94,7 +94,7 @@ func TestHTMLEntities(t *testing.T) { t.Fatal("Failed to decode request JSON: ", err) return nil, errors.New("Error handling matrix client test request") } - want := "New Item: Majora's Mask" + want := "New Item: Majora’s Mask" if !strings.Contains(msg.Body, want) { t.Errorf("TestHTMLEntities: want '%s' in body, got '%s'", want, msg.Body) }