Browse Source

HTML decode the RSS title/description fields

kegan/rss-escape-entities
Kegan Dougal 8 years ago
parent
commit
fcd3befb09
  1. 9
      src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go
  2. 2
      src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go

9
src/github.com/matrix-org/go-neb/services/rssbot/rssbot.go

@ -279,6 +279,15 @@ func (s *rssBotService) newItems(feedURL string, allItems []*gofeed.Item) (items
continue
}
// Decode HTML for <title> and <description>:
// The RSS 2.0 Spec http://cyber.harvard.edu/rss/rss.html#hrelementsOfLtitemgt supports a bunch
// of weird ways to put HTML into <title> and <description> tags. Not all RSS feed producers run
// these fields through entity encoders (some have ' unencoded, others have it as &#8217;). We'll
// assume that all RSS fields are sending HTML for these fields and run them through a standard decoder.
// This will inevitably break for some people, but that group of people are probably smaller, so *shrug*.
i.Title = html.UnescapeString(i.Title)
i.Description = html.UnescapeString(i.Description)
items = append(items, *i)
}
return

2
src/github.com/matrix-org/go-neb/services/rssbot/rssbot_test.go

@ -94,7 +94,7 @@ func TestHTMLEntities(t *testing.T) {
t.Fatal("Failed to decode request JSON: ", err)
return nil, errors.New("Error handling matrix client test request")
}
want := "New Item: Majora's Mask"
want := "New Item: Majoras Mask"
if !strings.Contains(msg.Body, want) {
t.Errorf("TestHTMLEntities: want '%s' in body, got '%s'", want, msg.Body)
}

Loading…
Cancel
Save