You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

559 lines
10 KiB

  1. package cascadia
  2. import (
  3. "strings"
  4. "testing"
  5. "golang.org/x/net/html"
  6. )
  7. type selectorTest struct {
  8. HTML, selector string
  9. results []string
  10. }
  11. func nodeString(n *html.Node) string {
  12. switch n.Type {
  13. case html.TextNode:
  14. return n.Data
  15. case html.ElementNode:
  16. return html.Token{
  17. Type: html.StartTagToken,
  18. Data: n.Data,
  19. Attr: n.Attr,
  20. }.String()
  21. }
  22. return ""
  23. }
  24. var selectorTests = []selectorTest{
  25. {
  26. `<body><address>This address...</address></body>`,
  27. "address",
  28. []string{
  29. "<address>",
  30. },
  31. },
  32. {
  33. `<html><head></head><body></body></html>`,
  34. "*",
  35. []string{
  36. "",
  37. "<html>",
  38. "<head>",
  39. "<body>",
  40. },
  41. },
  42. {
  43. `<p id="foo"><p id="bar">`,
  44. "#foo",
  45. []string{
  46. `<p id="foo">`,
  47. },
  48. },
  49. {
  50. `<ul><li id="t1"><p id="t1">`,
  51. "li#t1",
  52. []string{
  53. `<li id="t1">`,
  54. },
  55. },
  56. {
  57. `<ol><li id="t4"><li id="t44">`,
  58. "*#t4",
  59. []string{
  60. `<li id="t4">`,
  61. },
  62. },
  63. {
  64. `<ul><li class="t1"><li class="t2">`,
  65. ".t1",
  66. []string{
  67. `<li class="t1">`,
  68. },
  69. },
  70. {
  71. `<p class="t1 t2">`,
  72. "p.t1",
  73. []string{
  74. `<p class="t1 t2">`,
  75. },
  76. },
  77. {
  78. `<div class="test">`,
  79. "div.teST",
  80. []string{},
  81. },
  82. {
  83. `<p class="t1 t2">`,
  84. ".t1.fail",
  85. []string{},
  86. },
  87. {
  88. `<p class="t1 t2">`,
  89. "p.t1.t2",
  90. []string{
  91. `<p class="t1 t2">`,
  92. },
  93. },
  94. {
  95. `<p><p title="title">`,
  96. "p[title]",
  97. []string{
  98. `<p title="title">`,
  99. },
  100. },
  101. {
  102. `<address><address title="foo"><address title="bar">`,
  103. `address[title="foo"]`,
  104. []string{
  105. `<address title="foo">`,
  106. },
  107. },
  108. {
  109. `<p title="tot foo bar">`,
  110. `[ title ~= foo ]`,
  111. []string{
  112. `<p title="tot foo bar">`,
  113. },
  114. },
  115. {
  116. `<p title="hello world">`,
  117. `[title~="hello world"]`,
  118. []string{},
  119. },
  120. {
  121. `<p lang="en"><p lang="en-gb"><p lang="enough"><p lang="fr-en">`,
  122. `[lang|="en"]`,
  123. []string{
  124. `<p lang="en">`,
  125. `<p lang="en-gb">`,
  126. },
  127. },
  128. {
  129. `<p title="foobar"><p title="barfoo">`,
  130. `[title^="foo"]`,
  131. []string{
  132. `<p title="foobar">`,
  133. },
  134. },
  135. {
  136. `<p title="foobar"><p title="barfoo">`,
  137. `[title$="bar"]`,
  138. []string{
  139. `<p title="foobar">`,
  140. },
  141. },
  142. {
  143. `<p title="foobarufoo">`,
  144. `[title*="bar"]`,
  145. []string{
  146. `<p title="foobarufoo">`,
  147. },
  148. },
  149. {
  150. `<p class="t1 t2">`,
  151. ".t1:not(.t2)",
  152. []string{},
  153. },
  154. {
  155. `<div class="t3">`,
  156. `div:not(.t1)`,
  157. []string{
  158. `<div class="t3">`,
  159. },
  160. },
  161. {
  162. `<ol><li id=1><li id=2><li id=3></ol>`,
  163. `li:nth-child(odd)`,
  164. []string{
  165. `<li id="1">`,
  166. `<li id="3">`,
  167. },
  168. },
  169. {
  170. `<ol><li id=1><li id=2><li id=3></ol>`,
  171. `li:nth-child(even)`,
  172. []string{
  173. `<li id="2">`,
  174. },
  175. },
  176. {
  177. `<ol><li id=1><li id=2><li id=3></ol>`,
  178. `li:nth-child(-n+2)`,
  179. []string{
  180. `<li id="1">`,
  181. `<li id="2">`,
  182. },
  183. },
  184. {
  185. `<ol><li id=1><li id=2><li id=3></ol>`,
  186. `li:nth-child(3n+1)`,
  187. []string{
  188. `<li id="1">`,
  189. },
  190. },
  191. {
  192. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  193. `li:nth-last-child(odd)`,
  194. []string{
  195. `<li id="2">`,
  196. `<li id="4">`,
  197. },
  198. },
  199. {
  200. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  201. `li:nth-last-child(even)`,
  202. []string{
  203. `<li id="1">`,
  204. `<li id="3">`,
  205. },
  206. },
  207. {
  208. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  209. `li:nth-last-child(-n+2)`,
  210. []string{
  211. `<li id="3">`,
  212. `<li id="4">`,
  213. },
  214. },
  215. {
  216. `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
  217. `li:nth-last-child(3n+1)`,
  218. []string{
  219. `<li id="1">`,
  220. `<li id="4">`,
  221. },
  222. },
  223. {
  224. `<p>some text <span id="1">and a span</span><span id="2"> and another</span></p>`,
  225. `span:first-child`,
  226. []string{
  227. `<span id="1">`,
  228. },
  229. },
  230. {
  231. `<span>a span</span> and some text`,
  232. `span:last-child`,
  233. []string{
  234. `<span>`,
  235. },
  236. },
  237. {
  238. `<address></address><p id=1><p id=2>`,
  239. `p:nth-of-type(2)`,
  240. []string{
  241. `<p id="2">`,
  242. },
  243. },
  244. {
  245. `<address></address><p id=1><p id=2></p><a>`,
  246. `p:nth-last-of-type(2)`,
  247. []string{
  248. `<p id="1">`,
  249. },
  250. },
  251. {
  252. `<address></address><p id=1><p id=2></p><a>`,
  253. `p:last-of-type`,
  254. []string{
  255. `<p id="2">`,
  256. },
  257. },
  258. {
  259. `<address></address><p id=1><p id=2></p><a>`,
  260. `p:first-of-type`,
  261. []string{
  262. `<p id="1">`,
  263. },
  264. },
  265. {
  266. `<div><p id="1"></p><a></a></div><div><p id="2"></p></div>`,
  267. `p:only-child`,
  268. []string{
  269. `<p id="2">`,
  270. },
  271. },
  272. {
  273. `<div><p id="1"></p><a></a></div><div><p id="2"></p><p id="3"></p></div>`,
  274. `p:only-of-type`,
  275. []string{
  276. `<p id="1">`,
  277. },
  278. },
  279. {
  280. `<p id="1"><!-- --><p id="2">Hello<p id="3"><span>`,
  281. `:empty`,
  282. []string{
  283. `<head>`,
  284. `<p id="1">`,
  285. `<span>`,
  286. },
  287. },
  288. {
  289. `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
  290. `div p`,
  291. []string{
  292. `<p id="1">`,
  293. `<p id="2">`,
  294. },
  295. },
  296. {
  297. `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
  298. `div table p`,
  299. []string{
  300. `<p id="2">`,
  301. },
  302. },
  303. {
  304. `<div><p id="1"><div><p id="2"></div><table><tr><td><p id="3"></table></div>`,
  305. `div > p`,
  306. []string{
  307. `<p id="1">`,
  308. `<p id="2">`,
  309. },
  310. },
  311. {
  312. `<p id="1"><p id="2"></p><address></address><p id="3">`,
  313. `p ~ p`,
  314. []string{
  315. `<p id="2">`,
  316. `<p id="3">`,
  317. },
  318. },
  319. {
  320. `<p id="1"></p>
  321. <!--comment-->
  322. <p id="2"></p><address></address><p id="3">`,
  323. `p + p`,
  324. []string{
  325. `<p id="2">`,
  326. },
  327. },
  328. {
  329. `<ul><li></li><li></li></ul><p>`,
  330. `li, p`,
  331. []string{
  332. "<li>",
  333. "<li>",
  334. "<p>",
  335. },
  336. },
  337. {
  338. `<p id="1"><p id="2"></p><address></address><p id="3">`,
  339. `p +/*This is a comment*/ p`,
  340. []string{
  341. `<p id="2">`,
  342. },
  343. },
  344. {
  345. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  346. `p:contains("that wraps")`,
  347. []string{
  348. `<p>`,
  349. },
  350. },
  351. {
  352. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  353. `p:containsOwn("that wraps")`,
  354. []string{},
  355. },
  356. {
  357. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  358. `:containsOwn("inner")`,
  359. []string{
  360. `<span>`,
  361. },
  362. },
  363. {
  364. `<p>Text block that <span>wraps inner text</span> and continues</p>`,
  365. `p:containsOwn("block")`,
  366. []string{
  367. `<p>`,
  368. },
  369. },
  370. {
  371. `<div id="d1"><p id="p1"><span>text content</span></p></div><div id="d2"/>`,
  372. `div:has(#p1)`,
  373. []string{
  374. `<div id="d1">`,
  375. },
  376. },
  377. {
  378. `<div id="d1"><p id="p1"><span>contents 1</span></p></div>
  379. <div id="d2"><p>contents <em>2</em></p></div>`,
  380. `div:has(:containsOwn("2"))`,
  381. []string{
  382. `<div id="d2">`,
  383. },
  384. },
  385. {
  386. `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
  387. <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
  388. `body :has(:containsOwn("2"))`,
  389. []string{
  390. `<div id="d2">`,
  391. `<p id="p2">`,
  392. },
  393. },
  394. {
  395. `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
  396. <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
  397. `body :haschild(:containsOwn("2"))`,
  398. []string{
  399. `<p id="p2">`,
  400. },
  401. },
  402. {
  403. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  404. `p:matches([\d])`,
  405. []string{
  406. `<p id="p1">`,
  407. `<p id="p3">`,
  408. },
  409. },
  410. {
  411. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  412. `p:matches([a-z])`,
  413. []string{
  414. `<p id="p2">`,
  415. },
  416. },
  417. {
  418. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  419. `p:matches([a-zA-Z])`,
  420. []string{
  421. `<p id="p2">`,
  422. `<p id="p3">`,
  423. },
  424. },
  425. {
  426. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  427. `p:matches([^\d])`,
  428. []string{
  429. `<p id="p2">`,
  430. `<p id="p3">`,
  431. },
  432. },
  433. {
  434. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  435. `p:matches(^(0|a))`,
  436. []string{
  437. `<p id="p1">`,
  438. `<p id="p2">`,
  439. `<p id="p3">`,
  440. },
  441. },
  442. {
  443. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  444. `p:matches(^\d+$)`,
  445. []string{
  446. `<p id="p1">`,
  447. },
  448. },
  449. {
  450. `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
  451. `p:not(:matches(^\d+$))`,
  452. []string{
  453. `<p id="p2">`,
  454. `<p id="p3">`,
  455. },
  456. },
  457. {
  458. `<div><p id="p1">01234<em>567</em>89</p><div>`,
  459. `div :matchesOwn(^\d+$)`,
  460. []string{
  461. `<p id="p1">`,
  462. `<em>`,
  463. },
  464. },
  465. {
  466. `<ul>
  467. <li><a id="a1" href="http://www.google.com/finance"/>
  468. <li><a id="a2" href="http://finance.yahoo.com/"/>
  469. <li><a id="a2" href="http://finance.untrusted.com/"/>
  470. <li><a id="a3" href="https://www.google.com/news"/>
  471. <li><a id="a4" href="http://news.yahoo.com"/>
  472. </ul>`,
  473. `[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])`,
  474. []string{
  475. `<a id="a1" href="http://www.google.com/finance">`,
  476. `<a id="a2" href="http://finance.yahoo.com/">`,
  477. },
  478. },
  479. {
  480. `<ul>
  481. <li><a id="a1" href="http://www.google.com/finance"/>
  482. <li><a id="a2" href="http://finance.yahoo.com/"/>
  483. <li><a id="a3" href="https://www.google.com/news"/>
  484. <li><a id="a4" href="http://news.yahoo.com"/>
  485. </ul>`,
  486. `[href#=(^https:\/\/[^\/]*\/?news)]`,
  487. []string{
  488. `<a id="a3" href="https://www.google.com/news">`,
  489. },
  490. },
  491. {
  492. `<form>
  493. <label>Username <input type="text" name="username" /></label>
  494. <label>Password <input type="password" name="password" /></label>
  495. <label>Country
  496. <select name="country">
  497. <option value="ca">Canada</option>
  498. <option value="us">United States</option>
  499. </select>
  500. </label>
  501. <label>Bio <textarea name="bio"></textarea></label>
  502. <button>Sign up</button>
  503. </form>`,
  504. `:input`,
  505. []string{
  506. `<input type="text" name="username">`,
  507. `<input type="password" name="password">`,
  508. `<select name="country">`,
  509. `<textarea name="bio">`,
  510. `<button>`,
  511. },
  512. },
  513. }
  514. func TestSelectors(t *testing.T) {
  515. for _, test := range selectorTests {
  516. s, err := Compile(test.selector)
  517. if err != nil {
  518. t.Errorf("error compiling %q: %s", test.selector, err)
  519. continue
  520. }
  521. doc, err := html.Parse(strings.NewReader(test.HTML))
  522. if err != nil {
  523. t.Errorf("error parsing %q: %s", test.HTML, err)
  524. continue
  525. }
  526. matches := s.MatchAll(doc)
  527. if len(matches) != len(test.results) {
  528. t.Errorf("wanted %d elements, got %d instead", len(test.results), len(matches))
  529. continue
  530. }
  531. for i, m := range matches {
  532. got := nodeString(m)
  533. if got != test.results[i] {
  534. t.Errorf("wanted %s, got %s instead", test.results[i], got)
  535. }
  536. }
  537. firstMatch := s.MatchFirst(doc)
  538. if len(test.results) == 0 {
  539. if firstMatch != nil {
  540. t.Errorf("MatchFirst: want nil, got %s", nodeString(firstMatch))
  541. }
  542. } else {
  543. got := nodeString(firstMatch)
  544. if got != test.results[0] {
  545. t.Errorf("MatchFirst: want %s, got %s", test.results[0], got)
  546. }
  547. }
  548. }
  549. }