You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

661 lines
22 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. /*******************************************************************************
  2. µMatrix - a Chromium browser extension to black/white list requests.
  3. Copyright (C) 2014 Raymond Hill
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see {http://www.gnu.org/licenses/}.
  14. Home: https://github.com/gorhill/uMatrix
  15. */
  16. /* global chrome, µMatrix */
  17. /******************************************************************************/
  18. /******************************************************************************/
  19. (function() {
  20. 'use strict';
  21. /******************************************************************************/
  22. var µm = µMatrix;
  23. // https://github.com/gorhill/httpswitchboard/issues/303
  24. // Some kind of trick going on here:
  25. // Any scheme other than 'http' and 'https' is remapped into a fake
  26. // URL which trick the rest of µMatrix into being able to process an
  27. // otherwise unmanageable scheme. µMatrix needs web page to have a proper
  28. // hostname to work properly, so just like the 'behind-the-scene'
  29. // fake domain name, we map unknown schemes into a fake '{scheme}-scheme'
  30. // hostname. This way, for a specific scheme you can create scope with
  31. // rules which will apply only to that scheme.
  32. /******************************************************************************/
  33. /******************************************************************************/
  34. µm.normalizePageURL = function(tabId, pageURL) {
  35. if ( vAPI.isBehindTheSceneTabId(tabId) ) {
  36. return 'http://' + this.behindTheSceneScope + '/';
  37. }
  38. // If the URL is that of our "blocked page" document, return the URL of
  39. // the blocked page.
  40. if ( pageURL.lastIndexOf(vAPI.getURL('main-blocked.html'), 0) === 0 ) {
  41. var matches = /main-blocked\.html\?details=([^&]+)/.exec(pageURL);
  42. if ( matches && matches.length === 2 ) {
  43. try {
  44. var details = JSON.parse(atob(matches[1]));
  45. pageURL = details.url;
  46. } catch (e) {
  47. }
  48. }
  49. }
  50. var uri = this.URI.set(pageURL);
  51. var scheme = uri.scheme;
  52. if ( scheme === 'https' || scheme === 'http' ) {
  53. return uri.normalizedURI();
  54. }
  55. var fakeHostname = scheme + '-scheme';
  56. if ( uri.hostname !== '' ) {
  57. fakeHostname = uri.hostname + '.' + fakeHostname;
  58. } else if ( scheme === 'about' ) {
  59. fakeHostname = uri.path + '.' + fakeHostname;
  60. }
  61. return 'http://' + fakeHostname + '/';
  62. };
  63. /******************************************************************************/
  64. /******************************************************************************
  65. To keep track from which context *exactly* network requests are made. This is
  66. often tricky for various reasons, and the challenge is not specific to one
  67. browser.
  68. The time at which a URL is assigned to a tab and the time when a network
  69. request for a root document is made must be assumed to be unrelated: it's all
  70. asynchronous. There is no guaranteed order in which the two events are fired.
  71. Also, other "anomalies" can occur:
  72. - a network request for a root document is fired without the corresponding
  73. tab being really assigned a new URL
  74. <https://github.com/chrisaljoudi/uBlock/issues/516>
  75. - a network request for a secondary resource is labeled with a tab id for
  76. which no root document was pulled for that tab.
  77. <https://github.com/chrisaljoudi/uBlock/issues/1001>
  78. - a network request for a secondary resource is made without the root
  79. document to which it belongs being formally bound yet to the proper tab id,
  80. causing a bad scope to be used for filtering purpose.
  81. <https://github.com/chrisaljoudi/uBlock/issues/1205>
  82. <https://github.com/chrisaljoudi/uBlock/issues/1140>
  83. So the solution here is to keep a lightweight data structure which only
  84. purpose is to keep track as accurately as possible of which root document
  85. belongs to which tab. That's the only purpose, and because of this, there are
  86. no restrictions for when the URL of a root document can be associated to a tab.
  87. Before, the PageStore object was trying to deal with this, but it had to
  88. enforce some restrictions so as to not descend into one of the above issues, or
  89. other issues. The PageStore object can only be associated with a tab for which
  90. a definitive navigation event occurred, because it collects information about
  91. what occurred in the tab (for example, the number of requests blocked for a
  92. page).
  93. The TabContext objects do not suffer this restriction, and as a result they
  94. offer the most reliable picture of which root document URL is really associated
  95. to which tab. Moreover, the TabObject can undo an association from a root
  96. document, and automatically re-associate with the next most recent. This takes
  97. care of <https://github.com/chrisaljoudi/uBlock/issues/516>.
  98. The PageStore object no longer cache the various information about which
  99. root document it is currently bound. When it needs to find out, it will always
  100. defer to the TabContext object, which will provide the real answer. This takes
  101. case of <https://github.com/chrisaljoudi/uBlock/issues/1205>. In effect, the
  102. master switch and dynamic filtering rules can be evaluated now properly even
  103. in the absence of a PageStore object, this was not the case before.
  104. Also, the TabContext object will try its best to find a good candidate root
  105. document URL for when none exists. This takes care of
  106. <https://github.com/chrisaljoudi/uBlock/issues/1001>.
  107. The TabContext manager is self-contained, and it takes care to properly
  108. housekeep itself.
  109. */
  110. µm.tabContextManager = (function() {
  111. var tabContexts = Object.create(null);
  112. // https://github.com/chrisaljoudi/uBlock/issues/1001
  113. // This is to be used as last-resort fallback in case a tab is found to not
  114. // be bound while network requests are fired for the tab.
  115. var mostRecentRootDocURL = '';
  116. var mostRecentRootDocURLTimestamp = 0;
  117. var gcPeriod = 10 * 60 * 1000;
  118. var TabContext = function(tabId) {
  119. this.tabId = tabId;
  120. this.stack = [];
  121. this.rawURL =
  122. this.normalURL =
  123. this.scheme =
  124. this.rootHostname =
  125. this.rootDomain = '';
  126. this.secure = false;
  127. this.timer = null;
  128. this.onTabCallback = null;
  129. this.onTimerCallback = null;
  130. tabContexts[tabId] = this;
  131. };
  132. TabContext.prototype.destroy = function() {
  133. if ( vAPI.isBehindTheSceneTabId(this.tabId) ) {
  134. return;
  135. }
  136. if ( this.timer !== null ) {
  137. clearTimeout(this.timer);
  138. this.timer = null;
  139. }
  140. delete tabContexts[this.tabId];
  141. };
  142. TabContext.prototype.onTab = function(tab) {
  143. if ( tab ) {
  144. this.timer = vAPI.setTimeout(this.onTimerCallback, gcPeriod);
  145. } else {
  146. this.destroy();
  147. }
  148. };
  149. TabContext.prototype.onTimer = function() {
  150. this.timer = null;
  151. if ( vAPI.isBehindTheSceneTabId(this.tabId) ) {
  152. return;
  153. }
  154. vAPI.tabs.get(this.tabId, this.onTabCallback);
  155. };
  156. // This takes care of orphanized tab contexts. Can't be started for all
  157. // contexts, as the behind-the-scene context is permanent -- so we do not
  158. // want to slush it.
  159. TabContext.prototype.autodestroy = function() {
  160. if ( vAPI.isBehindTheSceneTabId(this.tabId) ) {
  161. return;
  162. }
  163. this.onTabCallback = this.onTab.bind(this);
  164. this.onTimerCallback = this.onTimer.bind(this);
  165. this.timer = vAPI.setTimeout(this.onTimerCallback, gcPeriod);
  166. };
  167. // Update just force all properties to be updated to match the most current
  168. // root URL.
  169. TabContext.prototype.update = function() {
  170. if ( this.stack.length === 0 ) {
  171. this.rawURL =
  172. this.normalURL =
  173. this.scheme =
  174. this.rootHostname =
  175. this.rootDomain = '';
  176. } else {
  177. this.rawURL = this.stack[this.stack.length - 1];
  178. this.normalURL = µm.normalizePageURL(this.tabId, this.rawURL);
  179. this.scheme = µm.URI.schemeFromURI(this.rawURL);
  180. this.rootHostname = µm.URI.hostnameFromURI(this.normalURL);
  181. this.rootDomain = µm.URI.domainFromHostname(this.rootHostname) || this.rootHostname;
  182. }
  183. this.secure = µm.URI.isSecureScheme(this.scheme);
  184. };
  185. // Called whenever a candidate root URL is spotted for the tab.
  186. TabContext.prototype.push = function(url) {
  187. if ( vAPI.isBehindTheSceneTabId(this.tabId) ) {
  188. return;
  189. }
  190. var count = this.stack.length;
  191. if ( count !== 0 && this.stack[count - 1] === url ) {
  192. return;
  193. }
  194. this.stack.push(url);
  195. this.update();
  196. };
  197. // Called when a former push is a false positive:
  198. // https://github.com/chrisaljoudi/uBlock/issues/516
  199. TabContext.prototype.unpush = function(url) {
  200. if ( vAPI.isBehindTheSceneTabId(this.tabId) ) {
  201. return;
  202. }
  203. // We are not going to unpush if there is no other candidate, the
  204. // point of unpush is to make space for a better candidate.
  205. if ( this.stack.length === 1 ) {
  206. return;
  207. }
  208. var pos = this.stack.indexOf(url);
  209. if ( pos === -1 ) {
  210. return;
  211. }
  212. this.stack.splice(pos, 1);
  213. if ( this.stack.length === 0 ) {
  214. this.destroy();
  215. return;
  216. }
  217. if ( pos !== this.stack.length ) {
  218. return;
  219. }
  220. this.update();
  221. };
  222. // This tells that the url is definitely the one to be associated with the
  223. // tab, there is no longer any ambiguity about which root URL is really
  224. // sitting in which tab.
  225. TabContext.prototype.commit = function(url) {
  226. if ( vAPI.isBehindTheSceneTabId(this.tabId) ) {
  227. return;
  228. }
  229. this.stack = [url];
  230. this.update();
  231. };
  232. // These are to be used for the API of the tab context manager.
  233. var push = function(tabId, url) {
  234. var entry = tabContexts[tabId];
  235. if ( entry === undefined ) {
  236. entry = new TabContext(tabId);
  237. entry.autodestroy();
  238. }
  239. entry.push(url);
  240. mostRecentRootDocURL = url;
  241. mostRecentRootDocURLTimestamp = Date.now();
  242. return entry;
  243. };
  244. // Find a tab context for a specific tab. If none is found, attempt to
  245. // fix this. When all fail, the behind-the-scene context is returned.
  246. var mustLookup = function(tabId, url) {
  247. var entry;
  248. if ( url !== undefined ) {
  249. entry = push(tabId, url);
  250. } else {
  251. entry = tabContexts[tabId];
  252. }
  253. if ( entry !== undefined ) {
  254. return entry;
  255. }
  256. // https://github.com/chrisaljoudi/uBlock/issues/1025
  257. // Google Hangout popup opens without a root frame. So for now we will
  258. // just discard that best-guess root frame if it is too far in the
  259. // future, at which point it ceases to be a "best guess".
  260. if ( mostRecentRootDocURL !== '' && mostRecentRootDocURLTimestamp + 500 < Date.now() ) {
  261. mostRecentRootDocURL = '';
  262. }
  263. // https://github.com/chrisaljoudi/uBlock/issues/1001
  264. // Not a behind-the-scene request, yet no page store found for the
  265. // tab id: we will thus bind the last-seen root document to the
  266. // unbound tab. It's a guess, but better than ending up filtering
  267. // nothing at all.
  268. if ( mostRecentRootDocURL !== '' ) {
  269. return push(tabId, mostRecentRootDocURL);
  270. }
  271. // If all else fail at finding a page store, re-categorize the
  272. // request as behind-the-scene. At least this ensures that ultimately
  273. // the user can still inspect/filter those net requests which were
  274. // about to fall through the cracks.
  275. // Example: Chromium + case #12 at
  276. // http://raymondhill.net/ublock/popup.html
  277. return tabContexts[vAPI.noTabId];
  278. };
  279. var commit = function(tabId, url) {
  280. var entry = tabContexts[tabId];
  281. if ( entry === undefined ) {
  282. entry = push(tabId, url);
  283. } else {
  284. entry.commit(url);
  285. }
  286. return entry;
  287. };
  288. var unpush = function(tabId, url) {
  289. var entry = tabContexts[tabId];
  290. if ( entry !== undefined ) {
  291. entry.unpush(url);
  292. }
  293. };
  294. var lookup = function(tabId) {
  295. return tabContexts[tabId] || null;
  296. };
  297. // Behind-the-scene tab context
  298. (function() {
  299. var entry = new TabContext(vAPI.noTabId);
  300. entry.stack.push('');
  301. entry.rawURL = '';
  302. entry.normalURL = µm.normalizePageURL(entry.tabId);
  303. entry.rootHostname = µm.URI.hostnameFromURI(entry.normalURL);
  304. entry.rootDomain = µm.URI.domainFromHostname(entry.rootHostname) || entry.rootHostname;
  305. })();
  306. // Context object, typically to be used to feed filtering engines.
  307. var Context = function(tabId) {
  308. var tabContext = lookup(tabId);
  309. this.rootHostname = tabContext.rootHostname;
  310. this.rootDomain = tabContext.rootDomain;
  311. this.pageHostname =
  312. this.pageDomain =
  313. this.requestURL =
  314. this.requestHostname =
  315. this.requestDomain = '';
  316. };
  317. var createContext = function(tabId) {
  318. return new Context(tabId);
  319. };
  320. return {
  321. push: push,
  322. unpush: unpush,
  323. commit: commit,
  324. lookup: lookup,
  325. mustLookup: mustLookup,
  326. createContext: createContext
  327. };
  328. })();
  329. /******************************************************************************/
  330. /******************************************************************************/
  331. // When the DOM content of root frame is loaded, this means the tab
  332. // content has changed.
  333. vAPI.tabs.onNavigation = function(details) {
  334. // This actually can happen
  335. var tabId = details.tabId;
  336. if ( vAPI.isBehindTheSceneTabId(tabId) ) {
  337. return;
  338. }
  339. //console.log('vAPI.tabs.onNavigation: %s %s %o', details.url, details.transitionType, details.transitionQualifiers);
  340. µm.tabContextManager.commit(tabId, details.url);
  341. µm.bindTabToPageStats(tabId, 'commit');
  342. };
  343. /******************************************************************************/
  344. // It may happen the URL in the tab changes, while the page's document
  345. // stays the same (for instance, Google Maps). Without this listener,
  346. // the extension icon won't be properly refreshed.
  347. vAPI.tabs.onUpdated = function(tabId, changeInfo, tab) {
  348. if ( !tab.url || tab.url === '' ) {
  349. return;
  350. }
  351. // This actually can happen
  352. if ( vAPI.isBehindTheSceneTabId(tabId) ) {
  353. return;
  354. }
  355. if ( changeInfo.url ) {
  356. µm.tabContextManager.commit(tabId, changeInfo.url);
  357. µm.bindTabToPageStats(tabId, 'updated');
  358. }
  359. };
  360. /******************************************************************************/
  361. vAPI.tabs.onClosed = function(tabId) {
  362. µm.unbindTabFromPageStats(tabId);
  363. };
  364. /******************************************************************************/
  365. vAPI.tabs.registerListeners();
  366. /******************************************************************************/
  367. /******************************************************************************/
  368. // Create an entry for the tab if it doesn't exist
  369. µm.bindTabToPageStats = function(tabId, context) {
  370. this.updateBadgeAsync(tabId);
  371. // Do not create a page store for URLs which are of no interests
  372. // Example: dev console
  373. var tabContext = this.tabContextManager.lookup(tabId);
  374. if ( tabContext === null ) {
  375. throw new Error('Unmanaged tab id: ' + tabId);
  376. }
  377. // rhill 2013-11-24: Never ever rebind behind-the-scene
  378. // virtual tab.
  379. // https://github.com/gorhill/httpswitchboard/issues/67
  380. if ( vAPI.isBehindTheSceneTabId(tabId) ) {
  381. return this.pageStores[tabId];
  382. }
  383. var normalURL = tabContext.normalURL;
  384. var pageStore = this.pageStores[tabId] || null;
  385. // The previous page URL, if any, associated with the tab
  386. if ( pageStore !== null ) {
  387. // No change, do not rebind
  388. if ( pageStore.pageUrl === normalURL ) {
  389. return pageStore;
  390. }
  391. // Do not change anything if it's weak binding -- typically when
  392. // binding from network request handler.
  393. if ( context === 'weak' ) {
  394. return pageStore;
  395. }
  396. // https://github.com/gorhill/uMatrix/issues/37
  397. // Just rebind whenever possible: the URL changed, but the document
  398. // maybe is the same.
  399. // Example: Google Maps, Github
  400. // https://github.com/gorhill/uMatrix/issues/72
  401. // Need to double-check that the new scope is same as old scope
  402. if ( context === 'updated' && pageStore.pageHostname === tabContext.rootHostname ) {
  403. pageStore.rawURL = tabContext.rawURL;
  404. pageStore.normalURL = normalURL;
  405. return pageStore;
  406. }
  407. // We won't be reusing this page store.
  408. this.unbindTabFromPageStats(tabId);
  409. }
  410. // Try to resurrect first.
  411. pageStore = this.resurrectPageStore(tabId, normalURL);
  412. if ( pageStore === null ) {
  413. pageStore = this.PageStore.factory(tabContext);
  414. }
  415. this.pageStores[tabId] = pageStore;
  416. // console.debug('tab.js > bindTabToPageStats(): dispatching traffic in tab id %d to page store "%s"', tabId, pageUrl);
  417. return pageStore;
  418. };
  419. /******************************************************************************/
  420. µm.unbindTabFromPageStats = function(tabId) {
  421. // Never unbind behind-the-scene page store.
  422. if ( vAPI.isBehindTheSceneTabId(tabId) ) {
  423. return;
  424. }
  425. var pageStore = this.pageStores[tabId] || null;
  426. if ( pageStore === null ) {
  427. return;
  428. }
  429. delete this.pageStores[tabId];
  430. if ( pageStore.incinerationTimer ) {
  431. clearTimeout(pageStore.incinerationTimer);
  432. pageStore.incinerationTimer = null;
  433. }
  434. if ( this.pageStoreCemetery.hasOwnProperty(tabId) === false ) {
  435. this.pageStoreCemetery[tabId] = {};
  436. }
  437. var pageStoreCrypt = this.pageStoreCemetery[tabId];
  438. var pageURL = pageStore.pageUrl;
  439. pageStoreCrypt[pageURL] = pageStore;
  440. pageStore.incinerationTimer = vAPI.setTimeout(
  441. this.incineratePageStore.bind(this, tabId, pageURL),
  442. 4 * 60 * 1000
  443. );
  444. };
  445. /******************************************************************************/
  446. µm.resurrectPageStore = function(tabId, pageURL) {
  447. if ( this.pageStoreCemetery.hasOwnProperty(tabId) === false ) {
  448. return null;
  449. }
  450. var pageStoreCrypt = this.pageStoreCemetery[tabId];
  451. if ( pageStoreCrypt.hasOwnProperty(pageURL) === false ) {
  452. return null;
  453. }
  454. var pageStore = pageStoreCrypt[pageURL];
  455. if ( pageStore.incinerationTimer !== null ) {
  456. clearTimeout(pageStore.incinerationTimer);
  457. pageStore.incinerationTimer = null;
  458. }
  459. delete pageStoreCrypt[pageURL];
  460. if ( Object.keys(pageStoreCrypt).length === 0 ) {
  461. delete this.pageStoreCemetery[tabId];
  462. }
  463. return pageStore;
  464. };
  465. /******************************************************************************/
  466. µm.incineratePageStore = function(tabId, pageURL) {
  467. if ( this.pageStoreCemetery.hasOwnProperty(tabId) === false ) {
  468. return;
  469. }
  470. var pageStoreCrypt = this.pageStoreCemetery[tabId];
  471. if ( pageStoreCrypt.hasOwnProperty(pageURL) === false ) {
  472. return;
  473. }
  474. var pageStore = pageStoreCrypt[pageURL];
  475. if ( pageStore.incinerationTimer !== null ) {
  476. clearTimeout(pageStore.incinerationTimer);
  477. pageStore.incinerationTimer = null;
  478. }
  479. delete pageStoreCrypt[pageURL];
  480. if ( Object.keys(pageStoreCrypt).length === 0 ) {
  481. delete this.pageStoreCemetery[tabId];
  482. }
  483. pageStore.dispose();
  484. };
  485. /******************************************************************************/
  486. µm.pageStoreFromTabId = function(tabId) {
  487. return this.pageStores[tabId] || null;
  488. };
  489. // Never return null
  490. µm.mustPageStoreFromTabId = function(tabId) {
  491. return this.pageStores[tabId] || this.pageStores[vAPI.noTabId];
  492. };
  493. /******************************************************************************/
  494. // Log a request
  495. µm.recordFromTabId = function(tabId, type, url, blocked) {
  496. var pageStore = this.pageStoreFromTabId(tabId);
  497. if ( pageStore === null ) {
  498. return;
  499. }
  500. pageStore.recordRequest(type, url, blocked);
  501. this.logger.writeOne(tabId, 'net', pageStore.pageHostname, url, type, blocked);
  502. };
  503. /******************************************************************************/
  504. µm.forceReload = function(tabId) {
  505. vAPI.tabs.reload(tabId, { bypassCache: true });
  506. };
  507. /******************************************************************************/
  508. // Stale page store entries janitor
  509. // https://github.com/chrisaljoudi/uBlock/issues/455
  510. (function() {
  511. var cleanupPeriod = 7 * 60 * 1000;
  512. var cleanupSampleAt = 0;
  513. var cleanupSampleSize = 11;
  514. var cleanup = function() {
  515. var vapiTabs = vAPI.tabs;
  516. var tabIds = Object.keys(µm.pageStores).sort();
  517. var checkTab = function(tabId) {
  518. vapiTabs.get(tabId, function(tab) {
  519. if ( !tab ) {
  520. µm.unbindTabFromPageStats(tabId);
  521. }
  522. });
  523. };
  524. if ( cleanupSampleAt >= tabIds.length ) {
  525. cleanupSampleAt = 0;
  526. }
  527. var tabId;
  528. var n = Math.min(cleanupSampleAt + cleanupSampleSize, tabIds.length);
  529. for ( var i = cleanupSampleAt; i < n; i++ ) {
  530. tabId = tabIds[i];
  531. if ( vAPI.isBehindTheSceneTabId(tabId) ) {
  532. continue;
  533. }
  534. checkTab(tabId);
  535. }
  536. cleanupSampleAt = n;
  537. vAPI.setTimeout(cleanup, cleanupPeriod);
  538. };
  539. vAPI.setTimeout(cleanup, cleanupPeriod);
  540. })();
  541. /******************************************************************************/
  542. })();