/*******************************************************************************
µMatrix - a Chromium browser extension to black/white list requests.
Copyright (C) 2014 Raymond Hill
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see {http://www.gnu.org/licenses/}.
Home: https://github.com/gorhill/uMatrix
*/
/* global chrome, µMatrix */
/******************************************************************************/
// Start isolation from global scope
µMatrix.webRequest = (function() {
/******************************************************************************/
// The `id='uMatrix'` is important, it allows µMatrix to detect whether a
// specific data URI originates from itself.
var rootFrameReplacement = [
'',
'
',
'',
'',
'',
'Blocked by μMatrix',
'',
'',
'
',
'
{{hostname}} blocked by μMatrix
',
'
',
'',
''
].join('');
var subFrameReplacement = [
'',
'',
'',
'',
'',
'Blocked by μMatrix',
'',
'',
'
',
'',
''
].join('');
/******************************************************************************/
// If it is HTTP Switchboard's root frame replacement URL, verify that
// the page that was blacklisted is still blacklisted, and if not,
// redirect to the previously blacklisted page.
var onBeforeChromeExtensionRequestHandler = function(details) {
var requestURL = details.url;
// console.debug('onBeforeChromeExtensionRequestHandler()> "%s": %o', details.url, details);
// rhill 2013-12-10: Avoid regex whenever a faster indexOf() can be used:
// here we can use fast indexOf() as a first filter -- which is executed
// for every single request (so speed matters).
var matches = requestURL.match(/url=([^&]+)&hostname=([^&]+)/);
if ( !matches ) {
return;
}
var µm = µMatrix;
var pageURL = decodeURIComponent(matches[1]);
var pageHostname = decodeURIComponent(matches[2]);
// Blacklisted as per matrix?
if ( µm.mustBlock(µm.scopeFromURL(pageURL), pageHostname, 'doc') ) {
return;
}
µMatrix.asyncJobs.add(
'gotoURL-' + details.tabId,
{ tabId: details.tabId, url: pageURL },
µm.utils.gotoURL,
200,
false
);
};
/******************************************************************************/
// Intercept and filter web requests according to white and black lists.
var onBeforeRootFrameRequestHandler = function(details) {
var µm = µMatrix;
// Do not ignore traffic outside tabs
var tabId = details.tabId;
if ( tabId < 0 ) {
tabId = µm.behindTheSceneTabId;
}
// It's a root frame, bind to a new page store
else {
µm.bindTabToPageStats(tabId, details.url);
}
var uri = µm.URI.set(details.url);
if ( uri.scheme.indexOf('http') === -1 ) {
return;
}
var requestURL = uri.normalizedURI();
var requestHostname = uri.hostname;
var pageStore = µm.pageStatsFromTabId(tabId);
// Disallow request as per matrix?
var block = µm.mustBlock(pageStore.pageHostname, requestHostname, 'doc');
// console.debug('onBeforeRequestHandler()> block=%s "%s": %o', block, details.url, details);
// whitelisted?
if ( !block ) {
// rhill 2013-11-07: Senseless to do this for behind-the-scene requests.
// rhill 2013-12-03: Do this here only for root frames.
if ( tabId !== µm.behindTheSceneTabId ) {
µm.cookieHunter.recordPageCookies(pageStore);
}
return;
}
// blacklisted
// rhill 2014-01-15: Delay logging of non-blocked top `main_frame`
// requests, in order to ensure any potential redirects is reported
// in proper chronological order.
// https://github.com/gorhill/httpswitchboard/issues/112
pageStore.recordRequest('doc', requestURL, block);
// If it's a blacklisted frame, redirect to frame.html
// rhill 2013-11-05: The root frame contains a link to noop.css, this
// allows to later check whether the root frame has been unblocked by the
// user, in which case we are able to force a reload using a redirect.
var html = rootFrameReplacement;
html = html.replace('{{cssURL}}', µm.noopCSSURL);
html = html.replace(/{{hostname}}/g, encodeURIComponent(requestHostname));
html = html.replace('{{originalURL}}', encodeURIComponent(requestURL));
html = html.replace('{{now}}', String(Date.now()));
var dataURI = 'data:text/html;base64,' + btoa(html);
return { 'redirectUrl': dataURI };
};
/******************************************************************************/
// Intercept and filter web requests according to white and black lists.
var onBeforeRequestHandler = function(details) {
var µm = µMatrix;
var µmuri = µm.URI.set(details.url);
var requestScheme = µmuri.scheme;
// rhill 2014-02-17: Ignore 'filesystem:': this can happen when listening
// to 'chrome-extension://'.
if ( requestScheme === 'filesystem' ) {
return;
}
// console.debug('onBeforeRequestHandler()> "%s": %o', details.url, details);
var requestType = requestTypeNormalizer[details.type];
// https://github.com/gorhill/httpswitchboard/issues/303
// Wherever the main doc comes from, create a receiver page URL: synthetize
// one if needed.
if ( requestType === 'doc' && details.parentFrameId < 0 ) {
return onBeforeRootFrameRequestHandler(details);
}
var requestURL = details.url;
// Is it µMatrix's noop css file?
if ( requestType === 'css' && requestURL.slice(0, µm.noopCSSURL.length) === µm.noopCSSURL ) {
return onBeforeChromeExtensionRequestHandler(details);
}
// Ignore non-http schemes
if ( requestScheme.indexOf('http') !== 0 ) {
return;
}
// Do not block myself from updating assets
// https://github.com/gorhill/httpswitchboard/issues/202
if ( requestType === 'xhr' && requestURL.slice(0, µm.projectServerRoot.length) === µm.projectServerRoot ) {
return;
}
var requestHostname = µmuri.hostname;
// rhill 2013-12-15:
// Try to transpose generic `other` category into something more
// meaningful.
if ( requestType === 'other' ) {
requestType = µm.transposeType(requestType, µmuri.path);
}
// Re-classify orphan HTTP requests as behind-the-scene requests. There is
// not much else which can be done, because there are URLs
// which cannot be handled by µMatrix, i.e. `opera://startpage`,
// as this would lead to complications with no obvious solution, like how
// to scope on unknown scheme? Etc.
// https://github.com/gorhill/httpswitchboard/issues/191
// https://github.com/gorhill/httpswitchboard/issues/91#issuecomment-37180275
var pageStore = µm.pageStatsFromTabId(details.tabId);
if ( !pageStore ) {
pageStore = µm.pageStatsFromTabId(µm.behindTheSceneTabId);
}
// Disallow request as per temporary matrix?
var block = µm.mustBlock(pageStore.pageHostname, requestHostname, requestType);
// Record request.
// https://github.com/gorhill/httpswitchboard/issues/342
// The way requests are handled now, it may happen at this point some
// processing has already been performed, and that a synthetic URL has
// been constructed for logging purpose. Use this synthetic URL if
// it is available.
pageStore.recordRequest(requestType, requestURL, block);
// whitelisted?
if ( !block ) {
// console.debug('onBeforeRequestHandler()> ALLOW "%s": %o', details.url, details);
return;
}
// blacklisted
// console.debug('onBeforeRequestHandler()> BLOCK "%s": %o', details.url, details);
// If it's a blacklisted frame, redirect to frame.html
// rhill 2013-11-05: The root frame contains a link to noop.css, this
// allows to later check whether the root frame has been unblocked by the
// user, in which case we are able to force a reload using a redirect.
if ( requestType === 'frame' ) {
var html = subFrameReplacement
.replace(/{{hostname}}/g, requestHostname)
.replace('{{frameSrc}}', requestURL)
.replace(/{{subframeColor}}/g, µm.userSettings.subframeColor)
.replace('{{subframeOpacity}}', (µm.userSettings.subframeOpacity / 100).toFixed(1));
return { 'redirectUrl': 'data:text/html,' + encodeURIComponent(html) };
}
return { 'cancel': true };
};
/******************************************************************************/
// This is where tabless requests are processed, as here there may be a chance
// we can bind a request to a specific tab, as headers may contain useful
// information to accomplish this.
//
// Also we sanitize outgoing headers as per user settings.
var onBeforeSendHeadersHandler = function(details) {
var µm = µMatrix;
// console.debug('onBeforeSendHeadersHandler()> "%s": %o', details.url, details);
// Re-classify orphan HTTP requests as behind-the-scene requests. There is
// not much else which can be done, because there are URLs
// which cannot be handled by HTTP Switchboard, i.e. `opera://startpage`,
// as this would lead to complications with no obvious solution, like how
// to scope on unknown scheme? Etc.
// https://github.com/gorhill/httpswitchboard/issues/191
// https://github.com/gorhill/httpswitchboard/issues/91#issuecomment-37180275
var tabId = details.tabId;
var pageStore = µm.pageStatsFromTabId(tabId);
if ( !pageStore ) {
tabId = µm.behindTheSceneTabId;
pageStore = µm.pageStatsFromTabId(tabId);
}
// https://github.com/gorhill/httpswitchboard/issues/342
// Is this hyperlink auditing?
// If yes, create a synthetic URL for reporting hyperlink auditing
// in request log. This way the user is better informed of what went
// on.
var requestURL = details.url;
var requestType = requestTypeNormalizer[details.type];
if ( requestType === 'other' ) {
var linkAuditor = hyperlinkAuditorFromHeaders(details.requestHeaders);
if ( linkAuditor ) {
var block = µm.userSettings.processHyperlinkAuditing;
pageStore.recordRequest('other', requestURL + '{Ping-To:' + linkAuditor + '}', block);
if ( block ) {
µm.hyperlinkAuditingFoiledCounter += 1;
return { 'cancel': true };
}
}
}
// If we reach this point, request is not blocked, so what is left to do
// is to sanitize headers.
var reqHostname = µm.hostnameFromURL(requestURL);
var changed = false;
if ( µm.mustBlock(pageStore.pageHostname, reqHostname, 'cookie') ) {
changed = foilCookieHeaders(µm, details) || changed;
}
if ( µm.tMatrix.evaluateSwitchZ('referrer-spoof', pageStore.pageHostname) ) {
changed = foilRefererHeaders(µm, reqHostname, details) || changed;
}
if ( µm.tMatrix.evaluateSwitchZ('ua-spoof', pageStore.pageHostname) ) {
changed = foilUserAgent(µm, details) || changed;
// https://github.com/gorhill/httpswitchboard/issues/252
// To avoid potential mismatch between the user agent from HTTP headers
// and the user agent from subrequests and the window.navigator object,
// I could always store here the effective user agent, but I am really
// not convinced it is worth the added overhead given the low
// probability and the benign consequence if it ever happen. Can always
// be revised if ever I become aware a mismatch is a terrible thing
}
if ( changed ) {
// console.debug('onBeforeSendHeadersHandler()> CHANGED "%s": %o', requestURL, details);
return { requestHeaders: details.requestHeaders };
}
};
/******************************************************************************/
// http://www.whatwg.org/specs/web-apps/current-work/multipage/links.html#hyperlink-auditing
//
// Target URL = the href of the link
// Doc URL = URL of the document containing the target URL
// Ping URLs = servers which will be told that user clicked target URL
//
// `Content-Type` = `text/ping` (always present)
// `Ping-To` = target URL (always present)
// `Ping-From` = doc URL
// `Referer` = doc URL
// request URL = URL which will receive the information
//
// With hyperlink-auditing, removing header(s) is pointless, the whole
// request must be cancelled.
var hyperlinkAuditorFromHeaders = function(headers) {
var i = headers.length;
while ( i-- ) {
if ( headers[i].name.toLowerCase() === 'ping-to' ) {
return headers[i].value;
}
}
return;
};
/******************************************************************************/
var tabIdFromHeaders = function(µm, headers) {
var header;
var i = headers.length;
while ( i-- ) {
header = headers[i];
if ( header.name.toLowerCase() === 'referer' ) {
return µm.tabIdFromPageUrl(header.value);
}
if ( header.name.toLowerCase() === 'ping-from' ) {
return µm.tabIdFromPageUrl(header.value);
}
}
return -1;
};
/******************************************************************************/
var foilCookieHeaders = function(µm, details) {
var changed = false;
var headers = details.requestHeaders;
var header;
var i = headers.length;
while ( i-- ) {
header = headers[i];
if ( header.name.toLowerCase() !== 'cookie' ) {
continue;
}
// console.debug('foilCookieHeaders()> foiled browser attempt to send cookie(s) to "%s"', details.url);
headers.splice(i, 1);
µm.cookieHeaderFoiledCounter++;
changed = true;
}
return changed;
};
/******************************************************************************/
var foilRefererHeaders = function(µm, toHostname, details) {
var headers = details.requestHeaders;
var i = headers.length, header;
while ( i-- ) {
header = headers[i];
if ( header.name.toLowerCase() === 'referer' ) {
break;
}
}
if ( i === -1 ) {
return false;
}
var µmuri = µm.URI;
var fromDomain = µmuri.domainFromURI(header.value);
var toDomain = µmuri.domainFromHostname(toHostname);
if ( toDomain === fromDomain ) {
return false;
}
//console.debug('foilRefererHeaders()> foiled referer for "%s"', details.url);
//console.debug('\treferrer "%s"', header.value);
// https://github.com/gorhill/httpswitchboard/issues/222#issuecomment-44828402
header.value = µmuri.schemeFromURI(details.url) + '://' + toHostname + '/';
//console.debug('\treplaced with "%s"', header.value);
µm.refererHeaderFoiledCounter++;
return true;
};
/******************************************************************************/
var foilUserAgent = function(µm, details) {
var headers = details.requestHeaders;
var header;
var i = 0;
while ( header = headers[i] ) {
if ( header.name.toLowerCase() === 'user-agent' ) {
header.value = µm.userAgentReplaceStr;
return true; // Assuming only one `user-agent` entry
}
i += 1;
}
return false;
};
/******************************************************************************/
// To prevent inline javascript from being executed.
// Prevent inline scripting using `Content-Security-Policy`:
// https://dvcs.w3.org/hg/content-security-policy/raw-file/tip/csp-specification.dev.html
// This fixes:
// https://github.com/gorhill/httpswitchboard/issues/35
var onHeadersReceived = function(details) {
// console.debug('onHeadersReceived()> "%s": %o', details.url, details);
// Ignore schemes other than 'http...'
if ( details.url.slice(0, 4) !== 'http' ) {
return;
}
var requestType = requestTypeNormalizer[details.type];
if ( requestType === 'frame' ) {
return onSubDocHeadersReceived(details);
}
if ( requestType === 'doc' ) {
return onMainDocHeadersReceived(details);
}
};
/******************************************************************************/
var onMainDocHeadersReceived = function(details) {
// console.debug('onMainDocHeadersReceived()> "%s": %o', details.url, details);
var µm = µMatrix;
// Do not ignore traffic outside tabs.
// https://github.com/gorhill/httpswitchboard/issues/91#issuecomment-37180275
var tabId = details.tabId;
if ( tabId < 0 ) {
tabId = µm.behindTheSceneTabId;
}
var µmuri = µm.URI.set(details.url);
var requestURL = µmuri.normalizedURI();
var requestScheme = µmuri.scheme;
var requestHostname = µmuri.hostname;
// rhill 2013-12-07:
// Apparently in Opera, onBeforeRequest() is triggered while the
// URL is not yet bound to a tab (-1), which caused the code here
// to not be able to lookup the pageStats. So let the code here bind
// the page to a tab if not done yet.
// https://github.com/gorhill/httpswitchboard/issues/75
µm.bindTabToPageStats(tabId, requestURL);
// Re-classify orphan HTTP requests as behind-the-scene requests. There is
// not much else which can be done, because there are URLs
// which cannot be handled by HTTP Switchboard, i.e. `opera://startpage`,
// as this would lead to complications with no obvious solution, like how
// to scope on unknown scheme? Etc.
// https://github.com/gorhill/httpswitchboard/issues/191
// https://github.com/gorhill/httpswitchboard/issues/91#issuecomment-37180275
var pageStats = µm.pageStatsFromTabId(tabId);
if ( !pageStats ) {
tabId = µm.behindTheSceneTabId;
pageStats = µm.pageStatsFromTabId(tabId);
}
var headers = details.responseHeaders;
// Simplify code paths by splitting func in two different handlers, one
// for main docs, one for sub docs.
// rhill 2014-01-15: Report redirects.
// https://github.com/gorhill/httpswitchboard/issues/112
// rhill 2014-02-10: Handle all redirects.
// https://github.com/gorhill/httpswitchboard/issues/188
if ( /\s+30[12378]\s+/.test(details.statusLine) ) {
var i = headerIndexFromName('location', headers);
if ( i >= 0 ) {
// rhill 2014-01-20: Be ready to handle relative URLs.
// https://github.com/gorhill/httpswitchboard/issues/162
var locationURL = µmuri.set(headers[i].value.trim()).normalizedURI();
if ( µmuri.authority === '' ) {
locationURL = requestScheme + '://' + requestHostname + µmuri.path;
}
µm.redirectRequests[locationURL] = requestURL;
}
// console.debug('onMainDocHeadersReceived()> redirect "%s" to "%s"', requestURL, headers[i].value);
}
// rhill 2014-01-15: Report redirects if any.
// https://github.com/gorhill/httpswitchboard/issues/112
if ( details.statusLine.indexOf(' 200') > 0 ) {
var mainFrameStack = [requestURL];
var destinationURL = requestURL;
var sourceURL;
while ( sourceURL = µm.redirectRequests[destinationURL] ) {
mainFrameStack.push(sourceURL);
delete µm.redirectRequests[destinationURL];
destinationURL = sourceURL;
}
while ( destinationURL = mainFrameStack.pop() ) {
pageStats.recordRequest('doc', destinationURL, false);
}
}
// Maybe modify inbound headers
var csp = '';
// Enforce strict HTTPS?
if ( requestScheme === 'https' && µm.tMatrix.evaluateSwitchZ('https-strict', pageStats.pageHostname) ) {
csp += "default-src chrome-search: data: https: wss: 'unsafe-eval' 'unsafe-inline';";
}
// https://github.com/gorhill/httpswitchboard/issues/181
pageStats.pageScriptBlocked = µm.mustBlock(pageStats.pageHostname, requestHostname, 'script');
if ( pageStats.pageScriptBlocked ) {
// If javascript not allowed, say so through a `Content-Security-Policy` directive.
// console.debug('onMainDocHeadersReceived()> PAGE CSP "%s": %o', details.url, details);
csp += " script-src 'none'";
}
// https://github.com/gorhill/httpswitchboard/issues/181
if ( csp !== '' ) {
headers.push({
'name': 'Content-Security-Policy',
'value': csp.trim()
});
return { responseHeaders: headers };
}
};
/******************************************************************************/
var onSubDocHeadersReceived = function(details) {
// console.debug('onSubDocHeadersReceived()> "%s": %o', details.url, details);
var µm = µMatrix;
// Do not ignore traffic outside tabs.
// https://github.com/gorhill/httpswitchboard/issues/91#issuecomment-37180275
var tabId = details.tabId;
if ( tabId < 0 ) {
tabId = µm.behindTheSceneTabId;
}
// Re-classify orphan HTTP requests as behind-the-scene requests. There is
// not much else which can be done, because there are URLs
// which cannot be handled by HTTP Switchboard, i.e. `opera://startpage`,
// as this would lead to complications with no obvious solution, like how
// to scope on unknown scheme? Etc.
// https://github.com/gorhill/httpswitchboard/issues/191
// https://github.com/gorhill/httpswitchboard/issues/91#issuecomment-37180275
var pageStats = µm.pageStatsFromTabId(tabId);
if ( !pageStats ) {
tabId = µm.behindTheSceneTabId;
pageStats = µm.pageStatsFromTabId(tabId);
}
// Evaluate
if ( µm.mustAllow(pageStats.pageHostname, µm.hostnameFromURL(details.url), 'script') ) {
return;
}
// If javascript not allowed, say so through a `Content-Security-Policy`
// directive.
// For inline javascript within iframes, we need to sandbox.
// https://github.com/gorhill/httpswitchboard/issues/73
// Now because sandbox cancels all permissions, this means
// not just javascript is disabled. To avoid negative side
// effects, I allow some other permissions, but...
// https://github.com/gorhill/uMatrix/issues/27
// Need to add `allow-popups` to prevent completely breaking links on
// some sites old style sites.
// TODO: Reuse CSP `sandbox` directive if it's already in the
// headers (strip out `allow-scripts` if present),
// and find out if the `sandbox` in the header interfere with a
// `sandbox` attribute which might be present on the iframe.
// console.debug('onSubDocHeadersReceived()> FRAME CSP "%s": %o, scope="%s"', details.url, details, pageURL);
details.responseHeaders.push({
'name': 'Content-Security-Policy',
'value': 'sandbox allow-forms allow-same-origin allow-popups allow-top-navigation'
});
return { responseHeaders: details.responseHeaders };
};
/******************************************************************************/
// As per Chrome API doc, webRequest.onErrorOccurred event is the last
// one called in the sequence of webRequest events.
// http://developer.chrome.com/extensions/webRequest.html
var onErrorOccurredHandler = function(details) {
// console.debug('onErrorOccurred()> "%s": %o', details.url, details);
var requestType = requestTypeNormalizer[details.type];
// Ignore all that is not a main document
if ( requestType !== 'doc'|| details.parentFrameId >= 0 ) {
return;
}
var µm = µMatrix;
var pageStats = µm.pageStatsFromPageUrl(details.url);
if ( !pageStats ) {
return;
}
// rhill 2014-01-28: Unwind the stack of redirects if any. Chromium will
// emit an error when a web page redirects apparently endlessly, so
// we need to unravel and report all these redirects upon error.
// https://github.com/gorhill/httpswitchboard/issues/171
var requestURL = µm.URI.set(details.url).normalizedURI();
var mainFrameStack = [requestURL];
var destinationURL = requestURL;
var sourceURL;
while ( sourceURL = µm.redirectRequests[destinationURL] ) {
mainFrameStack.push(sourceURL);
delete µm.redirectRequests[destinationURL];
destinationURL = sourceURL;
}
while ( destinationURL = mainFrameStack.pop() ) {
pageStats.recordRequest('doc', destinationURL, false);
}
};
/******************************************************************************/
// Caller must ensure headerName is normalized to lower case.
var headerIndexFromName = function(headerName, headers) {
var i = headers.length;
while ( i-- ) {
if ( headers[i].name.toLowerCase() === headerName ) {
return i;
}
}
return -1;
};
/******************************************************************************/
var requestTypeNormalizer = {
'main_frame' : 'doc',
'sub_frame' : 'frame',
'stylesheet' : 'css',
'script' : 'script',
'image' : 'image',
'object' : 'plugin',
'xmlhttprequest': 'xhr',
'other' : 'other'
};
/******************************************************************************/
var start = function() {
chrome.webRequest.onBeforeRequest.addListener(
//function(details) {
// quickProfiler.start('onBeforeRequest');
// var r = onBeforeRequestHandler(details);
// quickProfiler.stop();
// return r;
//},
onBeforeRequestHandler,
{
"urls": [
"http://*/*",
"https://*/*",
"chrome-extension://*/*"
],
"types": [
"main_frame",
"sub_frame",
'stylesheet',
"script",
"image",
"object",
"xmlhttprequest",
"other"
]
},
[ "blocking" ]
);
//console.log('µMatrix > Beginning to intercept net requests at %s', (new Date()).toISOString());
chrome.webRequest.onBeforeSendHeaders.addListener(
onBeforeSendHeadersHandler,
{
'urls': [
"http://*/*",
"https://*/*"
]
},
['blocking', 'requestHeaders']
);
chrome.webRequest.onHeadersReceived.addListener(
onHeadersReceived,
{
'urls': [
"http://*/*",
"https://*/*"
]
},
['blocking', 'responseHeaders']
);
chrome.webRequest.onErrorOccurred.addListener(
onErrorOccurredHandler,
{
'urls': [
"http://*/*",
"https://*/*"
]
}
);
};
/******************************************************************************/
return {
blockedRootFramePrefix: 'data:text/html;base64,' + btoa(rootFrameReplacement).slice(0, 80),
start: start
};
/******************************************************************************/
})();
/******************************************************************************/