#!/usr/bin/env python # Copyright 2013 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Inject javascript into html page source code.""" import logging import os import re import util DOCTYPE_RE = re.compile(r'^.{,256}?()?.{,256}?', re.IGNORECASE | re.DOTALL) HTML_RE = re.compile(r'^.{,256}?()?.{,256}?', re.IGNORECASE | re.DOTALL) HEAD_RE = re.compile(r'^.{,256}?()?.{,256}?', re.IGNORECASE | re.DOTALL) def GetInjectScript(scripts): """Loads |scripts| from disk and returns a string of their content.""" lines = [] if scripts: if not isinstance(scripts, list): scripts = scripts.split(',') for script in scripts: if os.path.exists(script): with open(script) as f: lines.extend(f.read()) elif util.resource_exists(script): lines.extend(util.resource_string(script)) else: raise Exception('Script does not exist: %s', script) def MinifyScript(script): """Remove C-style comments and line breaks from script. Note: statements must be ';' terminated, and not depending on newline""" # Regex adapted from http://ostermiller.org/findcomment.html. MULTILINE_COMMENT_RE = re.compile(r'/\*.*?\*/', re.DOTALL | re.MULTILINE) SINGLELINE_COMMENT_RE = re.compile(r'//.*', re.MULTILINE) # Remove C-style comments from JS. script = re.sub(MULTILINE_COMMENT_RE, '', script) script = re.sub(SINGLELINE_COMMENT_RE, '', script) # Remove line breaks. script = script.translate(None, '\r\n') return script return MinifyScript(''.join(lines)) def InjectScript(content, content_type, script_to_inject): """Inject |script_to_inject| into |content| if |content_type| is 'text/html'. Inject |script_to_inject| into |content| immediately after , or , if one of them is found. Otherwise, inject at the beginning. Returns: content, already_injected |content| is the new content if script is injected, otherwise the original. |already_injected| indicates if |script_to_inject| is already in |content|. """ already_injected = False if content_type and content_type == 'text/html': already_injected = not content or script_to_inject in content if not already_injected: def InsertScriptAfter(matchobj): return '%s' % (matchobj.group(0), script_to_inject) content, is_injected = HEAD_RE.subn(InsertScriptAfter, content, 1) if not is_injected: content, is_injected = HTML_RE.subn(InsertScriptAfter, content, 1) if not is_injected: content, is_injected = DOCTYPE_RE.subn(InsertScriptAfter, content, 1) if not is_injected: content = '%s' % (script_to_inject, content) logging.warning('Inject at the very beginning, because no tag of ' ', or is found.') return content, already_injected