From 42b80c0224ff04a5d5123a54b6cccfc56bb88dc8 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Sun, 23 Aug 2015 23:15:43 +0530 Subject: [PATCH 1/6] Allow setting defaults through a config file. --- splash/browser_tab.py | 8 +++---- splash/cache.py | 4 ++-- splash/config.py | 32 +++++++++++++++++++++++++++ splash/network_manager.py | 6 ++--- splash/qtrender.py | 6 ++--- splash/qtrender_image.py | 14 ++++++------ splash/render_options.py | 44 ++++++++++++++++++------------------- splash/server.py | 46 +++++++++++++++++++-------------------- splash/xvfb.py | 4 ++-- 9 files changed, 98 insertions(+), 66 deletions(-) create mode 100644 splash/config.py diff --git a/splash/browser_tab.py b/splash/browser_tab.py index 2144a6044..c41c4de56 100644 --- a/splash/browser_tab.py +++ b/splash/browser_tab.py @@ -14,7 +14,7 @@ from twisted.internet import defer from twisted.python import log -from splash import defaults +from splash import config from splash.har.qt import cookies2har from splash.har.utils import without_private from splash.qtrender_image import QtImageRenderer @@ -96,10 +96,10 @@ def _init_webpage(self, verbosity, network_manager, splash_proxy_factory, render self.web_view.move(0, 0) self.web_view.show() - self.set_viewport(defaults.VIEWPORT_SIZE) + self.set_viewport(config.VIEWPORT_SIZE) # XXX: hack to ensure that default window size is not 640x480. self.web_view.resize( - QSize(*map(int, defaults.VIEWPORT_SIZE.split('x')))) + QSize(*map(int, config.VIEWPORT_SIZE.split('x')))) def set_js_enabled(self, val): settings = self.web_page.settings() @@ -194,7 +194,7 @@ def set_viewport(self, size, raise_if_empty=False): if raise_if_empty: raise RuntimeError("Cannot detect viewport size") else: - size = defaults.VIEWPORT_SIZE + size = config.VIEWPORT_SIZE self.logger.log("Viewport is empty, falling back to: %s" % size) diff --git a/splash/cache.py b/splash/cache.py index d64d94aa9..e3c03808c 100644 --- a/splash/cache.py +++ b/splash/cache.py @@ -2,10 +2,10 @@ from __future__ import absolute_import from PyQt4.QtNetwork import QNetworkDiskCache from twisted.python import log -from splash import defaults +from splash import config -def construct(path=defaults.CACHE_PATH, size=defaults.CACHE_SIZE): +def construct(path=config.CACHE_PATH, size=config.CACHE_SIZE): log.msg("Initializing cache on %s (maxsize: %d Mb)" % (path, size)) cache = QNetworkDiskCache() cache.setCacheDirectory(path) diff --git a/splash/config.py b/splash/config.py new file mode 100644 index 000000000..dd8b31c4a --- /dev/null +++ b/splash/config.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +import os +import sys +import yaml +from . import defaults + + +class Settings(object): + + def __init__(self): + root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + config_file_path = os.path.join(root_dir, 'config.yml') + self.defaults = {} + for name in dir(defaults): + if name.isupper(): + self.defaults[name] = getattr(defaults, name) + try: + with open(config_file_path, 'rb') as config_file: + self.cfg = yaml.load(config_file) + except IOError: + self.cfg = {} + + def __getattr__(self, item): + val = self.cfg.get(item, None) + if val is None: + val = self.defaults.get(item, None) + if val is None: + raise AttributeError("There is no settings named %s" % item) + return val + +sys.modules[__name__] = Settings() diff --git a/splash/network_manager.py b/splash/network_manager.py index 54dac5b73..a7c82216c 100644 --- a/splash/network_manager.py +++ b/splash/network_manager.py @@ -27,13 +27,13 @@ AdblockRulesRegistry, ResourceTimeoutMiddleware) from splash.response_middleware import ContentTypeMiddleware -from splash import defaults +from splash import config def create_default(filters_path=None, verbosity=None, allowed_schemes=None): - verbosity = defaults.VERBOSITY if verbosity is None else verbosity + verbosity = config.VERBOSITY if verbosity is None else verbosity if allowed_schemes is None: - allowed_schemes = defaults.ALLOWED_SCHEMES + allowed_schemes = config.ALLOWED_SCHEMES else: allowed_schemes = allowed_schemes.split(',') manager = SplashQNetworkAccessManager( diff --git a/splash/qtrender.py b/splash/qtrender.py index 4fd1bc7d5..b57046761 100644 --- a/splash/qtrender.py +++ b/splash/qtrender.py @@ -3,7 +3,7 @@ import json import functools import pprint -from splash import defaults +from splash import config from splash.browser_tab import BrowserTab @@ -83,11 +83,11 @@ def start(self, url, baseurl=None, wait=None, viewport=None, render_all=False, resource_timeout=None): self.url = url - self.wait_time = defaults.WAIT_TIME if wait is None else wait + self.wait_time = config.WAIT_TIME if wait is None else wait self.js_source = js_source self.js_profile = js_profile self.console = console - self.viewport = defaults.VIEWPORT_SIZE if viewport is None else viewport + self.viewport = config.VIEWPORT_SIZE if viewport is None else viewport self.render_all = render_all or viewport == 'full' if resource_timeout: diff --git a/splash/qtrender_image.py b/splash/qtrender_image.py index 43cb4e527..069905571 100644 --- a/splash/qtrender_image.py +++ b/splash/qtrender_image.py @@ -9,7 +9,7 @@ from PyQt4.QtCore import QBuffer, QPoint, QRect, QSize, Qt from PyQt4.QtGui import QImage, QPainter, QRegion -from splash import defaults +from splash import config class QtImageRenderer(object): @@ -36,7 +36,7 @@ def __init__(self, web_page, logger=None, image_format=None, self.width = width self.height = height if scale_method is None: - scale_method = defaults.IMAGE_SCALE_METHOD + scale_method = config.IMAGE_SCALE_METHOD self.scale_method = scale_method self.image_format = image_format.upper() if not (self.is_png() or self.is_jpeg()): @@ -319,7 +319,7 @@ def _calculate_image_parameters(self, web_viewport, img_width, img_height): return image_viewport, image_size def _calculate_tiling(self, to_paint): - tile_maxsize = defaults.TILE_MAXSIZE + tile_maxsize = config.TILE_MAXSIZE tile_hsize = min(tile_maxsize, to_paint.width()) tile_vsize = min(tile_maxsize, to_paint.height()) htiles = 1 + (to_paint.width() - 1) // tile_hsize @@ -420,7 +420,7 @@ def crop(self, rect): assert isinstance(rect, QRect) self.img = self.img.copy(rect) - def to_png(self, complevel=defaults.PNG_COMPRESSION_LEVEL): + def to_png(self, complevel=config.PNG_COMPRESSION_LEVEL): quality = 90 - (complevel * 10) buf = QBuffer() self.img.save(buf, 'png', quality) @@ -428,7 +428,7 @@ def to_png(self, complevel=defaults.PNG_COMPRESSION_LEVEL): def to_jpeg(self, quality=None): if quality is None: - quality = defaults.JPEG_QUALITY + quality = config.JPEG_QUALITY buf = QBuffer() self.img.save(buf, 'jpeg', quality) return bytes(buf.data()) @@ -454,14 +454,14 @@ def crop(self, rect): top, bottom = rect.top(), rect.top() + rect.height() self.img = self.img.crop((left, top, right, bottom)) - def to_png(self, complevel=defaults.PNG_COMPRESSION_LEVEL): + def to_png(self, complevel=config.PNG_COMPRESSION_LEVEL): buf = StringIO() self.img.save(buf, 'png', compress_level=complevel) return buf.getvalue() def to_jpeg(self, quality=None): if quality is None: - quality = defaults.JPEG_QUALITY + quality = config.JPEG_QUALITY buf = StringIO() self.img.save(buf, 'jpeg', quality=quality) return buf.getvalue() diff --git a/splash/render_options.py b/splash/render_options.py index 504670a6a..49befca04 100644 --- a/splash/render_options.py +++ b/splash/render_options.py @@ -2,7 +2,7 @@ from __future__ import absolute_import import os import json -from splash import defaults +from splash import config class BadOption(Exception): @@ -100,19 +100,19 @@ def get_baseurl(self): return self._get_url("baseurl", default=None) def get_wait(self): - return self.get("wait", defaults.WAIT_TIME, - type=float, range=(0, defaults.MAX_WAIT_TIME)) + return self.get("wait", config.WAIT_TIME, + type=float, range=(0, config.MAX_WAIT_TIME)) def get_timeout(self): - default = min(self.max_timeout, defaults.TIMEOUT) + default = min(self.max_timeout, config.TIMEOUT) return self.get("timeout", default, type=float, range=(0, self.max_timeout)) def get_resource_timeout(self): - return self.get("resource_timeout", defaults.RESOURCE_TIMEOUT, + return self.get("resource_timeout", config.RESOURCE_TIMEOUT, type=float, range=(0, 1e6)) def get_images(self): - return self._get_bool("images", defaults.AUTOLOAD_IMAGES) + return self._get_bool("images", config.AUTOLOAD_IMAGES) def get_proxy(self): return self.get("proxy", default=None) @@ -121,13 +121,13 @@ def get_js_source(self): return self.get("js_source", default=None) def get_width(self): - return self.get("width", None, type=int, range=(1, defaults.MAX_WIDTH)) + return self.get("width", None, type=int, range=(1, config.MAX_WIDTH)) def get_height(self): - return self.get("height", None, type=int, range=(1, defaults.MAX_HEIGTH)) + return self.get("height", None, type=int, range=(1, config.MAX_HEIGTH)) def get_scale_method(self): - scale_method = self.get("scale_method", defaults.IMAGE_SCALE_METHOD) + scale_method = self.get("scale_method", config.IMAGE_SCALE_METHOD) if scale_method not in ('raster', 'vector'): raise BadOption( "Invalid 'scale_method' (must be 'raster' or 'vector'): %s" % @@ -135,7 +135,7 @@ def get_scale_method(self): return scale_method def get_quality(self): - return self.get("quality", defaults.JPEG_QUALITY, type=int, range=(0, 100)) + return self.get("quality", config.JPEG_QUALITY, type=int, range=(0, 100)) def get_http_method(self): return self.get("http_method", "GET") @@ -184,7 +184,7 @@ def get_headers(self): return headers def get_viewport(self, wait=None): - viewport = self.get("viewport", defaults.VIEWPORT_SIZE) + viewport = self.get("viewport", config.VIEWPORT_SIZE) if viewport == 'full': if wait == 0: @@ -274,14 +274,14 @@ def get_jpeg_params(self): def get_include_params(self): return dict( - html=self._get_bool("html", defaults.DO_HTML), - iframes=self._get_bool("iframes", defaults.DO_IFRAMES), - png=self._get_bool("png", defaults.DO_PNG), - jpeg=self._get_bool("jpeg", defaults.DO_JPEG), - script=self._get_bool("script", defaults.SHOW_SCRIPT), - console=self._get_bool("console", defaults.SHOW_CONSOLE), - history=self._get_bool("history", defaults.SHOW_HISTORY), - har=self._get_bool("har", defaults.SHOW_HAR), + html=self._get_bool("html", config.DO_HTML), + iframes=self._get_bool("iframes", config.DO_IFRAMES), + png=self._get_bool("png", config.DO_PNG), + jpeg=self._get_bool("jpeg", config.DO_JPEG), + script=self._get_bool("script", config.SHOW_SCRIPT), + console=self._get_bool("console", config.SHOW_CONSOLE), + history=self._get_bool("history", config.SHOW_HISTORY), + har=self._get_bool("har", config.SHOW_HAR), ) @@ -296,9 +296,9 @@ def validate_size_str(size_str): :param size_str: string to validate """ - max_width = defaults.VIEWPORT_MAX_WIDTH - max_heigth = defaults.VIEWPORT_MAX_HEIGTH - max_area = defaults.VIEWPORT_MAX_AREA + max_width = config.VIEWPORT_MAX_WIDTH + max_heigth = config.VIEWPORT_MAX_HEIGTH + max_area = config.VIEWPORT_MAX_AREA try: w, h = map(int, size_str.split('x')) except ValueError: diff --git a/splash/server.py b/splash/server.py index dc455891c..a764ef62b 100644 --- a/splash/server.py +++ b/splash/server.py @@ -7,7 +7,7 @@ import signal import functools -from splash import defaults, __version__ +from splash import config, __version__ from splash import xvfb from splash.qtutils import init_qt_app @@ -24,11 +24,11 @@ def parse_opts(): op.add_option("-f", "--logfile", help="log file") op.add_option("-m", "--maxrss", type=float, default=0, help="exit if max RSS reaches this value (in MB or ratio of physical mem) (default: %default)") - op.add_option("-p", "--port", type="int", default=defaults.SPLASH_PORT, + op.add_option("-p", "--port", type="int", default=config.SPLASH_PORT, help="port to listen to (default: %default)") - op.add_option("-s", "--slots", type="int", default=defaults.SLOTS, + op.add_option("-s", "--slots", type="int", default=config.SLOTS, help="number of render slots (default: %default)") - op.add_option("--max-timeout", type="float", default=defaults.MAX_TIMEOUT, + op.add_option("--max-timeout", type="float", default=config.MAX_TIMEOUT, help="maximum allowed value for timeout (default: %default)") op.add_option("--proxy-profiles-path", help="path to a folder with proxy profiles") @@ -37,20 +37,20 @@ def parse_opts(): op.add_option("--no-js-cross-domain-access", action="store_false", dest="js_cross_domain_enabled", - default=not defaults.JS_CROSS_DOMAIN_ENABLED, - help="disable support for cross domain access when executing custom javascript" + _bool_default[not defaults.JS_CROSS_DOMAIN_ENABLED]) + default=not config.JS_CROSS_DOMAIN_ENABLED, + help="disable support for cross domain access when executing custom javascript" + _bool_default[not config.JS_CROSS_DOMAIN_ENABLED]) op.add_option("--js-cross-domain-access", action="store_true", dest="js_cross_domain_enabled", - default=defaults.JS_CROSS_DOMAIN_ENABLED, + default=config.JS_CROSS_DOMAIN_ENABLED, help="enable support for cross domain access when executing custom javascript " - "(WARNING: it could break rendering for some of the websites)" + _bool_default[defaults.JS_CROSS_DOMAIN_ENABLED]) + "(WARNING: it could break rendering for some of the websites)" + _bool_default[config.JS_CROSS_DOMAIN_ENABLED]) op.add_option("--no-cache", action="store_false", dest="cache_enabled", - help="disable local cache" + _bool_default[not defaults.CACHE_ENABLED]) + help="disable local cache" + _bool_default[not config.CACHE_ENABLED]) op.add_option("--cache", action="store_true", dest="cache_enabled", - help="enable local cache (WARNING: don't enable it unless you know what are you doing)" + _bool_default[defaults.CACHE_ENABLED]) + help="enable local cache (WARNING: don't enable it unless you know what are you doing)" + _bool_default[config.CACHE_ENABLED]) op.add_option("-c", "--cache-path", help="local cache folder") - op.add_option("--cache-size", type=int, default=defaults.CACHE_SIZE, + op.add_option("--cache-size", type=int, default=config.CACHE_SIZE, help="maximum cache size in MB (default: %default)") op.add_option("--manhole", action="store_true", help="enable manhole server") @@ -58,9 +58,9 @@ def parse_opts(): help="disable proxy server") op.add_option("--disable-ui", action="store_true", default=False, help="disable web UI") - op.add_option("--proxy-portnum", type="int", default=defaults.PROXY_PORT, + op.add_option("--proxy-portnum", type="int", default=config.PROXY_PORT, help="proxy port to listen to (default: %default)") - op.add_option('--allowed-schemes', default=",".join(defaults.ALLOWED_SCHEMES), + op.add_option('--allowed-schemes', default=",".join(config.ALLOWED_SCHEMES), help="comma-separated list of allowed URI schemes (defaut: %default)") op.add_option("--filters-path", help="path to a folder with network request filters") @@ -75,7 +75,7 @@ def parse_opts(): "Each place can have a ? in it that's replaced with the module name.") op.add_option("--lua-sandbox-allowed-modules", default="", help="semicolon-separated list of Lua module names allowed to be required from a sandbox.") - op.add_option("-v", "--verbosity", type=int, default=defaults.VERBOSITY, + op.add_option("-v", "--verbosity", type=int, default=config.VERBOSITY, help="verbosity level; valid values are integers from 0 to 5") op.add_option("--version", action="store_true", help="print Splash version number and exit") @@ -148,9 +148,9 @@ def manhole_server(portnum=None, username=None, password=None): from twisted.manhole import telnet f = telnet.ShellFactory() - f.username = defaults.MANHOLE_USERNAME if username is None else username - f.password = defaults.MANHOLE_PASSWORD if password is None else password - portnum = defaults.MANHOLE_PORT if portnum is None else portnum + f.username = config.MANHOLE_USERNAME if username is None else username + f.password = config.MANHOLE_PASSWORD if password is None else password + portnum = config.MANHOLE_PORT if portnum is None else portnum reactor.listenTCP(portnum, f) @@ -170,10 +170,10 @@ def splash_server(portnum, slots, network_manager, max_timeout, from twisted.python import log from splash import lua - verbosity = defaults.VERBOSITY if verbosity is None else verbosity + verbosity = config.VERBOSITY if verbosity is None else verbosity log.msg("verbosity=%d" % verbosity) - slots = defaults.SLOTS if slots is None else slots + slots = config.SLOTS if slots is None else slots log.msg("slots=%s" % slots) pool = RenderPool( @@ -215,7 +215,7 @@ def splash_server(portnum, slots, network_manager, max_timeout, if not disable_proxy: from splash.proxy_server import SplashProxyServerFactory proxy_server_factory = SplashProxyServerFactory(pool, max_timeout=max_timeout) - proxy_portnum = defaults.PROXY_PORT if proxy_portnum is None else proxy_portnum + proxy_portnum = config.PROXY_PORT if proxy_portnum is None else proxy_portnum reactor.listenTCP(proxy_portnum, proxy_server_factory) @@ -283,9 +283,9 @@ def _default_cache(cache_enabled, cache_path, cache_size): from twisted.python import log from splash import cache - cache_enabled = defaults.CACHE_ENABLED if cache_enabled is None else cache_enabled - cache_path = defaults.CACHE_PATH if cache_path is None else cache_path - cache_size = defaults.CACHE_SIZE if cache_size is None else cache_size + cache_enabled = config.CACHE_ENABLED if cache_enabled is None else cache_enabled + cache_path = config.CACHE_PATH if cache_path is None else cache_path + cache_size = config.CACHE_SIZE if cache_size is None else cache_size if cache_enabled: log.msg("cache_enabled=%s, cache_path=%r, cache_size=%sMB" % (cache_enabled, cache_path, cache_size)) diff --git a/splash/xvfb.py b/splash/xvfb.py index 07369cf70..7b1c81b85 100644 --- a/splash/xvfb.py +++ b/splash/xvfb.py @@ -6,7 +6,7 @@ from __future__ import absolute_import import sys from contextlib import contextmanager -from splash import defaults +from splash import config from twisted.python import log @@ -34,7 +34,7 @@ def _get_xvfb(): try: from xvfbwrapper import Xvfb - width, height = map(int, defaults.VIEWPORT_SIZE.split("x")) + width, height = map(int, config.VIEWPORT_SIZE.split("x")) return Xvfb(width, height) except ImportError: return None From 98d64c837330e42fd77665a0b8626f9af1cb00c2 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Mon, 24 Aug 2015 19:54:12 +0530 Subject: [PATCH 2/6] Update dependencies to include pyyaml. --- .travis.yml | 2 +- dockerfiles/splash/provision.sh | 3 ++- requirements.txt | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f49727336..b805d6eb2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ virtualenv: before_install: - sudo apt-get install python-twisted python-qt4 lua5.2-dev zlib1g-dev python-imaging - - pip install -U qt4reactor psutil requests jsonschema strict-rfc3339 xvfbwrapper pytest funcparserlib pytest-cov codecov + - pip install -U qt4reactor psutil requests jsonschema strict-rfc3339 xvfbwrapper pytest funcparserlib pytest-cov codecov pyyaml - "if [[ $SYSTEM_PACKAGES == 'false' ]]; then pip install -UI Pillow twisted service_identity lupa; fi" install: diff --git a/dockerfiles/splash/provision.sh b/dockerfiles/splash/provision.sh index 824a1d5b9..17ed231b8 100755 --- a/dockerfiles/splash/provision.sh +++ b/dockerfiles/splash/provision.sh @@ -75,7 +75,8 @@ install_python_deps () { xvfbwrapper==0.2.4 \ lupa==1.1 \ funcparserlib==0.3.6 \ - Pillow==2.9.0 + Pillow==2.9.0 \ + pyyaml==3.11 } install_msfonts() { diff --git a/requirements.txt b/requirements.txt index deee11258..d4ffec318 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ adblockparser >= 0.4 re2 >= 0.2.21 xvfbwrapper Pillow +pyyaml # for scripting support lupa >= 1.1 From 7415aa7c1efbecc5efb29b693c8d4e1331af9b73 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Mon, 24 Aug 2015 22:24:17 +0530 Subject: [PATCH 3/6] A bit of refactoring. --- splash/browser_tab.py | 8 +++---- splash/cache.py | 4 ++-- splash/config.py | 3 +-- splash/network_manager.py | 6 ++--- splash/qtrender.py | 6 ++--- splash/qtrender_image.py | 14 ++++++------ splash/render_options.py | 44 +++++++++++++++++------------------ splash/server.py | 48 +++++++++++++++++++-------------------- splash/xvfb.py | 4 ++-- 9 files changed, 68 insertions(+), 69 deletions(-) diff --git a/splash/browser_tab.py b/splash/browser_tab.py index c41c4de56..d901c36a5 100644 --- a/splash/browser_tab.py +++ b/splash/browser_tab.py @@ -14,7 +14,7 @@ from twisted.internet import defer from twisted.python import log -from splash import config +from splash.config import settings from splash.har.qt import cookies2har from splash.har.utils import without_private from splash.qtrender_image import QtImageRenderer @@ -96,10 +96,10 @@ def _init_webpage(self, verbosity, network_manager, splash_proxy_factory, render self.web_view.move(0, 0) self.web_view.show() - self.set_viewport(config.VIEWPORT_SIZE) + self.set_viewport(settings.VIEWPORT_SIZE) # XXX: hack to ensure that default window size is not 640x480. self.web_view.resize( - QSize(*map(int, config.VIEWPORT_SIZE.split('x')))) + QSize(*map(int, settings.VIEWPORT_SIZE.split('x')))) def set_js_enabled(self, val): settings = self.web_page.settings() @@ -194,7 +194,7 @@ def set_viewport(self, size, raise_if_empty=False): if raise_if_empty: raise RuntimeError("Cannot detect viewport size") else: - size = config.VIEWPORT_SIZE + size = settings.VIEWPORT_SIZE self.logger.log("Viewport is empty, falling back to: %s" % size) diff --git a/splash/cache.py b/splash/cache.py index e3c03808c..46c306f70 100644 --- a/splash/cache.py +++ b/splash/cache.py @@ -2,10 +2,10 @@ from __future__ import absolute_import from PyQt4.QtNetwork import QNetworkDiskCache from twisted.python import log -from splash import config +from splash.config import settings -def construct(path=config.CACHE_PATH, size=config.CACHE_SIZE): +def construct(path=settings.CACHE_PATH, size=settings.CACHE_SIZE): log.msg("Initializing cache on %s (maxsize: %d Mb)" % (path, size)) cache = QNetworkDiskCache() cache.setCacheDirectory(path) diff --git a/splash/config.py b/splash/config.py index dd8b31c4a..8954937fd 100644 --- a/splash/config.py +++ b/splash/config.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os -import sys import yaml from . import defaults @@ -29,4 +28,4 @@ def __getattr__(self, item): raise AttributeError("There is no settings named %s" % item) return val -sys.modules[__name__] = Settings() +settings = Settings() diff --git a/splash/network_manager.py b/splash/network_manager.py index a7c82216c..2e80ee41f 100644 --- a/splash/network_manager.py +++ b/splash/network_manager.py @@ -27,13 +27,13 @@ AdblockRulesRegistry, ResourceTimeoutMiddleware) from splash.response_middleware import ContentTypeMiddleware -from splash import config +from splash.config import settings def create_default(filters_path=None, verbosity=None, allowed_schemes=None): - verbosity = config.VERBOSITY if verbosity is None else verbosity + verbosity = settings.VERBOSITY if verbosity is None else verbosity if allowed_schemes is None: - allowed_schemes = config.ALLOWED_SCHEMES + allowed_schemes = settings.ALLOWED_SCHEMES else: allowed_schemes = allowed_schemes.split(',') manager = SplashQNetworkAccessManager( diff --git a/splash/qtrender.py b/splash/qtrender.py index b57046761..ebfd021c9 100644 --- a/splash/qtrender.py +++ b/splash/qtrender.py @@ -3,7 +3,7 @@ import json import functools import pprint -from splash import config +from splash.config import settings from splash.browser_tab import BrowserTab @@ -83,11 +83,11 @@ def start(self, url, baseurl=None, wait=None, viewport=None, render_all=False, resource_timeout=None): self.url = url - self.wait_time = config.WAIT_TIME if wait is None else wait + self.wait_time = settings.WAIT_TIME if wait is None else wait self.js_source = js_source self.js_profile = js_profile self.console = console - self.viewport = config.VIEWPORT_SIZE if viewport is None else viewport + self.viewport = settings.VIEWPORT_SIZE if viewport is None else viewport self.render_all = render_all or viewport == 'full' if resource_timeout: diff --git a/splash/qtrender_image.py b/splash/qtrender_image.py index 069905571..46253852a 100644 --- a/splash/qtrender_image.py +++ b/splash/qtrender_image.py @@ -9,7 +9,7 @@ from PyQt4.QtCore import QBuffer, QPoint, QRect, QSize, Qt from PyQt4.QtGui import QImage, QPainter, QRegion -from splash import config +from splash.config import settings class QtImageRenderer(object): @@ -36,7 +36,7 @@ def __init__(self, web_page, logger=None, image_format=None, self.width = width self.height = height if scale_method is None: - scale_method = config.IMAGE_SCALE_METHOD + scale_method = settings.IMAGE_SCALE_METHOD self.scale_method = scale_method self.image_format = image_format.upper() if not (self.is_png() or self.is_jpeg()): @@ -319,7 +319,7 @@ def _calculate_image_parameters(self, web_viewport, img_width, img_height): return image_viewport, image_size def _calculate_tiling(self, to_paint): - tile_maxsize = config.TILE_MAXSIZE + tile_maxsize = settings.TILE_MAXSIZE tile_hsize = min(tile_maxsize, to_paint.width()) tile_vsize = min(tile_maxsize, to_paint.height()) htiles = 1 + (to_paint.width() - 1) // tile_hsize @@ -420,7 +420,7 @@ def crop(self, rect): assert isinstance(rect, QRect) self.img = self.img.copy(rect) - def to_png(self, complevel=config.PNG_COMPRESSION_LEVEL): + def to_png(self, complevel=settings.PNG_COMPRESSION_LEVEL): quality = 90 - (complevel * 10) buf = QBuffer() self.img.save(buf, 'png', quality) @@ -428,7 +428,7 @@ def to_png(self, complevel=config.PNG_COMPRESSION_LEVEL): def to_jpeg(self, quality=None): if quality is None: - quality = config.JPEG_QUALITY + quality = settings.JPEG_QUALITY buf = QBuffer() self.img.save(buf, 'jpeg', quality) return bytes(buf.data()) @@ -454,14 +454,14 @@ def crop(self, rect): top, bottom = rect.top(), rect.top() + rect.height() self.img = self.img.crop((left, top, right, bottom)) - def to_png(self, complevel=config.PNG_COMPRESSION_LEVEL): + def to_png(self, complevel=settings.PNG_COMPRESSION_LEVEL): buf = StringIO() self.img.save(buf, 'png', compress_level=complevel) return buf.getvalue() def to_jpeg(self, quality=None): if quality is None: - quality = config.JPEG_QUALITY + quality = settings.JPEG_QUALITY buf = StringIO() self.img.save(buf, 'jpeg', quality=quality) return buf.getvalue() diff --git a/splash/render_options.py b/splash/render_options.py index 49befca04..05944c859 100644 --- a/splash/render_options.py +++ b/splash/render_options.py @@ -2,7 +2,7 @@ from __future__ import absolute_import import os import json -from splash import config +from splash.config import settings class BadOption(Exception): @@ -100,19 +100,19 @@ def get_baseurl(self): return self._get_url("baseurl", default=None) def get_wait(self): - return self.get("wait", config.WAIT_TIME, - type=float, range=(0, config.MAX_WAIT_TIME)) + return self.get("wait", settings.WAIT_TIME, + type=float, range=(0, settings.MAX_WAIT_TIME)) def get_timeout(self): - default = min(self.max_timeout, config.TIMEOUT) + default = min(self.max_timeout, settings.TIMEOUT) return self.get("timeout", default, type=float, range=(0, self.max_timeout)) def get_resource_timeout(self): - return self.get("resource_timeout", config.RESOURCE_TIMEOUT, + return self.get("resource_timeout", settings.RESOURCE_TIMEOUT, type=float, range=(0, 1e6)) def get_images(self): - return self._get_bool("images", config.AUTOLOAD_IMAGES) + return self._get_bool("images", settings.AUTOLOAD_IMAGES) def get_proxy(self): return self.get("proxy", default=None) @@ -121,13 +121,13 @@ def get_js_source(self): return self.get("js_source", default=None) def get_width(self): - return self.get("width", None, type=int, range=(1, config.MAX_WIDTH)) + return self.get("width", None, type=int, range=(1, settings.MAX_WIDTH)) def get_height(self): - return self.get("height", None, type=int, range=(1, config.MAX_HEIGTH)) + return self.get("height", None, type=int, range=(1, settings.MAX_HEIGTH)) def get_scale_method(self): - scale_method = self.get("scale_method", config.IMAGE_SCALE_METHOD) + scale_method = self.get("scale_method", settings.IMAGE_SCALE_METHOD) if scale_method not in ('raster', 'vector'): raise BadOption( "Invalid 'scale_method' (must be 'raster' or 'vector'): %s" % @@ -135,7 +135,7 @@ def get_scale_method(self): return scale_method def get_quality(self): - return self.get("quality", config.JPEG_QUALITY, type=int, range=(0, 100)) + return self.get("quality", settings.JPEG_QUALITY, type=int, range=(0, 100)) def get_http_method(self): return self.get("http_method", "GET") @@ -184,7 +184,7 @@ def get_headers(self): return headers def get_viewport(self, wait=None): - viewport = self.get("viewport", config.VIEWPORT_SIZE) + viewport = self.get("viewport", settings.VIEWPORT_SIZE) if viewport == 'full': if wait == 0: @@ -274,14 +274,14 @@ def get_jpeg_params(self): def get_include_params(self): return dict( - html=self._get_bool("html", config.DO_HTML), - iframes=self._get_bool("iframes", config.DO_IFRAMES), - png=self._get_bool("png", config.DO_PNG), - jpeg=self._get_bool("jpeg", config.DO_JPEG), - script=self._get_bool("script", config.SHOW_SCRIPT), - console=self._get_bool("console", config.SHOW_CONSOLE), - history=self._get_bool("history", config.SHOW_HISTORY), - har=self._get_bool("har", config.SHOW_HAR), + html=self._get_bool("html", settings.DO_HTML), + iframes=self._get_bool("iframes", settings.DO_IFRAMES), + png=self._get_bool("png", settings.DO_PNG), + jpeg=self._get_bool("jpeg", settings.DO_JPEG), + script=self._get_bool("script", settings.SHOW_SCRIPT), + console=self._get_bool("console", settings.SHOW_CONSOLE), + history=self._get_bool("history", settings.SHOW_HISTORY), + har=self._get_bool("har", settings.SHOW_HAR), ) @@ -296,9 +296,9 @@ def validate_size_str(size_str): :param size_str: string to validate """ - max_width = config.VIEWPORT_MAX_WIDTH - max_heigth = config.VIEWPORT_MAX_HEIGTH - max_area = config.VIEWPORT_MAX_AREA + max_width = settings.VIEWPORT_MAX_WIDTH + max_heigth = settings.VIEWPORT_MAX_HEIGTH + max_area = settings.VIEWPORT_MAX_AREA try: w, h = map(int, size_str.split('x')) except ValueError: diff --git a/splash/server.py b/splash/server.py index a764ef62b..8e26595c7 100644 --- a/splash/server.py +++ b/splash/server.py @@ -7,8 +7,8 @@ import signal import functools -from splash import config, __version__ -from splash import xvfb +from splash.config import settings +from splash import xvfb, __version__ from splash.qtutils import init_qt_app def install_qtreactor(verbose): @@ -24,11 +24,11 @@ def parse_opts(): op.add_option("-f", "--logfile", help="log file") op.add_option("-m", "--maxrss", type=float, default=0, help="exit if max RSS reaches this value (in MB or ratio of physical mem) (default: %default)") - op.add_option("-p", "--port", type="int", default=config.SPLASH_PORT, + op.add_option("-p", "--port", type="int", default=settings.SPLASH_PORT, help="port to listen to (default: %default)") - op.add_option("-s", "--slots", type="int", default=config.SLOTS, + op.add_option("-s", "--slots", type="int", default=settings.SLOTS, help="number of render slots (default: %default)") - op.add_option("--max-timeout", type="float", default=config.MAX_TIMEOUT, + op.add_option("--max-timeout", type="float", default=settings.MAX_TIMEOUT, help="maximum allowed value for timeout (default: %default)") op.add_option("--proxy-profiles-path", help="path to a folder with proxy profiles") @@ -37,20 +37,20 @@ def parse_opts(): op.add_option("--no-js-cross-domain-access", action="store_false", dest="js_cross_domain_enabled", - default=not config.JS_CROSS_DOMAIN_ENABLED, - help="disable support for cross domain access when executing custom javascript" + _bool_default[not config.JS_CROSS_DOMAIN_ENABLED]) + default=not settings.JS_CROSS_DOMAIN_ENABLED, + help="disable support for cross domain access when executing custom javascript" + _bool_default[not settings.JS_CROSS_DOMAIN_ENABLED]) op.add_option("--js-cross-domain-access", action="store_true", dest="js_cross_domain_enabled", - default=config.JS_CROSS_DOMAIN_ENABLED, + default=settings.JS_CROSS_DOMAIN_ENABLED, help="enable support for cross domain access when executing custom javascript " - "(WARNING: it could break rendering for some of the websites)" + _bool_default[config.JS_CROSS_DOMAIN_ENABLED]) + "(WARNING: it could break rendering for some of the websites)" + _bool_default[settings.JS_CROSS_DOMAIN_ENABLED]) op.add_option("--no-cache", action="store_false", dest="cache_enabled", - help="disable local cache" + _bool_default[not config.CACHE_ENABLED]) + help="disable local cache" + _bool_default[not settings.CACHE_ENABLED]) op.add_option("--cache", action="store_true", dest="cache_enabled", - help="enable local cache (WARNING: don't enable it unless you know what are you doing)" + _bool_default[config.CACHE_ENABLED]) + help="enable local cache (WARNING: don't enable it unless you know what are you doing)" + _bool_default[settings.CACHE_ENABLED]) op.add_option("-c", "--cache-path", help="local cache folder") - op.add_option("--cache-size", type=int, default=config.CACHE_SIZE, + op.add_option("--cache-size", type=int, default=settings.CACHE_SIZE, help="maximum cache size in MB (default: %default)") op.add_option("--manhole", action="store_true", help="enable manhole server") @@ -58,9 +58,9 @@ def parse_opts(): help="disable proxy server") op.add_option("--disable-ui", action="store_true", default=False, help="disable web UI") - op.add_option("--proxy-portnum", type="int", default=config.PROXY_PORT, + op.add_option("--proxy-portnum", type="int", default=settings.PROXY_PORT, help="proxy port to listen to (default: %default)") - op.add_option('--allowed-schemes', default=",".join(config.ALLOWED_SCHEMES), + op.add_option('--allowed-schemes', default=",".join(settings.ALLOWED_SCHEMES), help="comma-separated list of allowed URI schemes (defaut: %default)") op.add_option("--filters-path", help="path to a folder with network request filters") @@ -75,7 +75,7 @@ def parse_opts(): "Each place can have a ? in it that's replaced with the module name.") op.add_option("--lua-sandbox-allowed-modules", default="", help="semicolon-separated list of Lua module names allowed to be required from a sandbox.") - op.add_option("-v", "--verbosity", type=int, default=config.VERBOSITY, + op.add_option("-v", "--verbosity", type=int, default=settings.VERBOSITY, help="verbosity level; valid values are integers from 0 to 5") op.add_option("--version", action="store_true", help="print Splash version number and exit") @@ -148,9 +148,9 @@ def manhole_server(portnum=None, username=None, password=None): from twisted.manhole import telnet f = telnet.ShellFactory() - f.username = config.MANHOLE_USERNAME if username is None else username - f.password = config.MANHOLE_PASSWORD if password is None else password - portnum = config.MANHOLE_PORT if portnum is None else portnum + f.username = settings.MANHOLE_USERNAME if username is None else username + f.password = settings.MANHOLE_PASSWORD if password is None else password + portnum = settings.MANHOLE_PORT if portnum is None else portnum reactor.listenTCP(portnum, f) @@ -170,10 +170,10 @@ def splash_server(portnum, slots, network_manager, max_timeout, from twisted.python import log from splash import lua - verbosity = config.VERBOSITY if verbosity is None else verbosity + verbosity = settings.VERBOSITY if verbosity is None else verbosity log.msg("verbosity=%d" % verbosity) - slots = config.SLOTS if slots is None else slots + slots = settings.SLOTS if slots is None else slots log.msg("slots=%s" % slots) pool = RenderPool( @@ -215,7 +215,7 @@ def splash_server(portnum, slots, network_manager, max_timeout, if not disable_proxy: from splash.proxy_server import SplashProxyServerFactory proxy_server_factory = SplashProxyServerFactory(pool, max_timeout=max_timeout) - proxy_portnum = config.PROXY_PORT if proxy_portnum is None else proxy_portnum + proxy_portnum = settings.PROXY_PORT if proxy_portnum is None else proxy_portnum reactor.listenTCP(proxy_portnum, proxy_server_factory) @@ -283,9 +283,9 @@ def _default_cache(cache_enabled, cache_path, cache_size): from twisted.python import log from splash import cache - cache_enabled = config.CACHE_ENABLED if cache_enabled is None else cache_enabled - cache_path = config.CACHE_PATH if cache_path is None else cache_path - cache_size = config.CACHE_SIZE if cache_size is None else cache_size + cache_enabled = settings.CACHE_ENABLED if cache_enabled is None else cache_enabled + cache_path = settings.CACHE_PATH if cache_path is None else cache_path + cache_size = settings.CACHE_SIZE if cache_size is None else cache_size if cache_enabled: log.msg("cache_enabled=%s, cache_path=%r, cache_size=%sMB" % (cache_enabled, cache_path, cache_size)) diff --git a/splash/xvfb.py b/splash/xvfb.py index 7b1c81b85..3b96f8521 100644 --- a/splash/xvfb.py +++ b/splash/xvfb.py @@ -6,7 +6,7 @@ from __future__ import absolute_import import sys from contextlib import contextmanager -from splash import config +from splash.config import settings from twisted.python import log @@ -34,7 +34,7 @@ def _get_xvfb(): try: from xvfbwrapper import Xvfb - width, height = map(int, config.VIEWPORT_SIZE.split("x")) + width, height = map(int, settings.VIEWPORT_SIZE.split("x")) return Xvfb(width, height) except ImportError: return None From fa5a9c19dea77e3f234277e5c15b9be5f4854d03 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Thu, 8 Oct 2015 23:28:48 +0530 Subject: [PATCH 4/6] Use ConfigParser instead of yaml. Also * Look for config files in some standard locations. * Add option to specify config file location at startup. --- splash/config.py | 51 +++++++++++++++++++++++++++++++++++++++++------- splash/server.py | 11 ++++++++++- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/splash/config.py b/splash/config.py index 8954937fd..82e08c606 100644 --- a/splash/config.py +++ b/splash/config.py @@ -1,25 +1,62 @@ # -*- coding: utf-8 -*- +import __builtin__ +import ast +import ConfigParser import os -import yaml + from . import defaults +class ConfigError(Exception): + pass + +# CONFIG_PATH is the user supplied config file path. +try: + # hack to make CONFIG_PATH available from splash.server.main + CONFIG_PATH = __builtin__.CONFIG_PATH +except AttributeError: + CONFIG_PATH = None + + class Settings(object): + """Handles config files and default values of config settings.""" + + NO_CONFIG_FILE_MSG = "Config file doesn't exist at %s" def __init__(self): - root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - config_file_path = os.path.join(root_dir, 'config.yml') + self.config_path = CONFIG_PATH self.defaults = {} for name in dir(defaults): if name.isupper(): self.defaults[name] = getattr(defaults, name) - try: - with open(config_file_path, 'rb') as config_file: - self.cfg = yaml.load(config_file) - except IOError: + parser = ConfigParser.SafeConfigParser() + # don't convert keys to lowercase. + parser.optionxform = str + if parser.read(self._get_configfile_paths()): + # Safely evaluate configuration values. + self.cfg = {key: ast.literal_eval(val) for (key, val) in parser.items('settings')} + else: self.cfg = {} + def _get_configfile_paths(self): + """Returns a list of config file paths.""" + if self.config_path: + config_dir_path = os.path.abspath(os.path.expanduser(self.config_path)) + configfile_path = os.path.abspath(os.path.join(config_dir_path, 'splash.cfg')) + if not os.path.isfile(configfile_path): + # file doesn't exist + raise ConfigError(self.NO_CONFIG_FILE_MSG % configfile_path) + else: + return configfile_path + else: + xdg_config_home = os.environ.get('XDG_CONFIG_HOME') or \ + os.path.expanduser('~/.config') + return ['/etc/splash.cfg', + 'C:\\splash\splash.cfg', + os.path.join(xdg_config_home, 'splash.cfg'), + os.path.expanduser('~/.splash.cfg')] + def __getattr__(self, item): val = self.cfg.get(item, None) if val is None: diff --git a/splash/server.py b/splash/server.py index 8e26595c7..e89f3edde 100644 --- a/splash/server.py +++ b/splash/server.py @@ -7,7 +7,7 @@ import signal import functools -from splash.config import settings +from splash import config from splash import xvfb, __version__ from splash.qtutils import init_qt_app @@ -18,9 +18,12 @@ def install_qtreactor(verbose): def parse_opts(): + settings = config.Settings() _bool_default = {True:' (default)', False: ''} op = optparse.OptionParser() + op.add_option("--config-path", + help="path to a folder with a config file named splash.cfg") op.add_option("-f", "--logfile", help="log file") op.add_option("-m", "--maxrss", type=float, default=0, help="exit if max RSS reaches this value (in MB or ratio of physical mem) (default: %default)") @@ -337,6 +340,12 @@ def main(): print(__version__) sys.exit(0) + # hack to make CONFIG_PATH available in splash.config + if opts.config_path: + import __builtin__ + __builtin__.CONFIG_PATH = opts.config_path + reload(config) + start_logging(opts) log_splash_version() bump_nofile_limit() From 080813b214997158d7b52291cb6e77b95f7efe40 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Thu, 8 Oct 2015 23:35:50 +0530 Subject: [PATCH 5/6] Remove yaml dependencies. --- .travis.yml | 2 +- dockerfiles/splash/provision.sh | 3 +-- requirements.txt | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index b805d6eb2..f49727336 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ virtualenv: before_install: - sudo apt-get install python-twisted python-qt4 lua5.2-dev zlib1g-dev python-imaging - - pip install -U qt4reactor psutil requests jsonschema strict-rfc3339 xvfbwrapper pytest funcparserlib pytest-cov codecov pyyaml + - pip install -U qt4reactor psutil requests jsonschema strict-rfc3339 xvfbwrapper pytest funcparserlib pytest-cov codecov - "if [[ $SYSTEM_PACKAGES == 'false' ]]; then pip install -UI Pillow twisted service_identity lupa; fi" install: diff --git a/dockerfiles/splash/provision.sh b/dockerfiles/splash/provision.sh index 17ed231b8..824a1d5b9 100755 --- a/dockerfiles/splash/provision.sh +++ b/dockerfiles/splash/provision.sh @@ -75,8 +75,7 @@ install_python_deps () { xvfbwrapper==0.2.4 \ lupa==1.1 \ funcparserlib==0.3.6 \ - Pillow==2.9.0 \ - pyyaml==3.11 + Pillow==2.9.0 } install_msfonts() { diff --git a/requirements.txt b/requirements.txt index d4ffec318..deee11258 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,6 @@ adblockparser >= 0.4 re2 >= 0.2.21 xvfbwrapper Pillow -pyyaml # for scripting support lupa >= 1.1 From 8479e7a30a82c3cd88fbc03fe1b5514ca6afebd4 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Mon, 12 Oct 2015 18:23:43 +0530 Subject: [PATCH 6/6] Use a global variable instead of __builtin__ --- splash/config.py | 13 ++++++------- splash/server.py | 4 +--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/splash/config.py b/splash/config.py index 82e08c606..73a22d117 100644 --- a/splash/config.py +++ b/splash/config.py @@ -11,12 +11,7 @@ class ConfigError(Exception): pass -# CONFIG_PATH is the user supplied config file path. -try: - # hack to make CONFIG_PATH available from splash.server.main - CONFIG_PATH = __builtin__.CONFIG_PATH -except AttributeError: - CONFIG_PATH = None +global CONFIG_PATH class Settings(object): @@ -25,7 +20,11 @@ class Settings(object): NO_CONFIG_FILE_MSG = "Config file doesn't exist at %s" def __init__(self): - self.config_path = CONFIG_PATH + try: + self.config_path = CONFIG_PATH + except NameError: + # CONFIG_PATH is not defined. User hasn't passed in a config file. + self.config_path = None self.defaults = {} for name in dir(defaults): if name.isupper(): diff --git a/splash/server.py b/splash/server.py index 8832710b7..c6f8c4cf6 100644 --- a/splash/server.py +++ b/splash/server.py @@ -350,10 +350,8 @@ def main(): print(__version__) sys.exit(0) - # hack to make CONFIG_PATH available in splash.config if opts.config_path: - import __builtin__ - __builtin__.CONFIG_PATH = opts.config_path + config.CONFIG_PATH = opts.config_path reload(config) start_logging(opts)