Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Many new unit tests for configuration settings file (pelicanconf.py) (#3357) #3368

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 289 additions & 21 deletions pelican/settings.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import copy
import errno
import importlib.util
import inspect
import locale
import logging
import os
import pathlib
import re
import sys
from os.path import isabs
Expand All @@ -13,14 +15,7 @@

from pelican.log import LimitFilter


def load_source(name: str, path: str) -> ModuleType:
spec = importlib.util.spec_from_file_location(name, path)
mod = importlib.util.module_from_spec(spec)
sys.modules[name] = mod
spec.loader.exec_module(mod)
return mod

DEFAULT_MODULE_NAME: str = "pelicanconf"

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -182,13 +177,259 @@ def load_source(name: str, path: str) -> ModuleType:
PYGMENTS_RST_OPTIONS = None


def load_source(name: str, path: str | pathlib.Path) -> ModuleType | None:
"""
Loads the Python-syntax file as a module for application access

Only `path` shall be consulted for its actual file location.

If module_name is not supplied as an argument, then its module name
shall be extracted from given `path` argument but without any directory
nor its file extension (just the basic pathLib.Path(path).stem part).

This function substitutes Python built-in `importlib` but with one distinctive
but different feature: No attempts are made to leverage the `PYTHONPATH` as an
alternative multi-directory search of a specified module name;
only singular-directory lookup/search is supported here.

WARNING to DEVELOPER: If you programmatically used the "from" reserved
Python keyword as in this "from pelicanconf import ..." statement, then you
will not be able to free up the pelicanconf module, much
less use 'reload module' features. (Not a likely scenario, but)

:param path: filespec of the Python script file to load as Python module;
`path` parameter may be a filename which is looked for in the
current working directory, or a relative filename that looks
only in that particular directory, or an absolute filename
that also looks only in that absolute directory.
:type path: str | pathlib.Path
:param name: Optional argument to the Python module name to be loaded.
`module_name` shall never use dotted notation nor any
directory separator, just the plain filename (without
any extension/suffix part).
:type name: str
:return: the ModuleType of the loaded Python module file. Will be
accessible in a form of "pelican.<module_name>".
:rtype: ModuleType | None
"""
if isinstance(path, str):
conf_filespec = pathlib.Path(path)
elif isinstance(path, pathlib.Path):
conf_filespec = path
else:
logger.fatal(
f"argument {path.__str__()} is not a pathLib.Path type nor a str type."
)
raise TypeError

absolute_filespec = conf_filespec.absolute()
filename_ext = conf_filespec.name
if not absolute_filespec.exists():
logger.error(f"File '{filename_ext!s}' not found.")
return None
if not absolute_filespec.is_file():
logger.error(f"Absolute '{conf_filespec!s}' path is not a file.")
return None
if not os.access(str(absolute_filespec), os.R_OK):
logger.error(f"'{absolute_filespec}' file is not readable.")
return None
resolved_absolute_filespec = absolute_filespec.resolve(strict=True)

# We used to strip '.py' extension to make a module name out of it
# But we cannot take in anymore the user-supplied path for anything
# (such as Pelican configuration settings file) as THE Python module
# name for Pelican due to potential conflict with 300+ names of Python
# built-in system module, such as `site.conf`, `calendar.conf`.
# A complete list is in https://docs.python.org/3/py-modindex.html.
# Instead, force the caller of load_source to supply a Pelican-specific but
# statically fixed module_name to be associated with the end-user's choice
# of configuration filename.

module_name = ""
# if load_source(path=...) is used
if "name" not in locals():
# old load_source(path) prototype
module_name = pathlib.Path(path).stem

# if load_source(None, =...) is used
if name is None:
logger.warning(
f"Module name is missing; using Python built-in to check "
f"PYTHONPATH for {absolute_filespec}"
)
# if load_source(value: not str, =...) is used
elif not isinstance(name, str):
raise TypeError
# if load_source(name="", =...) is used
elif name == "":
module_name = pathlib.Path(path).stem
# if load_source(value: not str, =...) is used
elif isinstance(name, str):
module_name = name
else:
raise TypeError("load_source(name=...) argument is not a str type")

# One last thing to do before sys.modules check, is to deny any dotted module name
# This is a Pelican design issue (to support pelicanconf reloadability)
#
# Alternatively, do we want to support the approach of Python 'pelican.conf'
# module? Yes, but we couldn't, while it is the correct design to nest
# the configuration settings as a submodule behind Pelican (we should be
# able to), but Pelican design is restricted by Python sys.modules design.
#
# Also, we lose reload() capability once the alternative desired design of
# 'conf' submodule gets loaded by Python 'from' keyword.
#
# Hence, we do not support 'pelican.conf' due to Pelican reloadability requirement
#
# Judge have ruled the 'period' symbol in module name as disallowable.
if "." in module_name:
# load_connect() should return sys.exit by design, but no.
# Below fatal log is used AS-IS by test_settings_module.py unit test
logger.fatal(f"Cannot use dotted module name such as `{module_name}`.")
# Nothing fancy, return None
return None

# Nonetheless, we check that this module_name is not taken as well.
# Check that the module name is not in sys.module (like pathlib!!!)
if module_name in sys.modules:
# following logger.fatal is used as-is by test_settings_module.py unit test
logger.fatal(
f"Cannot reserved the module name already used"
f" by Python system module `{module_name}`."
)
sys.exit(errno.EPERM)

# if module_name == "":
# module_name = "pelicanconf"

try:
# Using Python importlib, find the module using a full file path
# specification and its filename and return this Module instance
module_spec = importlib.util.spec_from_file_location(
module_name, resolved_absolute_filespec
)
logger.debug(f"ModuleSpec '{module_name}' obtained from {absolute_filespec}.")
except ImportError:
logger.fatal(
f"Location {resolved_absolute_filespec} may be missing a "
f"module `get_filename` attribute value."
)
raise ModuleNotFoundError from ImportError
except OSError:
logger.error(
f"Python module loader for configuration settings file "
f"cannot determine absolute directory path from {absolute_filespec}."
)
raise FileNotFoundError from OSError
# pass all the other excepts out

try:
# With the ModuleSpec object, we can get the
module_type = importlib.util.module_from_spec(module_spec)
except ImportError:
logger.fatal(
"Loader that defines exec_module() must also define create_module()"
)
raise ImportError from ImportError

# module_type also has all the loaded Python values/objects from
# the Pelican configuration settings file, but it is not
# yet readable for all...

# if you have use "from pelicanconf import ...", then you will not
# be able to free up the module

# store the module into the sys.modules
sys.modules[module_name] = module_type

try:
# finally, execute any codes in the Pelican configuration settings file.
module_spec.loader.exec_module(module_type)
# Below logger.debug is used as-is by test_settings_module.py unit-test
logger.debug(
f"Loaded module '{module_name}' from {resolved_absolute_filespec} file"
)
return module_type
except SyntaxError as e:
full_filespec = conf_filespec.resolve(strict=True)
# Show where in the pelicanconf.py the offending syntax error is at via {e}.
logger.error(
f"{e}.\nHINT: "
f"Try executing `python {full_filespec}` "
f"for better syntax troubleshooting."
)
# Trying something new, reraise the exception up
raise SyntaxError(
f"Invalid syntax error at line number {e.end_lineno}"
f" column offset {e.end_offset}",
{
"filename": full_filespec,
"lineno": int(e.lineno),
"offset": int(e.offset),
"text": e.text,
"end_lineno": int(e.end_lineno),
"end_offset": int(e.end_offset),
},
) from e
except Any as e:
logger.critical(
f"'Python system module loader for {resolved_absolute_filespec}'"
f" module failed: {e}."
)
sys.exit(errno.ENOEXEC)


def reload_source(name: str, path: str | pathlib.Path) -> ModuleType | None:
"""Reload the configuration settings file"""
# first line of defense against errant built-in module name
if name != DEFAULT_MODULE_NAME:
logger.error(
f"Module name of {name} cannot be anything other "
f"than {DEFAULT_MODULE_NAME}"
)
return None
# second line of defense, this function call only works if one firstly
# called the load_source()
if name not in sys.modules:
logger.error(f"Module name of {name} is not loaded, call load_source() firstly")
return None
# Now we can do the dangerous step
del sys.modules[name]
module_type = load_source(name, path)
if module_type is None:
logger.error(f"Module name of {name} is not loaded, call load_source() firstly")
return None
return module_type


def read_settings(
path: Optional[str] = None, override: Optional[Settings] = None
path: Optional[str] = None,
override: Optional[Settings] = None,
reload: bool = False,
) -> Settings:
"""reads the setting files into a Python configuration settings module

Returns the final Settings list of keys/values after reading the file
and applying an override of settings on top of it.

:param path: The full filespec path to the Pelican configuration settings file.
:type path: str | None
:param path: The override settings to be used to overwrite the ones read in
from the Pelican configuration settings file.
:type override: Settings | None
:param reload: A boolean value to safely reload the Pelican configuration settings
file into a Python module
:type reload: bool
:return: The Settings list of configurations after extracting the key/value from
the path of Pelican configuration settings file and after the override
settings has been applied over its read settings.
:rtype: Settings
"""
settings = override or {}

if path:
settings = dict(get_settings_from_file(path), **settings)
settings = dict(get_settings_from_file(path, reload), **settings)

if settings:
settings = handle_deprecated_settings(settings)
Expand Down Expand Up @@ -217,7 +458,7 @@ def getabs(maybe_relative, base_path=path):
]

settings = dict(copy.deepcopy(DEFAULT_CONFIG), **settings)
settings = configure_settings(settings)
settings = configure_settings(settings, reload)

# This is because there doesn't seem to be a way to pass extra
# parameters to docutils directive handlers, so we have to have a
Expand All @@ -229,19 +470,40 @@ def getabs(maybe_relative, base_path=path):


def get_settings_from_module(module: Optional[ModuleType] = None) -> Settings:
"""Loads settings from a module, returns a dictionary."""

"""Clones a dictionary of settings from a module.

:param module: Attempts to load a module using singular current working directory
(`$CWD`) search method then returns a clone-duplicate of its
settings found in the module.
If no module (`None`) is given, then default module name is used.
:type module: ModuleType | None
:return: Returns a dictionary of Settings found in that Python module.
:rtype: Settings"""
context = {}
if module is not None:
context.update((k, v) for k, v in inspect.getmembers(module) if k.isupper())
return context


def get_settings_from_file(path: str) -> Settings:
"""Loads settings from a file path, returning a dict."""

name, ext = os.path.splitext(os.path.basename(path))
module = load_source(name, path)
def get_settings_from_file(path: str, reload: bool) -> Settings:
"""Loads module from a file then clones dictionary of settings from that module.

:param path: Attempts to load a module using a file specification (absolute or
relative) then returns a clone-duplicate of its settings found in
the module. If no module (`None`) is given, then default module
name is used.
:param path: A file specification (absolute or relative) that points to the
Python script file containing the keyword/value assignment settings.
:param reload: A bool value to check if module is already preloaded
before doing a reload.
:return: Returns a dictionary of Settings found in that Python module.
:rtype: Settings"""
name = DEFAULT_MODULE_NAME
# Keep the module name constant for Pelican configuration settings file
if reload:
module = reload_source(name, path)
else:
module = load_source(name, path)
return get_settings_from_module(module)


Expand Down Expand Up @@ -568,11 +830,17 @@ def handle_deprecated_settings(settings: Settings) -> Settings:
return settings


def configure_settings(settings: Settings) -> Settings:
def configure_settings(settings: Settings, reload: None | bool = False) -> Settings:
"""Provide optimizations, error checking, and warnings for the given
settings.
Also, specify the log messages to be ignored.
"""

:param settings: Contains a dictionary of Pelican keyword/keyvalue
:type settings: Settings
:param reload: A flag to reload the same module but maybe with a different file
:type reload: bool
:return: An updated dictionary of Pelican's keywords and its keyvalue.
:rtype: Settings"""
if "PATH" not in settings or not os.path.isdir(settings["PATH"]):
raise Exception(
"You need to specify a path containing the content"
Expand Down Expand Up @@ -613,7 +881,7 @@ def configure_settings(settings: Settings) -> Settings:
]:
if key in settings and not isinstance(settings[key], types):
value = settings.pop(key)
logger.warn(
logger.warning(
"Detected misconfigured %s (%s), falling back to the default (%s)",
key,
value,
Expand Down
15 changes: 15 additions & 0 deletions pelican/tests/settings/pelicanconf-missing-path-var.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
DEBUG = True
#####PATH = "not-defined" # our newly created location for our articles/pages to reside in/under
OUTPUT_PATH = "output"
ARTICLES_PATH = ["articles"]
PLUGIN_PATHS = [
"~/admin/websites/egbert.net/pelican/plugins",
"~/admin/websites/egbert.net/pelican-plugins",
"~/admin/websites/egbert.net/development/tableize/pelican/plugins",
]
PLUGINS = ["tableize"]
AUTHOR = "John"
SITENAME = "My 1st Site"
# for
PORT = 8000
BIND = "127.0.0.1"
Loading