diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 51b13b90..447c1eb7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -27,7 +27,6 @@ jobs: - name: Install sec-certs run: | pip install -e . - python -m spacy download en_core_web_sm - name: Run tests run: pytest --cov=sec_certs tests - name: Code coverage upload diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4c31049..c5c6f238 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.5 + rev: v0.2.2 hooks: - id: ruff - id: ruff-format args: ["--check"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.6.1" + rev: "v1.8.0" hooks: - id: mypy additional_dependencies: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 148f54b5..588940e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,9 +34,12 @@ Note on single-sourcing the package version: More can be read [here](https://pac ### Currently, the release process is as follows -1. Update dependencies with `pre-commit autoupdate`, pin new versions of linters into `pyproject.toml` and run `cd requirements && ./compile.sh`. -2. Create a release from GitHub UI. Include release notes, add proper version tag and publish the release (or create it from scratch with new tag). -3. This will automatically update PyPi and DockerHub packages. +1. Update dependencies with `pre-commit autoupdate`, pin new versions of linters into `pyproject.toml`. +2. Run `cd requirements && ./compile.sh` to update dependencies. +3. Use `python -m spacy download en_core_web_sm` to find out the current version of `en_core_web_sm` dependency. Update pyproject.toml link of `en_core_web_sm` dependency with up-to-date link from [GitHub](https://github.com/explosion/spacy-models/releases). +4. Run `cd requirements && ./compile.sh` **again** to update dependencies. +5. Create a release from GitHub UI. Include release notes, add proper version tag and publish the release (or create it from scratch with new tag). +6. This will automatically update PyPi and DockerHub packages. ## Quality assurance diff --git a/Dockerfile b/Dockerfile index fdac3c05..53a5a75f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,8 +58,7 @@ RUN \ pip3 install -U pip wheel pip-tools && \ pip-sync requirements/requirements.txt && \ pip3 install --no-cache notebook jupyterlab && \ - pip3 install -e . && \ - python3 -m spacy download en_core_web_sm + pip3 install -e . # just to be sure that pdftotext is in $PATH ENV PATH /usr/bin/pdftotext:${PATH} diff --git a/docs/installation.md b/docs/installation.md index e71c9b97..e62149e6 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -7,7 +7,6 @@ The tool can be installed from PyPi with ```bash pip install -U sec-certs -python -m spacy download en_core_web_sm ``` Note, that `Python>=3.10` is required. @@ -32,7 +31,6 @@ git clone https://github.com/crocs-muni/sec-certs.git python3 -m venv venv source venv/bin/activate pip install -e . -python -m spacy download en_core_web_sm ``` Alternatively, our Our [Dockerfile](https://github.com/crocs-muni/sec-certs/blob/main/Dockerfile) represents a reproducible way of setting up the environment. diff --git a/docs/quickstart.md b/docs/quickstart.md index f2e3690f..49731e88 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -3,7 +3,7 @@ ::::{tab-set} :::{tab-item} Common Criteria -1. Install the latest version with `pip install -U sec-certs && python -m spacy download en_core_web_sm` (see [installation](installation.md)). +1. Install the latest version with `pip install -U sec-certs` (see [installation](installation.md)). 2. In your Python interpreter, type ```python from sec_certs.dataset.cc import CCDataset @@ -16,7 +16,7 @@ to obtain to obtain freshly processed dataset from [seccerts.org](https://seccer ::: :::{tab-item} FIPS 140 -1. Install the latest version with `pip install -U sec-certs && python -m spacy download en_core_web_sm` (see [installation](installation.md)). +1. Install the latest version with `pip install -U sec-certs` (see [installation](installation.md)). 2. In your Python interpreter, type ```python from sec_certs.dataset.fips import FIPSDataset diff --git a/pyproject.toml b/pyproject.toml index ad63b424..401a0ae9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ "ipykernel", "ipywidgets", "spacy", + "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl", "pkgconfig", "seaborn", "pySankeyBeta", @@ -63,8 +64,8 @@ [project.optional-dependencies] dev = [ - "ruff==0.1.5", - "mypy==1.6.1", + "ruff==0.2.2", + "mypy==1.8.0", "types-PyYAML", "types-python-dateutil", "types-requests", diff --git a/requirements/all_requirements.txt b/requirements/all_requirements.txt index 72cffbc0..7a836c31 100644 --- a/requirements/all_requirements.txt +++ b/requirements/all_requirements.txt @@ -18,8 +18,6 @@ appnope==0.1.3 # ipython asttokens==2.4.1 # via stack-data -async-timeout==4.0.3 - # via aiohttp attrs==23.1.0 # via # aiohttp @@ -132,12 +130,10 @@ docutils==0.19 # myst-parser # pydata-sphinx-theme # sphinx +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl + # via sec-certs (./../pyproject.toml) evaluate==0.4.1 # via setfit -exceptiongroup==1.2.0 - # via - # ipython - # pytest executing==2.0.1 # via stack-data fastjsonschema==2.19.0 @@ -309,7 +305,7 @@ murmurhash==1.0.10 # preshed # spacy # thinc -mypy==1.6.1 +mypy==1.8.0 # via sec-certs (./../pyproject.toml) mypy-extensions==1.0.0 # via mypy @@ -596,7 +592,7 @@ rpds-py==0.13.1 # via # jsonschema # referencing -ruff==0.1.5 +ruff==0.2.2 # via sec-certs (./../pyproject.toml) safetensors==0.4.0 # via transformers @@ -648,7 +644,9 @@ snowballstemmer==2.2.0 soupsieve==2.5 # via beautifulsoup4 spacy==3.7.2 - # via sec-certs (./../pyproject.toml) + # via + # en-core-web-sm + # sec-certs (./../pyproject.toml) spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 @@ -714,15 +712,6 @@ tifffile==2023.9.26 # via scikit-image tokenizers==0.15.0 # via transformers -tomli==2.0.1 - # via - # build - # coverage - # mypy - # pip-tools - # pyproject-hooks - # pytest - # setuptools-scm toolz==0.12.0 # via # dask @@ -778,7 +767,6 @@ types-requests==2.31.0.10 typing-extensions==4.8.0 # via # alembic - # cloudpathlib # huggingface-hub # mypy # myst-nb diff --git a/requirements/dev_requirements.txt b/requirements/dev_requirements.txt index deadf93b..1b028276 100644 --- a/requirements/dev_requirements.txt +++ b/requirements/dev_requirements.txt @@ -16,8 +16,6 @@ appnope==0.1.3 # ipython asttokens==2.4.1 # via stack-data -async-timeout==4.0.3 - # via aiohttp attrs==23.1.0 # via # aiohttp @@ -101,10 +99,8 @@ docutils==0.19 # myst-parser # pydata-sphinx-theme # sphinx -exceptiongroup==1.2.0 - # via - # ipython - # pytest +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl + # via sec-certs (./../pyproject.toml) executing==2.0.1 # via stack-data fastjsonschema==2.19.0 @@ -227,7 +223,7 @@ murmurhash==1.0.10 # preshed # spacy # thinc -mypy==1.6.1 +mypy==1.8.0 # via sec-certs (./../pyproject.toml) mypy-extensions==1.0.0 # via mypy @@ -424,7 +420,7 @@ rpds-py==0.13.1 # via # jsonschema # referencing -ruff==0.1.5 +ruff==0.2.2 # via sec-certs (./../pyproject.toml) scikit-learn==1.3.2 # via sec-certs (./../pyproject.toml) @@ -453,7 +449,9 @@ snowballstemmer==2.2.0 soupsieve==2.5 # via beautifulsoup4 spacy==3.7.2 - # via sec-certs (./../pyproject.toml) + # via + # en-core-web-sm + # sec-certs (./../pyproject.toml) spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 @@ -508,15 +506,6 @@ thinc==8.2.1 # via spacy threadpoolctl==3.2.0 # via scikit-learn -tomli==2.0.1 - # via - # build - # coverage - # mypy - # pip-tools - # pyproject-hooks - # pytest - # setuptools-scm tornado==6.3.3 # via # ipykernel @@ -550,7 +539,6 @@ types-requests==2.31.0.10 # via sec-certs (./../pyproject.toml) typing-extensions==4.8.0 # via - # cloudpathlib # huggingface-hub # mypy # myst-nb diff --git a/requirements/nlp_requirements.txt b/requirements/nlp_requirements.txt index 012a263e..1445a3e2 100644 --- a/requirements/nlp_requirements.txt +++ b/requirements/nlp_requirements.txt @@ -14,8 +14,6 @@ appnope==0.1.3 # ipython asttokens==2.4.1 # via stack-data -async-timeout==4.0.3 - # via aiohttp attrs==23.1.0 # via # aiohttp @@ -103,10 +101,10 @@ dill==0.3.7 # multiprocess distro==1.8.0 # via tabula-py +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl + # via sec-certs (./../pyproject.toml) evaluate==0.4.1 # via setfit -exceptiongroup==1.2.0 - # via ipython executing==2.0.1 # via stack-data filelock==3.13.1 @@ -520,7 +518,9 @@ smart-open==6.4.0 soupsieve==2.5 # via beautifulsoup4 spacy==3.7.2 - # via sec-certs (./../pyproject.toml) + # via + # en-core-web-sm + # sec-certs (./../pyproject.toml) spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 @@ -551,8 +551,6 @@ tifffile==2023.9.26 # via scikit-image tokenizers==0.15.0 # via transformers -tomli==2.0.1 - # via setuptools-scm toolz==0.12.0 # via # dask @@ -600,7 +598,6 @@ typer==0.9.0 typing-extensions==4.8.0 # via # alembic - # cloudpathlib # huggingface-hub # panel # pydantic diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 1b17a94c..7f3b639c 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -11,7 +11,7 @@ attrs==23.1.0 # jsonschema # referencing beautifulsoup4==4.12.2 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) blis==0.7.11 # via thinc catalogue==2.0.10 @@ -27,7 +27,7 @@ charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) # typer cloudpathlib==0.16.0 # via weasel @@ -58,24 +58,24 @@ deprecated==1.2.14 # via pikepdf distro==1.8.0 # via tabula-py -exceptiongroup==1.2.0 - # via ipython +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl + # via sec-certs (../pyproject.toml) executing==2.0.1 # via stack-data fonttools==4.45.0 # via matplotlib html5lib==1.1 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) idna==3.4 # via requests ipykernel==6.27.0 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) ipython==8.17.2 # via # ipykernel # ipywidgets ipywidgets==8.1.1 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) jedi==0.19.1 # via ipython jinja2==3.1.3 @@ -83,7 +83,7 @@ jinja2==3.1.3 joblib==1.3.2 # via scikit-learn jsonschema==4.20.0 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) jsonschema-specifications==2023.11.1 # via jsonschema jupyter-client==8.6.0 @@ -101,14 +101,14 @@ langcodes==3.3.0 lxml==4.9.3 # via # pikepdf - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) markupsafe==2.1.3 # via jinja2 matplotlib==3.8.2 # via # pysankeybeta # seaborn - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) matplotlib-inline==0.1.6 # via # ipykernel @@ -121,7 +121,7 @@ murmurhash==1.0.10 nest-asyncio==1.5.8 # via ipykernel networkx==3.2.1 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) numpy==1.26.2 # via # blis @@ -132,7 +132,7 @@ numpy==1.26.2 # scikit-learn # scipy # seaborn - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) # spacy # tabula-py # thinc @@ -150,24 +150,24 @@ pandas==2.1.3 # via # pysankeybeta # seaborn - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) # tabula-py parso==0.8.3 # via jedi pdftotext==2.2.2 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) pexpect==4.8.0 # via ipython pikepdf==8.7.1 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) pillow==10.2.0 # via # matplotlib # pikepdf # pytesseract - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) pkgconfig==1.5.5 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) platformdirs==4.0.0 # via jupyter-core preshed==3.0.9 @@ -179,7 +179,7 @@ prompt-toolkit==3.0.41 psutil==5.9.6 # via # ipykernel - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 @@ -190,51 +190,49 @@ pydantic==2.5.2 # via # confection # pydantic-settings - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) # spacy # thinc # weasel pydantic-core==2.14.5 # via pydantic pydantic-settings==2.1.0 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) pygments==2.17.2 # via ipython pyparsing==3.1.1 # via matplotlib pypdf[crypto]==3.17.1 - # via - # pypdf - # sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) pysankeybeta==1.4.1 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) pytesseract==0.3.10 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) python-dateutil==2.8.2 # via # jupyter-client # matplotlib # pandas - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) python-dotenv==1.0.0 # via pydantic-settings pytz==2023.3.post1 # via pandas pyyaml==6.0.1 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) pyzmq==25.1.1 # via # ipykernel # jupyter-client rapidfuzz==3.5.2 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) referencing==0.31.0 # via # jsonschema # jsonschema-specifications requests==2.31.0 # via - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) # spacy # weasel rpds-py==0.13.1 @@ -242,17 +240,17 @@ rpds-py==0.13.1 # jsonschema # referencing scikit-learn==1.3.2 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) scipy==1.11.4 # via # scikit-learn - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) seaborn==0.13.0 # via # pysankeybeta - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) setuptools-scm==8.0.4 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) six==1.16.0 # via # asttokens @@ -265,7 +263,9 @@ smart-open==6.4.0 soupsieve==2.5 # via beautifulsoup4 spacy==3.7.2 - # via sec-certs (./../pyproject.toml) + # via + # en-core-web-sm + # sec-certs (../pyproject.toml) spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 @@ -279,20 +279,18 @@ srsly==2.4.8 stack-data==0.6.3 # via ipython tabula-py==2.9.0 - # via sec-certs (./../pyproject.toml) + # via sec-certs (../pyproject.toml) thinc==8.2.1 # via spacy threadpoolctl==3.2.0 # via scikit-learn -tomli==2.0.1 - # via setuptools-scm tornado==6.3.3 # via # ipykernel # jupyter-client tqdm==4.66.1 # via - # sec-certs (./../pyproject.toml) + # sec-certs (../pyproject.toml) # spacy traitlets==5.13.0 # via @@ -309,7 +307,6 @@ typer==0.9.0 # weasel typing-extensions==4.8.0 # via - # cloudpathlib # pydantic # pydantic-core # setuptools-scm diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt index f94e9328..96f02e88 100644 --- a/requirements/test_requirements.txt +++ b/requirements/test_requirements.txt @@ -62,10 +62,8 @@ deprecated==1.2.14 # via pikepdf distro==1.8.0 # via tabula-py -exceptiongroup==1.2.0 - # via - # ipython - # pytest +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl + # via sec-certs (./../pyproject.toml) executing==2.0.1 # via stack-data fonttools==4.45.0 @@ -282,7 +280,9 @@ smart-open==6.4.0 soupsieve==2.5 # via beautifulsoup4 spacy==3.7.2 - # via sec-certs (./../pyproject.toml) + # via + # en-core-web-sm + # sec-certs (./../pyproject.toml) spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 @@ -301,11 +301,6 @@ thinc==8.2.1 # via spacy threadpoolctl==3.2.0 # via scikit-learn -tomli==2.0.1 - # via - # coverage - # pytest - # setuptools-scm tornado==6.3.3 # via # ipykernel @@ -329,7 +324,6 @@ typer==0.9.0 # weasel typing-extensions==4.8.0 # via - # cloudpathlib # pydantic # pydantic-core # setuptools-scm