diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d69c772..229f6ac 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -55,8 +55,8 @@ jobs: # package setup - uses: actions/checkout@v4 - - name: Install dependencies - run: pip install -r requirements-dev.txt + - name: Install package and dependencies + run: pip install .[dev] # tests - name: Lint with flake8 @@ -77,7 +77,7 @@ jobs: # coverage with default version - name: Upload coverage to Codecov - if: matrix.python-version == '3.10' + if: matrix.python-version == '3.11' uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/MANIFEST.in b/MANIFEST.in index c3f2e07..0a3d2a4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,4 @@ -include CITATION.cff -include HISTORY.rst -include LICENSE -include README.md +include CITATION.cff HISTORY.rst LICENSE README.md include licenses/ include simplemma/py.typed graft simplemma/strategies/ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d2a117d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,123 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "simplemma" +description = "A lightweight toolkit for multilingual lemmatization and language detection." +readme = "README.md" +license = { text = "MIT License" } +dynamic = ["version"] +requires-python = ">=3.8" +authors = [ + {name = "Adrien Barbaresi", email = "barbaresi@bbaw.de"} +] +keywords=[ + "language detection", + "language identification", + "langid", + "lemmatization", + "lemmatizer", + "lemmatiser", + "nlp", + "tokenization", + "tokenizer", +] +classifiers = [ + # https://pypi.org/classifiers/ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: Bulgarian", + "Natural Language :: Catalan", + "Natural Language :: Croatian", + "Natural Language :: Czech", + "Natural Language :: Danish", + "Natural Language :: Dutch", + "Natural Language :: English", + "Natural Language :: Finnish", + "Natural Language :: French", + "Natural Language :: Galician", + "Natural Language :: German", + "Natural Language :: Greek", + "Natural Language :: Hindi", + "Natural Language :: Hungarian", + "Natural Language :: Icelandic", + "Natural Language :: Indonesian", + "Natural Language :: Irish", + "Natural Language :: Italian", + "Natural Language :: Latin", + "Natural Language :: Latvian", + "Natural Language :: Lithuanian", + "Natural Language :: Macedonian", + "Natural Language :: Malay", + "Natural Language :: Norwegian", + "Natural Language :: Polish", + "Natural Language :: Portuguese", + "Natural Language :: Romanian", + "Natural Language :: Russian", + "Natural Language :: Slovak", + "Natural Language :: Slovenian", + "Natural Language :: Spanish", + "Natural Language :: Swedish", + "Natural Language :: Thai", + "Natural Language :: Turkish", + "Natural Language :: Ukrainian", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Software Development :: Internationalization", + "Topic :: Software Development :: Localization", + "Topic :: Text Processing :: Linguistic", + "Typing :: Typed", +] +dependencies = [] + +# https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html +[tool.setuptools] +packages = ["simplemma"] + +[tool.setuptools.package-data] +simplemma = ["strategies/dictionaries/data/*.plzma"] + +# https://packaging.python.org/en/latest/guides/single-sourcing-package-version/ +[tool.setuptools.dynamic] +version = {attr = "simplemma.__version__"} + +[project.urls] +"Homepage" = "https://github.com/adbar/simplemma" +"Docs" = "https://adbar.github.io/simplemma/" +"Blog" = "https://adrien.barbaresi.eu/blog/" + +[project.optional-dependencies] +marisa-trie = [ + "marisa_trie == 1.2.0", + "platformdirs == 4.2.2", +] +test = [ + "simplemma[marisa-trie]", + "pytest == 8.3.2", + "pytest-cov == 5.0.0", +] +dev = [ + "simplemma[test]", + "black == 24.8.0", + "flake8 == 7.1.1", + "mypy == 1.11.0", + "types-requests == 2.32.0.20240712", +] +docs = [ + "mkdocs", + "mkdocs-material", + "mkdocstrings", + "mkdocstrings-python", +] diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 7472f14..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -black==24.4.2 -flake8==7.0.0 -marisa_trie==1.2.0 -mypy==1.10.0 -platformdirs==4.2.2 -pytest==8.2.1 -pytest-cov==5.0.0 -types-requests==2.32.0.20240523 diff --git a/setup.py b/setup.py deleted file mode 100644 index 23673a4..0000000 --- a/setup.py +++ /dev/null @@ -1,115 +0,0 @@ -"""The setup script.""" - -import re - -from pathlib import Path -from setuptools import setup, find_packages - - -def get_version(package): - "Return package version as listed in `__version__` in `init.py`" - initfile = Path(package, "__init__.py").read_text() - return re.search("__version__ = ['\"]([^'\"]+)['\"]", initfile)[1] - - -readme = Path("README.md").read_text() -# with open('HISTORY.rst') as history_file: -# history = history_file.read() - -requirements = [] - -setup_requirements = [] - -test_requirements = ["pytest>=3", "pytest-cov"] - - -setup( - author="Adrien Barbaresi", - author_email="barbaresi@bbaw.de", - python_requires=">=3.8", - classifiers=[ # https://pypi.org/classifiers/ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Natural Language :: Bulgarian", - "Natural Language :: Catalan", - "Natural Language :: Croatian", - "Natural Language :: Czech", - "Natural Language :: Danish", - "Natural Language :: Dutch", - "Natural Language :: English", - "Natural Language :: Finnish", - "Natural Language :: French", - "Natural Language :: Galician", - "Natural Language :: German", - "Natural Language :: Greek", - "Natural Language :: Hindi", - "Natural Language :: Hungarian", - "Natural Language :: Icelandic", - "Natural Language :: Indonesian", - "Natural Language :: Irish", - "Natural Language :: Italian", - "Natural Language :: Latin", - "Natural Language :: Latvian", - "Natural Language :: Lithuanian", - "Natural Language :: Macedonian", - "Natural Language :: Malay", - "Natural Language :: Norwegian", - "Natural Language :: Polish", - "Natural Language :: Portuguese", - "Natural Language :: Romanian", - "Natural Language :: Russian", - "Natural Language :: Slovak", - "Natural Language :: Slovenian", - "Natural Language :: Spanish", - "Natural Language :: Swedish", - "Natural Language :: Thai", - "Natural Language :: Turkish", - "Natural Language :: Ukrainian", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Software Development :: Internationalization", - "Topic :: Software Development :: Localization", - "Topic :: Text Processing :: Linguistic", - "Typing :: Typed", - ], - description="A simple multilingual lemmatizer for Python.", - install_requires=requirements, - extras_require={"marisa-trie": ["marisa-trie", "platformdirs"]}, - license="MIT license", - long_description=readme, # + '\n\n' + history, - long_description_content_type="text/markdown", - include_package_data=True, - keywords=[ - "nlp", - "lemmatization", - "lemmatisation", - "lemmatiser", - "tokenization", - "tokenizer", - ], - name="simplemma", - package_data={"simplemma": ["strategies/dictionaries/data/*.plzma"]}, - packages=find_packages(include=["simplemma", "simplemma.*"]), - project_urls={ - "Source": "https://github.com/adbar/simplemma", - "Docs": "https://adbar.github.io/simplemma/", - # "Blog": "https://adrien.barbaresi.eu/blog/", # tag/simplemma - }, - setup_requires=setup_requirements, - test_suite="tests", - tests_require=test_requirements, - url="https://github.com/adbar/simplemma", - version=get_version("simplemma"), - zip_safe=False, -)