diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 8f78943a..6670ad6f 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.0.1
+current_version = 2.1.2
 commit = True
 tag = True
 tag_name = v{new_version}
diff --git a/.flake8 b/.flake8
index 268fd3a8..96c8f44d 100644
--- a/.flake8
+++ b/.flake8
@@ -10,4 +10,5 @@ ignore =
     W504,
 
     # black disagrees with flake8, and inserts whitespace
-    E203,  # whitespace before ':'
+    # E203: whitespace before ':'
+    E203,
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ee3f8e52..a7d8f921 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,27 +15,27 @@ jobs:
     strategy:
       matrix:
         include:
-        - python-version: "3.7"
+        - python-version: "3.8"
           env:
             TOXENV: docs
-        - python-version: "3.10"
+        - python-version: "3.12"
           env:
             TOXENV: flake8
-        - python-version: "3.10"
+        - python-version: "3.12"
           env:
             TOXENV: pylint
-        - python-version: "3.10"
+        - python-version: "3.12"
           env:
             TOXENV: security
-        - python-version: "3.10"
+        - python-version: "3.12"
           env:
             TOXENV: black
-        - python-version: "3.10"
+        - python-version: "3.12"
           env:
             TOXENV: typing
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index f694f422..83fc5206 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -10,12 +10,12 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
-    - name: Set up Python 3.9
+    - name: Set up Python 3.12
       uses: actions/setup-python@v4
       with:
-        python-version: 3.9
+        python-version: 3.12
 
     - name: Check Tag
       id: check-release-tag
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 031ee7b2..12acd0cc 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -14,10 +14,10 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy3.7"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.10"]
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 00000000..5ba0d2a3
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,15 @@
+version: 2
+formats: all
+sphinx:
+  configuration: docs/conf.py
+  fail_on_warning: true
+build:
+  os: ubuntu-22.04
+  tools:
+    # For available versions, see:
+    # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
+    python: "3.12"  # Keep in sync with .github/workflows/build.yml
+python:
+  install:
+    - requirements: docs/requirements.txt
+    - path: .
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..8044a257
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,132 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[opensource@zyte.com](mailto:opensource@zyte.com)
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations
diff --git a/NEWS b/NEWS
index 6b2e426f..3846a427 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,78 @@
 w3lib release notes
 ===================
 
+2.1.2 (2023-08-03)
+------------------
+
+- Fix test failures on Python 3.11.4+ (#212, #213).
+- Fix an incorrect type hint (#211).
+- Add project URLs to setup.py (#215).
+
+2.1.1 (2022-12-09)
+------------------
+
+- :func:`~w3lib.url.safe_url_string`, :func:`~w3lib.url.safe_download_url`
+  and :func:`~w3lib.url.canonicalize_url` now strip whitespace and control
+  characters urls according to the URL living standard.
+
+
+2.1.0 (2022-11-28)
+------------------
+
+-   Dropped Python 3.6 support, and made Python 3.11 support official. (#195,
+    #200)
+
+-   :func:`~w3lib.url.safe_url_string` now generates safer URLs.
+
+    To make URLs safer for the `URL living standard`_:
+
+    .. _URL living standard: https://url.spec.whatwg.org/
+
+    -   ``;=`` are percent-encoded in the URL username.
+
+    -   ``;:=`` are percent-encoded in the URL password.
+
+    -   ``'`` is percent-encoded in the URL query if the URL scheme is `special
+        <https://url.spec.whatwg.org/#special-scheme>`__.
+
+    To make URLs safer for `RFC 2396`_ and `RFC 3986`_, ``|[]`` are
+    percent-encoded in URL paths, queries, and fragments.
+
+    .. _RFC 2396: https://www.ietf.org/rfc/rfc2396.txt
+    .. _RFC 3986: https://www.ietf.org/rfc/rfc3986.txt
+
+    (#80, #203)
+
+-   :func:`~w3lib.encoding.html_to_unicode` now checks for the `byte order
+    mark`_ before inspecting the ``Content-Type`` header when determining the
+    content encoding, in line with the `URL living standard`_. (#189, #191)
+
+    .. _byte order mark: https://en.wikipedia.org/wiki/Byte_order_mark
+
+-   :func:`~w3lib.url.canonicalize_url` now strips spaces from the input URL,
+    to be more in line with the `URL living standard`_. (#132, #136)
+
+-   :func:`~w3lib.html.get_base_url` now ignores HTML comments. (#70, #77)
+
+-   Fixed :func:`~w3lib.url.safe_url_string` re-encoding percent signs on
+    the URL username and password even when they were being used as part of an
+    escape sequence. (#187, #196)
+
+-   Fixed :func:`~w3lib.http.basic_auth_header` using the wrong flavor of
+    base64 encoding, which could prevent authentication in rare cases. (#181,
+    #192)
+
+-   Fixed :func:`~w3lib.html.replace_entities` raising :exc:`OverflowError` in
+    some cases due to `a bug in CPython
+    <https://github.com/python/cpython/issues/76763>`__. (#199, #202)
+
+-   Improved typing and fixed typing issues. (#190, #206)
+
+-   Made CI and test improvements. (#197, #198)
+
+-   Adopted a Code of Conduct. (#194)
+
+
 2.0.1 (2022-08-11)
 ------------------
 Minor documentation fix (release date is set in the changelog).
@@ -130,8 +202,6 @@ Other improvements and bug fixes:
 - ``url_query_cleaner()``: support new ``keep_fragments`` argument
   (defaulting to ``False``)
 
-.. _RFC 3986: https://tools.ietf.org/html/rfc3986#section-3.2
-
 1.15.0 (2016-07-29)
 -------------------
 
diff --git a/README.rst b/README.rst
index 56641830..00b447d4 100644
--- a/README.rst
+++ b/README.rst
@@ -27,7 +27,7 @@ This is a Python library of web-related functions, such as:
 Requirements
 ============
 
-Python 3.7+
+Python 3.8+
 
 Install
 =======
diff --git a/docs/conf.py b/docs/conf.py
index 27c1af70..cb57d425 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -53,7 +53,7 @@
 # built documents.
 #
 # The full version, including alpha/beta/rc tags.
-release = '2.0.1'
+release = '2.1.2'
 # The short X.Y version.
 version = '.'.join(release.split('.')[:2])
 
@@ -96,7 +96,7 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'default'
+html_theme = "sphinx_rtd_theme"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
diff --git a/docs/index.rst b/docs/index.rst
index aa1c851f..c30d6a59 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -28,7 +28,7 @@ Modules
 Requirements
 ============
 
-Python 3.7+
+Python 3.8+
 
 Install
 =======
diff --git a/setup.py b/setup.py
index 4e3be0f4..0cfae8a1 100644
--- a/setup.py
+++ b/setup.py
@@ -41,12 +41,17 @@ def no_cythonize(extensions, **_ignore):
 
 setup(
     name="w3lib",
-    version="2.0.1",
+    version="2.1.2",
     license="BSD",
     description="Library of web-related functions",
     author="Scrapy project",
     author_email="info@scrapy.org",
     url="https://github.com/scrapy/w3lib",
+    project_urls={
+        "Documentation": "https://w3lib.readthedocs.io/en/latest/",
+        "Source Code": "https://github.com/scrapy/w3lib",
+        "Issue Tracker": "https://github.com/scrapy/w3lib/issues",
+    },
     packages=find_packages(exclude=("tests", "tests.*")),
     package_data={
         "w3lib": ["py.typed"],
@@ -54,7 +59,7 @@ def no_cythonize(extensions, **_ignore):
     include_package_data=True,
     zip_safe=False,
     platforms=["Any"],
-    python_requires=">=3.7",
+    python_requires=">=3.8",
     install_requires=[
         "idna",
     ],
@@ -65,11 +70,11 @@ def no_cythonize(extensions, **_ignore):
         "Operating System :: OS Independent",
         "Programming Language :: Python",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
         "Programming Language :: Python :: Implementation :: CPython",
         "Programming Language :: Python :: Implementation :: PyPy",
         "Topic :: Internet :: WWW/HTTP",
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index 6605cce0..58c98c3d 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -1,5 +1,6 @@
 import codecs
 import unittest
+from typing import Optional, Union, List, Any
 
 import pytest
 
@@ -144,11 +145,11 @@ def test_invalid_utf8(self):
         self.assertEqual(to_unicode(b"\xc2\xc2\xa3", "utf-8"), "\ufffd\xa3")
 
 
-def ct(charset):
+def ct(charset: Optional[str]) -> Optional[str]:
     return "Content-Type: text/html; charset=" + charset if charset else None
 
 
-def norm_encoding(enc):
+def norm_encoding(enc: str) -> str:
     return codecs.lookup(enc).name
 
 
@@ -161,7 +162,13 @@ def test_unicode_body(self):
         self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(body_unicode, unicode_string)
 
-    def _assert_encoding(self, content_type, body, expected_encoding, expected_unicode):
+    def _assert_encoding(
+        self,
+        content_type: Optional[str],
+        body: bytes,
+        expected_encoding: str,
+        expected_unicode: Union[str, List[str]],
+    ) -> None:
         assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body)
         self.assertTrue(isinstance(body_unicode, str))
@@ -233,8 +240,12 @@ def test_replace_wrong_encoding(self):
         assert "<span>value</span>" in body_unicode, repr(body_unicode)
 
     def _assert_encoding_detected(
-        self, content_type, expected_encoding, body, **kwargs
-    ):
+        self,
+        content_type: Optional[str],
+        expected_encoding: str,
+        body: bytes,
+        **kwargs: Any,
+    ) -> None:
         assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
         self.assertTrue(isinstance(body_unicode, str))
diff --git a/tests/test_html.py b/tests/test_html.py
index 1e637b0f..68abb2ee 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -1,5 +1,6 @@
 import unittest
 
+from w3lib._infra import _C0_CONTROL_OR_SPACE
 from w3lib.html import (
     get_base_url,
     get_meta_refresh,
@@ -65,6 +66,10 @@ def test_illegal_entities(self):
         self.assertEqual(replace_entities("x&#x2264;y"), "x\u2264y")
         self.assertEqual(replace_entities("x&#157;y"), "xy")
         self.assertEqual(replace_entities("x&#157;y", remove_illegal=False), "x&#157;y")
+        self.assertEqual(replace_entities("&#82179209091;"), "")
+        self.assertEqual(
+            replace_entities("&#82179209091;", remove_illegal=False), "&#82179209091;"
+        )
 
     def test_browser_hack(self):
         # check browser hack for numeric character references in the 80-9F range
@@ -156,12 +161,12 @@ def test_returns_unicode(self):
         assert isinstance(remove_tags(b"no tags"), str)
         assert isinstance(remove_tags(b"no tags", which_ones=("p",)), str)
         assert isinstance(remove_tags(b"<p>one tag</p>"), str)
-        assert isinstance(remove_tags(b"<p>one tag</p>", which_ones=("p")), str)
+        assert isinstance(remove_tags(b"<p>one tag</p>", which_ones=("p",)), str)
         assert isinstance(remove_tags(b"<a>link</a>", which_ones=("b",)), str)
         assert isinstance(remove_tags("no tags"), str)
         assert isinstance(remove_tags("no tags", which_ones=("p",)), str)
         assert isinstance(remove_tags("<p>one tag</p>"), str)
-        assert isinstance(remove_tags("<p>one tag</p>", which_ones=("p")), str)
+        assert isinstance(remove_tags("<p>one tag</p>", which_ones=("p",)), str)
         assert isinstance(remove_tags("<a>link</a>", which_ones=("b",)), str)
 
     def test_remove_tags_without_tags(self):
diff --git a/tests/test_url.py b/tests/test_url.py
index 94554542..2c0769b1 100644
--- a/tests/test_url.py
+++ b/tests/test_url.py
@@ -1,8 +1,11 @@
 import json
 import os
+import sys
 import unittest
+from inspect import isclass
 from pathlib import Path
 from timeit import timeit
+from typing import Optional, Union, Type, Callable, Tuple, List
 from urllib.parse import urlparse
 
 import pytest
@@ -13,6 +16,7 @@
     _ASCII_TAB_OR_NEWLINE,
     _C0_CONTROL_OR_SPACE,
 )
+from w3lib._types import StrOrBytes
 from w3lib._url import (
     _C0_CONTROL_PERCENT_ENCODE_SET,
     _domain_to_ascii,
@@ -41,6 +45,411 @@
     url_query_cleaner,
 )
 
+# Test cases for URL-to-safe-URL conversions with a URL and an encoding as
+# input parameters.
+#
+# (encoding, input URL, output URL or exception)
+SAFE_URL_ENCODING_CASES: List[
+    Tuple[Optional[str], StrOrBytes, Union[str, Type[Exception]]]
+] = [
+    (None, "", ValueError),
+    (None, "https://example.com", "https://example.com"),
+    (None, "https://example.com/©", "https://example.com/%C2%A9"),
+    # Paths are always UTF-8-encoded.
+    ("iso-8859-1", "https://example.com/©", "https://example.com/%C2%A9"),
+    # Queries are UTF-8-encoded if the scheme is not special, ws or wss.
+    ("iso-8859-1", "a://example.com?©", "a://example.com?%C2%A9"),
+    *(
+        ("iso-8859-1", f"{scheme}://example.com?©", f"{scheme}://example.com?%C2%A9")
+        for scheme in ("ws", "wss")
+    ),
+    *(
+        ("iso-8859-1", f"{scheme}://example.com?©", f"{scheme}://example.com?%A9")
+        for scheme in _SPECIAL_SCHEMES
+        if scheme not in {"ws", "wss"}
+    ),
+    # Fragments are always UTF-8-encoded.
+    ("iso-8859-1", "https://example.com#©", "https://example.com#%C2%A9"),
+]
+
+INVALID_SCHEME_FOLLOW_UPS = "".join(
+    chr(value)
+    for value in range(0x81)
+    if (
+        chr(value) not in _ASCII_ALPHANUMERIC
+        and chr(value) not in "+-."
+        and chr(value) not in _C0_CONTROL_OR_SPACE  # stripped
+        and chr(value) != ":"  # separator
+    )
+)
+
+SAFE_URL_URL_INVALID_SCHEME_CASES = tuple(
+    (f"{scheme}://example.com", ValueError)
+    for scheme in (
+        # A scheme is required.
+        "",
+        # The first scheme letter must be an ASCII alpha.
+        # Note: 0x80 is included below to also test non-ASCII example.
+        *(
+            chr(value)
+            for value in range(0x81)
+            if (
+                chr(value) not in _ASCII_ALPHA
+                and chr(value) not in _C0_CONTROL_OR_SPACE  # stripped
+                and chr(value) != ":"  # separator
+            )
+        ),
+        # The follow-up scheme letters can also be ASCII numbers, plus, hyphen,
+        # or period.
+        f"a{INVALID_SCHEME_FOLLOW_UPS}",
+    )
+)
+
+SCHEME_NON_FIRST = _ASCII_ALPHANUMERIC + "+-."
+
+# Username and password characters that do not need escaping.
+# Removed for RFC 2396 and RFC 3986: %
+# Removed for the URL living standard: :;=
+USERINFO_SAFE = _ASCII_ALPHANUMERIC + "-_.!~*'()" + "&+$,"
+USERNAME_TO_ENCODE = "".join(
+    chr(value)
+    for value in range(0x80)
+    if (
+        chr(value) not in _C0_CONTROL_OR_SPACE
+        and chr(value) not in USERINFO_SAFE
+        and chr(value) not in ":/?#\\[]"
+    )
+)
+USERNAME_ENCODED = "".join(f"%{ord(char):02X}" for char in USERNAME_TO_ENCODE)
+PASSWORD_TO_ENCODE = USERNAME_TO_ENCODE + ":"
+PASSWORD_ENCODED = "".join(f"%{ord(char):02X}" for char in PASSWORD_TO_ENCODE)
+
+# Path characters that do not need escaping.
+# Removed for RFC 2396 and RFC 3986: %[\]^|
+PATH_SAFE = _ASCII_ALPHANUMERIC + "-_.!~*'()" + ":@&=+$," + "/" + ";"
+PATH_TO_ENCODE = "".join(
+    chr(value)
+    for value in range(0x80)
+    if (
+        chr(value) not in _C0_CONTROL_OR_SPACE
+        and chr(value) not in PATH_SAFE
+        and chr(value) not in "?#\\"
+    )
+)
+PATH_ENCODED = "".join(f"%{ord(char):02X}" for char in PATH_TO_ENCODE)
+
+# Query characters that do not need escaping.
+# Removed for RFC 2396 and RFC 3986: %[\]^`{|}
+# Removed for the URL living standard: ' (special)
+QUERY_SAFE = _ASCII_ALPHANUMERIC + "-_.!~*'()" + ":@&=+$," + "/" + ";" + "?"
+QUERY_TO_ENCODE = "".join(
+    chr(value)
+    for value in range(0x80)
+    if (
+        chr(value) not in _C0_CONTROL_OR_SPACE
+        and chr(value) not in QUERY_SAFE
+        and chr(value) not in "#"
+    )
+)
+QUERY_ENCODED = "".join(f"%{ord(char):02X}" for char in QUERY_TO_ENCODE)
+SPECIAL_QUERY_SAFE = QUERY_SAFE.replace("'", "")
+SPECIAL_QUERY_TO_ENCODE = "".join(
+    chr(value)
+    for value in range(0x80)
+    if (
+        chr(value) not in _C0_CONTROL_OR_SPACE
+        and chr(value) not in SPECIAL_QUERY_SAFE
+        and chr(value) not in "#"
+    )
+)
+SPECIAL_QUERY_ENCODED = "".join(f"%{ord(char):02X}" for char in SPECIAL_QUERY_TO_ENCODE)
+
+# Fragment characters that do not need escaping.
+# Removed for RFC 2396 and RFC 3986: #%[\\]^{|}
+FRAGMENT_SAFE = _ASCII_ALPHANUMERIC + "-_.!~*'()" + ":@&=+$," + "/" + ";" + "?"
+FRAGMENT_TO_ENCODE = "".join(
+    chr(value)
+    for value in range(0x80)
+    if (chr(value) not in _C0_CONTROL_OR_SPACE and chr(value) not in FRAGMENT_SAFE)
+)
+FRAGMENT_ENCODED = "".join(f"%{ord(char):02X}" for char in FRAGMENT_TO_ENCODE)
+
+
+# Test cases for URL-to-safe-URL conversions with only a URL as input parameter
+# (i.e. no encoding or base URL).
+#
+# (input URL, output URL or exception)
+SAFE_URL_URL_CASES = (
+    # Invalid input type
+    (1, Exception),
+    (object(), Exception),
+    # Empty string
+    ("", ValueError),
+    # Remove any leading and trailing C0 control or space from input.
+    *(
+        (f"{char}https://example.com{char}", "https://example.com")
+        for char in _C0_CONTROL_OR_SPACE
+        if char not in _ASCII_TAB_OR_NEWLINE
+    ),
+    # Remove all ASCII tab or newline from input.
+    (
+        (
+            f"{_ASCII_TAB_OR_NEWLINE}h{_ASCII_TAB_OR_NEWLINE}ttps"
+            f"{_ASCII_TAB_OR_NEWLINE}:{_ASCII_TAB_OR_NEWLINE}/"
+            f"{_ASCII_TAB_OR_NEWLINE}/{_ASCII_TAB_OR_NEWLINE}a"
+            f"{_ASCII_TAB_OR_NEWLINE}b{_ASCII_TAB_OR_NEWLINE}:"
+            f"{_ASCII_TAB_OR_NEWLINE}a{_ASCII_TAB_OR_NEWLINE}b"
+            f"{_ASCII_TAB_OR_NEWLINE}@{_ASCII_TAB_OR_NEWLINE}exam"
+            f"{_ASCII_TAB_OR_NEWLINE}ple.com{_ASCII_TAB_OR_NEWLINE}:"
+            f"{_ASCII_TAB_OR_NEWLINE}1{_ASCII_TAB_OR_NEWLINE}2"
+            f"{_ASCII_TAB_OR_NEWLINE}/{_ASCII_TAB_OR_NEWLINE}a"
+            f"{_ASCII_TAB_OR_NEWLINE}b{_ASCII_TAB_OR_NEWLINE}?"
+            f"{_ASCII_TAB_OR_NEWLINE}a{_ASCII_TAB_OR_NEWLINE}b"
+            f"{_ASCII_TAB_OR_NEWLINE}#{_ASCII_TAB_OR_NEWLINE}a"
+            f"{_ASCII_TAB_OR_NEWLINE}b{_ASCII_TAB_OR_NEWLINE}"
+        ),
+        "https://ab:ab@example.com:12/ab?ab#ab",
+    ),
+    # Scheme
+    (f"{_ASCII_ALPHA}://example.com", f"{_ASCII_ALPHA.lower()}://example.com"),
+    (
+        f"a{SCHEME_NON_FIRST}://example.com",
+        f"a{SCHEME_NON_FIRST.lower()}://example.com",
+    ),
+    *SAFE_URL_URL_INVALID_SCHEME_CASES,
+    # Authority
+    ("https://a@example.com", "https://a@example.com"),
+    ("https://a:@example.com", "https://a:@example.com"),
+    ("https://a:a@example.com", "https://a:a@example.com"),
+    ("https://a%3A@example.com", "https://a%3A@example.com"),
+    (
+        f"https://{USERINFO_SAFE}:{USERINFO_SAFE}@example.com",
+        f"https://{USERINFO_SAFE}:{USERINFO_SAFE}@example.com",
+    ),
+    (
+        f"https://{USERNAME_TO_ENCODE}:{PASSWORD_TO_ENCODE}@example.com",
+        f"https://{USERNAME_ENCODED}:{PASSWORD_ENCODED}@example.com",
+    ),
+    ("https://@\\example.com", ValueError),
+    ("https://\x80:\x80@example.com", "https://%C2%80:%C2%80@example.com"),
+    # Host
+    ("https://example.com", "https://example.com"),
+    ("https://.example", "https://.example"),
+    ("https://\x80.example", ValueError),
+    ("https://%80.example", ValueError),
+    # The 4 cases below test before and after crossing DNS length limits on
+    # domain name labels (63 characters) and the domain name as a whole (253
+    # characters). However, all cases are expected to pass because the URL
+    # living standard does not require domain names to be within these limits.
+    (f"https://{'a' * 63}.example", f"https://{'a' * 63}.example"),
+    (f"https://{'a' * 64}.example", f"https://{'a' * 64}.example"),
+    (
+        f"https://{'a' * 63}.{'a' * 63}.{'a' * 63}.{'a' * 53}.example",
+        f"https://{'a' * 63}.{'a' * 63}.{'a' * 63}.{'a' * 53}.example",
+    ),
+    (
+        f"https://{'a' * 63}.{'a' * 63}.{'a' * 63}.{'a' * 54}.example",
+        f"https://{'a' * 63}.{'a' * 63}.{'a' * 63}.{'a' * 54}.example",
+    ),
+    ("https://ñ.example", "https://xn--ida.example"),
+    ("http://192.168.0.0", "http://192.168.0.0"),
+    ("http://192.168.0.256", ValueError),
+    ("http://192.168.0.0.0", ValueError),
+    ("http://[2a01:5cc0:1:2::4]", "http://[2a01:5cc0:1:2::4]"),
+    ("http://[2a01:5cc0:1:2:3:4]", ValueError),
+    # Port
+    ("https://example.com:", "https://example.com:"),
+    ("https://example.com:1", "https://example.com:1"),
+    ("https://example.com:443", "https://example.com:443"),
+    # Path
+    ("https://example.com/", "https://example.com/"),
+    ("https://example.com/a", "https://example.com/a"),
+    ("https://example.com\\a", "https://example.com/a"),
+    ("https://example.com/a\\b", "https://example.com/a/b"),
+    (
+        f"https://example.com/{PATH_SAFE}",
+        f"https://example.com/{PATH_SAFE}",
+    ),
+    (
+        f"https://example.com/{PATH_TO_ENCODE}",
+        f"https://example.com/{PATH_ENCODED}",
+    ),
+    ("https://example.com/ñ", "https://example.com/%C3%B1"),
+    ("https://example.com/ñ%C3%B1", "https://example.com/%C3%B1%C3%B1"),
+    # Query
+    ("https://example.com?", "https://example.com?"),
+    ("https://example.com/?", "https://example.com/?"),
+    ("https://example.com?a", "https://example.com?a"),
+    ("https://example.com?a=", "https://example.com?a="),
+    ("https://example.com?a=b", "https://example.com?a=b"),
+    (
+        f"a://example.com?{QUERY_SAFE}",
+        f"a://example.com?{QUERY_SAFE}",
+    ),
+    (
+        f"a://example.com?{QUERY_TO_ENCODE}",
+        f"a://example.com?{QUERY_ENCODED}",
+    ),
+    *(
+        (
+            f"{scheme}://example.com?{SPECIAL_QUERY_SAFE}",
+            f"{scheme}://example.com?{SPECIAL_QUERY_SAFE}",
+        )
+        for scheme in _SPECIAL_SCHEMES
+    ),
+    *(
+        (
+            f"{scheme}://example.com?{SPECIAL_QUERY_TO_ENCODE}",
+            f"{scheme}://example.com?{SPECIAL_QUERY_ENCODED}",
+        )
+        for scheme in _SPECIAL_SCHEMES
+    ),
+    ("https://example.com?ñ", "https://example.com?%C3%B1"),
+    ("https://example.com?ñ%C3%B1", "https://example.com?%C3%B1%C3%B1"),
+    # Fragment
+    ("https://example.com#", "https://example.com#"),
+    ("https://example.com/#", "https://example.com/#"),
+    ("https://example.com?#", "https://example.com?#"),
+    ("https://example.com/?#", "https://example.com/?#"),
+    ("https://example.com#a", "https://example.com#a"),
+    (
+        f"a://example.com#{FRAGMENT_SAFE}",
+        f"a://example.com#{FRAGMENT_SAFE}",
+    ),
+    (
+        f"a://example.com#{FRAGMENT_TO_ENCODE}",
+        f"a://example.com#{FRAGMENT_ENCODED}",
+    ),
+    ("https://example.com#ñ", "https://example.com#%C3%B1"),
+    ("https://example.com#ñ%C3%B1", "https://example.com#%C3%B1%C3%B1"),
+    # All fields, UTF-8 wherever possible.
+    (
+        "https://ñ:ñ@ñ.example:1/ñ?ñ#ñ",
+        "https://%C3%B1:%C3%B1@xn--ida.example:1/%C3%B1?%C3%B1#%C3%B1",
+    ),
+)
+
+
+def _test_safe_url_func(
+    url: StrOrBytes,
+    *,
+    encoding: Optional[str] = None,
+    output: Union[str, Type[Exception]],
+    func: Callable[..., str],
+) -> None:
+    kwargs = {}
+    if encoding is not None:
+        kwargs["encoding"] = encoding
+    if isclass(output) and issubclass(output, Exception):
+        with pytest.raises(output):
+            func(url, **kwargs)
+        return
+    actual = func(url, **kwargs)
+    assert actual == output
+    assert func(actual, **kwargs) == output  # Idempotency
+
+
+def _test_safe_url_string(
+    url: StrOrBytes,
+    *,
+    encoding: Optional[str] = None,
+    output: Union[str, Type[Exception]],
+) -> None:
+    return _test_safe_url_func(
+        url,
+        encoding=encoding,
+        output=output,
+        func=safe_url_string,
+    )
+
+
+KNOWN_SAFE_URL_STRING_ENCODING_ISSUES = {
+    (None, ""),  # Invalid URL
+    # UTF-8 encoding is not enforced in non-special URLs, or in URLs with the
+    # ws or wss schemas.
+    ("iso-8859-1", "a://example.com?\xa9"),
+    ("iso-8859-1", "ws://example.com?\xa9"),
+    ("iso-8859-1", "wss://example.com?\xa9"),
+    # UTF-8 encoding is not enforced on the fragment.
+    ("iso-8859-1", "https://example.com#\xa9"),
+}
+
+
+@pytest.mark.parametrize(
+    "encoding,url,output",
+    tuple(
+        case
+        if case[:2] not in KNOWN_SAFE_URL_STRING_ENCODING_ISSUES
+        else pytest.param(*case, marks=pytest.mark.xfail(strict=True))
+        for case in SAFE_URL_ENCODING_CASES
+    ),
+)
+def test_safe_url_string_encoding(
+    encoding: Optional[str], url: StrOrBytes, output: Union[str, Type[Exception]]
+) -> None:
+    _test_safe_url_string(url, encoding=encoding, output=output)
+
+
+KNOWN_SAFE_URL_STRING_URL_ISSUES = {
+    "",  # Invalid URL
+    *(case[0] for case in SAFE_URL_URL_INVALID_SCHEME_CASES),
+    # Userinfo characters that the URL living standard requires escaping (:;=)
+    # are not escaped.
+    "https://@\\example.com",  # Invalid URL
+    "https://\x80.example",  # Invalid domain name (non-visible character)
+    "https://%80.example",  # Invalid domain name (non-visible character)
+    "http://192.168.0.256",  # Invalid IP address
+    "http://192.168.0.0.0",  # Invalid IP address / domain name
+    "http://[2a01:5cc0:1:2::4]",  # https://github.com/scrapy/w3lib/issues/193
+    "https://example.com:",  # Removes the :
+    # Does not convert \ to /
+    "https://example.com\\a",
+    "https://example.com\\a\\b",
+    # Encodes \ and / after the first one in the path
+    "https://example.com/a/b",
+    "https://example.com/a\\b",
+    # Some path characters that RFC 2396 and RFC 3986 require escaping (%)
+    # are not escaped.
+    f"https://example.com/{PATH_TO_ENCODE}",
+    # ? is removed
+    "https://example.com?",
+    "https://example.com/?",
+    # Some query characters that RFC 2396 and RFC 3986 require escaping (%)
+    # are not escaped.
+    f"a://example.com?{QUERY_TO_ENCODE}",
+    # Some special query characters that RFC 2396 and RFC 3986 require escaping
+    # (%) are not escaped.
+    *(
+        f"{scheme}://example.com?{SPECIAL_QUERY_TO_ENCODE}"
+        for scheme in _SPECIAL_SCHEMES
+    ),
+    # ? and # are removed
+    "https://example.com#",
+    "https://example.com/#",
+    "https://example.com?#",
+    "https://example.com/?#",
+    # Some fragment characters that RFC 2396 and RFC 3986 require escaping
+    # (%) are not escaped.
+    f"a://example.com#{FRAGMENT_TO_ENCODE}",
+}
+if sys.version_info < (3, 11, 4):
+    KNOWN_SAFE_URL_STRING_URL_ISSUES.add("http://[2a01:5cc0:1:2:3:4]")  # Invalid IPv6
+
+
+@pytest.mark.parametrize(
+    "url,output",
+    tuple(
+        case
+        if case[0] not in KNOWN_SAFE_URL_STRING_URL_ISSUES
+        else pytest.param(*case, marks=pytest.mark.xfail(strict=True))
+        for case in SAFE_URL_URL_CASES
+    ),
+)
+def test_safe_url_string_url(
+    url: StrOrBytes, output: Union[str, Type[Exception]]
+) -> None:
+    _test_safe_url_string(url, output=output)
+
 
 TO_ASCII_TEST_DATA_FILE_PATH = Path(__file__).parent / "to-ascii-test-data.json"
 TO_ASCII_TEST_DATA_KNOWN_ISSUES = (
@@ -562,11 +971,7 @@ def test_safe_url_string_encoding(encoding, url, output):
 
 KNOWN_SAFE_URL_STRING_URL_ISSUES = {
     "",  # Invalid URL
-    *(case[0] for case in SAFE_URL_URL_STRIP_CASES),
     *(case[0] for case in SAFE_URL_URL_INVALID_SCHEME_CASES),
-    # %3A gets decoded, going from a "a:" username to a "a" username with an
-    # empty password.
-    "https://a%3A@example.com",
     # Userinfo characters that the URL living standard requires escaping (:;=)
     # are not escaped.
     f"https://{USERNAME_TO_ENCODE}:{PASSWORD_TO_ENCODE}@example.com",
@@ -657,6 +1062,56 @@ def test_safe_url_string_path_encoding(self):
         safeurl = safe_url_string("http://www.example.com/£", path_encoding="latin-1")
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%A3")
+        self.assertTrue(isinstance(safe_url_string(b"http://example.com/"), str))
+
+    def test_safe_url_string_remove_ascii_tab_and_newlines(self):
+        self.assertEqual(
+            safe_url_string("http://example.com/test\n.html"),
+            "http://example.com/test.html",
+        )
+        self.assertEqual(
+            safe_url_string("http://example.com/test\t.html"),
+            "http://example.com/test.html",
+        )
+        self.assertEqual(
+            safe_url_string("http://example.com/test\r.html"),
+            "http://example.com/test.html",
+        )
+        self.assertEqual(
+            safe_url_string("http://example.com/test\r.html\n"),
+            "http://example.com/test.html",
+        )
+        self.assertEqual(
+            safe_url_string("http://example.com/test\r\n.html\t"),
+            "http://example.com/test.html",
+        )
+        self.assertEqual(
+            safe_url_string("http://example.com/test\a\n.html"),
+            "http://example.com/test%07.html",
+        )
+
+    def test_safe_url_string_quote_path(self):
+        safeurl = safe_url_string('http://google.com/"hello"', quote_path=True)
+        self.assertEqual(safeurl, "http://google.com/%22hello%22")
+
+        safeurl = safe_url_string('http://google.com/"hello"', quote_path=False)
+        self.assertEqual(safeurl, 'http://google.com/"hello"')
+
+        safeurl = safe_url_string('http://google.com/"hello"')
+        self.assertEqual(safeurl, "http://google.com/%22hello%22")
+
+    def test_safe_url_string_with_query(self):
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ")
+        self.assertTrue(isinstance(safeurl, str))
+        self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%C2%B5")
+
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ", encoding="utf-8")
+        self.assertTrue(isinstance(safeurl, str))
+        self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%C2%B5")
+
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ", encoding="latin-1")
+        self.assertTrue(isinstance(safeurl, str))
+        self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%B5")
 
         safeurl = safe_url_string(
             "http://www.example.com/£?unit=µ", path_encoding="latin-1"
diff --git a/tox.ini b/tox.ini
index 60b12f38..56822019 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,14 +4,14 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py37, py38, py39, py310, pypy3, docs, security, flake8, pylint, black, typing
+envlist = py38, py39, py310, py311, py312, pypy3, docs, security, flake8, pylint, black, typing
 
 [testenv]
 deps =
     pytest !=3.1.1, !=3.1.2
     pytest-cov
 commands =
-    pytest \
+    python -m pytest \
         --doctest-modules \
         --cov=w3lib --cov-report=term --cov-report=xml \
         {posargs:w3lib tests}
@@ -27,14 +27,14 @@ basepython = python3
 deps =
     # mypy would error if pytest (or its sub) not found
     pytest
-    mypy==0.971
+    mypy==1.0.0
 commands =
-    mypy --show-error-codes {posargs: w3lib tests}
+    mypy --strict {posargs: w3lib tests}
 
 [testenv:flake8]
 basepython = python3
 deps =
-    flake8
+    flake8==6.1.0
 commands =
     flake8 \
     {posargs:w3lib tests setup.py}
@@ -42,7 +42,7 @@ commands =
 [testenv:pylint]
 deps =
     {[testenv]deps}
-    pylint==2.14.2
+    pylint==3.0.0
 commands =
     pylint conftest.py docs setup.py tests w3lib
 
@@ -52,12 +52,8 @@ deps =
 commands =
     black {posargs:--check conftest.py setup.py tests w3lib}
 
-[docs]
+[testenv:docs]
 changedir = docs
 deps = -rdocs/requirements.txt
-
-[testenv:docs]
-changedir = {[docs]changedir}
-deps = {[docs]deps}
 commands =
     sphinx-build -W -b html . {envtmpdir}/html
diff --git a/w3lib/__init__.py b/w3lib/__init__.py
index fb5f52cb..dd7a1fc7 100644
--- a/w3lib/__init__.py
+++ b/w3lib/__init__.py
@@ -1,2 +1,2 @@
-__version__ = "2.0.1"
+__version__ = "2.1.2"
 version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split("."))
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 0879ead7..7d46d785 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -136,7 +136,7 @@ def _c18n_encoding(encoding: str) -> str:
     encoding aliases
     """
     normed = encodings.normalize_encoding(encoding).lower()
-    return encodings.aliases.aliases.get(normed, normed)
+    return cast(str, encodings.aliases.aliases.get(normed, normed))
 
 
 def resolve_encoding(encoding_alias: str) -> Optional[str]:
diff --git a/w3lib/html.py b/w3lib/html.py
index a31d42bd..f0f0184e 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -66,7 +66,7 @@ def replace_entities(
 
     """
 
-    def convert_entity(m: Match) -> str:
+    def convert_entity(m: Match[str]) -> str:
         groups = m.groupdict()
         number = None
         if groups.get("dec"):
@@ -91,7 +91,7 @@ def convert_entity(m: Match) -> str:
                     return bytes((number,)).decode("cp1252")
                 else:
                     return chr(number)
-            except ValueError:
+            except (ValueError, OverflowError):
                 pass
 
         return "" if remove_illegal and groups.get("semicolon") else m.group(0)
@@ -205,7 +205,7 @@ def will_remove(tag: str) -> bool:
         else:
             return tag not in keep
 
-    def remove_tag(m: Match) -> str:
+    def remove_tag(m: Match[str]) -> str:
         tag = m.group(1)
         return "" if will_remove(tag) else m.group(0)
 
@@ -278,7 +278,9 @@ def unquote_markup(
 
     """
 
-    def _get_fragments(txt: str, pattern: Pattern) -> Iterable[Union[str, Match]]:
+    def _get_fragments(
+        txt: str, pattern: Pattern[str]
+    ) -> Iterable[Union[str, Match[str]]]:
         offset = 0
         for match in pattern.finditer(txt):
             match_s, match_e = match.span(1)
@@ -326,8 +328,8 @@ def get_meta_refresh(
     baseurl: str = "",
     encoding: str = "utf-8",
     ignore_tags: Iterable[str] = ("script", "noscript"),
-) -> Tuple[Optional[float], Optional[str]]:
-    """Return  the http-equiv parameter of the HTML meta element from the given
+) -> Union[Tuple[None, None], Tuple[float, str]]:
+    """Return the http-equiv parameter of the HTML meta element from the given
     HTML text and return a tuple ``(interval, url)`` where interval is an integer
     containing the delay in seconds (or zero if not present) and url is a
     string with the absolute url to redirect.
diff --git a/w3lib/http.py b/w3lib/http.py
index 10d16695..a3e4e174 100644
--- a/w3lib/http.py
+++ b/w3lib/http.py
@@ -2,7 +2,7 @@
 from typing import Any, List, MutableMapping, Optional, AnyStr, Sequence, Union, Mapping
 from w3lib.util import to_bytes, to_unicode
 
-HeadersDictInput = Mapping[bytes, Union[Any, Sequence]]
+HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]]
 HeadersDictOutput = MutableMapping[bytes, List[bytes]]
 
 
diff --git a/w3lib/url.py b/w3lib/url.py
index 0c5faae7..c5c2fea3 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -36,6 +36,10 @@
 from urllib.parse import _coerce_args  # type: ignore
 from urllib.request import pathname2url, url2pathname
 
+from ._infra import (
+    _ASCII_TAB_OR_NEWLINE,
+    _C0_CONTROL_OR_SPACE,
+)
 from ._rfc2396 import (
     _RFC2396_ABS_PATH_PERCENT_ENCODE_SET,
     _RFC2396_FRAGMENT_PERCENT_ENCODE_SET,
@@ -56,6 +60,7 @@
     _QUERY_PERCENT_ENCODE_SET,
     _serialize_url,
     _SPECIAL_QUERY_PERCENT_ENCODE_SET,
+    _SPECIAL_SCHEMES,
     _USERINFO_PERCENT_ENCODE_SET,
 )
 from .util import to_unicode
@@ -76,13 +81,25 @@ def _quote_byte(error: UnicodeError) -> Tuple[str, int]:
 RFC3986_UNRESERVED = (string.ascii_letters + string.digits + "-._~").encode("ascii")
 EXTRA_SAFE_CHARS = b"|"  # see https://github.com/scrapy/w3lib/pull/25
 
+RFC3986_USERINFO_SAFE_CHARS = RFC3986_UNRESERVED + RFC3986_SUB_DELIMS + b":"
 _safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + EXTRA_SAFE_CHARS + b"%"
 _path_safe_chars = _safe_chars.replace(b"#", b"")
-RFC3986_USERINFO_SAFE_CHARS = RFC3986_UNRESERVED + RFC3986_SUB_DELIMS + b":"
 
-_ascii_tab_newline_re = re.compile(
-    r"[\t\n\r]"
-)  # see https://infra.spec.whatwg.org/#ascii-tab-or-newline
+# Characters that are safe in all of:
+#
+# -   RFC 2396 + RFC 2732, as interpreted by Java 8’s java.net.URI class
+# -   RFC 3986
+# -   The URL living standard
+#
+# NOTE: % is currently excluded from these lists of characters, due to
+# limitations of the current safe_url_string implementation, but it should also
+# be escaped as %25 when it is not already being used as part of an escape
+# character.
+_USERINFO_SAFEST_CHARS = RFC3986_USERINFO_SAFE_CHARS.translate(None, delete=b":;=")
+_PATH_SAFEST_CHARS = _safe_chars.translate(None, delete=b"#[]|")
+_QUERY_SAFEST_CHARS = _PATH_SAFEST_CHARS
+_SPECIAL_QUERY_SAFEST_CHARS = _PATH_SAFEST_CHARS.translate(None, delete=b"'")
+_FRAGMENT_SAFEST_CHARS = _PATH_SAFEST_CHARS
 
 _SAFE_USERINFO_PERCENT_ENCODE_SET = (
     _USERINFO_PERCENT_ENCODE_SET
@@ -158,15 +175,46 @@ def safe_url(
     return _serialize_url(url, canonicalize=False)
 
 
-def safe_url_string(
+_ASCII_TAB_OR_NEWLINE_TRANSLATION_TABLE = {
+    ord(char): None for char in _ASCII_TAB_OR_NEWLINE
+}
+
+
+def _strip(url: str) -> str:
+    return url.strip(_C0_CONTROL_OR_SPACE).translate(
+        _ASCII_TAB_OR_NEWLINE_TRANSLATION_TABLE
+    )
+
+
+def safe_url_string(  # pylint: disable=too-many-locals
     url: StrOrBytes,
     encoding: str = "utf8",
     path_encoding: str = "utf8",
     quote_path: bool = True,
 ) -> str:
-    """Convert the given URL into a legal URL by escaping unsafe characters
-    according to RFC-3986. Also, ASCII tabs and newlines are removed
-    as per https://url.spec.whatwg.org/#url-parsing.
+    """Return a URL equivalent to *url* that a wide range of web browsers and
+    web servers consider valid.
+
+    *url* is parsed according to the rules of the `URL living standard`_,
+    and during serialization additional characters are percent-encoded to make
+    the URL valid by additional URL standards.
+
+    .. _URL living standard: https://url.spec.whatwg.org/
+
+    The returned URL should be valid by *all* of the following URL standards
+    known to be enforced by modern-day web browsers and web servers:
+
+    -   `URL living standard`_
+
+    -   `RFC 3986`_
+
+    -   `RFC 2396`_ and `RFC 2732`_, as interpreted by `Java 8’s java.net.URI
+        class`_.
+
+    .. _Java 8’s java.net.URI class: https://docs.oracle.com/javase/8/docs/api/java/net/URI.html
+    .. _RFC 2396: https://www.ietf.org/rfc/rfc2396.txt
+    .. _RFC 2732: https://www.ietf.org/rfc/rfc2732.txt
+    .. _RFC 3986: https://www.ietf.org/rfc/rfc3986.txt
 
     If a bytes URL is given, it is first converted to `str` using the given
     encoding (which defaults to 'utf-8'). If quote_path is True (default),
@@ -180,17 +228,15 @@ def safe_url_string(
 
     Calling this function on an already "safe" URL will return the URL
     unmodified.
-
-    Always returns a native `str` (bytes in Python2, unicode in Python3).
     """
-    # Python3's urlsplit() chokes on bytes input with non-ASCII chars,
+    # urlsplit() chokes on bytes input with non-ASCII chars,
     # so let's decode (to Unicode) using page encoding:
     #   - it is assumed that a raw bytes input comes from a document
     #     encoded with the supplied encoding (or UTF8 by default)
     #   - if the supplied (or default) encoding chokes,
     #     percent-encode offending bytes
     decoded = to_unicode(url, encoding=encoding, errors="percentencode")
-    parts = urlsplit(_ascii_tab_newline_re.sub("", decoded))
+    parts = urlsplit(_strip(decoded))
 
     username, password, hostname, port = (
         parts.username,
@@ -201,11 +247,11 @@ def safe_url_string(
     netloc_bytes = b""
     if username is not None or password is not None:
         if username is not None:
-            safe_username = quote(unquote(username), RFC3986_USERINFO_SAFE_CHARS)
+            safe_username = quote(unquote(username), _USERINFO_SAFEST_CHARS)
             netloc_bytes += safe_username.encode(encoding)
         if password is not None:
             netloc_bytes += b":"
-            safe_password = quote(unquote(password), RFC3986_USERINFO_SAFE_CHARS)
+            safe_password = quote(unquote(password), _USERINFO_SAFEST_CHARS)
             netloc_bytes += safe_password.encode(encoding)
         netloc_bytes += b"@"
     if hostname is not None:
@@ -223,17 +269,22 @@ def safe_url_string(
 
     # default encoding for path component SHOULD be UTF-8
     if quote_path:
-        path = quote(parts.path.encode(path_encoding), _path_safe_chars)
+        path = quote(parts.path.encode(path_encoding), _PATH_SAFEST_CHARS)
     else:
         path = parts.path
 
+    if parts.scheme in _SPECIAL_SCHEMES:
+        query = quote(parts.query.encode(encoding), _SPECIAL_QUERY_SAFEST_CHARS)
+    else:
+        query = quote(parts.query.encode(encoding), _QUERY_SAFEST_CHARS)
+
     return urlunsplit(
         (
             parts.scheme,
             netloc,
             path,
-            quote(parts.query.encode(encoding), _safe_chars),
-            quote(parts.fragment.encode(encoding), _safe_chars),
+            query,
+            quote(parts.fragment.encode(encoding), _FRAGMENT_SAFEST_CHARS),
         )
     )
 
@@ -373,7 +424,7 @@ def url_query_cleaner(
     url = "?".join([base, sep.join(querylist)]) if querylist else base
     if keep_fragments and fragment:
         url += "#" + fragment
-    return cast(str, url)
+    return url
 
 
 def _add_or_replace_parameters(url: str, params: Dict[str, str]) -> str:
@@ -602,11 +653,8 @@ def canonicalize_url(
 ) -> str:
     r"""Canonicalize the given url by applying the following procedures:
 
+    - make the URL safe
     - sort query arguments, first by key, then by value
-    - percent encode paths ; non-ASCII characters are percent-encoded
-      using UTF-8 (RFC-3986)
-    - percent encode query arguments ; non-ASCII characters are percent-encoded
-      using passed `encoding` (UTF-8 by default)
     - normalize all spaces (in query arguments) '+' (plus symbol)
     - normalize percent encodings case (%2f -> %2F)
     - remove query arguments with blank values (unless `keep_blank_values` is True)
@@ -634,7 +682,7 @@ def canonicalize_url(
     # so we should be covered regarding URL normalization,
     # if not for proper URL expected by remote website.
     if isinstance(url, str):
-        url = url.strip()
+        url = _strip(url)
     try:
         scheme, netloc, path, params, query, fragment = _safe_ParseResult(
             parse_url(url), encoding=encoding or "utf8"
@@ -734,7 +782,7 @@ def parse_qsl_to_bytes(
     # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
     # except for the unquote(s, encoding, errors) calls replaced
     # with unquote_to_bytes(s)
-    coerce_args = cast(Callable[..., Tuple[str, Callable]], _coerce_args)
+    coerce_args = cast(Callable[..., Tuple[str, Callable[..., bytes]]], _coerce_args)
     qs, _coerce_result = coerce_args(qs)
     pairs = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
     r = []
@@ -755,5 +803,5 @@ def parse_qsl_to_bytes(
             value: StrOrBytes = nv[1].replace("+", " ")
             value = unquote_to_bytes(value)
             value = _coerce_result(value)
-            r.append((cast(bytes, name), cast(bytes, value)))
+            r.append((name, value))
     return r