diff mbox series

[dunfell] python3: fix CVE-2023-24329 urllib.parse url blocklisting bypass

Message ID 20230713061425.122500-1-vkumbhar@mvista.com
State Accepted, archived
Headers show
Series [dunfell] python3: fix CVE-2023-24329 urllib.parse url blocklisting bypass | expand

Commit Message

Vivek Kumbhar July 13, 2023, 6:14 a.m. UTC
Signed-off-by: Vivek Kumbhar <vkumbhar@mvista.com>
---
 .../python/python3/CVE-2023-24329.patch       | 80 +++++++++++++++++++
 .../recipes-devtools/python/python3_3.8.17.bb |  1 +
 2 files changed, 81 insertions(+)
 create mode 100644 meta/recipes-devtools/python/python3/CVE-2023-24329.patch
diff mbox series

Patch

diff --git a/meta/recipes-devtools/python/python3/CVE-2023-24329.patch b/meta/recipes-devtools/python/python3/CVE-2023-24329.patch
new file mode 100644
index 0000000000..23dec65602
--- /dev/null
+++ b/meta/recipes-devtools/python/python3/CVE-2023-24329.patch
@@ -0,0 +1,80 @@ 
+From 72d356e3584ebfb8e813a8e9f2cd3dccf233c0d9 Mon Sep 17 00:00:00 2001
+From: "Miss Islington (bot)"
+ <31488909+miss-islington@users.noreply.github.com>
+Date: Sun, 13 Nov 2022 11:00:25 -0800
+Subject: [PATCH] gh-99418: Make urllib.parse.urlparse enforce that a scheme
+ must begin with an alphabetical ASCII character. (GH-99421)
+
+Prevent urllib.parse.urlparse from accepting schemes that don't begin with an alphabetical ASCII character.
+
+RFC 3986 defines a scheme like this: `scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )`
+RFC 2234 defines an ALPHA like this: `ALPHA = %x41-5A / %x61-7A`
+
+The WHATWG URL spec defines a scheme like this:
+`"A URL-scheme string must be one ASCII alpha, followed by zero or more of ASCII alphanumeric, U+002B (+), U+002D (-), and U+002E (.)."`
+(cherry picked from commit 439b9cfaf43080e91c4ad69f312f21fa098befc7)
+
+Co-authored-by: Ben Kallus <49924171+kenballus@users.noreply.github.com>
+
+Upstream-Status: Backport [https://github.com/python/cpython/commit/72d356e3584ebfb8e813a8e9f2cd3dccf233c0d9]
+CVE: CVE-2023-24329
+Signed-off-by: Vivek Kumbhar <vkumbhar@mvista.com>
+---
+ Lib/test/test_urlparse.py                      | 18 ++++++++++++++++++
+ Lib/urllib/parse.py                            |  2 +-
+ ...22-11-12-15-45-51.gh-issue-99418.FxfAXS.rst |  2 ++
+ 3 files changed, 21 insertions(+), 1 deletion(-)
+ create mode 100644 Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst
+
+diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
+index 0ad3bf1..e1aa913 100644
+--- a/Lib/test/test_urlparse.py
++++ b/Lib/test/test_urlparse.py
+@@ -735,6 +735,24 @@ class UrlParseTestCase(unittest.TestCase):
+                         with self.assertRaises(ValueError):
+                             p.port
+
++    def test_attributes_bad_scheme(self):
++        """Check handling of invalid schemes."""
++        for bytes in (False, True):
++            for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
++                for scheme in (".", "+", "-", "0", "http&", "६http"):
++                    with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
++                        url = scheme + "://www.example.net"
++                        if bytes:
++                            if url.isascii():
++                                url = url.encode("ascii")
++                            else:
++                                continue
++                        p = parse(url)
++                        if bytes:
++                            self.assertEqual(p.scheme, b"")
++                        else:
++                            self.assertEqual(p.scheme, "")
++
+     def test_attributes_without_netloc(self):
+         # This example is straight from RFC 3261.  It looks like it
+         # should allow the username, hostname, and port to be filled
+diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
+index 979e6d2..2e7a3e2 100644
+--- a/Lib/urllib/parse.py
++++ b/Lib/urllib/parse.py
+@@ -452,7 +452,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
+         clear_cache()
+     netloc = query = fragment = ''
+     i = url.find(':')
+-    if i > 0:
++    if i > 0 and url[0].isascii() and url[0].isalpha():
+         if url[:i] == 'http': # optimize the common case
+             url = url[i+1:]
+             if url[:2] == '//':
+diff --git a/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst b/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst
+new file mode 100644
+index 0000000..0a06e7c
+--- /dev/null
++++ b/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst
+@@ -0,0 +1,2 @@
++Fix bug in :func:`urllib.parse.urlparse` that causes URL schemes that begin
++with a digit, a plus sign, or a minus sign to be parsed incorrectly.
+--
+2.25.1
diff --git a/meta/recipes-devtools/python/python3_3.8.17.bb b/meta/recipes-devtools/python/python3_3.8.17.bb
index ba5f564d8e..8c00d65794 100644
--- a/meta/recipes-devtools/python/python3_3.8.17.bb
+++ b/meta/recipes-devtools/python/python3_3.8.17.bb
@@ -34,6 +34,7 @@  SRC_URI = "http://www.python.org/ftp/python/${PV}/Python-${PV}.tar.xz \
            file://0001-python3-Do-not-hardcode-lib-for-distutils.patch \
            file://0020-configure.ac-setup.py-do-not-add-a-curses-include-pa.patch \
            file://makerace.patch \
+           file://CVE-2023-24329.patch \
            "
 
 SRC_URI_append_class-native = " \