[hardknott,1/2] expat: fix CVE-2022-25235

Message ID 20220311134659.2413-1-kai.kang@windriver.com
State Rejected, archived
Headers show
Series [hardknott,1/2] expat: fix CVE-2022-25235 | expand

Commit Message

Kai March 11, 2022, 1:46 p.m. UTC
From: Kai Kang <kai.kang@windriver.com>

Backport patch to fix CVE-2022-25235 for expat.

CVE: CVE-2022-25235

Signed-off-by: Kai Kang <kai.kang@windriver.com>
---
 .../expat/expat/CVE-2022-25235.patch          | 261 ++++++++++++++++++
 meta/recipes-core/expat/expat_2.2.10.bb       |   1 +
 2 files changed, 262 insertions(+)
 create mode 100644 meta/recipes-core/expat/expat/CVE-2022-25235.patch

Comments

Kai March 11, 2022, 1:55 p.m. UTC | #1
On 3/11/22 9:46 PM, kai wrote:
> From: Kai Kang <kai.kang@windriver.com>
>
> Backport patches to fix CVE-2022-25236 for expat.
>
> CVE: CVE-2022-25236
>
> Signed-off-by: Kai Kang <kai.kang@windriver.com>

Ooooops. Wrong mailllist.

Sorry for inconvenience.

Kai

> ---
>   .../expat/expat/CVE-2022-25236-1.patch        | 116 +++++++++
>   .../expat/expat/CVE-2022-25236-2.patch        | 232 ++++++++++++++++++
>   meta/recipes-core/expat/expat_2.2.10.bb       |   2 +
>   3 files changed, 350 insertions(+)
>   create mode 100644 meta/recipes-core/expat/expat/CVE-2022-25236-1.patch
>   create mode 100644 meta/recipes-core/expat/expat/CVE-2022-25236-2.patch
>
> diff --git a/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch b/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch
> new file mode 100644
> index 0000000000..ab53d99c8f
> --- /dev/null
> +++ b/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch
> @@ -0,0 +1,116 @@
> +Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/2cc97e87]
> +CVE: CVE-2022-25236
> +
> +The commit is a merge commit, and this patch is created by:
> +
> +$ git diff -p --stat 2cc97e87~ 2cc97e87
> +
> +Remove modification for expat/Changes which fails to be applied.
> +
> +Signed-off-by: Kai Kang <kai.kang@windriver.com>
> +
> +commit 2cc97e875ef84da4bcf55156c83599116f7523b4 (from d477fdd284468f2ab822024e75702f2c1b254f42)
> +Merge: d477fdd2 e4d7e497
> +Author: Sebastian Pipping <sebastian@pipping.org>
> +Date:   Fri Feb 18 18:01:27 2022 +0100
> +
> +    Merge pull request #561 from libexpat/namesep-security
> +
> +    [CVE-2022-25236] lib: Protect against insertion of namesep characters into namespace URIs
> +
> +---
> + expat/Changes          | 16 ++++++++++++++++
> + expat/lib/xmlparse.c   | 17 +++++++++++++----
> + expat/tests/runtests.c | 30 ++++++++++++++++++++++++++++++
> + 3 files changed, 59 insertions(+), 4 deletions(-)
> +
> +diff --git a/lib/xmlparse.c b/lib/xmlparse.c
> +index 7376aab1..c98e2e9f 100644
> +--- a/lib/xmlparse.c
> ++++ b/lib/xmlparse.c
> +@@ -718,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
> +
> + XML_Parser XMLCALL
> + XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
> +-  XML_Char tmp[2];
> +-  *tmp = nsSep;
> ++  XML_Char tmp[2] = {nsSep, 0};
> +   return XML_ParserCreate_MM(encodingName, NULL, tmp);
> + }
> +
> +@@ -1344,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
> +      would be otherwise.
> +   */
> +   if (parser->m_ns) {
> +-    XML_Char tmp[2];
> +-    *tmp = parser->m_namespaceSeparator;
> ++    XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
> +     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
> +   } else {
> +     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
> +@@ -3761,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
> +     if (! mustBeXML && isXMLNS
> +         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
> +       isXMLNS = XML_FALSE;
> ++
> ++    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
> ++    //       we have to at least make sure that the XML processor on top of
> ++    //       Expat (that is splitting tag names by namespace separator into
> ++    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
> ++    //       by an attacker putting additional namespace separator characters
> ++    //       into namespace declarations.  That would be ambiguous and not to
> ++    //       be expected.
> ++    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
> ++      return XML_ERROR_SYNTAX;
> ++    }
> +   }
> +   isXML = isXML && len == xmlLen;
> +   isXMLNS = isXMLNS && len == xmlnsLen;
> +diff --git a/tests/runtests.c b/tests/runtests.c
> +index d07203f2..bc5344b1 100644
> +--- a/tests/runtests.c
> ++++ b/tests/runtests.c
> +@@ -7220,6 +7220,35 @@ START_TEST(test_ns_double_colon_doctype) {
> + }
> + END_TEST
> +
> ++START_TEST(test_ns_separator_in_uri) {
> ++  struct test_case {
> ++    enum XML_Status expectedStatus;
> ++    const char *doc;
> ++  };
> ++  struct test_case cases[] = {
> ++      {XML_STATUS_OK, "<doc xmlns='one_two' />"},
> ++      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
> ++  };
> ++
> ++  size_t i = 0;
> ++  size_t failCount = 0;
> ++  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
> ++    XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
> ++    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
> ++    if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
> ++                  /*isFinal*/ XML_TRUE)
> ++        != cases[i].expectedStatus) {
> ++      failCount++;
> ++    }
> ++    XML_ParserFree(parser);
> ++  }
> ++
> ++  if (failCount) {
> ++    fail("Namespace separator handling is broken");
> ++  }
> ++}
> ++END_TEST
> ++
> + /* Control variable; the number of times duff_allocator() will successfully
> +  * allocate */
> + #define ALLOC_ALWAYS_SUCCEED (-1)
> +@@ -11905,6 +11934,7 @@ make_suite(void) {
> +   tcase_add_test(tc_namespace, test_ns_utf16_doctype);
> +   tcase_add_test(tc_namespace, test_ns_invalid_doctype);
> +   tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
> ++  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
> +
> +   suite_add_tcase(s, tc_misc);
> +   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
> diff --git a/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch b/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch
> new file mode 100644
> index 0000000000..0f14c9631b
> --- /dev/null
> +++ b/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch
> @@ -0,0 +1,232 @@
> +Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/f178826b]
> +CVE: CVE-2022-25236
> +
> +The commit is a merge commit, and this patch is created by:
> +
> +$ git show -m -p --stat f178826b
> +
> +Remove changes for expat/Changes and reference.html which fail to be applied.
> +
> +Signed-off-by: Kai Kang <kai.kang@windriver.com>
> +
> +commit f178826bb1e9c8ee23202f1be55ad4ac7b649e84 (from c99e0e7f2b15b48848038992ecbb4480f957cfe9)
> +Merge: c99e0e7f 9579f7ea
> +Author: Sebastian Pipping <sebastian@pipping.org>
> +Date:   Fri Mar 4 18:43:39 2022 +0100
> +
> +    Merge pull request #577 from libexpat/namesep
> +
> +    lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters (fixes #572)
> +---
> + expat/Changes            |  16 ++++++
> + expat/doc/reference.html |   8 +++
> + expat/lib/expat.h        |  11 ++++
> + expat/lib/xmlparse.c     | 139 ++++++++++++++++++++++++++++++++++++++++++++---
> + expat/tests/runtests.c   |   8 ++-
> + 5 files changed, 171 insertions(+), 11 deletions(-)
> +
> +diff --git a/lib/expat.h b/lib/expat.h
> +index 5ab493f7..181fc960 100644
> +--- a/lib/expat.h
> ++++ b/lib/expat.h
> +@@ -239,6 +239,17 @@ XML_ParserCreate(const XML_Char *encoding);
> +    and the local part will be concatenated without any separator.
> +    It is a programming error to use the separator '\0' with namespace
> +    triplets (see XML_SetReturnNSTriplet).
> ++   If a namespace separator is chosen that can be part of a URI or
> ++   part of an XML name, splitting an expanded name back into its
> ++   1, 2 or 3 original parts on application level in the element handler
> ++   may end up vulnerable, so these are advised against;  sane choices for
> ++   a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
> ++
> ++   Note that Expat does not validate namespace URIs (beyond encoding)
> ++   against RFC 3986 today (and is not required to do so with regard to
> ++   the XML 1.0 namespaces specification) but it may start doing that
> ++   in future releases.  Before that, an application using Expat must
> ++   be ready to receive namespace URIs containing non-URI characters.
> + */
> + XMLPARSEAPI(XML_Parser)
> + XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
> +diff --git a/lib/xmlparse.c b/lib/xmlparse.c
> +index 59da19c8..6fe2cf1e 100644
> +--- a/lib/xmlparse.c
> ++++ b/lib/xmlparse.c
> +@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
> +   return XML_ERROR_NONE;
> + }
> +
> ++static XML_Bool
> ++is_rfc3986_uri_char(XML_Char candidate) {
> ++  // For the RFC 3986 ANBF grammar see
> ++  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
> ++
> ++  switch (candidate) {
> ++  // From rule "ALPHA" (uppercase half)
> ++  case 'A':
> ++  case 'B':
> ++  case 'C':
> ++  case 'D':
> ++  case 'E':
> ++  case 'F':
> ++  case 'G':
> ++  case 'H':
> ++  case 'I':
> ++  case 'J':
> ++  case 'K':
> ++  case 'L':
> ++  case 'M':
> ++  case 'N':
> ++  case 'O':
> ++  case 'P':
> ++  case 'Q':
> ++  case 'R':
> ++  case 'S':
> ++  case 'T':
> ++  case 'U':
> ++  case 'V':
> ++  case 'W':
> ++  case 'X':
> ++  case 'Y':
> ++  case 'Z':
> ++
> ++  // From rule "ALPHA" (lowercase half)
> ++  case 'a':
> ++  case 'b':
> ++  case 'c':
> ++  case 'd':
> ++  case 'e':
> ++  case 'f':
> ++  case 'g':
> ++  case 'h':
> ++  case 'i':
> ++  case 'j':
> ++  case 'k':
> ++  case 'l':
> ++  case 'm':
> ++  case 'n':
> ++  case 'o':
> ++  case 'p':
> ++  case 'q':
> ++  case 'r':
> ++  case 's':
> ++  case 't':
> ++  case 'u':
> ++  case 'v':
> ++  case 'w':
> ++  case 'x':
> ++  case 'y':
> ++  case 'z':
> ++
> ++  // From rule "DIGIT"
> ++  case '0':
> ++  case '1':
> ++  case '2':
> ++  case '3':
> ++  case '4':
> ++  case '5':
> ++  case '6':
> ++  case '7':
> ++  case '8':
> ++  case '9':
> ++
> ++  // From rule "pct-encoded"
> ++  case '%':
> ++
> ++  // From rule "unreserved"
> ++  case '-':
> ++  case '.':
> ++  case '_':
> ++  case '~':
> ++
> ++  // From rule "gen-delims"
> ++  case ':':
> ++  case '/':
> ++  case '?':
> ++  case '#':
> ++  case '[':
> ++  case ']':
> ++  case '@':
> ++
> ++  // From rule "sub-delims"
> ++  case '!':
> ++  case '$':
> ++  case '&':
> ++  case '\'':
> ++  case '(':
> ++  case ')':
> ++  case '*':
> ++  case '+':
> ++  case ',':
> ++  case ';':
> ++  case '=':
> ++    return XML_TRUE;
> ++
> ++  default:
> ++    return XML_FALSE;
> ++  }
> ++}
> ++
> + /* addBinding() overwrites the value of prefix->binding without checking.
> +    Therefore one must keep track of the old value outside of addBinding().
> + */
> +@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
> +         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
> +       isXMLNS = XML_FALSE;
> +
> +-    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
> +-    //       we have to at least make sure that the XML processor on top of
> +-    //       Expat (that is splitting tag names by namespace separator into
> +-    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
> +-    //       by an attacker putting additional namespace separator characters
> +-    //       into namespace declarations.  That would be ambiguous and not to
> +-    //       be expected.
> +-    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
> ++    // NOTE: While Expat does not validate namespace URIs against RFC 3986
> ++    //       today (and is not REQUIRED to do so with regard to the XML 1.0
> ++    //       namespaces specification) we have to at least make sure, that
> ++    //       the application on top of Expat (that is likely splitting expanded
> ++    //       element names ("qualified names") of form
> ++    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
> ++    //       in its element handler code) cannot be confused by an attacker
> ++    //       putting additional namespace separator characters into namespace
> ++    //       declarations.  That would be ambiguous and not to be expected.
> ++    //
> ++    //       While the HTML API docs of function XML_ParserCreateNS have been
> ++    //       advising against use of a namespace separator character that can
> ++    //       appear in a URI for >20 years now, some widespread applications
> ++    //       are using URI characters (':' (colon) in particular) for a
> ++    //       namespace separator, in practice.  To keep these applications
> ++    //       functional, we only reject namespaces URIs containing the
> ++    //       application-chosen namespace separator if the chosen separator
> ++    //       is a non-URI character with regard to RFC 3986.
> ++    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
> ++        && ! is_rfc3986_uri_char(uri[len])) {
> +       return XML_ERROR_SYNTAX;
> +     }
> +   }
> +diff --git a/tests/runtests.c b/tests/runtests.c
> +index 60da868e..712706c4 100644
> +--- a/tests/runtests.c
> ++++ b/tests/runtests.c
> +@@ -7406,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) {
> +   struct test_case {
> +     enum XML_Status expectedStatus;
> +     const char *doc;
> ++    XML_Char namesep;
> +   };
> +   struct test_case cases[] = {
> +-      {XML_STATUS_OK, "<doc xmlns='one_two' />"},
> +-      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
> ++      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
> ++      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
> ++      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
> +   };
> +
> +   size_t i = 0;
> +   size_t failCount = 0;
> +   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
> +-    XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
> ++    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
> +     XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
> +     if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
> +                   /*isFinal*/ XML_TRUE)
> diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb
> index 0b3331981c..f99fa7edb6 100644
> --- a/meta/recipes-core/expat/expat_2.2.10.bb
> +++ b/meta/recipes-core/expat/expat_2.2.10.bb
> @@ -18,6 +18,8 @@ SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TA
>              file://CVE-2022-23852.patch \
>              file://CVE-2022-23990.patch \
>              file://CVE-2022-25235.patch \
> +           file://CVE-2022-25236-1.patch \
> +           file://CVE-2022-25236-2.patch \
>              "
>   
>   UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#95950): https://lists.openembedded.org/g/openembedded-devel/message/95950
> Mute This Topic: https://lists.openembedded.org/mt/89710285/3616933
> Group Owner: openembedded-devel+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-devel/unsub [kai.kang@windriver.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>

Patch

diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
new file mode 100644
index 0000000000..9febeae609
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
@@ -0,0 +1,261 @@ 
+Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/306b721]
+CVE: CVE-2022-25235
+
+The commit is a merge commit, and this patch is created by:
+
+$ git show -m -p --stat 306b72134f157bbfd1637b20a22cabf4acfa136a
+
+Remove modification for expat/Changes which fails to be applied.
+
+Signed-off-by: Kai Kang <kai.kang@windriver.com>
+
+commit 306b72134f157bbfd1637b20a22cabf4acfa136a (from 2cc97e875ef84da4bcf55156c83599116f7523b4)
+Merge: 2cc97e87 c16300f0
+Author: Sebastian Pipping <sebastian@pipping.org>
+Date:   Fri Feb 18 20:12:32 2022 +0100
+
+    Merge pull request #562 from libexpat/utf8-security
+    
+    [CVE-2022-25235] lib: Protect against malformed encoding (e.g. malformed UTF-8)
+---
+ expat/Changes           |   7 ++++
+ expat/lib/xmltok.c      |   5 ---
+ expat/lib/xmltok_impl.c |  18 ++++----
+ expat/tests/runtests.c  | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 127 insertions(+), 12 deletions(-)
+
+diff --git a/lib/xmltok.c b/lib/xmltok.c
+index a72200e8..3bddf125 100644
+--- a/lib/xmltok.c
++++ b/lib/xmltok.c
+@@ -98,11 +98,6 @@
+         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
+    & (1u << (((byte)[2]) & 0x1F)))
+ 
+-#define UTF8_GET_NAMING(pages, p, n)                                           \
+-  ((n) == 2                                                                    \
+-       ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p))                   \
+-       : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
+-
+ /* Detection of invalid UTF-8 sequences is based on Table 3.1B
+    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
+    with the additional restriction of not allowing the Unicode
+diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
+index 0430591b..84ff35f9 100644
+--- a/lib/xmltok_impl.c
++++ b/lib/xmltok_impl.c
+@@ -69,7 +69,7 @@
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+-    if (! IS_NAME_CHAR(enc, ptr, n)) {                                         \
++    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
+       *nextTokPtr = ptr;                                                       \
+       return XML_TOK_INVALID;                                                  \
+     }                                                                          \
+@@ -98,7 +98,7 @@
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+-    if (! IS_NMSTRT_CHAR(enc, ptr, n)) {                                       \
++    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
+       *nextTokPtr = ptr;                                                       \
+       return XML_TOK_INVALID;                                                  \
+     }                                                                          \
+@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
++    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
++      *nextTokPtr = ptr;                                                       \
++      return XML_TOK_INVALID;                                                  \
++    }                                                                          \
+     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
+       ptr += n;                                                                \
+       tok = XML_TOK_NAME;                                                      \
+@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
++    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     break;
+       LEAD_CASE(2)
+       LEAD_CASE(3)
+@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
++    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     break;
+       LEAD_CASE(2)
+       LEAD_CASE(3)
+@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
+       state = inName;                                                          \
+     }
+ #  define LEAD_CASE(n)                                                         \
+-  case BT_LEAD##n:                                                             \
++  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
+     START_NAME ptr += (n - MINBPC(enc));                                       \
+     break;
+       LEAD_CASE(2)
+@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
++    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     break;
+       LEAD_CASE(2)
+       LEAD_CASE(3)
+@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
++    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     pos->columnNumber++;                                                       \
+     break;
+       LEAD_CASE(2)
+diff --git a/tests/runtests.c b/tests/runtests.c
+index bc5344b1..9b155b82 100644
+--- a/tests/runtests.c
++++ b/tests/runtests.c
+@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
+ }
+ END_TEST
+ 
++START_TEST(test_utf8_in_start_tags) {
++  struct test_case {
++    bool goodName;
++    bool goodNameStart;
++    const char *tagName;
++  };
++
++  // The idea with the tests below is this:
++  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
++  // go to isNever and are hence not a concern.
++  //
++  // We start with a character that is a valid name character
++  // (or even name-start character, see XML 1.0r4 spec) and then we flip
++  // single bits at places where (1) the result leaves the UTF-8 encoding space
++  // and (2) we stay in the same n-byte sequence family.
++  //
++  // The flipped bits are highlighted in angle brackets in comments,
++  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
++  // the most significant bit to 1 to leave UTF-8 encoding space.
++  struct test_case cases[] = {
++      // 1-byte UTF-8: [0xxx xxxx]
++      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
++      {false, false, "\xBA"}, // [<1>011 1010]
++      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
++      {false, false, "\xB9"}, // [<1>011 1001]
++
++      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
++      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
++                                  // Arabic small waw U+06E5
++      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
++      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
++      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
++      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
++                                  // combining char U+0301
++      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
++      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
++      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
++
++      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
++      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
++                                      // Devanagari Letter A U+0905
++      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
++      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
++      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
++      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
++      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
++      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
++                                      // combining char U+0901
++      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
++      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
++      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
++      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
++      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
++  };
++  const bool atNameStart[] = {true, false};
++
++  size_t i = 0;
++  char doc[1024];
++  size_t failCount = 0;
++
++  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
++    size_t j = 0;
++    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
++      const bool expectedSuccess
++          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
++      sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
++      XML_Parser parser = XML_ParserCreate(NULL);
++
++      const enum XML_Status status
++          = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
++
++      bool success = true;
++      if ((status == XML_STATUS_OK) != expectedSuccess) {
++        success = false;
++      }
++      if ((status == XML_STATUS_ERROR)
++          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
++        success = false;
++      }
++
++      if (! success) {
++        fprintf(
++            stderr,
++            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
++            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
++            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
++        failCount++;
++      }
++
++      XML_ParserFree(parser);
++    }
++  }
++
++  if (failCount > 0) {
++    fail("UTF-8 regression detected");
++  }
++}
++END_TEST
++
+ /* Test trailing spaces in elements are accepted */
+ static void XMLCALL
+ record_element_end_handler(void *userData, const XML_Char *name) {
+@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
+ }
+ END_TEST
+ 
++START_TEST(test_bad_doctype_utf8) {
++  const char *text = "<!DOCTYPE \xDB\x25"
++                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
++  expect_failure(text, XML_ERROR_INVALID_TOKEN,
++                 "Invalid UTF-8 in DOCTYPE not faulted");
++}
++END_TEST
++
+ START_TEST(test_bad_doctype_utf16) {
+   const char text[] =
+       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
+@@ -11870,6 +11977,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
+   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
+   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
++  tcase_add_test(tc_basic, test_utf8_in_start_tags);
+   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
+   tcase_add_test(tc_basic, test_utf16_attribute);
+   tcase_add_test(tc_basic, test_utf16_second_attr);
+@@ -11878,6 +11986,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
+   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
+   tcase_add_test(tc_basic, test_bad_doctype);
++  tcase_add_test(tc_basic, test_bad_doctype_utf8);
+   tcase_add_test(tc_basic, test_bad_doctype_utf16);
+   tcase_add_test(tc_basic, test_bad_doctype_plus);
+   tcase_add_test(tc_basic, test_bad_doctype_star);
diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb
index a851e54b2a..0b3331981c 100644
--- a/meta/recipes-core/expat/expat_2.2.10.bb
+++ b/meta/recipes-core/expat/expat_2.2.10.bb
@@ -17,6 +17,7 @@  SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TA
            file://CVE-2021-46143.patch \
            file://CVE-2022-23852.patch \
            file://CVE-2022-23990.patch \
+           file://CVE-2022-25235.patch \
            "
 
 UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"