From 09f2464913be8d036406e64712236e020e2412c3 Mon Sep 17 00:00:00 2001 From: Lauris Kaplinski Date: Wed, 17 Dec 2025 22:33:22 +0200 Subject: [PATCH 1/4] Check XML text for unsupported chars and validate utf8 Signed-off-by: Lauris Kaplinski --- src/XMLDocument.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/XMLDocument.h b/src/XMLDocument.h index f9e7b94f9..bda130d4b 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -229,10 +229,21 @@ struct XMLNode: public XMLElem return from_base64(operator sv()); } - XMLNode& operator=(sv text) noexcept + XMLNode& operator=(sv text) { if(!d) return *this; + const char *utf = text.cbegin(); + int len = int(text.size()); + while (utf < text.cend()) { + int uc = xmlGetUTF8Char((const unsigned char *) utf, &len); + if (len < 1) { + THROW("Invalid utf8 string"); + } else if ((uc < 0x20) && (uc != 0x9) && (uc != 0xa) && (uc != 0xd)) { + THROW("Invalid character"); + } + utf += len; + } xmlNodeSetContentLen(d, nullptr, 0); if(!text.empty()) xmlNodeAddContentLen(d, pcxmlChar(text.data()), int(text.length())); From 3413a56efc8f527ad6ed2fa41f87ed63f7ec49c2 Mon Sep 17 00:00:00 2001 From: Lauris Kaplinski Date: Wed, 17 Dec 2025 22:59:16 +0200 Subject: [PATCH 2/4] Added code comments about utf8/xml requirement Signed-off-by: Lauris Kaplinski --- src/crypto/Signer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/crypto/Signer.cpp b/src/crypto/Signer.cpp index b36a0e4c5..0381e6bba 100644 --- a/src/crypto/Signer.cpp +++ b/src/crypto/Signer.cpp @@ -73,6 +73,7 @@ Signer::~Signer() = default; * @param stateOrProvince * @param postalCode * @param countryName + * The strings have to be utf8 encoded and not contain any control values (any char < 0x20 except 0x9, 0xa and 0xd) */ void Signer::setSignatureProductionPlace(const string &city, const string &stateOrProvince, const string &postalCode, const string &countryName) @@ -90,6 +91,7 @@ void Signer::setSignatureProductionPlace(const string &city, * @param stateOrProvince * @param postalCode * @param countryName + * The strings have to be utf8 encoded and not contain any control values (any char < 0x20 except 0x9, 0xa and 0xd) */ void Signer::setSignatureProductionPlaceV2(const string &city, const string &streetAddress, const string &stateOrProvince, const string &postalCode, const string &countryName) @@ -194,6 +196,7 @@ void Signer::setProfile(const string &profile) /** * Sets signature roles according XAdES standard. The parameter may contain the signer’s role and optionally the signer’s resolution. Note that only one signer role value (i.e. one <ClaimedRole> XML element) should be used. * If the signer role contains both role and resolution then they must be separated with a slash mark, e.g. “role / resolution”. + * The strings have to be utf8 encoded and not contain any control values (any char < 0x20 except 0x9, 0xa and 0xd) */ void Signer::setSignerRoles(const vector &signerRoles) { From 22fcbc40cd36508b6de17961eeec88cd7db32969 Mon Sep 17 00:00:00 2001 From: Lauris Kaplinski Date: Mon, 5 Jan 2026 11:07:02 +0200 Subject: [PATCH 3/4] Clearer XML charset error message, removed noexcept from setSignerRoles Signed-off-by: Lauris Kaplinski --- src/SignatureXAdES_B.cpp | 2 +- src/SignatureXAdES_B.h | 2 +- src/XMLDocument.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/SignatureXAdES_B.cpp b/src/SignatureXAdES_B.cpp index 4fcdb9694..47208720d 100644 --- a/src/SignatureXAdES_B.cpp +++ b/src/SignatureXAdES_B.cpp @@ -742,7 +742,7 @@ void SignatureXAdES_B::setSignatureProductionPlace(string_view name, * * @param roles signer roles. */ -void SignatureXAdES_B::setSignerRoles(string_view name, const vector &roles) noexcept +void SignatureXAdES_B::setSignerRoles(string_view name, const vector &roles) { if(roles.empty()) return; diff --git a/src/SignatureXAdES_B.h b/src/SignatureXAdES_B.h index 56f12e485..aa212fdf7 100644 --- a/src/SignatureXAdES_B.h +++ b/src/SignatureXAdES_B.h @@ -112,7 +112,7 @@ namespace digidoc void setSignatureProductionPlace(std::string_view name, const std::string &city, const std::string &streetAddress, const std::string &stateOrProvince, const std::string &postalCode, const std::string &countryName) noexcept; - void setSignerRoles(std::string_view name, const std::vector &signerRoles) noexcept; + void setSignerRoles(std::string_view name, const std::vector &signerRoles); constexpr XMLNode V1orV2(std::string_view v1, std::string_view v2) const noexcept; // offline checks diff --git a/src/XMLDocument.h b/src/XMLDocument.h index bda130d4b..d97d3ca6c 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -238,9 +238,9 @@ struct XMLNode: public XMLElem while (utf < text.cend()) { int uc = xmlGetUTF8Char((const unsigned char *) utf, &len); if (len < 1) { - THROW("Invalid utf8 string"); - } else if ((uc < 0x20) && (uc != 0x9) && (uc != 0xa) && (uc != 0xd)) { - THROW("Invalid character"); + THROW("Invalid utf8 string in XML content"); + } else if (((uc < 0x20) && (uc != 0x9) && (uc != 0xa) && (uc != 0xd)) || (uc == 0xfffe) || (uc == 0xffff)) { + THROW("Invalid character '0x%2x' in XML content", uc); } utf += len; } From 2aa2c04d26449c8e723c1b0e0d9777f2e58effd8 Mon Sep 17 00:00:00 2001 From: Raul Metsma Date: Wed, 1 Apr 2026 09:30:10 +0300 Subject: [PATCH 4/4] Fix build on windows Signed-off-by: Raul Metsma --- .github/workflows/build.yml | 2 +- src/XMLDocument.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 40795fcf0..68cae5cb3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -137,7 +137,7 @@ jobs: strategy: matrix: dist: [ubuntu] - ver: ['22.04', '24.04', '25.04', '25.10'] + ver: ['22.04', '24.04', '25.04', '25.10', '26.04'] arch: [amd64, arm64] java: [17] include: [ diff --git a/src/XMLDocument.h b/src/XMLDocument.h index d97d3ca6c..d332d253f 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -233,9 +233,10 @@ struct XMLNode: public XMLElem { if(!d) return *this; - const char *utf = text.cbegin(); - int len = int(text.size()); - while (utf < text.cend()) { + const char *utf = std::to_address(text.cbegin()); + const char *end = utf + text.size(); + while (utf < end) { + int len = int(end - utf); // remaining bytes int uc = xmlGetUTF8Char((const unsigned char *) utf, &len); if (len < 1) { THROW("Invalid utf8 string in XML content");