diff --git a/src/common/xml/ebml_converter.cpp b/src/common/xml/ebml_converter.cpp index b9e959af3..029d14ffa 100644 --- a/src/common/xml/ebml_converter.cpp +++ b/src/common/xml/ebml_converter.cpp @@ -264,8 +264,8 @@ ebml_converter_c::parse_binary(parser_context_t &ctx) { format = "base64"; if (format == "hex") { - auto hex_content = std::regex_replace(content, std::regex{"(0x|\\s|\\r|\\n)+", std::regex_constants::icase}, ""); - if (std::regex_search(hex_content, std::regex{"[^0-9a-f]", std::regex_constants::icase})) + auto hex_content = mtx::regex::replace(content, mtx::regex::jp::Regex{"(0x|\\s|\\r|\\n)+", "i"}, "g", ""); + if (mtx::regex::match(hex_content, mtx::regex::jp::Regex{"[^0-9a-f]", "i"})) throw malformed_data_x{ ctx.name, ctx.node.offset_debug(), Y("Non-hex digits encountered.") }; if ((hex_content.size() % 2) == 1) diff --git a/src/common/xml/xml.cpp b/src/common/xml/xml.cpp index 8905ef5e1..6d9cd46ba 100644 --- a/src/common/xml/xml.cpp +++ b/src/common/xml/xml.cpp @@ -19,6 +19,7 @@ #include "common/mm_file_io.h" #include "common/mm_proxy_io.h" #include "common/mm_text_io.h" +#include "common/regex.h" #include "common/xml/xml.h" namespace mtx { @@ -71,21 +72,24 @@ load_file(std::string const &file_name, throw mtx::mm_io::end_of_file_x{}; if (byte_order_mark_e::none == in.get_byte_order_mark()) { - std::regex encoding_re("^\\s*" // ignore leading whitespace - "<\\?xml" // XML declaration start - "[^\\?]+?" // skip to encoding, but don't go beyond XML declaration - "encoding\\s*=\\s*" // encoding attribute - "\"([^\"]+)\"", // attribute value - std::regex_constants::icase); + mtx::regex::jp::Regex encoding_re{ + "^\\s*" // ignore leading whitespace + "<\\?xml" // XML declaration start + "[^\\?]+?" // skip to encoding, but don't go beyond XML declaration + "encoding\\s*=\\s*" // encoding attribute + "\"([^\"]+)\"", // attribute value + "i"}; - std::smatch matches; - if (std::regex_search(content, matches, encoding_re)) { - // Extract the old encoding, replace the string with "UTF-8" so - // that pugixml doesn't recode, and recode to UTF-8. - auto encoding = matches[1].str(); - content.replace(matches.position(1), matches.length(1), "UTF-8"); + // Extract the old encoding, replace the string with "UTF-8" so + // that pugixml doesn't recode, and recode to UTF-8. + std::string encoding; + content = mtx::regex::replace(content, encoding_re, "", [&encoding](auto const &match) { + encoding = match[1]; + return "UTF-8"s; + }); + + if (!encoding.empty()) content = charset_converter_c::init(encoding)->utf8(content); - } } std::stringstream scontent(content);