regexes: conversion from std::regex to jpcre2: common/xml/{ebml_converter,xml}.cpp

This commit is contained in:
Moritz Bunkus 2020-08-20 23:10:03 +02:00
parent b53cd5d322
commit 1c7c2b521f
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
2 changed files with 19 additions and 15 deletions

View File

@ -264,8 +264,8 @@ ebml_converter_c::parse_binary(parser_context_t &ctx) {
format = "base64";
if (format == "hex") {
auto hex_content = std::regex_replace(content, std::regex{"(0x|\\s|\\r|\\n)+", std::regex_constants::icase}, "");
if (std::regex_search(hex_content, std::regex{"[^0-9a-f]", std::regex_constants::icase}))
auto hex_content = mtx::regex::replace(content, mtx::regex::jp::Regex{"(0x|\\s|\\r|\\n)+", "i"}, "g", "");
if (mtx::regex::match(hex_content, mtx::regex::jp::Regex{"[^0-9a-f]", "i"}))
throw malformed_data_x{ ctx.name, ctx.node.offset_debug(), Y("Non-hex digits encountered.") };
if ((hex_content.size() % 2) == 1)

View File

@ -19,6 +19,7 @@
#include "common/mm_file_io.h"
#include "common/mm_proxy_io.h"
#include "common/mm_text_io.h"
#include "common/regex.h"
#include "common/xml/xml.h"
namespace mtx {
@ -71,21 +72,24 @@ load_file(std::string const &file_name,
throw mtx::mm_io::end_of_file_x{};
if (byte_order_mark_e::none == in.get_byte_order_mark()) {
std::regex encoding_re("^\\s*" // ignore leading whitespace
"<\\?xml" // XML declaration start
"[^\\?]+?" // skip to encoding, but don't go beyond XML declaration
"encoding\\s*=\\s*" // encoding attribute
"\"([^\"]+)\"", // attribute value
std::regex_constants::icase);
mtx::regex::jp::Regex encoding_re{
"^\\s*" // ignore leading whitespace
"<\\?xml" // XML declaration start
"[^\\?]+?" // skip to encoding, but don't go beyond XML declaration
"encoding\\s*=\\s*" // encoding attribute
"\"([^\"]+)\"", // attribute value
"i"};
std::smatch matches;
if (std::regex_search(content, matches, encoding_re)) {
// Extract the old encoding, replace the string with "UTF-8" so
// that pugixml doesn't recode, and recode to UTF-8.
auto encoding = matches[1].str();
content.replace(matches.position(1), matches.length(1), "UTF-8");
// Extract the old encoding, replace the string with "UTF-8" so
// that pugixml doesn't recode, and recode to UTF-8.
std::string encoding;
content = mtx::regex::replace(content, encoding_re, "", [&encoding](auto const &match) {
encoding = match[1];
return "UTF-8"s;
});
if (!encoding.empty())
content = charset_converter_c::init(encoding)->utf8(content);
}
}
std::stringstream scontent(content);