regexes jpcre2 conversion: convert regex replace functions & various files

This commit is contained in:
Moritz Bunkus 2020-08-20 19:00:35 +02:00
parent e60783e889
commit 1755e2e1df
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
5 changed files with 87 additions and 94 deletions

View File

@ -76,9 +76,8 @@ chapter_error(const std::string &error) {
*/
bool
probe_simple(mm_text_io_c *in) {
std::regex timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
std::regex name_line_re{ SIMCHAP_RE_NAME_LINE};
std::smatch matches;
mtx::regex::jp::Regex timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
mtx::regex::jp::Regex name_line_re{ SIMCHAP_RE_NAME_LINE};
std::string line;
@ -90,7 +89,7 @@ probe_simple(mm_text_io_c *in) {
if (line.empty())
continue;
if (!std::regex_search(line, timestamp_line_re))
if (!mtx::regex::match(line, timestamp_line_re))
return false;
while (in->getline2(line)) {
@ -98,7 +97,7 @@ probe_simple(mm_text_io_c *in) {
if (line.empty())
continue;
return std::regex_search(line, name_line_re);
return mtx::regex::match(line, name_line_re);
}
return false;
@ -172,10 +171,10 @@ parse_simple(mm_text_io_c *in,
: !g_default_language.empty() ? g_default_language
: "eng";
std::regex timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
std::regex timestamp_re{ SIMCHAP_RE_TIMESTAMP};
std::regex name_line_re{ SIMCHAP_RE_NAME_LINE};
std::smatch matches;
mtx::regex::jp::Regex timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE, "S"};
mtx::regex::jp::Regex timestamp_re{ SIMCHAP_RE_TIMESTAMP, "S"};
mtx::regex::jp::Regex name_line_re{ SIMCHAP_RE_NAME_LINE, "S"};
mtx::regex::jp::VecNum matches;
std::string line;
@ -185,14 +184,14 @@ parse_simple(mm_text_io_c *in,
continue;
if (0 == mode) {
if (!std::regex_match(line, matches, timestamp_line_re))
if (!mtx::regex::match(line, matches, timestamp_line_re))
chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxx=... line."), line));
int64_t hour = 0, minute = 0, second = 0, msecs = 0;
mtx::string::parse_number(matches[1].str(), hour);
mtx::string::parse_number(matches[2].str(), minute);
mtx::string::parse_number(matches[3].str(), second);
mtx::string::parse_number(matches[4].str(), msecs);
mtx::string::parse_number(matches[0][1], hour);
mtx::string::parse_number(matches[0][2], minute);
mtx::string::parse_number(matches[0][3], second);
mtx::string::parse_number(matches[0][4], msecs);
if (59 < minute)
chapter_error(fmt::format(Y("Invalid minute: {0}"), minute));
@ -202,14 +201,14 @@ parse_simple(mm_text_io_c *in,
start = msecs + second * 1000 + minute * 1000 * 60 + hour * 1000 * 60 * 60;
mode = 1;
if (!std::regex_match(line, matches, timestamp_re))
if (!mtx::regex::match(line, matches, timestamp_re))
chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxx=... line."), line));
} else {
if (!std::regex_match(line, matches, name_line_re))
if (!mtx::regex::match(line, matches, name_line_re))
chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxxNAME=... line."), line));
std::string name = matches[1].str();
auto name = matches[0][1];
if (name.empty())
name = format_name_template(g_chapter_generation_name_template.get_translated(), num + 1, timestamp_c::ms(start));
@ -1114,17 +1113,17 @@ format_name_template(std::string const &name_template,
timestamp_c const &start_timestamp,
std::string const &appended_file_name) {
auto name = name_template;
auto number_re = std::regex{"<NUM(?::(\\d+))?>"};
auto timestamp_re = std::regex{"<START(?::([^>]+))?>"};
auto file_name_re = std::regex{"<FILE_NAME>"};
auto file_name_ext_re = std::regex{"<FILE_NAME_WITH_EXT>"};
auto number_re = mtx::regex::jp::Regex{"<NUM(?::(\\d+))?>"};
auto timestamp_re = mtx::regex::jp::Regex{"<START(?::([^>]+))?>"};
auto file_name_re = mtx::regex::jp::Regex{"<FILE_NAME>"};
auto file_name_ext_re = mtx::regex::jp::Regex{"<FILE_NAME_WITH_EXT>"};
auto appended_file_name_p = bfs::path{appended_file_name};
name = mtx::regex::replace(name, number_re, [=](std::smatch const &match) {
name = mtx::regex::replace(name, number_re, "g", [=](auto const &match) {
auto number_str = fmt::format("{0}", chapter_number);
auto wanted_length = 1u;
if (match[1].length() && !mtx::string::parse_number(match[1].str(), wanted_length))
if (!match[1].empty() && !mtx::string::parse_number(match[1], wanted_length))
wanted_length = 1;
if (number_str.length() < wanted_length)
@ -1133,13 +1132,13 @@ format_name_template(std::string const &name_template,
return number_str;
});
name = mtx::regex::replace(name, timestamp_re, [=](std::smatch const &match) {
auto format = match[1].length() ? match[1] : "%H:%M:%S"s;
name = mtx::regex::replace(name, timestamp_re, "g", [=](auto const &match) {
auto format = !match[1].empty() ? match[1] : "%H:%M:%S"s;
return mtx::string::format_timestamp(start_timestamp.to_ns(), format);
});
name = std::regex_replace(name, file_name_re, appended_file_name_p.stem().string());
name = std::regex_replace(name, file_name_ext_re, appended_file_name_p.filename().string());
name = mtx::regex::replace(name, file_name_re, "g", appended_file_name_p.stem().string());
name = mtx::regex::replace(name, file_name_ext_re, "g", appended_file_name_p.filename().string());
return name;
}

View File

@ -22,6 +22,7 @@
#include "common/at_scope_exit.h"
#include "common/chapters/chapters.h"
#include "common/chapters/dvd.h"
#include "common/regex.h"
#include "common/strings/parsing.h"
#include "common/timestamp.h"
@ -108,18 +109,18 @@ maybe_parse_dvd(std::string const &file_name,
std::string const &language) {
auto title = 1u;
auto cleaned_file_name = file_name;
std::smatch matches;
mtx::regex::jp::VecNum matches;
if (std::regex_search(cleaned_file_name, matches, std::regex{"(.+):([0-9]+)$"})) {
cleaned_file_name = matches[1].str();
if (mtx::regex::match(cleaned_file_name, matches, mtx::regex::jp::Regex{"(.+):([0-9]+)$"})) {
cleaned_file_name = matches[0][1];
if (!mtx::string::parse_number(matches[2].str(), title) || (title < 1))
throw parser_x{fmt::format(Y("'{0}' is not a valid DVD title number."), matches[2].str())};
if (!mtx::string::parse_number(matches[0][2], title) || (title < 1))
throw parser_x{fmt::format(Y("'{0}' is not a valid DVD title number."), matches[0][2])};
}
auto dvd_dir = bfs::path{cleaned_file_name};
if (std::regex_search(boost::to_lower_copy(cleaned_file_name), std::regex{"\\.(bup|ifo|vob)$"}))
if (mtx::regex::match(cleaned_file_name, mtx::regex::jp::Regex{"\\.(bup|ifo|vob)$", "i"}))
dvd_dir = dvd_dir.parent_path();
else if ( !bfs::exists(dvd_dir)

View File

@ -41,48 +41,41 @@ match(std::string const &subject,
.match();
}
template<typename Tunary_function>
std::string
replace(std::string::const_iterator first,
std::string::const_iterator last,
std::regex const &re,
Tunary_function formatter) {
std::string s;
std::smatch::difference_type last_match_pos = 0;
auto last_match_end = first;
auto callback = [&](std::smatch const &match) {
auto this_match_start = last_match_end;
auto this_match_pos = match.position(0);
auto diff = this_match_pos - last_match_pos;
std::advance(this_match_start, diff);
s.append(last_match_end, this_match_start);
s.append(formatter(match));
auto match_length = match.length(0);
last_match_pos = this_match_pos + match_length;
last_match_end = this_match_start;
std::advance(last_match_end, match_length);
};
std::sregex_iterator re_begin(first, last, re), re_end;
std::for_each(re_begin, re_end, callback);
s.append(last_match_end, last);
return s;
inline auto
replace(std::string const &subject,
jp::Regex const &regex,
std::string const &modifier,
std::string const &replacement) {
return jp::RegexReplace{}
.setRegexObject(&regex)
.setSubject(subject)
.setReplaceWith(replacement)
.setModifier(modifier)
.replace();
}
template<typename Tunary_function>
std::string
replace(std::string const &s,
std::regex const &re,
Tunary_function formatter) {
return replace(s.cbegin(), s.cend(), re, formatter);
inline auto
replace(std::string const &subject,
jp::Regex const &regex,
std::string const &modifier,
std::function<std::string(jp::NumSub const &)> const &formatter) {
return jp::RegexReplace{}
.setRegexObject(&regex)
.setSubject(subject)
.setModifier(modifier)
.replace(jp::MatchEvaluator{[&formatter](jp::NumSub const &numbered, void *, void *) { return formatter(numbered); }});
}
inline auto
replace(std::string const &subject,
jp::Regex const &regex,
std::string const &modifier,
std::function<std::string(jp::NumSub const &, jp::MapNas const &)> const &formatter) {
return jp::RegexReplace{}
.setRegexObject(&regex)
.setSubject(subject)
.setModifier(modifier)
.replace(jp::MatchEvaluator{[&formatter](jp::NumSub const &numbered, jp::MapNas const &named, void *) { return formatter(numbered, named); }});
}
}

View File

@ -28,7 +28,7 @@ public:
unsigned int current_cue_number{}, total_number_of_cues{}, total_number_of_bytes{};
debugging_option_c debug{"webvtt_parser"};
std::regex timestamp_line_re{"^" RE_TIMESTAMP " --> " RE_TIMESTAMP "(?: ([^\\n]+))?$"};
mtx::regex::jp::Regex timestamp_line_re{"^" RE_TIMESTAMP " --> " RE_TIMESTAMP "(?: ([^\\n]+))?$", "S"};
};
webvtt_parser_c::webvtt_parser_c()
@ -77,14 +77,14 @@ webvtt_parser_c::add_block() {
if (m->current_block.empty())
return;
std::smatch matches;
mtx::regex::jp::VecNum matches;
std::string label, additional;
auto timestamp_line = -1;
if (std::regex_search(m->current_block[0], matches, m->timestamp_line_re))
if (mtx::regex::match(m->current_block[0], matches, m->timestamp_line_re))
timestamp_line = 0;
else if ((m->current_block.size() > 1) && std::regex_search(m->current_block[1], matches, m->timestamp_line_re)) {
else if ((m->current_block.size() > 1) && mtx::regex::match(m->current_block[1], matches, m->timestamp_line_re)) {
timestamp_line = 1;
label = std::move(m->current_block[0]);
@ -100,8 +100,8 @@ webvtt_parser_c::add_block() {
m->parsing_global_data = false;
timestamp_c start, end;
mtx::string::parse_timestamp(matches[1].str(), start);
mtx::string::parse_timestamp(matches[2].str(), end);
mtx::string::parse_timestamp(matches[0][1], start);
mtx::string::parse_timestamp(matches[0][2], end);
auto content = mtx::string::join(m->current_block.begin() + timestamp_line + 1, m->current_block.end(), "\n");
content = adjust_embedded_timestamps(content, start.negate());
@ -109,7 +109,7 @@ webvtt_parser_c::add_block() {
cue->m_start = start;
cue->m_duration = end - start;
cue->m_content = memory_c::clone(content);
auto settings_list = matches[3].str();
auto settings_list = matches[0][3];
if (! (label.empty() && settings_list.empty() && m->local_blocks.empty())) {
additional = settings_list + "\n" + label + "\n" + mtx::string::join(m->local_blocks, "\n");
@ -118,9 +118,9 @@ webvtt_parser_c::add_block() {
mxdebug_if(m->debug,
fmt::format("label «{0}» start «{1}» end «{2}» settings list «{3}» additional «{4}» content «{5}»\n",
label, matches[1].str(), matches[2].str(), matches[3].str(),
std::regex_replace(additional, std::regex{"\n+"}, ""),
std::regex_replace(content, std::regex{"\n+"}, "")));
label, matches[0][1], matches[0][2], matches[0][3],
mtx::regex::replace(additional, mtx::regex::jp::Regex{"\n+"}, "g", ""),
mtx::regex::replace(content, mtx::regex::jp::Regex{"\n+"}, "g", "")));
m->local_blocks.clear();
m->current_block.clear();
@ -176,14 +176,14 @@ webvtt_parser_c::get_total_number_of_bytes()
std::string
webvtt_parser_c::adjust_embedded_timestamps(std::string const &text,
timestamp_c const &offset) {
static std::optional<std::regex> s_embedded_timestamp_re;
static std::optional<mtx::regex::jp::Regex> s_embedded_timestamp_re;
if (!s_embedded_timestamp_re)
s_embedded_timestamp_re = std::regex{"<" RE_TIMESTAMP ">"};
s_embedded_timestamp_re = mtx::regex::jp::Regex{"<" RE_TIMESTAMP ">", "S"};
return mtx::regex::replace(text, *s_embedded_timestamp_re, [&offset](std::smatch const &match) -> std::string {
return mtx::regex::replace(text, *s_embedded_timestamp_re, "g", [&offset](auto const &match) -> std::string {
timestamp_c timestamp;
mtx::string::parse_timestamp(match[1].str(), timestamp);
mtx::string::parse_timestamp(match[1], timestamp);
return fmt::format("<{0}>", mtx::string::format_timestamp(timestamp + offset, 3));
});
}

View File

@ -143,11 +143,11 @@ AvailableUpdateInfoDialog::updateReleasesInfoDisplay() {
auto html = QStringList{};
auto numReleasesOutput = 0;
auto releases = m_releasesInfo->select_nodes("/mkvtoolnix-releases/release[not(@version='HEAD')]");
auto reReleased = std::regex{"^released\\s+v?[\\d\\.]+", std::regex_constants::icase};
auto reBug = std::regex{"(#\\d+)", std::regex_constants::icase};
auto reNewlines = std::regex{"\r?\n", std::regex_constants::icase};
auto bugFormatter = [](std::smatch const &matches) -> std::string {
auto number_str = matches[1].str().substr(1);
auto reReleased = mtx::regex::jp::Regex{"^released\\s+v?[\\d\\.]+", "iS"};
auto reBug = mtx::regex::jp::Regex{"(#\\d+)", "iS"};
auto reNewlines = mtx::regex::jp::Regex{"\r?\n", "iS"};
auto bugFormatter = [](mtx::regex::jp::NumSub const &matches) -> std::string {
auto number_str = matches[1].substr(1);
return fmt::format("<a href=\"https://gitlab.com/mbunkus/mkvtoolnix/issues/{0}\">#{0}</a>", number_str);
};
@ -163,7 +163,7 @@ AvailableUpdateInfoDialog::updateReleasesInfoDisplay() {
for (auto change = release.node().child("changes").first_child() ; change ; change = change.next_sibling()) {
if ( (std::string{change.name()} != "change")
|| std::regex_search(change.child_value(), reReleased))
|| mtx::regex::match(change.child_value(), reReleased))
continue;
auto typeQ = Q(change.attribute("type").value()).toHtmlEscaped();
@ -183,7 +183,7 @@ AvailableUpdateInfoDialog::updateReleasesInfoDisplay() {
html << Q("<p><ul>");
}
auto text = std::regex_replace(mtx::regex::replace(mtx::markdown::to_html(change.child_value()), reBug, bugFormatter), reNewlines, " ");
auto text = mtx::regex::replace(mtx::regex::replace(mtx::markdown::to_html(change.child_value()), reBug, "g", bugFormatter), reNewlines, "g", " ");
html << Q("<li>%1</li>").arg(Q(text));
}