GUI: mux: detect full BCP 47/RFC 5646 language tags in file names

Implements the rest of #3173.
This commit is contained in:
Moritz Bunkus 2021-07-31 23:04:29 +02:00
parent 5dc52310c2
commit 9352a69bab
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
2 changed files with 39 additions and 0 deletions

View File

@ -5,6 +5,9 @@
* MKVToolNix GUI: multiplexer: deriving track languages from file names: the
default list of boundary characters now includes `-`. Part of the
implementation of #3173.
* MKVToolNix GUI: multiplexer: deriving track languages from file names: the
GUI can now detect full BCP 47/RFC 5646 language tags in file names. Part of
the implementation of #3173.
## Build system changes

View File

@ -369,6 +369,42 @@ SourceFile::deriveLanguageFromFileName() {
QStringList escapedChars;
// First try to detect full BCP 47 language tags if they contain at
// least two components & don't start with x-. As BCP 47 tags
// contain '-' characters, don't split by them.
for (auto c : cfg.m_boundaryCharsForDerivingTrackLanguagesFromFileNames)
if (c != L'-')
escapedChars << QRegularExpression::escape(c);
QRegularExpression bcp47Re{Q("^[^x][a-z]+-"), QRegularExpression::CaseInsensitiveOption};
if (!escapedChars.isEmpty()) {
auto splitRE = QRegularExpression{Q("(?:%1)+").arg(escapedChars.join(Q("|")))};
auto allCaptures = fileName.split(splitRE);
for (auto captureItr = allCaptures.rbegin(), captureEnd = allCaptures.rend(); captureItr != captureEnd; ++captureItr) {
auto &capture = *captureItr;
if (capture.isEmpty())
continue;
qDebug() << "language derivation match (BCP 47):" << capture;
if (!capture.contains(bcp47Re))
continue;
auto tag = mtx::bcp47::language_c::parse(to_utf8(capture));
if (tag.is_valid()) {
qDebug() << "derived BCP 47 language tag";
return tag;
}
}
}
// No full BCP 47 language tag found. Now look for languages only
// with the full set of boundary characters.
escapedChars.clear();
for (auto c : cfg.m_boundaryCharsForDerivingTrackLanguagesFromFileNames)
escapedChars << QRegularExpression::escape(c);