mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-29 06:15:24 +00:00
GUI: mux: detect full BCP 47/RFC 5646 language tags in file names
Implements the rest of #3173.
This commit is contained in:
parent
5dc52310c2
commit
9352a69bab
3
NEWS.md
3
NEWS.md
@ -5,6 +5,9 @@
|
||||
* MKVToolNix GUI: multiplexer: deriving track languages from file names: the
|
||||
default list of boundary characters now includes `-`. Part of the
|
||||
implementation of #3173.
|
||||
* MKVToolNix GUI: multiplexer: deriving track languages from file names: the
|
||||
GUI can now detect full BCP 47/RFC 5646 language tags in file names. Part of
|
||||
the implementation of #3173.
|
||||
|
||||
## Build system changes
|
||||
|
||||
|
@ -369,6 +369,42 @@ SourceFile::deriveLanguageFromFileName() {
|
||||
|
||||
QStringList escapedChars;
|
||||
|
||||
// First try to detect full BCP 47 language tags if they contain at
|
||||
// least two components & don't start with x-. As BCP 47 tags
|
||||
// contain '-' characters, don't split by them.
|
||||
for (auto c : cfg.m_boundaryCharsForDerivingTrackLanguagesFromFileNames)
|
||||
if (c != L'-')
|
||||
escapedChars << QRegularExpression::escape(c);
|
||||
|
||||
QRegularExpression bcp47Re{Q("^[^x][a-z]+-"), QRegularExpression::CaseInsensitiveOption};
|
||||
|
||||
if (!escapedChars.isEmpty()) {
|
||||
auto splitRE = QRegularExpression{Q("(?:%1)+").arg(escapedChars.join(Q("|")))};
|
||||
auto allCaptures = fileName.split(splitRE);
|
||||
|
||||
for (auto captureItr = allCaptures.rbegin(), captureEnd = allCaptures.rend(); captureItr != captureEnd; ++captureItr) {
|
||||
auto &capture = *captureItr;
|
||||
|
||||
if (capture.isEmpty())
|
||||
continue;
|
||||
|
||||
qDebug() << "language derivation match (BCP 47):" << capture;
|
||||
|
||||
if (!capture.contains(bcp47Re))
|
||||
continue;
|
||||
|
||||
auto tag = mtx::bcp47::language_c::parse(to_utf8(capture));
|
||||
if (tag.is_valid()) {
|
||||
qDebug() << "derived BCP 47 language tag";
|
||||
return tag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No full BCP 47 language tag found. Now look for languages only
|
||||
// with the full set of boundary characters.
|
||||
escapedChars.clear();
|
||||
|
||||
for (auto c : cfg.m_boundaryCharsForDerivingTrackLanguagesFromFileNames)
|
||||
escapedChars << QRegularExpression::escape(c);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user