diff --git a/NEWS.md b/NEWS.md index 57346e03a..a04ebb827 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,9 @@ * MKVToolNix GUI: multiplexer: deriving track languages from file names: the default list of boundary characters now includes `-`. Part of the implementation of #3173. +* MKVToolNix GUI: multiplexer: deriving track languages from file names: the + GUI can now detect full BCP 47/RFC 5646 language tags in file names. Part of + the implementation of #3173. ## Build system changes diff --git a/src/mkvtoolnix-gui/merge/source_file.cpp b/src/mkvtoolnix-gui/merge/source_file.cpp index bc2574b5d..b698fbf00 100644 --- a/src/mkvtoolnix-gui/merge/source_file.cpp +++ b/src/mkvtoolnix-gui/merge/source_file.cpp @@ -369,6 +369,42 @@ SourceFile::deriveLanguageFromFileName() { QStringList escapedChars; + // First try to detect full BCP 47 language tags if they contain at + // least two components & don't start with x-. As BCP 47 tags + // contain '-' characters, don't split by them. + for (auto c : cfg.m_boundaryCharsForDerivingTrackLanguagesFromFileNames) + if (c != L'-') + escapedChars << QRegularExpression::escape(c); + + QRegularExpression bcp47Re{Q("^[^x][a-z]+-"), QRegularExpression::CaseInsensitiveOption}; + + if (!escapedChars.isEmpty()) { + auto splitRE = QRegularExpression{Q("(?:%1)+").arg(escapedChars.join(Q("|")))}; + auto allCaptures = fileName.split(splitRE); + + for (auto captureItr = allCaptures.rbegin(), captureEnd = allCaptures.rend(); captureItr != captureEnd; ++captureItr) { + auto &capture = *captureItr; + + if (capture.isEmpty()) + continue; + + qDebug() << "language derivation match (BCP 47):" << capture; + + if (!capture.contains(bcp47Re)) + continue; + + auto tag = mtx::bcp47::language_c::parse(to_utf8(capture)); + if (tag.is_valid()) { + qDebug() << "derived BCP 47 language tag"; + return tag; + } + } + } + + // No full BCP 47 language tag found. Now look for languages only + // with the full set of boundary characters. + escapedChars.clear(); + for (auto c : cfg.m_boundaryCharsForDerivingTrackLanguagesFromFileNames) escapedChars << QRegularExpression::escape(c);