support for reading chapters from ffmpeg metadata files

implements #3676
This commit is contained in:
Moritz Bunkus 2024-03-10 14:50:49 +01:00
parent d0618d0544
commit 690694b815
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
7 changed files with 169 additions and 8 deletions

View File

@ -4,6 +4,8 @@
* translations: added a Belarusian translation of the programs & the man pages * translations: added a Belarusian translation of the programs & the man pages
by prydespar (see `AUTHORS`). by prydespar (see `AUTHORS`).
* mkvmerge, MKVToolNix GUI's chapter editor: added support for reading
chapters from ffmpeg metadata files. Implements #3676.
# Version 82.0 "I'm The President" 2024-01-02 # Version 82.0 "I'm The President" 2024-01-02

View File

@ -16,6 +16,7 @@
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <optional>
#include <QRegularExpression> #include <QRegularExpression>
@ -29,6 +30,7 @@
#include "common/error.h" #include "common/error.h"
#include "common/iso3166.h" #include "common/iso3166.h"
#include "common/locale.h" #include "common/locale.h"
#include "common/math_fwd.h"
#include "common/mm_io_x.h" #include "common/mm_io_x.h"
#include "common/mm_file_io.h" #include "common/mm_file_io.h"
#include "common/mm_proxy_io.h" #include "common/mm_proxy_io.h"
@ -350,6 +352,146 @@ parse_simple(mm_text_io_c *in,
return 0 == num ? kax_cptr{} : chaps; return 0 == num ? kax_cptr{} : chaps;
} }
bool
probe_ffmpeg_meta(mm_text_io_c *in) {
std::string line;
in->setFilePointer(0);
if (!in->getline2(line))
return false;
mtx::string::strip(line);
return line == ";FFMETADATA1";
}
kax_cptr
parse_ffmpeg_meta(mm_text_io_c *in,
int64_t min_ts,
int64_t max_ts,
int64_t offset,
mtx::bcp47::language_c const &language,
std::string const &charset) {
in->setFilePointer(0);
std::string line, title;
std::optional<int64_t> start_scaled, end_scaled;
mtx_mp_rational_t time_base;
kax_cptr chapters;
libmatroska::KaxEditionEntry *edition{};
charset_converter_cptr cc_utf8;
bool in_chapter = false;
bool do_convert = in->get_byte_order_mark() == byte_order_mark_e::none;
if (do_convert)
cc_utf8 = charset_converter_c::init(charset);
auto use_language = language.is_valid() ? language
: g_default_language.is_valid() ? g_default_language
: mtx::bcp47::language_c::parse("eng"s);
QRegularExpression
start_line_re{ Q("^start *=([0-9]+)"), QRegularExpression::CaseInsensitiveOption},
end_line_re{ Q("^end *=([0-9]+)"), QRegularExpression::CaseInsensitiveOption},
title_line_re{ Q("^title *=(.*)"), QRegularExpression::CaseInsensitiveOption},
time_base_line_re{ Q("^timebase *=([0-9]+)/([0-9]+)"), QRegularExpression::CaseInsensitiveOption};
QRegularExpressionMatch matches;
auto reset_values = [&]() {
start_scaled.reset();
end_scaled.reset();
title.clear();
time_base = mtx::rational(1, 1);
};
auto add_chapter_atom = [&]() {
if (!start_scaled || !in_chapter) {
reset_values();
return;
}
auto start = mtx::to_int_rounded(*start_scaled * time_base);
if ((start < min_ts) || ((max_ts != -1) && (start > max_ts))) {
reset_values();
return;
}
if (!chapters) {
chapters = std::make_shared<libmatroska::KaxChapters>();
edition = &get_child<libmatroska::KaxEditionEntry>(*chapters);
}
auto &atom = add_empty_child<libmatroska::KaxChapterAtom>(*edition);
get_child<libmatroska::KaxChapterUID>(atom).SetValue(create_unique_number(UNIQUE_CHAPTER_IDS));
get_child<libmatroska::KaxChapterTimeStart>(atom).SetValue(start - offset);
if (end_scaled) {
auto end = mtx::to_int_rounded(*end_scaled * time_base);
get_child<libmatroska::KaxChapterTimeEnd>(atom).SetValue(end - offset);
}
auto &display = get_child<libmatroska::KaxChapterDisplay>(atom);
get_child<libmatroska::KaxChapterString>(display).SetValueUTF8(title);
if (use_language.is_valid()) {
get_child<libmatroska::KaxChapterLanguage>(display).SetValue(use_language.get_closest_iso639_2_alpha_3_code());
if (!mtx::bcp47::language_c::is_disabled())
get_child<libmatroska::KaxChapLanguageIETF>(display).SetValue(use_language.format());
else
delete_children<libmatroska::KaxChapLanguageIETF>(display);
}
if (!g_default_country.empty())
get_child<libmatroska::KaxChapterCountry>(display).SetValue(g_default_country);
reset_values();
};
while (in->getline2(line)) {
if (do_convert)
line = cc_utf8->utf8(line);
mtx::string::strip(line);
if (line.empty() || (line[0] == ';') || (line[0] == '#'))
continue;
if (mtx::string::to_lower_ascii(line) == "[chapter]") {
add_chapter_atom();
in_chapter = true;
} else if (line[0] == '[') {
add_chapter_atom();
in_chapter = false;
} else if (!in_chapter)
continue;
else if ((matches = title_line_re.match(Q(line))).hasMatch())
title = to_utf8(matches.captured(1));
else if ((matches = start_line_re.match(Q(line))).hasMatch())
start_scaled = matches.captured(1).toLongLong();
else if ((matches = end_line_re.match(Q(line))).hasMatch())
end_scaled = matches.captured(1).toLongLong();
else if ((matches = time_base_line_re.match(Q(line))).hasMatch()) {
auto numerator = matches.captured(1).toLongLong();
auto deniminator = matches.captured(2).toLongLong();
if ((numerator != 0) && (deniminator != 0))
time_base = mtx::rational(numerator, deniminator) * 1'000'000'000;
}
}
add_chapter_atom();
return chapters;
}
/** \brief Probe a file for different chapter formats and parse the file. /** \brief Probe a file for different chapter formats and parse the file.
The file \a file_name is opened and checked for supported chapter formats. The file \a file_name is opened and checked for supported chapter formats.
@ -483,6 +625,11 @@ parse(mm_text_io_c *in,
*format = format_e::cue; *format = format_e::cue;
return parse_cue(in, min_ts, max_ts, offset, language, charset, tags); return parse_cue(in, min_ts, max_ts, offset, language, charset, tags);
} else if (probe_ffmpeg_meta(in)) {
if (format)
*format = format_e::ffmpeg_meta;
return parse_ffmpeg_meta(in, min_ts, max_ts, offset, language, charset);
} else if (format) } else if (format)
*format = format_e::xml; *format = format_e::xml;

View File

@ -54,6 +54,7 @@ enum class format_e {
xml, xml,
ogg, ogg,
cue, cue,
ffmpeg_meta,
}; };
mtx::chapters::kax_cptr mtx::chapters::kax_cptr

View File

@ -193,7 +193,7 @@ Tool::selectFileToOpen(bool append) {
#endif // HAVE_DVDREAD #endif // HAVE_DVDREAD
auto fileNames = Util::getOpenFileNames(this, append ? QY("Append files in chapter editor") : QY("Open files in chapter editor"), Util::Settings::get().lastOpenDirPath(), auto fileNames = Util::getOpenFileNames(this, append ? QY("Append files in chapter editor") : QY("Open files in chapter editor"), Util::Settings::get().lastOpenDirPath(),
QY("Supported file types") + Q(" (*.cue%1 *.mpls *.mkv *.mka *.mks *.mk3d *.txt *.webm *.xml);;").arg(ifo) + QY("Supported file types") + Q(" (*.cue%1 *.meta *.mpls *.mkv *.mka *.mks *.mk3d *.txt *.webm *.xml);;").arg(ifo) +
QY("Matroska files") + Q(" (*.mkv *.mka *.mks *.mk3d);;") + QY("Matroska files") + Q(" (*.mkv *.mka *.mks *.mk3d);;") +
QY("WebM files") + Q(" (*.webm);;") + QY("WebM files") + Q(" (*.webm);;") +
QY("Blu-ray playlist files") + Q(" (*.mpls);;") + QY("Blu-ray playlist files") + Q(" (*.mpls);;") +
@ -201,6 +201,7 @@ Tool::selectFileToOpen(bool append) {
QY("XML chapter files") + Q(" (*.xml);;") + QY("XML chapter files") + Q(" (*.xml);;") +
QY("Simple OGM-style chapter files") + Q(" (*.txt);;") + QY("Simple OGM-style chapter files") + Q(" (*.txt);;") +
QY("Cue sheet files") + Q(" (*.cue);;") + QY("Cue sheet files") + Q(" (*.cue);;") +
QY("ffmpeg metadata files") + Q(" (*.meta);;") +
QY("All files") + Q(" (*)")); QY("All files") + Q(" (*)"));
if (fileNames.isEmpty()) if (fileNames.isEmpty())
return; return;

View File

@ -26,7 +26,7 @@ class FileIdentificationWorkerPrivate {
QVector<IdentificationPack> m_toIdentify; QVector<IdentificationPack> m_toIdentify;
QMutex m_mutex; QMutex m_mutex;
QAtomicInteger<bool> m_abortPlaylistScan; QAtomicInteger<bool> m_abortPlaylistScan;
QRegularExpression m_simpleChaptersRE, m_xmlChaptersRE, m_xmlSegmentInfoRE, m_xmlTagsRE; QRegularExpression m_simpleChaptersRE, m_ffmpegMetaChaptersRE, m_xmlChaptersRE, m_xmlSegmentInfoRE, m_xmlTagsRE;
explicit FileIdentificationWorkerPrivate() explicit FileIdentificationWorkerPrivate()
{ {
@ -39,11 +39,12 @@ FileIdentificationWorker::FileIdentificationWorker(QObject *parent)
: QObject{parent} : QObject{parent}
, p_ptr{new FileIdentificationWorkerPrivate{}} , p_ptr{new FileIdentificationWorkerPrivate{}}
{ {
auto p = p_func(); auto p = p_func();
p->m_simpleChaptersRE = QRegularExpression{R"(^CHAPTER\d{2}=[\s\S]*CHAPTER\d{2}NAME=)"}; p->m_simpleChaptersRE = QRegularExpression{R"(^CHAPTER\d{2}=[\s\S]*CHAPTER\d{2}NAME=)"};
p->m_xmlChaptersRE = QRegularExpression{R"(^(<!--.*?-->\s*)*<\?xml[^>]+version[\s\S]*?\?>[\s\S]*?<Chapters>)"}; p->m_xmlChaptersRE = QRegularExpression{R"(^(<!--.*?-->\s*)*<\?xml[^>]+version[\s\S]*?\?>[\s\S]*?<Chapters>)"};
p->m_xmlSegmentInfoRE = QRegularExpression{R"(^(<!--.*?-->\s*)*<\?xml[^>]+version[\s\S]*?\?>[\s\S]*?<Info>)"}; p->m_xmlSegmentInfoRE = QRegularExpression{R"(^(<!--.*?-->\s*)*<\?xml[^>]+version[\s\S]*?\?>[\s\S]*?<Info>)"};
p->m_xmlTagsRE = QRegularExpression{R"(^(<!--.*?-->\s*)*<\?xml[^>]+version[\s\S]*?\?>[\s\S]*?<Tags>)"}; p->m_xmlTagsRE = QRegularExpression{R"(^(<!--.*?-->\s*)*<\?xml[^>]+version[\s\S]*?\?>[\s\S]*?<Tags>)"};
p->m_ffmpegMetaChaptersRE = QRegularExpression{R"(;FFMETADATA1)"};
} }
FileIdentificationWorker::~FileIdentificationWorker() { FileIdentificationWorker::~FileIdentificationWorker() {
@ -179,7 +180,7 @@ FileIdentificationWorker::determineIfFileThatShouldBeSelectedElsewhere(QString c
auto content = QString::fromUtf8(bytes); auto content = QString::fromUtf8(bytes);
if (content.contains(p->m_simpleChaptersRE) || content.contains(p->m_xmlChaptersRE)) if (content.contains(p->m_simpleChaptersRE) || content.contains(p->m_ffmpegMetaChaptersRE) || content.contains(p->m_xmlChaptersRE))
return IdentificationPack::FileType::Chapters; return IdentificationPack::FileType::Chapters;
else if (content.contains(p->m_xmlSegmentInfoRE)) else if (content.contains(p->m_xmlSegmentInfoRE))

View File

@ -609,3 +609,4 @@ T_0761hevc_dolby_vision_dual_layer_single_track_annex_b:4a40b17c0d60a1c82c109975
T_0762dovi_combining_bl_and_el:880014a766cf6b5929cf180bea71d046-2eeed24f6b3e3f2d6da0e8d25896089b-88193925316d397c23c6a3a6cb2ab740-ef854e7bae7c85ad46b1a0d71d09d3fc:passed:20231129-202129:0.387596229 T_0762dovi_combining_bl_and_el:880014a766cf6b5929cf180bea71d046-2eeed24f6b3e3f2d6da0e8d25896089b-88193925316d397c23c6a3a6cb2ab740-ef854e7bae7c85ad46b1a0d71d09d3fc:passed:20231129-202129:0.387596229
T_0763vp9_alpha_channel_data:ac329c9d810d9b19fb3ffcde6f12af2a-OK:passed:20231203-190856:0.059109676 T_0763vp9_alpha_channel_data:ac329c9d810d9b19fb3ffcde6f12af2a-OK:passed:20231203-190856:0.059109676
T_0764ui_locale_be_BY:a44c54eadfb4c8fbdc104b75aa1de1c1-72b98d331b58a0f95e10159fca191b52:passed:20240120-191944:0.043782405 T_0764ui_locale_be_BY:a44c54eadfb4c8fbdc104b75aa1de1c1-72b98d331b58a0f95e10159fca191b52:passed:20240120-191944:0.043782405
T_0765ffmpeg_metadata_chapters:f16630c4019413c98b75b959a5697391-6b2b843310e80367b5fe5aaa8a5d51c4:passed:20240310-145016:0.047790171

View File

@ -0,0 +1,8 @@
#!/usr/bin/ruby -w
# T_765ffmpeg_metadata_chapters
describe "mkvmerge / reading chapters from ffmpeg metadata files"
Dir.glob("data/chapters/ffmpeg-metadata/*.meta").sort.each do |file_name|
test_merge "data/subtitles/srt/ven.srt", :args => "--chapters #{file_name}"
end