Added a quick Matroska file parser that uses the seek head entries for locating specific level 1 elements. Changed the mkvextract modules to use this parser.

This commit is contained in:
Moritz Bunkus 2004-09-07 21:32:37 +00:00
parent 6cb028af63
commit 6b7a173a64
9 changed files with 458 additions and 553 deletions

View File

@ -1,3 +1,9 @@
2004-09-07 Moritz Bunkus <moritz@bunkus.org>
* mkvextract: Sped up the extraction of attachments, chapters,
cuesheets and tags by using the seek head information and not
parsing the full file each time.
2004-09-02 Moritz Bunkus <moritz@bunkus.org>
* mkvmerge: bug fix: The Matroska reader was not handling very big

226
src/common/quickparser.cpp Normal file
View File

@ -0,0 +1,226 @@
/*
* mkvmerge -- utility for splicing together matroska files
* from component media subtypes
*
* Distributed under the GPL
* see the file COPYING for details
* or visit http://www.gnu.org/copyleft/gpl.html
*
* $Id$
*
* quick Matroska file parsing
*
* Written by Moritz Bunkus <moritz@bunkus.org>.
*/
#include <ebml/EbmlHead.h>
#include <ebml/EbmlStream.h>
#include <matroska/KaxCluster.h>
#include <matroska/KaxSegment.h>
#include <matroska/KaxSeekHead.h>
#include "commonebml.h"
#include "error.h"
#include "quickparser.h"
using namespace std;
using namespace libebml;
using namespace libmatroska;
kax_quickparser_c::kax_quickparser_c(mm_io_c &_in,
bool parse_fully):
in(_in) {
int upper_lvl_el, k;
// Elements for different levels
EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL;
segment_child_t new_child, *child;
vector<segment_child_t>::iterator it;
in.setFilePointer(0, seek_beginning);
EbmlStream es(in);
// Find the EbmlHead element. Must be the first one.
l0 = es.FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL);
if (l0 == NULL)
throw error_c(_("Error: No EBML head found."));
// Don't verify its data for now.
l0->SkipData(es, l0->Generic().Context);
delete l0;
while (1) {
// Next element must be a segment
l0 = es.FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL);
if (l0 == NULL)
throw error_c(_("No segment/level 0 element found."));
if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId)
break;
l0->SkipData(es, l0->Generic().Context);
delete l0;
}
upper_lvl_el = 0;
l1 = es.FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true, 1);
while ((l1 != NULL) && (upper_lvl_el <= 0)) {
if (is_id(l1, KaxCluster) && !parse_fully)
break;
new_child.pos = l1->GetElementPosition();
new_child.size = l1->ElementSize();
new_child.id = l1->Generic().GlobalId;
children.push_back(new_child);
if ((l0->GetElementPosition() + l0->ElementSize()) <
l1->GetElementPosition()) {
delete l1;
break;
}
if (upper_lvl_el < 0) {
upper_lvl_el++;
if (upper_lvl_el < 0)
break;
}
l1->SkipData(es, l1->Generic().Context);
delete l1;
l1 = es.FindNextElement(l0->Generic().Context, upper_lvl_el,
0xFFFFFFFFL, true);
} // while (l1 != NULL)
for (k = 0; k < children.size(); k++) {
child = &children[k];
if (child->id == KaxSeekHead::ClassInfos.GlobalId) {
EbmlMaster *m;
KaxSeek *seek;
KaxSeekID *seek_id;
int64_t pos;
int i;
bool found;
in.setFilePointer(child->pos, seek_beginning);
upper_lvl_el = 0;
l1 = es.FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true, 1);
if (l1 == NULL)
continue;
if (!is_id(l1, KaxSeekHead)) {
delete l1;
continue;
}
m = static_cast<EbmlMaster *>(l1);
m->Read(es, l1->Generic().Context, upper_lvl_el, l2, true);
for (i = 0; i < m->ListSize(); i++) {
if (!is_id((*m)[i], KaxSeek))
continue;
seek = static_cast<KaxSeek *>((*m)[i]);
seek_id = FINDFIRST(seek, KaxSeekID);
pos = seek->Location() + l0->GetElementPosition() + l0->HeadSize();
if ((pos == 0) || (seek_id == NULL))
continue;
found = false;
foreach(it, children)
if ((*it).pos == pos) {
found = true;
break;
}
if (!found) {
new_child.pos = pos;
new_child.size = -1;
new_child.id = EbmlId(seek_id->GetBuffer(), seek_id->GetSize());
children.push_back(new_child);
}
}
delete l1;
}
}
delete l0;
current_child = children.begin();
}
int
kax_quickparser_c::num_elements(const EbmlId &id)
const {
vector<segment_child_t>::const_iterator it;
int num;
num = 0;
foreach(it, children)
if ((*it).id == id)
num++;
return num;
}
segment_child_t *
kax_quickparser_c::get_next(const EbmlId &id) {
while (current_child != children.end())
if ((*current_child).id == id) {
segment_child_t *ptr;
ptr = &(*current_child);
current_child++;
return ptr;
} else
current_child++;
return NULL;
}
void
kax_quickparser_c::reset() {
current_child = children.begin();
}
EbmlMaster *
kax_quickparser_c::read_all(const EbmlCallbacks &callbacks) {
EbmlElement *e, *l2;
EbmlMaster *m, *src;
segment_child_t *child;
int upper_lvl_el;
m = NULL;
EbmlStream es(in);
reset();
for (child = get_next(callbacks.GlobalId); child != NULL;
child = get_next(callbacks.GlobalId)) {
in.setFilePointer(child->pos);
upper_lvl_el = 0;
e = es.FindNextElement(KaxSegment::ClassInfos.Context, upper_lvl_el,
0xFFFFFFFFL, true);
if (e == NULL)
continue;
if (!(e->Generic().GlobalId == callbacks.GlobalId)) {
delete e;
continue;
}
l2 = NULL;
e->Read(es, callbacks.Context, upper_lvl_el, l2, true);
if (m == NULL)
m = static_cast<EbmlMaster *>(e);
else {
src = static_cast<EbmlMaster *>(e);
while (src->ListSize() > 0) {
m->PushElement(*(*src)[0]);
src->Remove(0);
}
delete e;
}
}
reset();
if ((m != NULL) && (m->ListSize() == 0)) {
delete m;
return NULL;
}
return m;
}

52
src/common/quickparser.h Normal file
View File

@ -0,0 +1,52 @@
/*
* mkvmerge -- utility for splicing together matroska files
* from component media subtypes
*
* Distributed under the GPL
* see the file COPYING for details
* or visit http://www.gnu.org/copyleft/gpl.html
*
* $Id$
*
* quick Matroska file parsing
*
* Written by Moritz Bunkus <moritz@bunkus.org>.
*/
#ifndef __QUICKPARSER_H
#define __QUICKPARSER_H
#include <vector>
#include <ebml/EbmlElement.h>
#include "mm_io.h"
using namespace std;
using namespace libebml;
typedef struct segment_child_t {
int64_t pos;
int64_t size;
EbmlId id;
segment_child_t(): id((uint32_t)0, 0) {};
} segment_child_t;
class kax_quickparser_c {
private:
vector<segment_child_t> children;
vector<segment_child_t>::iterator current_child;
mm_io_c &in;
public:
kax_quickparser_c(mm_io_c &_in, bool parse_fully = false);
virtual ~kax_quickparser_c() {};
virtual int num_elements(const EbmlId &id) const;
virtual segment_child_t *get_next(const EbmlId &id);
virtual void reset();
virtual EbmlMaster *read_all(const EbmlCallbacks &callbacks);
};
#endif

View File

@ -65,262 +65,118 @@ extern "C" {
#include "matroska.h"
#include "mkvextract.h"
#include "mm_io.h"
#include "quickparser.h"
using namespace libmatroska;
using namespace std;
static void
handle_attachments(mm_io_c *in,
EbmlStream *es,
EbmlElement *l0,
int64_t pos) {
KaxAttachments *atts;
handle_attachments(KaxAttachments *atts) {
KaxAttached *att;
KaxFileData *fdata;
EbmlElement *l1, *l2;
int upper_lvl_el, i, k;
EbmlElement *e;
int i, k;
string name, type;
int64_t size, id;
char *str;
bool found;
mm_io_c *out;
out = NULL;
in->save_pos(pos);
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true);
for (i = 0; i < atts->ListSize(); i++) {
att = dynamic_cast<KaxAttached *>((*atts)[i]);
assert(att != NULL);
if ((l1 != NULL) && (EbmlId(*l1) == KaxAttachments::ClassInfos.GlobalId)) {
atts = (KaxAttachments *)l1;
l2 = NULL;
upper_lvl_el = 0;
atts->Read(*es, KaxAttachments::ClassInfos.Context, upper_lvl_el, l2,
true);
for (i = 0; i < atts->ListSize(); i++) {
att = (KaxAttached *)(*atts)[i];
if (EbmlId(*att) == KaxAttached::ClassInfos.GlobalId) {
name = "";
type = "";
size = -1;
id = -1;
fdata = NULL;
name = "";
type = "";
size = -1;
id = -1;
fdata = NULL;
for (k = 0; k < att->ListSize(); k++) {
l2 = (*att)[k];
for (k = 0; k < att->ListSize(); k++) {
e = (*att)[k];
if (EbmlId(*l2) == KaxFileName::ClassInfos.GlobalId) {
KaxFileName &fname = *static_cast<KaxFileName *>(l2);
str = UTFstring_to_cstr(UTFstring(fname));
name = str;
safefree(str);
if (EbmlId(*e) == KaxFileName::ClassInfos.GlobalId) {
KaxFileName &fname = *static_cast<KaxFileName *>(e);
str = UTFstring_to_cstr(UTFstring(fname));
name = str;
safefree(str);
} else if (EbmlId(*l2) == KaxMimeType::ClassInfos.GlobalId) {
KaxMimeType &mtype = *static_cast<KaxMimeType *>(l2);
type = string(mtype);
} else if (EbmlId(*e) == KaxMimeType::ClassInfos.GlobalId) {
KaxMimeType &mtype = *static_cast<KaxMimeType *>(e);
type = string(mtype);
} else if (EbmlId(*l2) == KaxFileUID::ClassInfos.GlobalId) {
KaxFileUID &fuid = *static_cast<KaxFileUID *>(l2);
id = uint32(fuid);
} else if (EbmlId(*e) == KaxFileUID::ClassInfos.GlobalId) {
KaxFileUID &fuid = *static_cast<KaxFileUID *>(e);
id = uint32(fuid);
} else if (EbmlId(*l2) == KaxFileData::ClassInfos.GlobalId) {
fdata = (KaxFileData *)l2;
size = fdata->GetSize();
} else if (EbmlId(*e) == KaxFileData::ClassInfos.GlobalId) {
fdata = (KaxFileData *)e;
size = fdata->GetSize();
}
}
if ((id != -1) && (size != -1) && (type.length() != 0)) {
found = false;
for (k = 0; k < tracks.size(); k++)
if (tracks[k].tid == id) {
found = true;
break;
}
if (found && !tracks[k].done) {
mxinfo(_("The attachment #%lld, MIME type %s, size %lld, "
"is written to '%s'.\n"), id, type.c_str(), size,
tracks[k].out_name);
try {
out = new mm_io_c(tracks[k].out_name, MODE_WRITE);
} catch (...) {
mxerror(_("The file '%s' could not be opened for writing "
"(%d, %s).\n"),
tracks[k].out_name, errno, strerror(errno));
}
out->write(fdata->GetBuffer(), fdata->GetSize());
delete out;
tracks[k].done = true;
}
}
}
}
delete l1;
}
if ((id != -1) && (size != -1) && (type.length() != 0)) {
found = false;
in->restore_pos();
for (k = 0; k < tracks.size(); k++)
if (tracks[k].tid == id) {
found = true;
break;
}
if (found && !tracks[k].done) {
mxinfo(_("The attachment #%lld, MIME type %s, size %lld, "
"is written to '%s'.\n"), id, type.c_str(), size,
tracks[k].out_name);
try {
out = new mm_io_c(tracks[k].out_name, MODE_WRITE);
} catch (...) {
mxerror(_("The file '%s' could not be opened for writing "
"(%d, %s).\n"),
tracks[k].out_name, errno, strerror(errno));
}
out->write(fdata->GetBuffer(), fdata->GetSize());
delete out;
tracks[k].done = true;
}
}
}
}
void
extract_attachments(const char *file_name) {
int upper_lvl_el, i;
// Elements for different levels
EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL;
EbmlStream *es;
extract_attachments(const char *file_name,
bool parse_fully) {
mm_io_c *in;
bool done;
mm_stdio_c out;
kax_quickparser_c *qp;
KaxAttachments *attachments;
int i;
// open input file
try {
in = new mm_io_c(file_name, MODE_READ);
qp = new kax_quickparser_c(*in, parse_fully);
} catch (std::exception &ex) {
show_error(_("The file '%s' could not be opened for reading (%s)."),
file_name, strerror(errno));
return;
}
try {
es = new EbmlStream(*in);
attachments =
dynamic_cast<KaxAttachments *>(qp->read_all(KaxAttachments::ClassInfos));
if (attachments != NULL) {
if (verbose > 0)
debug_dump_elements(attachments, 0);
// Find the EbmlHead element. Must be the first one.
l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL);
if (l0 == NULL) {
show_error(_("Error: No EBML head found."));
delete es;
handle_attachments(attachments);
return;
}
// Don't verify its data for now.
l0->SkipData(*es, l0->Generic().Context);
delete l0;
while (1) {
// Next element must be a segment
l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL);
if (l0 == NULL) {
show_error(_("No segment/level 0 element found."));
return;
}
if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) {
show_element(l0, 0, _("Segment"));
break;
}
show_element(l0, 0, _("Next level 0 element is not a segment but %s"),
l0->Generic().DebugName);
l0->SkipData(*es, l0->Generic().Context);
delete l0;
}
upper_lvl_el = 0;
done = false;
// We've got our segment, so let's find the attachments
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true, 1);
while ((l1 != NULL) && (upper_lvl_el <= 0)) {
if (EbmlId(*l1) == KaxAttachments::ClassInfos.GlobalId) {
handle_attachments(in, es, l0, l1->GetElementPosition());
done = true;
for (i = 0; i < tracks.size(); i++)
if (!tracks[i].done) {
done = false;
break;
}
if (done)
break;
} else if (EbmlId(*l1) == KaxSeekHead::ClassInfos.GlobalId) {
int k;
EbmlElement *el;
KaxSeekHead &seek_head = *static_cast<KaxSeekHead *>(l1);
int64_t pos;
bool is_attachments;
i = 0;
seek_head.Read(*es, KaxSeekHead::ClassInfos.Context, i, el, true);
for (i = 0; i < seek_head.ListSize(); i++)
if (EbmlId(*seek_head[i]) == KaxSeek::ClassInfos.GlobalId) {
KaxSeek &seek = *static_cast<KaxSeek *>(seek_head[i]);
pos = -1;
is_attachments = false;
for (k = 0; k < seek.ListSize(); k++)
if (EbmlId(*seek[k]) == KaxSeekID::ClassInfos.GlobalId) {
KaxSeekID &sid = *static_cast<KaxSeekID *>(seek[k]);
EbmlId id(sid.GetBuffer(), sid.GetSize());
if (id == KaxAttachments::ClassInfos.GlobalId)
is_attachments = true;
} else if (EbmlId(*seek[k]) ==
KaxSeekPosition::ClassInfos.GlobalId)
pos = uint64(*static_cast<KaxSeekPosition *>(seek[k]));
if ((pos != -1) && is_attachments) {
handle_attachments(in, es, l0,
((KaxSegment *)l0)->GetGlobalPosition(pos));
done = true;
for (k = 0; k < tracks.size(); k++)
if (!tracks[k].done) {
done = false;
break;
}
if (done)
break;
}
}
} else
l1->SkipData(*es, l1->Generic().Context);
if (done)
break;
if (!in_parent(l0)) {
delete l1;
break;
}
if (upper_lvl_el > 0) {
upper_lvl_el--;
if (upper_lvl_el > 0)
break;
delete l1;
l1 = l2;
continue;
} else if (upper_lvl_el < 0) {
upper_lvl_el++;
if (upper_lvl_el < 0)
break;
}
l1->SkipData(*es, l1->Generic().Context);
delete l1;
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el,
0xFFFFFFFFL, true);
} // while (l1 != NULL)
delete l0;
delete es;
delete in;
} catch (exception &ex) {
show_error(_("Caught exception: %s"), ex.what());
delete in;
delete attachments;
}
delete in;
delete qp;
for (i = 0; i < tracks.size(); i++)
if (!tracks[i].done)
mxinfo(_("An attachment with the ID %lld was not found.\n"),

View File

@ -39,10 +39,7 @@ extern "C" {
#include <ebml/EbmlVoid.h>
#include <matroska/FileKax.h>
#include <matroska/KaxAttached.h>
#include <matroska/KaxAttachments.h>
#include <matroska/KaxBlock.h>
#include <matroska/KaxBlockData.h>
#include <matroska/KaxChapters.h>
#include <matroska/KaxCluster.h>
#include <matroska/KaxClusterData.h>
@ -54,9 +51,6 @@ extern "C" {
#include <matroska/KaxSegment.h>
#include <matroska/KaxTags.h>
#include <matroska/KaxTracks.h>
#include <matroska/KaxTrackEntryData.h>
#include <matroska/KaxTrackAudio.h>
#include <matroska/KaxTrackVideo.h>
#include "chapters.h"
#include "common.h"
@ -64,135 +58,56 @@ extern "C" {
#include "matroska.h"
#include "mkvextract.h"
#include "mm_io.h"
#include "quickparser.h"
using namespace libmatroska;
using namespace std;
void
extract_chapters(const char *file_name,
bool chapter_format_simple) {
int upper_lvl_el;
// Elements for different levels
EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL;
EbmlStream *es;
bool chapter_format_simple,
bool parse_fully) {
EbmlMaster *m;
mm_io_c *in;
mm_stdio_c out;
bool chapters_extracted = false;
int next_chapter = 1;
kax_quickparser_c *qp;
KaxChapters *chapters;
// open input file
try {
in = new mm_io_c(file_name, MODE_READ);
qp = new kax_quickparser_c(*in, parse_fully);
} catch (std::exception &ex) {
show_error(_("The file '%s' could not be opened for reading (%s)."),
file_name, strerror(errno));
return;
}
try {
es = new EbmlStream(*in);
m = qp->read_all(KaxChapters::ClassInfos);
if (m != NULL) {
chapters = dynamic_cast<KaxChapters *>(m);
assert(chapters != NULL);
// Find the EbmlHead element. Must be the first one.
l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL);
if (l0 == NULL) {
show_error(_("Error: No EBML head found."));
delete es;
if (verbose > 0)
debug_dump_elements(chapters, 0);
return;
if (!chapter_format_simple) {
out.write_bom("UTF-8");
out.printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"\n"
"<!-- <!DOCTYPE Tags SYSTEM \"matroskatags.dtd\"> -->\n"
"\n"
"<Chapters>\n");
write_chapters_xml(chapters, &out);
out.printf("</Chapters>\n");
} else {
int dummy = 1;
write_chapters_simple(dummy, chapters, &out);
}
// Don't verify its data for now.
l0->SkipData(*es, l0->Generic().Context);
delete l0;
while (1) {
// Next element must be a segment
l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL);
if (l0 == NULL) {
show_error(_("No segment/level 0 element found."));
return;
}
if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) {
show_element(l0, 0, _("Segment"));
break;
}
show_element(l0, 0, _("Next level 0 element is not a segment but %s"),
l0->Generic().DebugName);
l0->SkipData(*es, l0->Generic().Context);
delete l0;
}
upper_lvl_el = 0;
// We've got our segment, so let's find the chapters
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true, 1);
while ((l1 != NULL) && (upper_lvl_el <= 0)) {
if (EbmlId(*l1) == KaxChapters::ClassInfos.GlobalId) {
KaxChapters &chapters = *static_cast<KaxChapters *>(l1);
chapters.Read(*es, KaxChapters::ClassInfos.Context, upper_lvl_el, l2,
true);
if (verbose > 0)
debug_dump_elements(&chapters, 0);
if (!chapters_extracted && !chapter_format_simple) {
out.write_bom("UTF-8");
out.printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"\n"
"<!-- <!DOCTYPE Tags SYSTEM \"matroskatags.dtd\"> -->\n"
"\n"
"<Chapters>\n");
chapters_extracted = true;
}
if (chapter_format_simple)
write_chapters_simple(next_chapter, &chapters, &out);
else
write_chapters_xml(&chapters, &out);
} else
l1->SkipData(*es, l1->Generic().Context);
if (!in_parent(l0)) {
delete l1;
break;
}
if (upper_lvl_el > 0) {
upper_lvl_el--;
if (upper_lvl_el > 0)
break;
delete l1;
l1 = l2;
continue;
} else if (upper_lvl_el < 0) {
upper_lvl_el++;
if (upper_lvl_el < 0)
break;
}
l1->SkipData(*es, l1->Generic().Context);
delete l1;
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el,
0xFFFFFFFFL, true);
} // while (l1 != NULL)
delete l0;
delete es;
delete in;
} catch (exception &ex) {
show_error(_("Caught exception: %s"), ex.what());
delete in;
return;
delete chapters;
}
if (chapters_extracted && !chapter_format_simple)
out.printf("</Chapters>\n");
delete in;
delete qp;
}

View File

@ -54,6 +54,7 @@ extern "C" {
#include "matroska.h"
#include "mkvextract.h"
#include "mm_io.h"
#include "quickparser.h"
using namespace libmatroska;
using namespace std;
@ -345,137 +346,52 @@ write_cuesheet(const char *file_name,
}
void
extract_cuesheet(const char *file_name) {
int upper_lvl_el;
// Elements for different levels
EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL;
EbmlStream *es;
extract_cuesheet(const char *file_name,
bool parse_fully) {
mm_io_c *in;
mm_stdio_c out;
KaxChapters all_chapters;
KaxTags all_tags;
kax_quickparser_c *qp;
KaxChapters all_chapters, *chapters;
KaxEditionEntry *eentry;
KaxTags *all_tags;
int i, k;
// open input file
try {
in = new mm_io_c(file_name, MODE_READ);
qp = new kax_quickparser_c(*in, parse_fully);
} catch (std::exception &ex) {
show_error(_("The file '%s' could not be opened for reading (%s)."),
file_name, strerror(errno));
return;
}
try {
es = new EbmlStream(*in);
// Find the EbmlHead element. Must be the first one.
l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL);
if (l0 == NULL) {
show_error(_("Error: No EBML head found."));
delete es;
return;
}
// Don't verify its data for now.
l0->SkipData(*es, l0->Generic().Context);
delete l0;
while (1) {
// Next element must be a segment
l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL);
if (l0 == NULL) {
show_error(_("No segment/level 0 element found."));
return;
}
if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) {
show_element(l0, 0, _("Segment"));
break;
}
show_element(l0, 0, _("Next level 0 element is not a segment but %s"),
l0->Generic().DebugName);
l0->SkipData(*es, l0->Generic().Context);
delete l0;
}
upper_lvl_el = 0;
// We've got our segment, so let's find the chapters
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true, 1);
while ((l1 != NULL) && (upper_lvl_el <= 0)) {
if (EbmlId(*l1) == KaxChapters::ClassInfos.GlobalId) {
KaxChapters &chapters = *static_cast<KaxChapters *>(l1);
chapters.Read(*es, KaxChapters::ClassInfos.Context, upper_lvl_el, l2,
true);
if (verbose > 0)
debug_dump_elements(&chapters, 0);
while (chapters.ListSize() > 0) {
if (EbmlId(*chapters[0]) == KaxEditionEntry::ClassInfos.GlobalId) {
KaxEditionEntry &entry =
*static_cast<KaxEditionEntry *>(chapters[0]);
while (entry.ListSize() > 0) {
if (EbmlId(*entry[0]) == KaxChapterAtom::ClassInfos.GlobalId)
all_chapters.PushElement(*entry[0]);
entry.Remove(0);
}
}
chapters.Remove(0);
}
} else if (EbmlId(*l1) == KaxTags::ClassInfos.GlobalId) {
KaxTags &tags = *static_cast<KaxTags *>(l1);
tags.Read(*es, KaxTags::ClassInfos.Context, upper_lvl_el, l2, true);
if (verbose > 0)
debug_dump_elements(&tags, 0);
while (tags.ListSize() > 0) {
all_tags.PushElement(*tags[0]);
tags.Remove(0);
}
} else
l1->SkipData(*es, l1->Generic().Context);
if (!in_parent(l0)) {
delete l1;
break;
}
if (upper_lvl_el > 0) {
upper_lvl_el--;
if (upper_lvl_el > 0)
break;
delete l1;
l1 = l2;
chapters =
dynamic_cast<KaxChapters *>(qp->read_all(KaxChapters::ClassInfos));
all_tags = dynamic_cast<KaxTags *>(qp->read_all(KaxTags::ClassInfos));
if ((chapters != NULL) && (all_tags != NULL)) {
for (i = 0; i < chapters->ListSize(); i++) {
if (dynamic_cast<KaxEditionEntry *>((*chapters)[i]) == NULL)
continue;
eentry = dynamic_cast<KaxEditionEntry *>((*chapters)[i]);
for (k = 0; k < eentry->ListSize(); k++)
if (dynamic_cast<KaxChapterAtom *>((*eentry)[k]) != NULL)
all_chapters.PushElement(*(*eentry)[k]);
}
if (verbose > 0) {
debug_dump_elements(&all_chapters, 0);
debug_dump_elements(all_tags, 0);
}
} else if (upper_lvl_el < 0) {
upper_lvl_el++;
if (upper_lvl_el < 0)
break;
write_cuesheet(file_name, all_chapters, *all_tags, -1, out);
}
l1->SkipData(*es, l1->Generic().Context);
delete l1;
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el,
0xFFFFFFFFL, true);
} // while (l1 != NULL)
write_cuesheet(file_name, all_chapters, all_tags, -1, out);
delete l0;
delete es;
delete in;
} catch (exception &ex) {
show_error(_("Caught exception: %s"), ex.what());
delete in;
return;
while (all_chapters.ListSize() > 0)
all_chapters.Remove(0);
}
delete all_tags;
delete chapters;
delete in;
delete qp;
}

View File

@ -175,6 +175,7 @@ usage() {
// {{{ FUNCTION parse_args
static bool chapter_format_simple = false;
static bool parse_fully = false;
void
parse_args(int argc,
@ -235,6 +236,10 @@ parse_args(int argc,
for (i = 3; i < argc; i++)
if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose"))
verbose++;
else if (!strcmp(argv[i], "-f") || !strcmp(argv[i], "--parse-fully"))
parse_fully = true;
else if (!strcmp(argv[i], "-c")) {
if (mode != MODE_TRACKS)
mxerror(_("'-c' is only allowed when extracting tracks.\n"));
@ -269,7 +274,7 @@ parse_args(int argc,
chapter_format_simple = true;
} else {
} else if ((mode == MODE_TRACKS) || (mode == MODE_ATTACHMENTS)) {
copy = safestrdup(argv[i]);
colon = strchr(copy, ':');
if (colon == NULL)
@ -303,7 +308,11 @@ parse_args(int argc,
sub_charset = "UTF-8";
embed_in_ogg = true;
extract_cuesheet = false;
}
} else
mxerror(_("Unrecognized command line option '%s'. Maybe you put a "
"mode specific option before the input file name?\n"),
argv[i]);
if ((mode == MODE_TAGS) || (mode == MODE_CHAPTERS) ||
(mode == MODE_CUESHEET))
@ -401,16 +410,16 @@ main(int argc,
mxinfo(_("progress: 100%%\n"));
} else if (mode == MODE_TAGS)
extract_tags(input_file);
extract_tags(input_file, parse_fully);
else if (mode == MODE_ATTACHMENTS)
extract_attachments(input_file);
extract_attachments(input_file, parse_fully);
else if (mode == MODE_CHAPTERS)
extract_chapters(input_file, chapter_format_simple);
extract_chapters(input_file, chapter_format_simple, parse_fully);
else if (mode == MODE_CUESHEET)
extract_cuesheet(input_file);
extract_cuesheet(input_file, parse_fully);
else
die("mkvextract: Unknown mode!?");

View File

@ -106,10 +106,11 @@ kax_track_t *find_track(int tid);
bool extract_tracks(const char *file_name);
extern int conv_utf8;
void extract_tags(const char *file_name);
void extract_chapters(const char *file_name, bool chapter_format_simple);
void extract_attachments(const char *file_name);
void extract_cuesheet(const char *file_name);
void extract_tags(const char *file_name, bool parse_fully);
void extract_chapters(const char *file_name, bool chapter_format_simple,
bool parse_fully);
void extract_attachments(const char *file_name, bool parse_fully);
void extract_cuesheet(const char *file_name, bool parse_fully);
void write_cuesheet(const char *file_name, KaxChapters &chapters,
KaxTags &tags, int64_t tuid, mm_io_c &out);

View File

@ -61,129 +61,53 @@ extern "C" {
#include "chapters.h"
#include "common.h"
#include "commonebml.h"
#include "matroska.h"
#include "mkvextract.h"
#include "mm_io.h"
#include "quickparser.h"
#include "tagwriter.h"
using namespace libmatroska;
using namespace std;
void
extract_tags(const char *file_name) {
int upper_lvl_el;
// Elements for different levels
EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL;
EbmlStream *es;
extract_tags(const char *file_name,
bool parse_fully) {
EbmlMaster *m;
mm_io_c *in;
mm_stdio_c out;
bool tags_extracted = false;
kax_quickparser_c *qp;
KaxTags *tags;
// open input file
try {
in = new mm_io_c(file_name, MODE_READ);
qp = new kax_quickparser_c(*in, parse_fully);
} catch (std::exception &ex) {
show_error(_("The file '%s' could not be opened for reading (%s)."),
file_name, strerror(errno));
return;
}
try {
es = new EbmlStream(*in);
m = qp->read_all(KaxTags::ClassInfos);
if (m != NULL) {
tags = dynamic_cast<KaxTags *>(m);
assert(tags != NULL);
// Find the EbmlHead element. Must be the first one.
l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL);
if (l0 == NULL) {
show_error(_("Error: No EBML head found."));
delete es;
if (verbose > 0)
debug_dump_elements(tags, 0);
return;
}
out.write_bom("UTF-8");
out.printf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n"
"<!DOCTYPE Tags SYSTEM \"matroskatags.dtd\">\n\n"
"<Tags>\n");
write_tags_xml(*tags, &out);
out.printf("</Tags>\n");
// Don't verify its data for now.
l0->SkipData(*es, l0->Generic().Context);
delete l0;
while (1) {
// Next element must be a segment
l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL);
if (l0 == NULL) {
show_error(_("No segment/level 0 element found."));
return;
}
if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) {
show_element(l0, 0, _("Segment"));
break;
}
show_element(l0, 0, _("Next level 0 element is not a segment but %s"),
l0->Generic().DebugName);
l0->SkipData(*es, l0->Generic().Context);
delete l0;
}
upper_lvl_el = 0;
// We've got our segment, so let's find the tags
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL,
true, 1);
while ((l1 != NULL) && (upper_lvl_el <= 0)) {
if (EbmlId(*l1) == KaxTags::ClassInfos.GlobalId) {
KaxTags &tags = *static_cast<KaxTags *>(l1);
tags.Read(*es, KaxTags::ClassInfos.Context, upper_lvl_el, l2, true);
if (!tags_extracted) {
mxinfo("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n"
"<!DOCTYPE Tags SYSTEM \"matroskatags.dtd\">\n\n"
"<Tags>\n");
tags_extracted = true;
}
write_tags_xml(tags, &out);
} else
l1->SkipData(*es, l1->Generic().Context);
if (!in_parent(l0)) {
delete l1;
break;
}
if (upper_lvl_el > 0) {
upper_lvl_el--;
if (upper_lvl_el > 0)
break;
delete l1;
l1 = l2;
continue;
} else if (upper_lvl_el < 0) {
upper_lvl_el++;
if (upper_lvl_el < 0)
break;
}
l1->SkipData(*es, l1->Generic().Context);
delete l1;
l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el,
0xFFFFFFFFL, true);
} // while (l1 != NULL)
delete l0;
delete es;
delete in;
} catch (exception &ex) {
show_error(_("Caught exception: %s"), ex.what());
delete in;
return;
delete tags;
}
if (tags_extracted)
mxprint(stdout, "</Tags>\n");
delete in;
delete qp;
}