diff --git a/ChangeLog b/ChangeLog index 134edaf88..f854531e3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2004-09-07 Moritz Bunkus + + * mkvextract: Sped up the extraction of attachments, chapters, + cuesheets and tags by using the seek head information and not + parsing the full file each time. + 2004-09-02 Moritz Bunkus * mkvmerge: bug fix: The Matroska reader was not handling very big diff --git a/src/common/quickparser.cpp b/src/common/quickparser.cpp new file mode 100644 index 000000000..28e7033b2 --- /dev/null +++ b/src/common/quickparser.cpp @@ -0,0 +1,226 @@ +/* + * mkvmerge -- utility for splicing together matroska files + * from component media subtypes + * + * Distributed under the GPL + * see the file COPYING for details + * or visit http://www.gnu.org/copyleft/gpl.html + * + * $Id$ + * + * quick Matroska file parsing + * + * Written by Moritz Bunkus . + */ + +#include +#include + +#include +#include +#include + +#include "commonebml.h" +#include "error.h" +#include "quickparser.h" + +using namespace std; +using namespace libebml; +using namespace libmatroska; + +kax_quickparser_c::kax_quickparser_c(mm_io_c &_in, + bool parse_fully): + in(_in) { + int upper_lvl_el, k; + // Elements for different levels + EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL; + segment_child_t new_child, *child; + vector::iterator it; + + in.setFilePointer(0, seek_beginning); + EbmlStream es(in); + + // Find the EbmlHead element. Must be the first one. + l0 = es.FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL); + if (l0 == NULL) + throw error_c(_("Error: No EBML head found.")); + + // Don't verify its data for now. + l0->SkipData(es, l0->Generic().Context); + delete l0; + + while (1) { + // Next element must be a segment + l0 = es.FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL); + if (l0 == NULL) + throw error_c(_("No segment/level 0 element found.")); + if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) + break; + + l0->SkipData(es, l0->Generic().Context); + delete l0; + } + + upper_lvl_el = 0; + l1 = es.FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, + true, 1); + while ((l1 != NULL) && (upper_lvl_el <= 0)) { + if (is_id(l1, KaxCluster) && !parse_fully) + break; + + new_child.pos = l1->GetElementPosition(); + new_child.size = l1->ElementSize(); + new_child.id = l1->Generic().GlobalId; + children.push_back(new_child); + + if ((l0->GetElementPosition() + l0->ElementSize()) < + l1->GetElementPosition()) { + delete l1; + break; + } + + if (upper_lvl_el < 0) { + upper_lvl_el++; + if (upper_lvl_el < 0) + break; + + } + + l1->SkipData(es, l1->Generic().Context); + delete l1; + l1 = es.FindNextElement(l0->Generic().Context, upper_lvl_el, + 0xFFFFFFFFL, true); + + } // while (l1 != NULL) + + for (k = 0; k < children.size(); k++) { + child = &children[k]; + if (child->id == KaxSeekHead::ClassInfos.GlobalId) { + EbmlMaster *m; + KaxSeek *seek; + KaxSeekID *seek_id; + int64_t pos; + int i; + bool found; + + in.setFilePointer(child->pos, seek_beginning); + upper_lvl_el = 0; + l1 = es.FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, + true, 1); + if (l1 == NULL) + continue; + if (!is_id(l1, KaxSeekHead)) { + delete l1; + continue; + } + + m = static_cast(l1); + m->Read(es, l1->Generic().Context, upper_lvl_el, l2, true); + for (i = 0; i < m->ListSize(); i++) { + if (!is_id((*m)[i], KaxSeek)) + continue; + seek = static_cast((*m)[i]); + seek_id = FINDFIRST(seek, KaxSeekID); + pos = seek->Location() + l0->GetElementPosition() + l0->HeadSize(); + if ((pos == 0) || (seek_id == NULL)) + continue; + + found = false; + foreach(it, children) + if ((*it).pos == pos) { + found = true; + break; + } + if (!found) { + new_child.pos = pos; + new_child.size = -1; + new_child.id = EbmlId(seek_id->GetBuffer(), seek_id->GetSize()); + children.push_back(new_child); + } + } + delete l1; + } + } + + delete l0; + + current_child = children.begin(); +} + +int +kax_quickparser_c::num_elements(const EbmlId &id) + const { + vector::const_iterator it; + int num; + + num = 0; + foreach(it, children) + if ((*it).id == id) + num++; + + return num; +} + +segment_child_t * +kax_quickparser_c::get_next(const EbmlId &id) { + while (current_child != children.end()) + if ((*current_child).id == id) { + segment_child_t *ptr; + ptr = &(*current_child); + current_child++; + return ptr; + } else + current_child++; + return NULL; +} + +void +kax_quickparser_c::reset() { + current_child = children.begin(); +} + +EbmlMaster * +kax_quickparser_c::read_all(const EbmlCallbacks &callbacks) { + EbmlElement *e, *l2; + EbmlMaster *m, *src; + segment_child_t *child; + int upper_lvl_el; + + m = NULL; + EbmlStream es(in); + reset(); + for (child = get_next(callbacks.GlobalId); child != NULL; + child = get_next(callbacks.GlobalId)) { + in.setFilePointer(child->pos); + upper_lvl_el = 0; + e = es.FindNextElement(KaxSegment::ClassInfos.Context, upper_lvl_el, + 0xFFFFFFFFL, true); + if (e == NULL) + continue; + if (!(e->Generic().GlobalId == callbacks.GlobalId)) { + delete e; + continue; + } + + l2 = NULL; + e->Read(es, callbacks.Context, upper_lvl_el, l2, true); + if (m == NULL) + m = static_cast(e); + else { + src = static_cast(e); + while (src->ListSize() > 0) { + m->PushElement(*(*src)[0]); + src->Remove(0); + } + delete e; + } + } + reset(); + + if ((m != NULL) && (m->ListSize() == 0)) { + delete m; + return NULL; + } + + return m; +} diff --git a/src/common/quickparser.h b/src/common/quickparser.h new file mode 100644 index 000000000..207d1b6d6 --- /dev/null +++ b/src/common/quickparser.h @@ -0,0 +1,52 @@ +/* + * mkvmerge -- utility for splicing together matroska files + * from component media subtypes + * + * Distributed under the GPL + * see the file COPYING for details + * or visit http://www.gnu.org/copyleft/gpl.html + * + * $Id$ + * + * quick Matroska file parsing + * + * Written by Moritz Bunkus . + */ + +#ifndef __QUICKPARSER_H +#define __QUICKPARSER_H + +#include + +#include + +#include "mm_io.h" + +using namespace std; +using namespace libebml; + +typedef struct segment_child_t { + int64_t pos; + int64_t size; + EbmlId id; + + segment_child_t(): id((uint32_t)0, 0) {}; +} segment_child_t; + +class kax_quickparser_c { +private: + vector children; + vector::iterator current_child; + mm_io_c ∈ + +public: + kax_quickparser_c(mm_io_c &_in, bool parse_fully = false); + virtual ~kax_quickparser_c() {}; + + virtual int num_elements(const EbmlId &id) const; + virtual segment_child_t *get_next(const EbmlId &id); + virtual void reset(); + virtual EbmlMaster *read_all(const EbmlCallbacks &callbacks); +}; + +#endif diff --git a/src/extract/attachments.cpp b/src/extract/attachments.cpp index 61e4370e1..42d0ae28a 100644 --- a/src/extract/attachments.cpp +++ b/src/extract/attachments.cpp @@ -65,262 +65,118 @@ extern "C" { #include "matroska.h" #include "mkvextract.h" #include "mm_io.h" +#include "quickparser.h" using namespace libmatroska; using namespace std; static void -handle_attachments(mm_io_c *in, - EbmlStream *es, - EbmlElement *l0, - int64_t pos) { - KaxAttachments *atts; +handle_attachments(KaxAttachments *atts) { KaxAttached *att; KaxFileData *fdata; - EbmlElement *l1, *l2; - int upper_lvl_el, i, k; + EbmlElement *e; + int i, k; string name, type; int64_t size, id; char *str; bool found; mm_io_c *out; - out = NULL; - in->save_pos(pos); - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, - true); + for (i = 0; i < atts->ListSize(); i++) { + att = dynamic_cast((*atts)[i]); + assert(att != NULL); - if ((l1 != NULL) && (EbmlId(*l1) == KaxAttachments::ClassInfos.GlobalId)) { - atts = (KaxAttachments *)l1; - l2 = NULL; - upper_lvl_el = 0; - atts->Read(*es, KaxAttachments::ClassInfos.Context, upper_lvl_el, l2, - true); - for (i = 0; i < atts->ListSize(); i++) { - att = (KaxAttached *)(*atts)[i]; - if (EbmlId(*att) == KaxAttached::ClassInfos.GlobalId) { - name = ""; - type = ""; - size = -1; - id = -1; - fdata = NULL; + name = ""; + type = ""; + size = -1; + id = -1; + fdata = NULL; - for (k = 0; k < att->ListSize(); k++) { - l2 = (*att)[k]; + for (k = 0; k < att->ListSize(); k++) { + e = (*att)[k]; - if (EbmlId(*l2) == KaxFileName::ClassInfos.GlobalId) { - KaxFileName &fname = *static_cast(l2); - str = UTFstring_to_cstr(UTFstring(fname)); - name = str; - safefree(str); + if (EbmlId(*e) == KaxFileName::ClassInfos.GlobalId) { + KaxFileName &fname = *static_cast(e); + str = UTFstring_to_cstr(UTFstring(fname)); + name = str; + safefree(str); - } else if (EbmlId(*l2) == KaxMimeType::ClassInfos.GlobalId) { - KaxMimeType &mtype = *static_cast(l2); - type = string(mtype); + } else if (EbmlId(*e) == KaxMimeType::ClassInfos.GlobalId) { + KaxMimeType &mtype = *static_cast(e); + type = string(mtype); - } else if (EbmlId(*l2) == KaxFileUID::ClassInfos.GlobalId) { - KaxFileUID &fuid = *static_cast(l2); - id = uint32(fuid); + } else if (EbmlId(*e) == KaxFileUID::ClassInfos.GlobalId) { + KaxFileUID &fuid = *static_cast(e); + id = uint32(fuid); - } else if (EbmlId(*l2) == KaxFileData::ClassInfos.GlobalId) { - fdata = (KaxFileData *)l2; - size = fdata->GetSize(); + } else if (EbmlId(*e) == KaxFileData::ClassInfos.GlobalId) { + fdata = (KaxFileData *)e; + size = fdata->GetSize(); - } - } - - if ((id != -1) && (size != -1) && (type.length() != 0)) { - found = false; - - for (k = 0; k < tracks.size(); k++) - if (tracks[k].tid == id) { - found = true; - break; - } - - if (found && !tracks[k].done) { - mxinfo(_("The attachment #%lld, MIME type %s, size %lld, " - "is written to '%s'.\n"), id, type.c_str(), size, - tracks[k].out_name); - try { - out = new mm_io_c(tracks[k].out_name, MODE_WRITE); - } catch (...) { - mxerror(_("The file '%s' could not be opened for writing " - "(%d, %s).\n"), - tracks[k].out_name, errno, strerror(errno)); - } - out->write(fdata->GetBuffer(), fdata->GetSize()); - delete out; - tracks[k].done = true; - } - } } } - delete l1; - } + if ((id != -1) && (size != -1) && (type.length() != 0)) { + found = false; - in->restore_pos(); + for (k = 0; k < tracks.size(); k++) + if (tracks[k].tid == id) { + found = true; + break; + } + + if (found && !tracks[k].done) { + mxinfo(_("The attachment #%lld, MIME type %s, size %lld, " + "is written to '%s'.\n"), id, type.c_str(), size, + tracks[k].out_name); + try { + out = new mm_io_c(tracks[k].out_name, MODE_WRITE); + } catch (...) { + mxerror(_("The file '%s' could not be opened for writing " + "(%d, %s).\n"), + tracks[k].out_name, errno, strerror(errno)); + } + out->write(fdata->GetBuffer(), fdata->GetSize()); + delete out; + tracks[k].done = true; + } + } + } } void -extract_attachments(const char *file_name) { - int upper_lvl_el, i; - // Elements for different levels - EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL; - EbmlStream *es; +extract_attachments(const char *file_name, + bool parse_fully) { mm_io_c *in; - bool done; + mm_stdio_c out; + kax_quickparser_c *qp; + KaxAttachments *attachments; + int i; // open input file try { in = new mm_io_c(file_name, MODE_READ); + qp = new kax_quickparser_c(*in, parse_fully); } catch (std::exception &ex) { show_error(_("The file '%s' could not be opened for reading (%s)."), file_name, strerror(errno)); return; } - try { - es = new EbmlStream(*in); + attachments = + dynamic_cast(qp->read_all(KaxAttachments::ClassInfos)); + if (attachments != NULL) { + if (verbose > 0) + debug_dump_elements(attachments, 0); - // Find the EbmlHead element. Must be the first one. - l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL); - if (l0 == NULL) { - show_error(_("Error: No EBML head found.")); - delete es; + handle_attachments(attachments); - return; - } - - // Don't verify its data for now. - l0->SkipData(*es, l0->Generic().Context); - delete l0; - - while (1) { - // Next element must be a segment - l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL); - if (l0 == NULL) { - show_error(_("No segment/level 0 element found.")); - return; - } - if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) { - show_element(l0, 0, _("Segment")); - break; - } - - show_element(l0, 0, _("Next level 0 element is not a segment but %s"), - l0->Generic().DebugName); - - l0->SkipData(*es, l0->Generic().Context); - delete l0; - } - - upper_lvl_el = 0; - done = false; - - // We've got our segment, so let's find the attachments - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, - true, 1); - while ((l1 != NULL) && (upper_lvl_el <= 0)) { - - if (EbmlId(*l1) == KaxAttachments::ClassInfos.GlobalId) { - handle_attachments(in, es, l0, l1->GetElementPosition()); - - done = true; - for (i = 0; i < tracks.size(); i++) - if (!tracks[i].done) { - done = false; - break; - } - - if (done) - break; - - } else if (EbmlId(*l1) == KaxSeekHead::ClassInfos.GlobalId) { - int k; - EbmlElement *el; - KaxSeekHead &seek_head = *static_cast(l1); - int64_t pos; - bool is_attachments; - - i = 0; - seek_head.Read(*es, KaxSeekHead::ClassInfos.Context, i, el, true); - for (i = 0; i < seek_head.ListSize(); i++) - if (EbmlId(*seek_head[i]) == KaxSeek::ClassInfos.GlobalId) { - KaxSeek &seek = *static_cast(seek_head[i]); - pos = -1; - is_attachments = false; - - for (k = 0; k < seek.ListSize(); k++) - if (EbmlId(*seek[k]) == KaxSeekID::ClassInfos.GlobalId) { - KaxSeekID &sid = *static_cast(seek[k]); - EbmlId id(sid.GetBuffer(), sid.GetSize()); - if (id == KaxAttachments::ClassInfos.GlobalId) - is_attachments = true; - - } else if (EbmlId(*seek[k]) == - KaxSeekPosition::ClassInfos.GlobalId) - pos = uint64(*static_cast(seek[k])); - - if ((pos != -1) && is_attachments) { - handle_attachments(in, es, l0, - ((KaxSegment *)l0)->GetGlobalPosition(pos)); - done = true; - for (k = 0; k < tracks.size(); k++) - if (!tracks[k].done) { - done = false; - break; - } - - if (done) - break; - } - } - - - } else - l1->SkipData(*es, l1->Generic().Context); - - if (done) - break; - - if (!in_parent(l0)) { - delete l1; - break; - } - - if (upper_lvl_el > 0) { - upper_lvl_el--; - if (upper_lvl_el > 0) - break; - delete l1; - l1 = l2; - continue; - - } else if (upper_lvl_el < 0) { - upper_lvl_el++; - if (upper_lvl_el < 0) - break; - - } - - l1->SkipData(*es, l1->Generic().Context); - delete l1; - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, - 0xFFFFFFFFL, true); - - } // while (l1 != NULL) - - delete l0; - delete es; - delete in; - - } catch (exception &ex) { - show_error(_("Caught exception: %s"), ex.what()); - delete in; + delete attachments; } + delete in; + delete qp; + for (i = 0; i < tracks.size(); i++) if (!tracks[i].done) mxinfo(_("An attachment with the ID %lld was not found.\n"), diff --git a/src/extract/chapters.cpp b/src/extract/chapters.cpp index abb3bc181..0fe650e03 100644 --- a/src/extract/chapters.cpp +++ b/src/extract/chapters.cpp @@ -39,10 +39,7 @@ extern "C" { #include #include -#include #include -#include -#include #include #include #include @@ -54,9 +51,6 @@ extern "C" { #include #include #include -#include -#include -#include #include "chapters.h" #include "common.h" @@ -64,135 +58,56 @@ extern "C" { #include "matroska.h" #include "mkvextract.h" #include "mm_io.h" +#include "quickparser.h" using namespace libmatroska; using namespace std; void extract_chapters(const char *file_name, - bool chapter_format_simple) { - int upper_lvl_el; - // Elements for different levels - EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL; - EbmlStream *es; + bool chapter_format_simple, + bool parse_fully) { + EbmlMaster *m; mm_io_c *in; mm_stdio_c out; - bool chapters_extracted = false; - int next_chapter = 1; + kax_quickparser_c *qp; + KaxChapters *chapters; // open input file try { in = new mm_io_c(file_name, MODE_READ); + qp = new kax_quickparser_c(*in, parse_fully); } catch (std::exception &ex) { show_error(_("The file '%s' could not be opened for reading (%s)."), file_name, strerror(errno)); return; } - try { - es = new EbmlStream(*in); + m = qp->read_all(KaxChapters::ClassInfos); + if (m != NULL) { + chapters = dynamic_cast(m); + assert(chapters != NULL); - // Find the EbmlHead element. Must be the first one. - l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL); - if (l0 == NULL) { - show_error(_("Error: No EBML head found.")); - delete es; + if (verbose > 0) + debug_dump_elements(chapters, 0); - return; - } - - // Don't verify its data for now. - l0->SkipData(*es, l0->Generic().Context); - delete l0; - - while (1) { - // Next element must be a segment - l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL); - if (l0 == NULL) { - show_error(_("No segment/level 0 element found.")); - return; - } - if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) { - show_element(l0, 0, _("Segment")); - break; - } - - show_element(l0, 0, _("Next level 0 element is not a segment but %s"), - l0->Generic().DebugName); - - l0->SkipData(*es, l0->Generic().Context); - delete l0; + if (!chapter_format_simple) { + out.write_bom("UTF-8"); + out.printf("\n" + "\n" + "\n" + "\n" + "\n"); + write_chapters_xml(chapters, &out); + out.printf("\n"); + } else { + int dummy = 1; + write_chapters_simple(dummy, chapters, &out); } - upper_lvl_el = 0; - // We've got our segment, so let's find the chapters - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, - true, 1); - while ((l1 != NULL) && (upper_lvl_el <= 0)) { - - if (EbmlId(*l1) == KaxChapters::ClassInfos.GlobalId) { - KaxChapters &chapters = *static_cast(l1); - chapters.Read(*es, KaxChapters::ClassInfos.Context, upper_lvl_el, l2, - true); - if (verbose > 0) - debug_dump_elements(&chapters, 0); - - if (!chapters_extracted && !chapter_format_simple) { - out.write_bom("UTF-8"); - out.printf("\n" - "\n" - "\n" - "\n" - "\n"); - chapters_extracted = true; - } - - if (chapter_format_simple) - write_chapters_simple(next_chapter, &chapters, &out); - else - write_chapters_xml(&chapters, &out); - - } else - l1->SkipData(*es, l1->Generic().Context); - - if (!in_parent(l0)) { - delete l1; - break; - } - - if (upper_lvl_el > 0) { - upper_lvl_el--; - if (upper_lvl_el > 0) - break; - delete l1; - l1 = l2; - continue; - - } else if (upper_lvl_el < 0) { - upper_lvl_el++; - if (upper_lvl_el < 0) - break; - - } - - l1->SkipData(*es, l1->Generic().Context); - delete l1; - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, - 0xFFFFFFFFL, true); - - } // while (l1 != NULL) - - delete l0; - delete es; - delete in; - - } catch (exception &ex) { - show_error(_("Caught exception: %s"), ex.what()); - delete in; - - return; + delete chapters; } - if (chapters_extracted && !chapter_format_simple) - out.printf("\n"); + delete in; + delete qp; } diff --git a/src/extract/cuesheets.cpp b/src/extract/cuesheets.cpp index 9b0806164..7bd413bf8 100644 --- a/src/extract/cuesheets.cpp +++ b/src/extract/cuesheets.cpp @@ -54,6 +54,7 @@ extern "C" { #include "matroska.h" #include "mkvextract.h" #include "mm_io.h" +#include "quickparser.h" using namespace libmatroska; using namespace std; @@ -345,137 +346,52 @@ write_cuesheet(const char *file_name, } void -extract_cuesheet(const char *file_name) { - int upper_lvl_el; - // Elements for different levels - EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL; - EbmlStream *es; +extract_cuesheet(const char *file_name, + bool parse_fully) { mm_io_c *in; mm_stdio_c out; - KaxChapters all_chapters; - KaxTags all_tags; + kax_quickparser_c *qp; + KaxChapters all_chapters, *chapters; + KaxEditionEntry *eentry; + KaxTags *all_tags; + int i, k; // open input file try { in = new mm_io_c(file_name, MODE_READ); + qp = new kax_quickparser_c(*in, parse_fully); } catch (std::exception &ex) { show_error(_("The file '%s' could not be opened for reading (%s)."), file_name, strerror(errno)); return; } - try { - es = new EbmlStream(*in); - - // Find the EbmlHead element. Must be the first one. - l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL); - if (l0 == NULL) { - show_error(_("Error: No EBML head found.")); - delete es; - - return; - } - - // Don't verify its data for now. - l0->SkipData(*es, l0->Generic().Context); - delete l0; - - while (1) { - // Next element must be a segment - l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL); - if (l0 == NULL) { - show_error(_("No segment/level 0 element found.")); - return; - } - if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) { - show_element(l0, 0, _("Segment")); - break; - } - - show_element(l0, 0, _("Next level 0 element is not a segment but %s"), - l0->Generic().DebugName); - - l0->SkipData(*es, l0->Generic().Context); - delete l0; - } - - upper_lvl_el = 0; - // We've got our segment, so let's find the chapters - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, - true, 1); - while ((l1 != NULL) && (upper_lvl_el <= 0)) { - - if (EbmlId(*l1) == KaxChapters::ClassInfos.GlobalId) { - KaxChapters &chapters = *static_cast(l1); - chapters.Read(*es, KaxChapters::ClassInfos.Context, upper_lvl_el, l2, - true); - if (verbose > 0) - debug_dump_elements(&chapters, 0); - - while (chapters.ListSize() > 0) { - if (EbmlId(*chapters[0]) == KaxEditionEntry::ClassInfos.GlobalId) { - KaxEditionEntry &entry = - *static_cast(chapters[0]); - while (entry.ListSize() > 0) { - if (EbmlId(*entry[0]) == KaxChapterAtom::ClassInfos.GlobalId) - all_chapters.PushElement(*entry[0]); - entry.Remove(0); - } - } - chapters.Remove(0); - } - - } else if (EbmlId(*l1) == KaxTags::ClassInfos.GlobalId) { - KaxTags &tags = *static_cast(l1); - tags.Read(*es, KaxTags::ClassInfos.Context, upper_lvl_el, l2, true); - if (verbose > 0) - debug_dump_elements(&tags, 0); - - while (tags.ListSize() > 0) { - all_tags.PushElement(*tags[0]); - tags.Remove(0); - } - - } else - l1->SkipData(*es, l1->Generic().Context); - - if (!in_parent(l0)) { - delete l1; - break; - } - - if (upper_lvl_el > 0) { - upper_lvl_el--; - if (upper_lvl_el > 0) - break; - delete l1; - l1 = l2; + chapters = + dynamic_cast(qp->read_all(KaxChapters::ClassInfos)); + all_tags = dynamic_cast(qp->read_all(KaxTags::ClassInfos)); + if ((chapters != NULL) && (all_tags != NULL)) { + for (i = 0; i < chapters->ListSize(); i++) { + if (dynamic_cast((*chapters)[i]) == NULL) continue; + eentry = dynamic_cast((*chapters)[i]); + for (k = 0; k < eentry->ListSize(); k++) + if (dynamic_cast((*eentry)[k]) != NULL) + all_chapters.PushElement(*(*eentry)[k]); + } + if (verbose > 0) { + debug_dump_elements(&all_chapters, 0); + debug_dump_elements(all_tags, 0); + } - } else if (upper_lvl_el < 0) { - upper_lvl_el++; - if (upper_lvl_el < 0) - break; + write_cuesheet(file_name, all_chapters, *all_tags, -1, out); - } - - l1->SkipData(*es, l1->Generic().Context); - delete l1; - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, - 0xFFFFFFFFL, true); - - } // while (l1 != NULL) - - write_cuesheet(file_name, all_chapters, all_tags, -1, out); - - delete l0; - delete es; - delete in; - - } catch (exception &ex) { - show_error(_("Caught exception: %s"), ex.what()); - delete in; - - return; + while (all_chapters.ListSize() > 0) + all_chapters.Remove(0); } + + delete all_tags; + delete chapters; + + delete in; + delete qp; } diff --git a/src/extract/mkvextract.cpp b/src/extract/mkvextract.cpp index 31571563b..ccfc9df22 100644 --- a/src/extract/mkvextract.cpp +++ b/src/extract/mkvextract.cpp @@ -175,6 +175,7 @@ usage() { // {{{ FUNCTION parse_args static bool chapter_format_simple = false; +static bool parse_fully = false; void parse_args(int argc, @@ -235,6 +236,10 @@ parse_args(int argc, for (i = 3; i < argc; i++) if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) verbose++; + + else if (!strcmp(argv[i], "-f") || !strcmp(argv[i], "--parse-fully")) + parse_fully = true; + else if (!strcmp(argv[i], "-c")) { if (mode != MODE_TRACKS) mxerror(_("'-c' is only allowed when extracting tracks.\n")); @@ -269,7 +274,7 @@ parse_args(int argc, chapter_format_simple = true; - } else { + } else if ((mode == MODE_TRACKS) || (mode == MODE_ATTACHMENTS)) { copy = safestrdup(argv[i]); colon = strchr(copy, ':'); if (colon == NULL) @@ -303,7 +308,11 @@ parse_args(int argc, sub_charset = "UTF-8"; embed_in_ogg = true; extract_cuesheet = false; - } + + } else + mxerror(_("Unrecognized command line option '%s'. Maybe you put a " + "mode specific option before the input file name?\n"), + argv[i]); if ((mode == MODE_TAGS) || (mode == MODE_CHAPTERS) || (mode == MODE_CUESHEET)) @@ -401,16 +410,16 @@ main(int argc, mxinfo(_("progress: 100%%\n")); } else if (mode == MODE_TAGS) - extract_tags(input_file); + extract_tags(input_file, parse_fully); else if (mode == MODE_ATTACHMENTS) - extract_attachments(input_file); + extract_attachments(input_file, parse_fully); else if (mode == MODE_CHAPTERS) - extract_chapters(input_file, chapter_format_simple); + extract_chapters(input_file, chapter_format_simple, parse_fully); else if (mode == MODE_CUESHEET) - extract_cuesheet(input_file); + extract_cuesheet(input_file, parse_fully); else die("mkvextract: Unknown mode!?"); diff --git a/src/extract/mkvextract.h b/src/extract/mkvextract.h index 371185164..ac3035c59 100644 --- a/src/extract/mkvextract.h +++ b/src/extract/mkvextract.h @@ -106,10 +106,11 @@ kax_track_t *find_track(int tid); bool extract_tracks(const char *file_name); extern int conv_utf8; -void extract_tags(const char *file_name); -void extract_chapters(const char *file_name, bool chapter_format_simple); -void extract_attachments(const char *file_name); -void extract_cuesheet(const char *file_name); +void extract_tags(const char *file_name, bool parse_fully); +void extract_chapters(const char *file_name, bool chapter_format_simple, + bool parse_fully); +void extract_attachments(const char *file_name, bool parse_fully); +void extract_cuesheet(const char *file_name, bool parse_fully); void write_cuesheet(const char *file_name, KaxChapters &chapters, KaxTags &tags, int64_t tuid, mm_io_c &out); diff --git a/src/extract/tags.cpp b/src/extract/tags.cpp index 232bb463e..d82144dcd 100644 --- a/src/extract/tags.cpp +++ b/src/extract/tags.cpp @@ -61,129 +61,53 @@ extern "C" { #include "chapters.h" #include "common.h" +#include "commonebml.h" #include "matroska.h" #include "mkvextract.h" #include "mm_io.h" +#include "quickparser.h" #include "tagwriter.h" using namespace libmatroska; using namespace std; void -extract_tags(const char *file_name) { - int upper_lvl_el; - // Elements for different levels - EbmlElement *l0 = NULL, *l1 = NULL, *l2 = NULL; - EbmlStream *es; +extract_tags(const char *file_name, + bool parse_fully) { + EbmlMaster *m; mm_io_c *in; mm_stdio_c out; - bool tags_extracted = false; + kax_quickparser_c *qp; + KaxTags *tags; // open input file try { in = new mm_io_c(file_name, MODE_READ); + qp = new kax_quickparser_c(*in, parse_fully); } catch (std::exception &ex) { show_error(_("The file '%s' could not be opened for reading (%s)."), file_name, strerror(errno)); return; } - try { - es = new EbmlStream(*in); + m = qp->read_all(KaxTags::ClassInfos); + if (m != NULL) { + tags = dynamic_cast(m); + assert(tags != NULL); - // Find the EbmlHead element. Must be the first one. - l0 = es->FindNextID(EbmlHead::ClassInfos, 0xFFFFFFFFL); - if (l0 == NULL) { - show_error(_("Error: No EBML head found.")); - delete es; + if (verbose > 0) + debug_dump_elements(tags, 0); - return; - } - - // Don't verify its data for now. - l0->SkipData(*es, l0->Generic().Context); - delete l0; + out.write_bom("UTF-8"); + out.printf("\n\n" + "\n\n" + "\n"); + write_tags_xml(*tags, &out); + out.printf("\n"); - while (1) { - // Next element must be a segment - l0 = es->FindNextID(KaxSegment::ClassInfos, 0xFFFFFFFFFFFFFFFFLL); - if (l0 == NULL) { - show_error(_("No segment/level 0 element found.")); - return; - } - if (EbmlId(*l0) == KaxSegment::ClassInfos.GlobalId) { - show_element(l0, 0, _("Segment")); - break; - } - - show_element(l0, 0, _("Next level 0 element is not a segment but %s"), - l0->Generic().DebugName); - - l0->SkipData(*es, l0->Generic().Context); - delete l0; - } - - upper_lvl_el = 0; - // We've got our segment, so let's find the tags - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, 0xFFFFFFFFL, - true, 1); - while ((l1 != NULL) && (upper_lvl_el <= 0)) { - - if (EbmlId(*l1) == KaxTags::ClassInfos.GlobalId) { - KaxTags &tags = *static_cast(l1); - tags.Read(*es, KaxTags::ClassInfos.Context, upper_lvl_el, l2, true); - - if (!tags_extracted) { - mxinfo("\n\n" - "\n\n" - "\n"); - tags_extracted = true; - } - - write_tags_xml(tags, &out); - - } else - l1->SkipData(*es, l1->Generic().Context); - - if (!in_parent(l0)) { - delete l1; - break; - } - - if (upper_lvl_el > 0) { - upper_lvl_el--; - if (upper_lvl_el > 0) - break; - delete l1; - l1 = l2; - continue; - - } else if (upper_lvl_el < 0) { - upper_lvl_el++; - if (upper_lvl_el < 0) - break; - - } - - l1->SkipData(*es, l1->Generic().Context); - delete l1; - l1 = es->FindNextElement(l0->Generic().Context, upper_lvl_el, - 0xFFFFFFFFL, true); - - } // while (l1 != NULL) - - delete l0; - delete es; - delete in; - - } catch (exception &ex) { - show_error(_("Caught exception: %s"), ex.what()); - delete in; - - return; + delete tags; } - if (tags_extracted) - mxprint(stdout, "\n"); + delete in; + delete qp; } -