From 8c0bf65d835de2fcf3f6599ee45f908bc5ad5dd8 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sun, 21 Sep 2003 17:43:15 +0000 Subject: [PATCH] Modified the VobSub reader/packetizer to work with the new specs and with multiple-languages-per-idx files. --- src/mkvmerge.cpp | 14 +-- src/p_vobsub.cpp | 37 +----- src/p_vobsub.h | 5 +- src/r_matroska.cpp | 40 +----- src/r_vobsub.cpp | 303 ++++++++++++++++++++++++--------------------- src/r_vobsub.h | 28 ++++- 6 files changed, 196 insertions(+), 231 deletions(-) diff --git a/src/mkvmerge.cpp b/src/mkvmerge.cpp index e841abc6b..5e9b6954c 100644 --- a/src/mkvmerge.cpp +++ b/src/mkvmerge.cpp @@ -83,7 +83,7 @@ #include "r_real.h" #include "r_srt.h" #include "r_ssa.h" -// #include "r_vobsub.h" +#include "r_vobsub.h" #include "r_wav.h" #include "tagparser.h" @@ -203,7 +203,7 @@ file_type_t file_types[] = {"rm ", TYPEREAL, "RealMedia audio and video"}, {"srt", TYPESRT, "SRT text subtitles"}, {"ssa", TYPESSA, "SSA/ASS text subtitles"}, -// {"idx", TYPEVOBSUB, "VobSub subtitles"}, + {"idx", TYPEVOBSUB, "VobSub subtitles"}, {"wav", TYPEWAV, "WAVE (uncompressed PCM)"}, {"output modules:", -1, ""}, {" ", -1, "AAC audio"}, @@ -373,8 +373,8 @@ static int get_type(char *filename) { type = TYPESRT; else if (ssa_reader_c::probe_file(mm_text_io, size)) type = TYPESSA; -// else if (vobsub_reader_c::probe_file(mm_text_io, size)) -// type = TYPEVOBSUB; + else if (vobsub_reader_c::probe_file(mm_text_io, size)) + type = TYPEVOBSUB; else type = TYPEUNKNOWN; @@ -972,9 +972,9 @@ static void create_readers() { case TYPESSA: file->reader = new ssa_reader_c(file->ti); break; -// case TYPEVOBSUB: -// file->reader = new vobsub_reader_c(file->ti); -// break; + case TYPEVOBSUB: + file->reader = new vobsub_reader_c(file->ti); + break; case TYPEQTMP4: file->reader = new qtmp4_reader_c(file->ti); break; diff --git a/src/p_vobsub.cpp b/src/p_vobsub.cpp index 0a3667565..d0ddc827b 100644 --- a/src/p_vobsub.cpp +++ b/src/p_vobsub.cpp @@ -36,8 +36,6 @@ using namespace libmatroska; vobsub_packetizer_c::vobsub_packetizer_c(generic_reader_c *nreader, const void *nidx_data, int nidx_data_size, - const void *nifo_data, - int nifo_data_size, int ncompression_type, int ncompressed_type, track_info_t *nti) throw (error_c): @@ -48,14 +46,6 @@ vobsub_packetizer_c::vobsub_packetizer_c(generic_reader_c *nreader, idx_data = (unsigned char *)safememdup(nidx_data, nidx_data_size); idx_data_size = nidx_data_size; - if ((nifo_data != NULL) && (nifo_data_size != 0)) { - ifo_data = (unsigned char *)safememdup(nifo_data, nifo_data_size); - ifo_data_size = nifo_data_size; - } else { - ifo_data = NULL; - ifo_data_size = 0; - } - if (nti->compression == COMPRESSION_UNSPECIFIED) compression_type = ncompression_type; else @@ -124,7 +114,6 @@ vobsub_packetizer_c::vobsub_packetizer_c(generic_reader_c *nreader, vobsub_packetizer_c::~vobsub_packetizer_c() { safefree(idx_data); - safefree(ifo_data); if (compression_type != compressed_type) { if (compression_type == COMPRESSION_LZO) { @@ -147,8 +136,6 @@ vobsub_packetizer_c::~vobsub_packetizer_c() { void vobsub_packetizer_c::set_headers() { string codec_id; - unsigned char *priv; - int priv_size, i, size_tmp; codec_id = MKV_S_VOBSUB; if (compression_type == COMPRESSION_LZO) @@ -159,29 +146,7 @@ void vobsub_packetizer_c::set_headers() { codec_id += "/BZ2"; set_codec_id(codec_id.c_str()); - priv_size = idx_data_size + 1; - if (ifo_data_size > 0) - priv_size += ifo_data_size + idx_data_size / 255 + 1; - - priv = (unsigned char *)safemalloc(priv_size); - i = 1; - if (ifo_data_size > 0) { - priv[0] = 1; - size_tmp = idx_data_size; - while (size_tmp >= 255) { - priv[i] = 0xff; - i++; - size_tmp -= 255; - } - priv[i] = size_tmp; - i++; - memcpy(&priv[i + idx_data_size], ifo_data, ifo_data_size); - } else { - priv[0] = 0; - } - memcpy(&priv[i], idx_data, idx_data_size); - set_codec_private(priv, priv_size); - safefree(priv); + set_codec_private(idx_data, idx_data_size); generic_packetizer_c::set_headers(); diff --git a/src/p_vobsub.h b/src/p_vobsub.h index 9d8cdf9fe..8413833eb 100644 --- a/src/p_vobsub.h +++ b/src/p_vobsub.h @@ -32,8 +32,8 @@ class vobsub_packetizer_c: public generic_packetizer_c { private: - unsigned char *idx_data, *ifo_data; - int idx_data_size, ifo_data_size; + unsigned char *idx_data; + int idx_data_size; bool compressed; int compression_type, compressed_type; int64_t raw_size, compressed_size, items; @@ -45,7 +45,6 @@ private: public: vobsub_packetizer_c(generic_reader_c *nreader, const void *nidx_data, int nidx_data_size, - const void *nifo_data, int nifo_data_size, int ncompression_type, int ncompressed_type, track_info_t *nti) throw (error_c); virtual ~vobsub_packetizer_c(); diff --git a/src/r_matroska.cpp b/src/r_matroska.cpp index 2595bbcf0..bee03592c 100644 --- a/src/r_matroska.cpp +++ b/src/r_matroska.cpp @@ -439,43 +439,6 @@ void kax_reader_c::verify_tracks() { "private data found.\n", t->tnum, t->codec_id); continue; } - - c = (unsigned char *)t->private_data; - if (c[0] > 1) { - if (verbose) - mxwarn(PFX "VobSub track does not contain valid headers.\n"); - continue; - } - - offset = 1; - t->header_sizes[c[0]] = t->private_size; - for (i = 0; i < c[0]; i++) { - length = 0; - while ((c[offset] == (unsigned char)255) && - (length < t->private_size)) { - length += 255; - offset++; - } - if (offset >= (t->private_size - 1)) { - if (verbose) - mxwarn(PFX "VobSub track does not " - "contain valid headers.\n"); - continue; - } - length += c[offset]; - offset++; - t->header_sizes[i] = length; - t->header_sizes[c[0]] -= length; - } - t->header_sizes[c[0]] -= offset; - - t->headers[0] = &c[offset]; - if (c[0] == 1) - t->headers[1] = & c[offset + t->header_sizes[0]]; - else { - t->headers[1] = NULL; - t->header_sizes[1] = 0; - } } t->ok = 1; break; @@ -1236,8 +1199,7 @@ void kax_reader_c::create_packetizers() { } t->packetizer = - new vobsub_packetizer_c(this, t->headers[0], t->header_sizes[0], - t->headers[1], t->header_sizes[1], + new vobsub_packetizer_c(this, t->private_data, t->private_size, compression, compressed, &nti); if (verbose) mxinfo("Matroska demultiplexer (%s): using the VobSub " diff --git a/src/r_vobsub.cpp b/src/r_vobsub.cpp index b21016ab5..235315b1c 100644 --- a/src/r_vobsub.cpp +++ b/src/r_vobsub.cpp @@ -59,15 +59,10 @@ using namespace std; ishexdigit(*(s + 6)) && \ ishexdigit(*(s + 7)) && \ ishexdigit(*(s + 8))) -// timestamp: 00:01:43:603, filepos: 000000000 -#define isvobsubline_v3(s) ((strlen(s) >= 43) && \ +#define isvobsubline_v7(s) ((strlen(s) >= 42) && \ istimestampstr(s) && istimecode(s + 11) && \ iscommafileposstr(s + 23) && \ isfilepos(s + 34)) -#define isvobsubline_v7(s) ((strlen(s) >= 42) && \ - istimecodestr(s) && istimecode(s + 10) && \ - iscommafileposstr(s + 22) && \ - isfilepos(s + 33)) #define PFX "vobsub_reader: " @@ -90,10 +85,9 @@ int vobsub_reader_c::probe_file(mm_io_c *mm_io, int64_t size) { vobsub_reader_c::vobsub_reader_c(track_info_t *nti) throw (error_c): generic_reader_c(nti) { - mm_io_c *ifo_file; - string sub_name, ifo_name, line; + string sub_name, line; int len; - + try { idx_file = new mm_text_io_c(ti->fname); } catch (...) { @@ -115,34 +109,8 @@ vobsub_reader_c::vobsub_reader_c(track_info_t *nti) throw (error_c): throw error_c(emsg.c_str()); } - ifo_name = ti->fname; - len = ifo_name.rfind("."); - if (len >= 0) - ifo_name.erase(len); - ifo_name += ".ifo"; - - ifo_file = NULL; - try { - ifo_file = new mm_io_c(ifo_name.c_str(), MODE_READ); - ifo_file->setFilePointer(0, seek_end); - ifo_data_size = ifo_file->getFilePointer(); - ifo_file->setFilePointer(0); - ifo_data = (unsigned char *)safemalloc(ifo_data_size); - if (ifo_file->read(ifo_data, ifo_data_size) != ifo_data_size) - mxerror(PFX "Could not read the IFO file.\n"); - delete ifo_file; - } catch (...) { - if (ifo_file != NULL) - delete ifo_file; - ifo_data = NULL; - ifo_data_size = 0; - } - idx_data = ""; - last_filepos = -1; - last_timestamp = -1; act_wchar = 0; - done = false; len = strlen("# VobSub index file, v"); if (!idx_file->getline2(line) || @@ -156,143 +124,189 @@ vobsub_reader_c::vobsub_reader_c(track_info_t *nti) throw (error_c): version = version * 10 + line[len] - '0'; len++; } - mxverb(2, PFX "Version: %u\n", version); + if (version < 7) + mxerror(PFX "Only v7 and newer VobSub files are supported. If you have an " + "older version then use the VSConv utility from " + "http://sourceforge.net/projects/guliverkli/ to convert these " + "files to v7 files.\n"); - if ((version != 3) && (version != 7)) - mxerror(PFX "Unsupported file type version %d.\n", version); - - if (!parse_headers()) - throw error_c(PFX "The input file is not a valid VobSub file."); - - packetizer = new vobsub_packetizer_c(this, idx_data.c_str(), - idx_data.length(), ifo_data, - ifo_data_size, COMPRESSION_ZLIB, - COMPRESSION_NONE, ti); - - if (verbose) { - mxinfo("Using VobSub subtitle reader for '%s'/'%s'. ", ti->fname, - sub_name.c_str()); - if (ifo_data_size != 0) - mxinfo("Using IFO file '%s'.", ifo_name.c_str()); - else - mxinfo("No IFO file found."); - mxinfo("\n+-> Using VobSub subtitle output module for subtitles.\n"); - } + parse_headers(); + mxinfo("Using VobSub subtitle reader for '%s' & '%s'.\n", ti->fname, + sub_name.c_str()); + create_packetizers(); } vobsub_reader_c::~vobsub_reader_c() { + uint32_t i; + + for (i = 0; i < tracks.size(); i++) + delete tracks[i]; delete sub_file; delete idx_file; - safefree(ifo_data); - delete packetizer; } -bool vobsub_reader_c::parse_headers() { - int64_t pos; +void vobsub_reader_c::create_packetizers() { + uint32_t i, k; + int64_t avg_duration; + + for (i = 0; i < tracks.size(); i++) { + if (!demuxing_requested('s', i)) + continue; + + ti->id = i; +// ti.language = tracks[i]->language; + tracks[i]->packetizer = + new vobsub_packetizer_c(this, idx_data.c_str(), idx_data.length(), + COMPRESSION_ZLIB, COMPRESSION_NONE, ti); + avg_duration = 0; + for (k = 0; k < (tracks[i]->timecodes.size() - 1); k++) { + tracks[i]->durations.push_back(tracks[i]->timecodes[k + 1] - + tracks[i]->timecodes[k]); + avg_duration += tracks[i]->timecodes[k + 1] - tracks[i]->timecodes[k]; + } + if (tracks[i]->timecodes.size() == 0) + avg_duration = 1000; + else + avg_duration /= (tracks[i]->timecodes.size() - 1); + tracks[i]->durations.push_back(avg_duration); + + if (verbose) + mxinfo("+-> Using VobSub subtitle output module for subtitle track " + "%u (language: %s).\n", i, tracks[i]->language); + } +} + +void vobsub_reader_c::parse_headers() { string line; const char *sline; + char language[3]; + vobsub_track_c *track, *last_track; + int64_t filepos, last_pos, timestamp; + int hour, minute, second, msecond, idx; + uint32_t i; + + language[0] = 0; + track = NULL; + last_track = NULL; + last_pos = -1; while (1) { - pos = idx_file->getFilePointer(); - if (!idx_file->getline2(line)) - return false; + break; if ((line.length() == 0) || (line[0] == '#')) { - idx_data += line; - idx_data += "\n"; +// idx_data += line; +// idx_data += "\n"; continue; } sline = line.c_str(); - if (((version == 3) && isvobsubline_v3(sline)) || - ((version == 7) && isvobsubline_v7(sline))) { - idx_file->setFilePointer(pos); - return true; + + if (!strncasecmp(sline, "id:", 3)) { + if (line.length() >= 6) { + language[0] = sline[4]; + language[1] = sline[5]; + language[2] = 0; + } else + language[0] = 0; + last_track = track; + track = new vobsub_track_c(language); + tracks.push_back(track); + continue; + } + + if (!strncasecmp(sline, "alt:", 4)) + continue; + + if ((version == 7) && isvobsubline_v7(sline)) { + if (track == NULL) + mxerror(PFX ".idx file does not contain an 'id: ...' line to indicate " + "the language.\n"); + + idx = 34; + filepos = hexvalue(sline[idx]); + idx++; + while ((idx < line.length()) && ishexdigit(sline[idx])) { + filepos = filepos * 16 + hexvalue(sline[idx]); + idx++; + } + + if (last_pos != -1) { + if (last_track != NULL) { + last_track->sizes.push_back(filepos - last_pos); + last_track = NULL; + } else + track->sizes.push_back(filepos - last_pos); + } + last_pos = filepos; + track->positions.push_back(filepos); + + sscanf(&sline[11], "%02d:%02d:%02d:%03d", &hour, &minute, &second, + &msecond); + timestamp = (int64_t)hour * 60 * 60 * 1000 + + (int64_t)minute * 60 * 1000 + (int64_t)second * 1000 + + (int64_t)msecond; + track->timecodes.push_back(timestamp); + + continue; } idx_data += line; idx_data += "\n"; } + + if ((last_pos != -1) && (track != NULL)) { + sub_file->setFilePointer(0, seek_end); + track->sizes.push_back(sub_file->getFilePointer() - last_pos); + sub_file->setFilePointer(0); + } + + for (i = 0; i < tracks.size(); i++) + if ((tracks[i]->positions.size() != tracks[i]->timecodes.size()) || + (tracks[i]->positions.size() != tracks[i]->sizes.size())) + mxerror(PFX "Have %u positions, %u sizes and %u timecodes. This should " + "not have happened. Please file a bug report.\n", + tracks[i]->positions.size(), tracks[i]->sizes.size(), + tracks[i]->timecodes.size()); } -int vobsub_reader_c::read(generic_packetizer_c *) { - string line; - const char *s; - int64_t filepos, timestamp; - int hour, minute, second, msecond, timestamp_offset, filepos_offset, idx; - unsigned char *buffer; - int size; +int vobsub_reader_c::read(generic_packetizer_c *ptzr) { + vobsub_track_c *track; + unsigned char *data; + uint32_t i, id; - if (done) - return 0; - - if (!idx_file->getline2(line)) { - if (last_filepos != -1) { - sub_file->setFilePointer(0, seek_end); - size = sub_file->getFilePointer() - last_filepos; - sub_file->setFilePointer(last_filepos); - buffer = (unsigned char *)safemalloc(size); - if (sub_file->read(buffer, size) != size) - mxerror(PFX "Could not read %u bytes from the VobSub file.\n", size); - packetizer->process(buffer, size, last_timestamp, 1000); - safefree(buffer); + track = NULL; + for (i = 0; i < tracks.size(); i++) + if (tracks[i]->packetizer == ptzr) { + track = tracks[i]; + break; } - done = true; + if ((track == NULL) || (track->idx >= track->positions.size())) + return 0; + + id = i; + i = track->idx; + sub_file->setFilePointer(track->positions[i]); + data = (unsigned char *)safemalloc(track->sizes[i]); + if (sub_file->read(data, track->sizes[i]) != track->sizes[i]) { + mxwarn(PFX "Could not read %lld bytes from the .sub file. Aborting.\n", + track->sizes[i]); + safefree(data); return 0; } + mxverb(2, PFX "track: %u, size: %lld, at: %lld, timecode: %lld, duration: " + "%lld\n", id, track->sizes[i], track->positions[i], + track->timecodes[i], track->durations[i]); + track->packetizer->process(data, track->sizes[i], track->timecodes[i], + track->durations[i]); + safefree(data); + track->idx++; - s = line.c_str(); - - if ((version == 3) && !isvobsubline_v3(s)) + if (track->idx >= track->sizes.size()) + return 0; + else return EMOREDATA; - if ((version == 7) && !isvobsubline_v7(s)) - return EMOREDATA; - - if (version == 3) { -// timestamp: 00:01:43:603, filepos: 000000000 - timestamp_offset = 11; - filepos_offset = 34; - } else if (version == 7) { - die(PFX "Version 7 has not been implemented yet."); - } else - die(PFX "Unknown version. Should not have happened."); - - sscanf(&s[timestamp_offset], "%02d:%02d:%02d:%03d", &hour, &minute, &second, - &msecond); - timestamp = (int64_t)hour * 60 * 60 * 1000 + - (int64_t)minute * 60 * 1000 + (int64_t)second * 1000 + (int64_t)msecond; - - idx = filepos_offset; - filepos = hexvalue(s[idx]); - idx++; - while ((idx < line.length()) && ishexdigit(s[idx])) { - filepos = filepos * 16 + hexvalue(s[idx]); - idx++; - } - - mxverb(3, PFX "Timestamp: %lld, file pos: %lld\n", timestamp, filepos); - - if (last_filepos == -1) { - last_filepos = filepos; - last_timestamp = timestamp; - return EMOREDATA; - } - - // Now process the stuff... - sub_file->setFilePointer(last_filepos); - size = filepos - last_filepos; - buffer = (unsigned char *)safemalloc(size); - if (sub_file->read(buffer, size) != size) - mxerror(PFX "Could not read %u bytes from the VobSub file.\n", size); - packetizer->process(buffer, size, last_timestamp, - timestamp - last_timestamp); - safefree(buffer); - last_timestamp = timestamp; - last_filepos = filepos; - - return EMOREDATA; } int vobsub_reader_c::display_priority() { @@ -309,10 +323,17 @@ void vobsub_reader_c::display_progress(bool final) { } void vobsub_reader_c::identify() { + uint32_t i; + mxinfo("File '%s': container: VobSub\n", ti->fname); - mxinfo("Track ID 0: subtitles (VobSub)\n"); + for (i = 0; i < tracks.size(); i++) + mxinfo("Track ID %u: subtitles (VobSub)\n", i); } void vobsub_reader_c::set_headers() { - packetizer->set_headers(); + uint32_t i; + + for (i = 0; i < tracks.size(); i++) + if (tracks[i]->packetizer != NULL) + tracks[i]->packetizer->set_headers(); } diff --git a/src/r_vobsub.h b/src/r_vobsub.h index aa90ef3c1..894a66493 100644 --- a/src/r_vobsub.h +++ b/src/r_vobsub.h @@ -30,6 +30,26 @@ #include "pr_generic.h" #include "p_vobsub.h" +class vobsub_track_c { +public: + char *language; + vobsub_packetizer_c *packetizer; + vector positions, sizes, timecodes, durations; + int idx; + +public: + vobsub_track_c(const char *new_language) { + language = safestrdup(new_language); + packetizer = NULL; + idx = 0; + }; + ~vobsub_track_c() { + safefree(language); + if (packetizer != NULL) + delete packetizer; + } +}; + class vobsub_reader_c: public generic_reader_c { private: mm_io_c *sub_file; @@ -38,10 +58,7 @@ private: int act_wchar, version, ifo_data_size; string idx_data; - int64_t last_filepos, last_timestamp; - bool done; - - vobsub_packetizer_c *packetizer; + vector tracks; public: vobsub_reader_c(track_info_t *nti) throw (error_c); @@ -57,7 +74,8 @@ public: static int probe_file(mm_io_c *mm_io, int64_t size); protected: - virtual bool parse_headers(); + virtual void parse_headers(); + virtual void create_packetizers(); }; #endif // __R_VOBSUB_H