From 3fcd47dc6a9e5a9a22a0ad985f2fab2f761f5e86 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Thu, 11 Sep 2003 19:43:32 +0000 Subject: [PATCH] Implemented the VobSub reader and packetizer. Implemented three compression algos which are selectable via the (undocumented) command line option --compression. --- ChangeLog | 16 +++++ src/common.h | 1 + src/mkvmerge.cpp | 47 +++++++++++++ src/p_vobsub.cpp | 165 ++++++++++++++++++++++++++++++++++++++++++--- src/p_vobsub.h | 13 ++++ src/pr_generic.cpp | 12 ++++ src/pr_generic.h | 3 + src/r_vobsub.cpp | 25 ++++++- 8 files changed, 273 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 154683803..eb273b65d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2003-09-11 Moritz Bunkus + + * mkvmerge: Implemented an experimental VobSub reader and + packetizer. No specs exist for these yet, though. + + * mkvmerge: Improved the support for Matroska files with tracks + with big gaps between entries, e.g. subtitle tracks whose entries + are a minute or more apart. + + * mkvmerge: When splitting is active and the source is a Matroska + file then splitpoints were borked, and the first pass was slow as + your average mole. + + * mkvmerge: The track UIDs are kept when reading Matroska files + even when splitting is active. + 2003-09-09 Moritz Bunkus * mkvmerge: Added a QuickTime/MP4 reader. Can handle several diff --git a/src/common.h b/src/common.h index da991873f..e28fdd2d7 100644 --- a/src/common.h +++ b/src/common.h @@ -64,6 +64,7 @@ using namespace libebml; #define TYPEQTMP4 15 /* compression types */ +#define COMPRESSION_UNSPECIFIED -1 #define COMPRESSION_NONE 0 #define COMPRESSION_LZO 1 #define COMPRESSION_ZLIB 2 diff --git a/src/mkvmerge.cpp b/src/mkvmerge.cpp index 400d6b606..887254bab 100644 --- a/src/mkvmerge.cpp +++ b/src/mkvmerge.cpp @@ -656,6 +656,39 @@ static void parse_cues(char *s, cue_creation_t &cues) { mxerror("'%s' is an unsupported argument for --cues.\n", orig.c_str()); } +static void parse_compression(char *s, cue_creation_t &compression) { + char *colon; + string orig = s; + + // Extract the track number. + if ((colon = strchr(s, ':')) == NULL) + mxerror("Invalid compression option. No track ID specified in " + "'--compression %s'.\n", s); + + *colon = 0; + if (!parse_int(s, compression.id)) + mxerror("Invalid track ID specified in '--compression %s'.\n", + orig.c_str()); + + s = &colon[1]; + if (*s == 0) + mxerror("Invalid compression option specified in '--compression %s'.\n", + orig.c_str()); + + if (!strcasecmp(s, "lzo")) + compression.cues = COMPRESSION_LZO; + else if (!strcasecmp(s, "zlib")) + compression.cues = COMPRESSION_ZLIB; + else if (!strcasecmp(s, "bz2")) + compression.cues = COMPRESSION_BZ2; + else if (!strcmp(s, "none")) + compression.cues = COMPRESSION_NONE; + else + mxerror("'%s' is an unsupported argument for --compression. Available " + "compression methods are 'none', 'lzo', 'zlib' and 'bz2'.\n", + orig.c_str()); +} + static void parse_language(char *s, language_t &lang) { char *colon; string orig = s; @@ -971,6 +1004,7 @@ static void identify(const char *filename) { ti.vtracks = new vector; ti.stracks = new vector; ti.aac_is_sbr = new vector; + ti.compression_list = new vector; file = (filelist_t *)safemalloc(sizeof(filelist_t)); @@ -1027,6 +1061,7 @@ static void parse_args(int argc, char **argv) { ti.atracks = new vector; ti.vtracks = new vector; ti.stracks = new vector; + ti.compression_list = new vector; attachment = (attachment_t *)safemalloc(sizeof(attachment_t)); memset(attachment, 0, sizeof(attachment_t)); memset(&tags, 0, sizeof(tags_t)); @@ -1442,6 +1477,15 @@ static void parse_args(int argc, char **argv) { ti.aac_is_sbr->push_back(id); i++; + + } else if (!strcmp(this_arg, "--compression")) { + if (next_arg == NULL) + mxerror("'--compression' lacks its argument.\n"); + + parse_compression(next_arg, cues); + ti.compression_list->push_back(cues); + i++; + } // The argument is an input file. @@ -1487,6 +1531,7 @@ static void parse_args(int argc, char **argv) { delete ti.sub_charsets; delete ti.all_tags; delete ti.aac_is_sbr; + delete ti.compression_list; memset(&ti, 0, sizeof(track_info_t)); ti.audio_syncs = new vector; ti.cue_creations = new vector; @@ -1499,6 +1544,7 @@ static void parse_args(int argc, char **argv) { ti.atracks = new vector; ti.vtracks = new vector; ti.stracks = new vector; + ti.compression_list = new vector; } } @@ -1532,6 +1578,7 @@ static void parse_args(int argc, char **argv) { delete ti.atracks; delete ti.vtracks; delete ti.stracks; + delete ti.compression_list; safefree(attachment); } diff --git a/src/p_vobsub.cpp b/src/p_vobsub.cpp index 9d7e8e86a..c79a9ed7f 100644 --- a/src/p_vobsub.cpp +++ b/src/p_vobsub.cpp @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include "common.h" #include "p_vobsub.h" @@ -40,6 +42,8 @@ vobsub_packetizer_c::vobsub_packetizer_c(generic_reader_c *nreader, int ncompressed_type, track_info_t *nti) throw (error_c): generic_packetizer_c(nreader, nti) { + int result; + const char *compression; idx_data = (unsigned char *)safememdup(nidx_data, nidx_data_size); idx_data_size = nidx_data_size; @@ -52,10 +56,72 @@ vobsub_packetizer_c::vobsub_packetizer_c(generic_reader_c *nreader, ifo_data_size = 0; } - compression_type = ncompression_type; + if (nti->compression == COMPRESSION_UNSPECIFIED) + compression_type = ncompression_type; + else + compression_type = ti->compression; compressed_type = ncompressed_type; + raw_size = 0; + compressed_size = 0; + items = 0; + set_track_type(track_subtitle); + + if (compression_type == COMPRESSION_LZO) { + if ((result = lzo_init()) != LZO_E_OK) + mxerror("vobsub_packetizer: lzo_init() failed. Result: %d\n", result); + lzo_wrkmem = (lzo_bytep)lzo_malloc(LZO1X_999_MEM_COMPRESS); + if (lzo_wrkmem == NULL) + mxerror("vobsub_packetizer: lzo_malloc(LZO1X_999_MEM_COMPRESS) failed." + "\n"); + compression = "LZO"; + + } else if (compression_type == COMPRESSION_ZLIB) { + zc_stream.zalloc = (alloc_func)0; + zc_stream.zfree = (free_func)0; + zc_stream.opaque = (voidpf)0; + result = deflateInit(&zc_stream, 9); + if (result != Z_OK) + mxerror("vobsub_packetizer: deflateInit() failed. Result: %d\n", result); + compression = "Zlib"; + + } else if (compression_type == COMPRESSION_BZ2) { + bzc_stream.bzalloc = NULL; + bzc_stream.bzfree = NULL; + bzc_stream.opaque = NULL; + + result = BZ2_bzCompressInit(&bzc_stream, 9, 0, 30); + if (result != BZ_OK) + mxerror("vobsub_packetizer: BZ2_bzCompressInit() failed. Result: %d\n", + result); + compression = "bzip2"; + + } else if (compression_type != COMPRESSION_NONE) + die("vobsub_packetizer: Compression schmeme %d not implemented.", + compression_type); + else + compression = "no"; + + mxverb(2, "vobsub_packetizer: Using %s compression.\n", compression); +} + +vobsub_packetizer_c::~vobsub_packetizer_c() { + safefree(idx_data); + safefree(ifo_data); + + if (compression_type == COMPRESSION_LZO) { + safefree(lzo_wrkmem); + } else if (compression_type == COMPRESSION_ZLIB) + deflateEnd(&zc_stream); + else if (compression_type == COMPRESSION_BZ2) + BZ2_bzCompressEnd(&bzc_stream); + + if (items != 0) + mxverb(2, "vobsub_packetizer: Overall stats: raw size: %lld, compressed " + "size: %lld, items: %lld, ratio: %.2f%%, avg bytes per item: " + "%lld\n", raw_size, compressed_size, items, + compressed_size * 100.0 / raw_size, compressed_size / items); } void vobsub_packetizer_c::set_headers() { @@ -80,7 +146,7 @@ void vobsub_packetizer_c::set_headers() { i = 1; if (ifo_data_size > 0) { priv[0] = 1; - size_tmp = ifo_data_size; + size_tmp = idx_data_size; while (size_tmp >= 255) { priv[i] = 0xff; i++; @@ -101,23 +167,106 @@ void vobsub_packetizer_c::set_headers() { track_entry->EnableLacing(false); } +unsigned char *vobsub_packetizer_c::uncompress(unsigned char *buffer, + int &size) { + return NULL; +} + +unsigned char *vobsub_packetizer_c::compress(unsigned char *buffer, + int &size) { + unsigned char *dst; + int result, dstsize; + + dst = (unsigned char *)safemalloc(size * 2); + + if (compression_type == COMPRESSION_LZO) { + lzo_uint lzo_dstsize = size * 2; + if ((result = lzo1x_999_compress(buffer, size, dst, &lzo_dstsize, + lzo_wrkmem)) != LZO_E_OK) + mxerror("vobsub_packetizer: LZO compression failed. Result: %d\n", + result); + dstsize = lzo_dstsize; + + } else if (compression_type == COMPRESSION_ZLIB) { + zc_stream.next_in = (Bytef *)buffer; + zc_stream.next_out = (Bytef *)dst; + zc_stream.avail_in = size; + zc_stream.avail_out = 2 * size; + result = deflate(&zc_stream, Z_FULL_FLUSH); + if (result != Z_OK) + mxerror("vobsub_packetizer: Zlib compression failed. Result: %d\n", + result); + + dstsize = 2 * size - zc_stream.avail_out; + + } else if (compression_type == COMPRESSION_BZ2) { + bzc_stream.next_in = (char *)buffer; + bzc_stream.next_out = (char *)dst; + bzc_stream.avail_in = size; + bzc_stream.avail_out = 2 * size; + result = BZ2_bzCompress(&bzc_stream, BZ_FLUSH); + if (result != BZ_RUN_OK) + mxerror("vobsub_packetizer: bzip2 compression failed. Result: %d\n", + result); + + dstsize = 2 * size - bzc_stream.avail_out; + + } + + mxverb(3, "vobsub_packetizer: Compression from %d to %d, %d%%\n", + size, dstsize, dstsize * 100 / size); + + raw_size += size; + compressed_size += dstsize; + items++; + + dst = (unsigned char *)saferealloc(dst, dstsize); + size = dstsize; + + return dst; +} + int vobsub_packetizer_c::process(unsigned char *buf, int size, int64_t timecode, int64_t duration, int64_t, int64_t) { + unsigned char *uncompressed_buf, *final_buf; + bool del_uncompressed_buf, del_final_buf; + int new_size; + debug_enter("vobsub_packetizer_c::process"); - add_packet(buf, size, timecode, duration, true); + if (compression_type == compressed_type) + add_packet(buf, size, timecode, duration, true); + else { + new_size = size; + if (compressed_type != COMPRESSION_NONE) { + uncompressed_buf = uncompress(buf, new_size); + del_uncompressed_buf = true; + } else { + uncompressed_buf = buf; + del_uncompressed_buf = false; + } + + if (compression_type != COMPRESSION_NONE) { + final_buf = compress(uncompressed_buf, new_size); + del_final_buf = true; + } else { + final_buf = uncompressed_buf; + del_final_buf = false; + } + + add_packet(final_buf, new_size, timecode, duration, true); + if (del_uncompressed_buf) + safefree(uncompressed_buf); + if (del_final_buf) + safefree(final_buf); + } debug_leave("vobsub_packetizer_c::process"); return EMOREDATA; } -vobsub_packetizer_c::~vobsub_packetizer_c() { - safefree(idx_data); - safefree(ifo_data); -} - void vobsub_packetizer_c::dump_debug_info() { mxdebug("vobsub_packetizer_c: queue: %d\n", packet_queue.size()); } diff --git a/src/p_vobsub.h b/src/p_vobsub.h index 17db200be..2f7b1742f 100644 --- a/src/p_vobsub.h +++ b/src/p_vobsub.h @@ -21,6 +21,10 @@ #ifndef __P_VOBSUB_H #define __P_VOBSUB_H +#include +#include +#include + #include "os.h" #include "common.h" @@ -32,6 +36,11 @@ private: int idx_data_size, ifo_data_size; bool compressed; int compression_type, compressed_type; + int64_t raw_size, compressed_size, items; + + lzo_byte *lzo_wrkmem; + z_stream zc_stream; + bz_stream bzc_stream; public: vobsub_packetizer_c(generic_reader_c *nreader, @@ -47,6 +56,10 @@ public: virtual void set_headers(); virtual void dump_debug_info(); + +protected: + virtual unsigned char *uncompress(unsigned char *buffer, int &size); + virtual unsigned char *compress(unsigned char *buffer, int &size); }; #endif // __P_VOBSUB_H diff --git a/src/pr_generic.cpp b/src/pr_generic.cpp index 14092cab8..67a94687a 100644 --- a/src/pr_generic.cpp +++ b/src/pr_generic.cpp @@ -115,6 +115,16 @@ generic_packetizer_c::generic_packetizer_c(generic_reader_c *nreader, } } + // Let's see if the user has specified how this track should be compressed. + ti->compression = COMPRESSION_UNSPECIFIED; + for (i = 0; i < ti->compression_list->size(); i++) { + cc = &(*ti->compression_list)[i]; + if ((cc->id == ti->id) || (cc->id == -1)) { // -1 == all tracks + ti->compression = cc->cues; + break; + } + } + // Set default header values to 'unset'. hserialno = track_number++; huid = 0; @@ -664,6 +674,7 @@ track_info_t *duplicate_track_info(track_info_t *src) { src->private_size); dst->language = safestrdup(src->language); dst->sub_charset = safestrdup(src->sub_charset); + dst->compression_list = new vector(*src->compression_list); dst->tags = NULL; return dst; @@ -692,6 +703,7 @@ void free_track_info(track_info_t *ti) { safefree((*ti->all_tags)[i].file_name); delete ti->all_tags; delete ti->aac_is_sbr; + delete ti->compression_list; safefree(ti->language); safefree(ti->private_data); safefree(ti->sub_charset); diff --git a/src/pr_generic.h b/src/pr_generic.h index 7ce37058b..52c7c940d 100644 --- a/src/pr_generic.h +++ b/src/pr_generic.h @@ -111,6 +111,9 @@ typedef struct { KaxTags *tags; // For this very track vector *aac_is_sbr; // For AAC+/HE-AAC/SBR + + vector *compression_list; // As given on the command line + int compression; // For this very track } track_info_t; class generic_reader_c; diff --git a/src/r_vobsub.cpp b/src/r_vobsub.cpp index 08a75e2cb..0c12af9bb 100644 --- a/src/r_vobsub.cpp +++ b/src/r_vobsub.cpp @@ -141,6 +141,8 @@ vobsub_reader_c::vobsub_reader_c(track_info_t *nti) throw (error_c): idx_data = ""; last_filepos = -1; last_timestamp = -1; + act_wchar = 0; + done = false; len = strlen("# VobSub index file, v"); if (!idx_file->getline2(line) || @@ -219,12 +221,22 @@ int vobsub_reader_c::read(generic_packetizer_c *) { const char *s; int64_t filepos, timestamp; int hour, minute, second, msecond, timestamp_offset, filepos_offset, idx; + unsigned char *buffer; + int size; if (done) return 0; if (!idx_file->getline2(line)) { if (last_filepos != -1) { + sub_file->setFilePointer(0, seek_end); + size = sub_file->getFilePointer() - last_filepos; + sub_file->setFilePointer(last_filepos); + buffer = (unsigned char *)safemalloc(size); + if (sub_file->read(buffer, size) != size) + mxerror(PFX "Could not read %u bytes from the VobSub file.\n", size); + packetizer->process(buffer, size, last_timestamp, 1000); + safefree(buffer); } done = true; @@ -260,7 +272,7 @@ int vobsub_reader_c::read(generic_packetizer_c *) { idx++; } - mxverb(2, PFX "Timestamp: %lld, file pos: %lld\n", timestamp, filepos); + mxverb(3, PFX "Timestamp: %lld, file pos: %lld\n", timestamp, filepos); if (last_filepos == -1) { last_filepos = filepos; @@ -269,6 +281,16 @@ int vobsub_reader_c::read(generic_packetizer_c *) { } // Now process the stuff... + sub_file->setFilePointer(last_filepos); + size = filepos - last_filepos; + buffer = (unsigned char *)safemalloc(size); + if (sub_file->read(buffer, size) != size) + mxerror(PFX "Could not read %u bytes from the VobSub file.\n", size); + packetizer->process(buffer, size, last_timestamp, + timestamp - last_timestamp); + safefree(buffer); + last_timestamp = timestamp; + last_filepos = filepos; return EMOREDATA; } @@ -292,4 +314,5 @@ void vobsub_reader_c::identify() { } void vobsub_reader_c::set_headers() { + packetizer->set_headers(); }