From 72e15986c5be20c285eafe607363b7a39fc5cf08 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Thu, 6 Mar 2003 23:39:40 +0000 Subject: [PATCH] Added support for SRT reading. Added support for outputting the preliminary simple text subtitle format. --- Makefile.am | 5 +- mkvinfo.cpp | 4 +- mkvmerge.cpp | 23 +++---- p_textsubs.cpp | 152 ++++++++++++++++++++++++++++++++++++++++++++++ p_textsubs.h | 41 +++++++++++++ r_srt.cpp | 56 +++++++---------- r_srt.h | 36 ++++++----- subtitles.cpp | 162 +++++++++++++++++++++++++++++++++++++++++++++++++ subtitles.h | 46 ++++++++++++++ 9 files changed, 460 insertions(+), 65 deletions(-) create mode 100644 p_textsubs.cpp create mode 100644 p_textsubs.h create mode 100644 subtitles.cpp create mode 100644 subtitles.h diff --git a/Makefile.am b/Makefile.am index dd0048962..63fc28950 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,6 +13,7 @@ mkvmerge_SOURCES = mkvmerge.cpp mkvmerge.h \ p_ac3.cpp p_ac3.h \ p_mp3.cpp p_mp3.h \ p_pcm.cpp p_pcm.h \ + p_textsubs.cpp p_textsubs.h \ p_video.cpp p_video.h \ p_vorbis.cpp p_vorbis.h \ pr_generic.h pr_generic.cpp \ @@ -20,8 +21,10 @@ mkvmerge_SOURCES = mkvmerge.cpp mkvmerge.h \ r_ac3.cpp r_ac3.h \ r_avi.cpp r_avi.h \ r_mp3.cpp r_mp3.h \ + r_srt.cpp r_srt.h \ r_ogm.cpp r_ogm.h \ - r_wav.cpp r_wav.h + r_wav.cpp r_wav.h \ + subtitles.cpp subtitles.h mkvinfo_SOURCES = mkvinfo.cpp mkvinfo.h \ common.cpp common.h diff --git a/mkvinfo.cpp b/mkvinfo.cpp index 2290db613..352ce22b4 100644 --- a/mkvinfo.cpp +++ b/mkvinfo.cpp @@ -12,7 +12,7 @@ /*! \file - \version \$Id: mkvinfo.cpp,v 1.2 2003/02/25 13:17:33 mosu Exp $ + \version \$Id: mkvinfo.cpp,v 1.3 2003/03/06 23:39:40 mosu Exp $ \brief retrieves and displays information about a Matroska file \author Moritz Bunkus */ @@ -288,6 +288,8 @@ void process_file() { case track_video: fprintf(stdout, "Video"); break; + case track_subtitle: + fprintf(stdout, "Subtitles"); default: fprintf(stdout, "unknown"); break; diff --git a/mkvmerge.cpp b/mkvmerge.cpp index 71c9d0244..5ae89ae00 100644 --- a/mkvmerge.cpp +++ b/mkvmerge.cpp @@ -13,7 +13,7 @@ /*! \file - \version \$Id: mkvmerge.cpp,v 1.22 2003/03/05 17:44:32 mosu Exp $ + \version \$Id: mkvmerge.cpp,v 1.23 2003/03/06 23:39:40 mosu Exp $ \brief command line parameter parsing, looping, output handling \author Moritz Bunkus */ @@ -57,6 +57,7 @@ #ifdef HAVE_OGGVORBIS #include "r_ogm.h" #endif +#include "r_srt.h" #ifdef DMALLOC #include @@ -108,7 +109,7 @@ file_type_t file_types[] = #endif // HAVE_OGGVORBIS {"avi", TYPEAVI, "AVI (Audio/Video Interleaved)"}, {"wav", TYPEWAV, "WAVE (uncompressed PCM)"}, -// {"srt", TYPESRT, "SRT text subtitles"}, + {"srt", TYPESRT, "SRT text subtitles"}, // {" ", TYPEMICRODVD, "MicroDVD text subtitles"}, // {"idx", TYPEVOBSUB, "VobSub subtitles"}, {"mp3", TYPEMP3, "MPEG1 layer III audio (CBR and VBR/ABR)"}, @@ -192,8 +193,8 @@ static int get_type(char *filename) { else if (ogm_reader_c::probe_file(f, size)) return TYPEOGM; #endif // HAVE_OGGVORBIS -// else if (srt_reader_c::probe_file(f, size)) -// return TYPESRT; + else if (srt_reader_c::probe_file(f, size)) + return TYPESRT; else if (mp3_reader_c::probe_file(f, size)) return TYPEMP3; else if (ac3_reader_c::probe_file(f, size)) @@ -617,13 +618,13 @@ static void parse_args(int argc, char **argv) { "WAVE files.\n"); file->reader = new wav_reader_c(&ti); break; -// case TYPESRT: -// if ((astreams != NULL) || (vstreams != NULL) || -// (tstreams != NULL)) -// fprintf(stderr, "Warning: -a/-A/-d/-D/-t/-T are ignored for " \ -// "SRT files.\n"); -// file->reader = new srt_reader_c(file->name, &async); -// break; + case TYPESRT: + if ((ti.astreams != NULL) || (ti.vstreams != NULL) || + (ti.tstreams != NULL)) + fprintf(stderr, "Warning: -a/-A/-d/-D/-t/-T are ignored for " \ + "SRT files.\n"); + file->reader = new srt_reader_c(&ti); + break; case TYPEMP3: if ((ti.astreams != NULL) || (ti.vstreams != NULL) || (ti.tstreams != NULL)) diff --git a/p_textsubs.cpp b/p_textsubs.cpp new file mode 100644 index 000000000..639e4f844 --- /dev/null +++ b/p_textsubs.cpp @@ -0,0 +1,152 @@ +/* + mkvmerge -- utility for splicing together matroska files + from component media subtypes + + r_srt.cpp + + Written by Moritz Bunkus + + Distributed under the GPL + see the file COPYING for details + or visit http://www.gnu.org/copyleft/gpl.html +*/ + +/*! + \file + \version \$Id: p_textsubs.cpp,v 1.1 2003/03/06 23:39:40 mosu Exp $ + \brief Subripper subtitle reader + \author Moritz Bunkus +*/ + +#include +#include +#include +#include + +#include "common.h" +#include "pr_generic.h" +#include "p_textsubs.h" + +#include "KaxTracks.h" +#include "KaxTrackVideo.h" + +#ifdef DMALLOC +#include +#endif + +textsubs_packetizer_c::textsubs_packetizer_c(track_info_t *nti) + throw (error_c): q_c(nti) { + packetno = 0; + set_header(); +} + +textsubs_packetizer_c::~textsubs_packetizer_c() { +} + +#define STEXTSIMPLE "S_TEXT/SIMPLE" + +void textsubs_packetizer_c::set_header() { + using namespace LIBMATROSKA_NAMESPACE; + + if (kax_last_entry == NULL) + track_entry = + &GetChild(static_cast(*kax_tracks)); + else + track_entry = + &GetNextChild(static_cast(*kax_tracks), + static_cast(*kax_last_entry)); + kax_last_entry = track_entry; + + if (serialno == -1) + serialno = track_number++; + KaxTrackNumber &tnumber = + GetChild(static_cast(*track_entry)); + *(static_cast(&tnumber)) = serialno; + + *(static_cast + (&GetChild(static_cast(*track_entry)))) = + track_subtitle; + + KaxCodecID &codec_id = + GetChild(static_cast(*track_entry)); + codec_id.CopyBuffer((binary *)STEXTSIMPLE, countof(STEXTSIMPLE)); +} + +int textsubs_packetizer_c::process(int64_t start, int64_t end, char *_subs) { + int num_newlines; + char *subs, *idx1, *idx2, *tempbuf; + int64_t duration, dlen, tmp; + + // Adjust the start and end values according to the audio adjustment. + start += ti->async.displacement; + start = (int64_t)(ti->async.linear * start); + end += ti->async.displacement; + end = (int64_t)(ti->async.linear * end); + + if (end < 0) + return EMOREDATA; + else if (start < 0) + start = 0; + + duration = end - start; + if (duration < 0) { + fprintf(stderr, "Warning: textsubs_packetizer: Ignoring an entry which " + "starts after it ends.\n"); + return EMOREDATA; + } + + tmp = duration; + dlen = 1; + while (tmp >= 10) { + tmp /= 10; + dlen++; + } + + idx1 = _subs; + subs = NULL; + num_newlines = 0; + while (*idx1 != 0) { + if (*idx1 == '\n') + num_newlines++; + idx1++; + } + subs = (char *)malloc(strlen(_subs) + num_newlines * 2 + 1); + if (subs == NULL) + die("malloc"); + + idx1 = _subs; + idx2 = subs; + while (*idx1 != 0) { + if (*idx1 == '\n') { + *idx2 = '\r'; + idx2++; + *idx2 = '\n'; + idx2++; + } else if (*idx1 != '\r') { + *idx2 = *idx1; + idx2++; + } + idx1++; + } + if (idx2 != subs) { + while (((idx2 - 1) != subs) && + ((*(idx2 - 1) == '\n') || (*(idx2 - 1) == '\r'))) { + *idx2 = 0; + idx2--; + } + } + *idx2 = 0; + + tempbuf = (char *)malloc(strlen(subs) + dlen + 2 + 1); + if (tempbuf == NULL) + die("malloc"); + sprintf(tempbuf, "%lld\r\n", duration); + strcat(tempbuf, subs); + + add_packet((unsigned char *)tempbuf, strlen(tempbuf), start); + + free(tempbuf); + free(subs); + + return EMOREDATA; +} diff --git a/p_textsubs.h b/p_textsubs.h new file mode 100644 index 000000000..69c781163 --- /dev/null +++ b/p_textsubs.h @@ -0,0 +1,41 @@ +/* + mkvmerge -- utility for splicing together matroska files + from component media subtypes + + p_textsubs.h + + Written by Moritz Bunkus + + Distributed under the GPL + see the file COPYING for details + or visit http://www.gnu.org/copyleft/gpl.html +*/ + +/*! + \file + \version \$Id: p_textsubs.h,v 1.1 2003/03/06 23:39:40 mosu Exp $ + \brief class definition for the simple text subtitle packetizer + \author Moritz Bunkus +*/ + +#ifndef __P_TEXTSUBS_H +#define __P_TEXTSUBS_H + +#include "common.h" +#include "pr_generic.h" +#include "queue.h" + +class textsubs_packetizer_c: public q_c { +private: + int packetno; + +public: + textsubs_packetizer_c(track_info_t *nti) throw (error_c); + virtual ~textsubs_packetizer_c(); + + virtual int process(int64_t start, int64_t end, char *_subs); + virtual void set_header(); +}; + + +#endif // __P_TEXTSUBS_H diff --git a/r_srt.cpp b/r_srt.cpp index 12b040231..304d136a8 100644 --- a/r_srt.cpp +++ b/r_srt.cpp @@ -1,30 +1,30 @@ /* - ogmmerge -- utility for splicing together ogg bitstreams + mkvmerge -- utility for splicing together matroska files from component media subtypes r_srt.cpp - SRT text subtitle reader module Written by Moritz Bunkus - Based on Xiph.org's 'oggmerge' found in their CVS repository - See http://www.xiph.org Distributed under the GPL see the file COPYING for details or visit http://www.gnu.org/copyleft/gpl.html */ +/*! + \file + \version \$Id: r_srt.cpp,v 1.4 2003/03/06 23:39:40 mosu Exp $ + \brief Subripper subtitle reader + \author Moritz Bunkus +*/ + #include #include #include #include #include -#include - -#include "ogmmerge.h" -#include "ogmstreams.h" -#include "queue.h" +#include "pr_generic.h" #include "r_srt.h" #include "subtitles.h" @@ -65,24 +65,25 @@ int srt_reader_c::probe_file(FILE *file, u_int64_t size) { return 1; } -srt_reader_c::srt_reader_c(char *fname, audio_sync_t *nasync) throw (error_c) { - if ((file = fopen(fname, "r")) == NULL) +srt_reader_c::srt_reader_c(track_info_t *nti) throw (error_c): + generic_reader_c(nti) { + if ((file = fopen(ti->fname, "r")) == NULL) throw error_c("srt_reader: Could not open source file."); if (!srt_reader_c::probe_file(file, 0)) throw error_c("srt_reader: Source is not a valid SRT file."); - textsubspacketizer = new textsubs_packetizer_c(nasync); + textsubs_packetizer = new textsubs_packetizer_c(ti); if (verbose) fprintf(stdout, "Using SRT subtitle reader for %s.\n+-> Using " \ - "text subtitle output module for subtitles.\n", fname); + "text subtitle output module for subtitles.\n", ti->fname); } srt_reader_c::~srt_reader_c() { - if (textsubspacketizer != NULL) - delete textsubspacketizer; + if (textsubs_packetizer != NULL) + delete textsubs_packetizer; } int srt_reader_c::read() { - ogg_int64_t start, end; + int64_t start, end; char *subtitles; subtitles_c subs; @@ -93,7 +94,8 @@ int srt_reader_c::read() { break; if ((strlen(chunk) < 29) || !issrttimestamp(chunk)) break; - + chunk[2047] = 0; + // 00:00:00,000 --> 00:00:00,000 // 01234567890123456789012345678 // 1 2 @@ -114,6 +116,7 @@ int srt_reader_c::read() { while (1) { if (fgets(chunk, 2047, file) == NULL) break; + chunk[2047] = 0; if ((*chunk == '\n') || (*chunk == '\r')) break; if (subtitles == NULL) { @@ -138,32 +141,19 @@ int srt_reader_c::read() { fprintf(stdout, "srt_reader: Warning: The subtitle file seems to be " \ "badly broken. The output file might not be playable " \ "correctly.\n"); - subs.process(textsubspacketizer); + subs.process(textsubs_packetizer); return 0; } -int srt_reader_c::serial_in_use(int serial) { - return textsubspacketizer->serial_in_use(serial); -} - -ogmmerge_page_t *srt_reader_c::get_header_page(int header_type) { - return textsubspacketizer->get_header_page(header_type); -} - -ogmmerge_page_t *srt_reader_c::get_page() { - return textsubspacketizer->get_page(); +packet_t *srt_reader_c::get_packet() { + return textsubs_packetizer->get_packet(); } int srt_reader_c::display_priority() { return DISPLAYPRIORITY_LOW; } -void srt_reader_c::reset() { - if (textsubspacketizer != NULL) - textsubspacketizer->reset(); -} - static char wchar[] = "-\\|/-\\|/-"; void srt_reader_c::display_progress() { diff --git a/r_srt.h b/r_srt.h index c649c5ac2..5d91d2fec 100644 --- a/r_srt.h +++ b/r_srt.h @@ -1,28 +1,30 @@ /* - ogmmerge -- utility for splicing together ogg bitstreams + mkvmerge -- utility for splicing together matroska files from component media subtypes r_srt.h - class definitions for the SRT text subtitle reader Written by Moritz Bunkus - Based on Xiph.org's 'oggmerge' found in their CVS repository - See http://www.xiph.org Distributed under the GPL see the file COPYING for details or visit http://www.gnu.org/copyleft/gpl.html */ +/*! + \file + \version \$Id: r_srt.h,v 1.6 2003/03/06 23:39:40 mosu Exp $ + \brief class definition for the Subripper subtitle reader + \author Moritz Bunkus +*/ + #ifndef __R_SRT_H #define __R_SRT_H #include -#include - -#include "ogmmerge.h" -#include "queue.h" +#include "common.h" +#include "pr_generic.h" #include "p_textsubs.h" @@ -30,24 +32,20 @@ class srt_reader_c: public generic_reader_c { private: char chunk[2048]; FILE *file; - textsubs_packetizer_c *textsubspacketizer; + textsubs_packetizer_c *textsubs_packetizer; int act_wchar; public: - srt_reader_c(char *fname, audio_sync_t *nasync) throw (error_c); + srt_reader_c(track_info_t *nti) throw (error_c); virtual ~srt_reader_c(); - virtual int read(); - virtual int serial_in_use(int); - virtual ogmmerge_page_t *get_page(); - virtual ogmmerge_page_t *get_header_page(int header_type = - PACKET_TYPE_HEADER); + virtual int read(); + virtual packet_t *get_packet(); - virtual void reset(); - virtual int display_priority(); - virtual void display_progress(); + virtual int display_priority(); + virtual void display_progress(); - static int probe_file(FILE *file, u_int64_t size); + static int probe_file(FILE *file, u_int64_t size); }; #endif // __R_SRT_H diff --git a/subtitles.cpp b/subtitles.cpp new file mode 100644 index 000000000..03a0adb06 --- /dev/null +++ b/subtitles.cpp @@ -0,0 +1,162 @@ +/* + mkvmerge -- utility for splicing together matroska files + from component media subtypes + + subtitles.cpp + + Written by Moritz Bunkus + + Distributed under the GPL + see the file COPYING for details + or visit http://www.gnu.org/copyleft/gpl.html +*/ + +/*! + \file + \version \$Id: subtitles.cpp,v 1.1 2003/03/06 23:39:40 mosu Exp $ + \brief subtitle helper + \author Moritz Bunkus +*/ + +#include +#include +#include +#include +#include + +#include "common.h" +#include "subtitles.h" + +#ifdef DMALLOC +#include +#endif + +subtitles_c::subtitles_c() { + first = NULL; + last = NULL; +} + +subtitles_c::~subtitles_c() { + sub_t *current = first; + + while (current != NULL) { + if (current->subs != NULL) + free(current->subs); + last = current; + current = current->next; + free(last); + } +} + +void subtitles_c::add(int64_t nstart, int64_t nend, char *nsubs) { + sub_t *s; + + s = (sub_t *)malloc(sizeof(sub_t)); + if (s == NULL) + die("malloc"); + s->subs = strdup(nsubs); + s->start = nstart; + s->end = nend; + s->next = NULL; + + if (last == NULL) { + first = s; + last = s; + } else { + last->next = s; + last = s; + } +} + +int subtitles_c::check() { + sub_t *current; + int error = 0; + char *c; + + current = first; + while ((current != NULL) && (current->next != NULL)) { + if (current->end > current->next->start) { + if (verbose) { + char short_subs[21]; + + memset(short_subs, 0, 21); + strncpy(short_subs, current->subs, 20); + for (c = short_subs; *c != 0; c++) + if (*c == '\n') + *c = ' '; + fprintf(stdout, "subtitles: Warning: current entry ends after " + "the next one starts. This end: %02lld:%02lld:%02lld,%03lld" + " next start: %02lld:%02lld:%02lld,%03lld (\"%s\"...)\n", + current->end / (60 * 60 * 1000), + (current->end / (60 * 1000)) % 60, + (current->end / 1000) % 60, + current->end % 1000, + current->next->start / (60 * 60 * 1000), + (current->next->start / (60 * 1000)) % 60, + (current->next->start / 1000) % 60, + current->next->start % 1000, + short_subs); + } + current->end = current->next->start - 1; + } + current = current->next; + } + + current = first; + while (current != NULL) { + if (current->start > current->end) { + error = 1; + if (verbose) { + char short_subs[21]; + + memset(short_subs, 0, 21); + strncpy(short_subs, current->subs, 20); + for (c = short_subs; *c != 0; c++) + if (*c == '\n') + *c = ' '; + fprintf(stdout, "subtitles: Warning: after fixing the time the " + "current entry begins after it ends. This start: " + "%02lld:%02lld:%02lld,%03lld this end: %02lld:%02lld:" + "%02lld,%03lld (\"%s\"...)\n", + current->start / (60 * 60 * 1000), + (current->start / (60 * 1000)) % 60, + (current->start / 1000) % 60, + current->start % 1000, + current->end / (60 * 60 * 1000), + (current->end / (60 * 1000)) % 60, + (current->end / 1000) % 60, + current->end % 1000, + short_subs); + } + } + current = current->next; + } + + return error; +} + +void subtitles_c::process(textsubs_packetizer_c *p) { + sub_t *current; + + while ((current = get_next()) != NULL) { + p->process(current->start, current->end, current->subs); + free(current->subs); + free(current); + } +} + +sub_t *subtitles_c::get_next() { + sub_t *current; + + if (first == NULL) + return NULL; + + current = first; + if (first == last) { + first = NULL; + last = NULL; + } else + first = first->next; + + return current; +} diff --git a/subtitles.h b/subtitles.h new file mode 100644 index 000000000..3c1bc9cf9 --- /dev/null +++ b/subtitles.h @@ -0,0 +1,46 @@ +/* + mkvmerge -- utility for splicing together matroska files + from component media subtypes + + subtitles.h + + Written by Moritz Bunkus + + Distributed under the GPL + see the file COPYING for details + or visit http://www.gnu.org/copyleft/gpl.html +*/ + +/*! + \file + \version \$Id: subtitles.h,v 1.1 2003/03/06 23:39:40 mosu Exp $ + \brief class definition for the subtitle helper + \author Moritz Bunkus +*/ + +#ifndef __SUBTITLES_H +#define __SUBTITLES_H + + +#include "p_textsubs.h" + +typedef struct sub_t { + int64_t start, end; + char *subs; + sub_t *next; +} sub_t; + +class subtitles_c { +private: + sub_t *first, *last; +public: + subtitles_c(); + ~subtitles_c(); + + void add(int64_t, int64_t, char *); + int check(); + void process(textsubs_packetizer_c *); + sub_t *get_next(); +}; + +#endif // __SUBTITLES_H