diff --git a/mkvextract.cpp b/mkvextract.cpp index b5831a454..0cc656ab4 100644 --- a/mkvextract.cpp +++ b/mkvextract.cpp @@ -510,7 +510,7 @@ void create_output_files() { s = (char *)safemalloc(tracks[i].private_size + 1); memcpy(s, tracks[i].private_data, tracks[i].private_size); s[tracks[i].private_size] = 0; - tracks[i].mm_io->writeline_unix_newlines(s); + tracks[i].mm_io->puts_unl(s); safefree(s); } } @@ -596,7 +596,7 @@ void handle_data(KaxBlock *block, int64_t block_duration, bool has_ref) { tracks[i].mm_io->write(buffer, strlen(buffer)); // Print the text itself. - tracks[i].mm_io->writeline_unix_newlines(s); + tracks[i].mm_io->puts_unl(s); safefree(s); break; @@ -623,22 +623,22 @@ void handle_data(KaxBlock *block, int64_t block_duration, bool has_ref) { } // Print "Dialogue: " - tracks[i].mm_io->writeline_unix_newlines("Dialogue: "); + tracks[i].mm_io->puts_unl("Dialogue: "); *s2 = 0; s2++; - tracks[i].mm_io->writeline_unix_newlines(s); - tracks[i].mm_io->writeline_unix_newlines(","); + tracks[i].mm_io->puts_unl(s); + tracks[i].mm_io->puts_unl(","); sprintf(buffer, "%lld:%02lld:%02lld.%02lld", start / 1000 / 60 / 60, (start / 1000 / 60) % 60, (start / 1000) % 60, (start % 1000) / 10); - tracks[i].mm_io->writeline_unix_newlines(buffer); - tracks[i].mm_io->writeline_unix_newlines(","); + tracks[i].mm_io->puts_unl(buffer); + tracks[i].mm_io->puts_unl(","); sprintf(buffer, "%lld:%02lld:%02lld.%02lld", end / 1000 / 60 / 60, (end / 1000 / 60) % 60, (end / 1000) % 60, (end % 1000) / 10); - tracks[i].mm_io->writeline_unix_newlines(buffer); - tracks[i].mm_io->writeline_unix_newlines(","); - tracks[i].mm_io->writeline_unix_newlines(s2); + tracks[i].mm_io->puts_unl(buffer); + tracks[i].mm_io->puts_unl(","); + tracks[i].mm_io->puts_unl(s2); safefree(s); break; diff --git a/mm_io.cpp b/mm_io.cpp index e0cb38934..2550ef409 100644 --- a/mm_io.cpp +++ b/mm_io.cpp @@ -272,7 +272,7 @@ string mm_io_c::getline() { #endif -size_t mm_io_c::writeline_unix_newlines(const char *s) { +size_t mm_io_c::puts_unl(const char *s) { int i; size_t bytes_written; diff --git a/mm_io.h b/mm_io.h index 40377329b..1993412be 100644 --- a/mm_io.h +++ b/mm_io.h @@ -48,7 +48,7 @@ public: virtual bool eof(); virtual char *gets(char *buffer, size_t max_size); virtual string getline(); - virtual size_t writeline_unix_newlines(const char *s); + virtual size_t puts_unl(const char *s); }; class mm_null_io_c: public mm_io_c { diff --git a/p_textsubs.cpp b/p_textsubs.cpp index c6dcf63e2..7c80426ca 100644 --- a/p_textsubs.cpp +++ b/p_textsubs.cpp @@ -33,11 +33,13 @@ using namespace LIBMATROSKA_NAMESPACE; textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *nreader, const char *ncodec_id, const void *nglobal_data, - int nglobal_size, + int nglobal_size, bool nrecode, track_info_t *nti) throw (error_c): generic_packetizer_c(nreader, nti) { packetno = 0; - cc_utf8 = utf8_init(ti->sub_charset); + recode = nrecode; + if (recode) + cc_utf8 = utf8_init(ti->sub_charset); global_size = nglobal_size; global_data = safememdup(nglobal_data, global_size); codec_id = safestrdup(ncodec_id); @@ -118,10 +120,13 @@ int textsubs_packetizer_c::process(unsigned char *_subs, int, int64_t start, } *idx2 = 0; - utf8_subs = to_utf8(cc_utf8, subs); - add_packet((unsigned char *)utf8_subs, strlen(utf8_subs), start, length, - 1, -1, -1); - safefree(utf8_subs); + if (recode) { + utf8_subs = to_utf8(cc_utf8, subs); + add_packet((unsigned char *)utf8_subs, strlen(utf8_subs), start, length, + 1, -1, -1); + safefree(utf8_subs); + } else + add_packet((unsigned char *)subs, strlen(subs), start, length, 1, -1, -1); safefree(subs); diff --git a/p_textsubs.h b/p_textsubs.h index bc335db7c..270773b56 100644 --- a/p_textsubs.h +++ b/p_textsubs.h @@ -33,11 +33,12 @@ private: void *global_data; int global_size; char *codec_id; + bool recode; public: textsubs_packetizer_c(generic_reader_c *nreader, const char *ncodec_id, const void *nglobal_data, int nglobal_size, - track_info_t *nti) throw (error_c); + bool nrecode, track_info_t *nti) throw (error_c); virtual ~textsubs_packetizer_c(); virtual int process(unsigned char *_subs, int size, int64_t start = -1, diff --git a/r_matroska.cpp b/r_matroska.cpp index 189bbe40d..2124476b8 100644 --- a/r_matroska.cpp +++ b/r_matroska.cpp @@ -1006,7 +1006,8 @@ void mkv_reader_c::create_packetizers() { nti.sub_charset = "UTF-8"; t->packetizer = new textsubs_packetizer_c(this, t->codec_id, t->private_data, - t->private_size, &nti); + t->private_size, false, + &nti); if (verbose) fprintf(stdout, "Matroska demultiplexer (%s): using the text " "subtitle output module for track ID %u.\n", ti->fname, diff --git a/r_ogm.cpp b/r_ogm.cpp index d0b0688b6..52ac135cf 100644 --- a/r_ogm.cpp +++ b/r_ogm.cpp @@ -343,7 +343,7 @@ void ogm_reader_c::create_packetizers() { case OGM_STREAM_TYPE_TEXT: try { dmx->packetizer = new textsubs_packetizer_c(this, MKV_S_TEXTUTF8, - NULL, 0, ti); + NULL, 0, true, ti); } catch (error_c &error) { fprintf(stderr, "Error: ogm_reader: could not initialize the " "text subtitles packetizer for stream id %d. Will try to " diff --git a/r_srt.cpp b/r_srt.cpp index ff6c27d8b..9a7206b4f 100644 --- a/r_srt.cpp +++ b/r_srt.cpp @@ -75,7 +75,7 @@ srt_reader_c::srt_reader_c(track_info_t *nti) throw (error_c): throw error_c("srt_reader: Source is not a valid SRT file."); ti->id = 0; // ID for this track. textsubs_packetizer = new textsubs_packetizer_c(this, MKV_S_TEXTUTF8, NULL, - 0, ti); + 0, true, ti); } catch (exception &ex) { throw error_c("srt_reader: Could not open the source file."); } diff --git a/r_ssa.cpp b/r_ssa.cpp index 248425765..74842924a 100644 --- a/r_ssa.cpp +++ b/r_ssa.cpp @@ -38,6 +38,7 @@ class ssa_line_c { public: char *line; int64_t start, end; + int num; bool operator < (const ssa_line_c &cmp) const; }; @@ -65,8 +66,11 @@ ssa_reader_c::ssa_reader_c(track_info_t *nti) throw (error_c): generic_reader_c(nti) { string line, global; int64_t old_pos; + char section; bool is_ass; + cc_utf8 = utf8_init(ti->sub_charset); + is_ass = false; try { @@ -90,17 +94,33 @@ ssa_reader_c::ssa_reader_c(track_info_t *nti) throw (error_c): if (!strcasecmp(line.c_str(), "ScriptType: v4.00+") || !strcasecmp(line.c_str(), "[V4+ Styles]")) is_ass = true; + else if (!strcasecmp(line.c_str(), "[Events]")) + section = 'e'; + // Analyze the format string. + else if (!strncasecmp(line.c_str(), "Format: ", strlen("Format: ")) && + (section == 'e')) { + format = split(&line.c_str()[strlen("Format: ")]); + strip(format); + } // Now just append the current line and some DOS style newlines. - global += "\r\n"; - global += line; + // But not if we've already encountered the [Events] section. + if (section != 'e') { + global += "\r\n"; + global += line; + } } + if (format.size() == 0) + throw error_c("ssa_reader: Invalid format. Could not find the " + "\"Format\" line in the \"[Events]\" section."); + textsubs_packetizer = new textsubs_packetizer_c(this, is_ass ? MKV_S_TEXTASS : MKV_S_TEXTSSA, global.c_str(), - global.length(), ti); + global.length(), false, + ti); } catch (exception &ex) { throw error_c("ssa_reader: Could not open the source file."); } @@ -114,6 +134,16 @@ ssa_reader_c::~ssa_reader_c() { delete textsubs_packetizer; } +string ssa_reader_c::get_element(const char *index, vector &fields) { + int i; + + for (i = 0; i < format.size(); i++) + if (format[i] == index) + return fields[i]; + + return string(""); +} + int64_t ssa_reader_c::parse_time(string &stime) { int64_t th, tm, ts, tds; int pos; @@ -152,13 +182,29 @@ int64_t ssa_reader_c::parse_time(string &stime) { return tds * 10 + ts * 1000 + tm * 60 * 1000 + th * 60 * 60 * 1000; } +string ssa_reader_c::recode_text(vector &fields) { + char *s; + string res; + + // TODO: Handle \fe encoding changes. + res = get_element("Text", fields); + s = to_utf8(cc_utf8, res.c_str()); + res = s; + safefree(s); + + return res; +} + int ssa_reader_c::read() { - string line, stime, orig_line; - int pos1, pos2, i; + string line, stime, orig_line, comma; + int i, num; int64_t start, end; vector clines; + vector fields; ssa_line_c cline; + num = 1; + do { line = mm_io->getline(); orig_line = line; @@ -167,53 +213,45 @@ int ssa_reader_c::read() { line.erase(0, strlen("Dialogue: ")); // Trim the start. - pos1 = line.find(','); // Find and parse the start time. - if (pos1 < 0) { - fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n", - orig_line.c_str()); - continue; - } - pos2 = line.find(',', pos1 + 1); - if (pos2 < 0) { - fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n", - orig_line.c_str()); - continue; - } + // Split the line into fields. + fields = split(line.c_str(), ",", format.size()); - stime = line.substr(pos1 + 1, pos2 - pos1 - 1); + // Parse the start time. + stime = get_element("Start", fields); start = parse_time(stime); if (start < 0) { fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n", orig_line.c_str()); continue; } - line.erase(pos1, pos2 - pos1); - pos1 = line.find(','); // Find and parse the end time. - if (pos1 < 0) { - fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n", - orig_line.c_str()); - continue; - } - pos2 = line.find(',', pos1 + 1); - if (pos2 < 0) { - fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n", - orig_line.c_str()); - continue; - } - - stime = line.substr(pos1 + 1, pos2 - pos1 - 1); + // Parse the end time. + stime = get_element("Start", fields); end = parse_time(stime); - if (end < 0) { + if (start < 0) { fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n", orig_line.c_str()); continue; } - line.erase(pos1, pos2 - pos1); + + // Specs say that the following fields are to put into the block: + // ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text + + comma = ","; + line = comma + get_element("Layer", fields) + comma + + get_element("Style", fields) + comma + + get_element("Name", fields) + comma + + get_element("MarginL", fields) + comma + + get_element("MarginR", fields) + comma + + get_element("MarginV", fields) + comma + + get_element("Effect", fields) + comma + + recode_text(fields); cline.line = safestrdup(line.c_str()); cline.start = start; cline.end = end; + cline.num = num; + num++; clines.push_back(cline); } while (!mm_io->eof()); @@ -221,8 +259,11 @@ int ssa_reader_c::read() { stable_sort(clines.begin(), clines.end()); for (i = 0; i < clines.size(); i++) { + char buffer[20]; // Let the packetizer handle this line. - textsubs_packetizer->process((unsigned char *)clines[i].line, 0, + sprintf(buffer, "%d", clines[i].num); + line = string(buffer) + string(clines[i].line); + textsubs_packetizer->process((unsigned char *)line.c_str(), 0, clines[i].start, clines[i].end - clines[i].start); safefree(clines[i].line); diff --git a/r_ssa.h b/r_ssa.h index a354e5320..f8e9290cf 100644 --- a/r_ssa.h +++ b/r_ssa.h @@ -25,17 +25,24 @@ #include +#include +#include + #include "mm_io.h" #include "common.h" #include "pr_generic.h" #include "p_textsubs.h" +using namespace std; + class ssa_reader_c: public generic_reader_c { private: mm_io_c *mm_io; textsubs_packetizer_c *textsubs_packetizer; int act_wchar; + vector format; + int cc_utf8; public: ssa_reader_c(track_info_t *nti) throw (error_c); @@ -53,6 +60,8 @@ public: protected: virtual int64_t parse_time(string &time); + virtual string get_element(const char *index, vector &fields); + virtual string recode_text(vector &fields); }; #endif // __R_SSA_H