Made the SSA reader more spec compliant. Made the charset recoding in textsubs packetizer optional (only if the source is a SRT or OGM. Not if the source is a Matroska file (already UTF-8) nor for SSA/ASS files (reader will recode)). Renamed the long writeline_unix_newlines() to puts_unl().

This commit is contained in:
Moritz Bunkus 2003-06-21 23:24:07 +00:00
parent d52e2f51bf
commit 95a6f7f9d4
10 changed files with 115 additions and 58 deletions

View File

@ -510,7 +510,7 @@ void create_output_files() {
s = (char *)safemalloc(tracks[i].private_size + 1);
memcpy(s, tracks[i].private_data, tracks[i].private_size);
s[tracks[i].private_size] = 0;
tracks[i].mm_io->writeline_unix_newlines(s);
tracks[i].mm_io->puts_unl(s);
safefree(s);
}
}
@ -596,7 +596,7 @@ void handle_data(KaxBlock *block, int64_t block_duration, bool has_ref) {
tracks[i].mm_io->write(buffer, strlen(buffer));
// Print the text itself.
tracks[i].mm_io->writeline_unix_newlines(s);
tracks[i].mm_io->puts_unl(s);
safefree(s);
break;
@ -623,22 +623,22 @@ void handle_data(KaxBlock *block, int64_t block_duration, bool has_ref) {
}
// Print "Dialogue: "
tracks[i].mm_io->writeline_unix_newlines("Dialogue: ");
tracks[i].mm_io->puts_unl("Dialogue: ");
*s2 = 0;
s2++;
tracks[i].mm_io->writeline_unix_newlines(s);
tracks[i].mm_io->writeline_unix_newlines(",");
tracks[i].mm_io->puts_unl(s);
tracks[i].mm_io->puts_unl(",");
sprintf(buffer, "%lld:%02lld:%02lld.%02lld",
start / 1000 / 60 / 60, (start / 1000 / 60) % 60,
(start / 1000) % 60, (start % 1000) / 10);
tracks[i].mm_io->writeline_unix_newlines(buffer);
tracks[i].mm_io->writeline_unix_newlines(",");
tracks[i].mm_io->puts_unl(buffer);
tracks[i].mm_io->puts_unl(",");
sprintf(buffer, "%lld:%02lld:%02lld.%02lld",
end / 1000 / 60 / 60, (end / 1000 / 60) % 60,
(end / 1000) % 60, (end % 1000) / 10);
tracks[i].mm_io->writeline_unix_newlines(buffer);
tracks[i].mm_io->writeline_unix_newlines(",");
tracks[i].mm_io->writeline_unix_newlines(s2);
tracks[i].mm_io->puts_unl(buffer);
tracks[i].mm_io->puts_unl(",");
tracks[i].mm_io->puts_unl(s2);
safefree(s);
break;

View File

@ -272,7 +272,7 @@ string mm_io_c::getline() {
#endif
size_t mm_io_c::writeline_unix_newlines(const char *s) {
size_t mm_io_c::puts_unl(const char *s) {
int i;
size_t bytes_written;

View File

@ -48,7 +48,7 @@ public:
virtual bool eof();
virtual char *gets(char *buffer, size_t max_size);
virtual string getline();
virtual size_t writeline_unix_newlines(const char *s);
virtual size_t puts_unl(const char *s);
};
class mm_null_io_c: public mm_io_c {

View File

@ -33,11 +33,13 @@ using namespace LIBMATROSKA_NAMESPACE;
textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *nreader,
const char *ncodec_id,
const void *nglobal_data,
int nglobal_size,
int nglobal_size, bool nrecode,
track_info_t *nti)
throw (error_c): generic_packetizer_c(nreader, nti) {
packetno = 0;
cc_utf8 = utf8_init(ti->sub_charset);
recode = nrecode;
if (recode)
cc_utf8 = utf8_init(ti->sub_charset);
global_size = nglobal_size;
global_data = safememdup(nglobal_data, global_size);
codec_id = safestrdup(ncodec_id);
@ -118,10 +120,13 @@ int textsubs_packetizer_c::process(unsigned char *_subs, int, int64_t start,
}
*idx2 = 0;
utf8_subs = to_utf8(cc_utf8, subs);
add_packet((unsigned char *)utf8_subs, strlen(utf8_subs), start, length,
1, -1, -1);
safefree(utf8_subs);
if (recode) {
utf8_subs = to_utf8(cc_utf8, subs);
add_packet((unsigned char *)utf8_subs, strlen(utf8_subs), start, length,
1, -1, -1);
safefree(utf8_subs);
} else
add_packet((unsigned char *)subs, strlen(subs), start, length, 1, -1, -1);
safefree(subs);

View File

@ -33,11 +33,12 @@ private:
void *global_data;
int global_size;
char *codec_id;
bool recode;
public:
textsubs_packetizer_c(generic_reader_c *nreader, const char *ncodec_id,
const void *nglobal_data, int nglobal_size,
track_info_t *nti) throw (error_c);
bool nrecode, track_info_t *nti) throw (error_c);
virtual ~textsubs_packetizer_c();
virtual int process(unsigned char *_subs, int size, int64_t start = -1,

View File

@ -1006,7 +1006,8 @@ void mkv_reader_c::create_packetizers() {
nti.sub_charset = "UTF-8";
t->packetizer = new textsubs_packetizer_c(this, t->codec_id,
t->private_data,
t->private_size, &nti);
t->private_size, false,
&nti);
if (verbose)
fprintf(stdout, "Matroska demultiplexer (%s): using the text "
"subtitle output module for track ID %u.\n", ti->fname,

View File

@ -343,7 +343,7 @@ void ogm_reader_c::create_packetizers() {
case OGM_STREAM_TYPE_TEXT:
try {
dmx->packetizer = new textsubs_packetizer_c(this, MKV_S_TEXTUTF8,
NULL, 0, ti);
NULL, 0, true, ti);
} catch (error_c &error) {
fprintf(stderr, "Error: ogm_reader: could not initialize the "
"text subtitles packetizer for stream id %d. Will try to "

View File

@ -75,7 +75,7 @@ srt_reader_c::srt_reader_c(track_info_t *nti) throw (error_c):
throw error_c("srt_reader: Source is not a valid SRT file.");
ti->id = 0; // ID for this track.
textsubs_packetizer = new textsubs_packetizer_c(this, MKV_S_TEXTUTF8, NULL,
0, ti);
0, true, ti);
} catch (exception &ex) {
throw error_c("srt_reader: Could not open the source file.");
}

113
r_ssa.cpp
View File

@ -38,6 +38,7 @@ class ssa_line_c {
public:
char *line;
int64_t start, end;
int num;
bool operator < (const ssa_line_c &cmp) const;
};
@ -65,8 +66,11 @@ ssa_reader_c::ssa_reader_c(track_info_t *nti) throw (error_c):
generic_reader_c(nti) {
string line, global;
int64_t old_pos;
char section;
bool is_ass;
cc_utf8 = utf8_init(ti->sub_charset);
is_ass = false;
try {
@ -90,17 +94,33 @@ ssa_reader_c::ssa_reader_c(track_info_t *nti) throw (error_c):
if (!strcasecmp(line.c_str(), "ScriptType: v4.00+") ||
!strcasecmp(line.c_str(), "[V4+ Styles]"))
is_ass = true;
else if (!strcasecmp(line.c_str(), "[Events]"))
section = 'e';
// Analyze the format string.
else if (!strncasecmp(line.c_str(), "Format: ", strlen("Format: ")) &&
(section == 'e')) {
format = split(&line.c_str()[strlen("Format: ")]);
strip(format);
}
// Now just append the current line and some DOS style newlines.
global += "\r\n";
global += line;
// But not if we've already encountered the [Events] section.
if (section != 'e') {
global += "\r\n";
global += line;
}
}
if (format.size() == 0)
throw error_c("ssa_reader: Invalid format. Could not find the "
"\"Format\" line in the \"[Events]\" section.");
textsubs_packetizer = new textsubs_packetizer_c(this, is_ass ?
MKV_S_TEXTASS :
MKV_S_TEXTSSA,
global.c_str(),
global.length(), ti);
global.length(), false,
ti);
} catch (exception &ex) {
throw error_c("ssa_reader: Could not open the source file.");
}
@ -114,6 +134,16 @@ ssa_reader_c::~ssa_reader_c() {
delete textsubs_packetizer;
}
string ssa_reader_c::get_element(const char *index, vector<string> &fields) {
int i;
for (i = 0; i < format.size(); i++)
if (format[i] == index)
return fields[i];
return string("");
}
int64_t ssa_reader_c::parse_time(string &stime) {
int64_t th, tm, ts, tds;
int pos;
@ -152,13 +182,29 @@ int64_t ssa_reader_c::parse_time(string &stime) {
return tds * 10 + ts * 1000 + tm * 60 * 1000 + th * 60 * 60 * 1000;
}
string ssa_reader_c::recode_text(vector<string> &fields) {
char *s;
string res;
// TODO: Handle \fe encoding changes.
res = get_element("Text", fields);
s = to_utf8(cc_utf8, res.c_str());
res = s;
safefree(s);
return res;
}
int ssa_reader_c::read() {
string line, stime, orig_line;
int pos1, pos2, i;
string line, stime, orig_line, comma;
int i, num;
int64_t start, end;
vector<ssa_line_c> clines;
vector<string> fields;
ssa_line_c cline;
num = 1;
do {
line = mm_io->getline();
orig_line = line;
@ -167,53 +213,45 @@ int ssa_reader_c::read() {
line.erase(0, strlen("Dialogue: ")); // Trim the start.
pos1 = line.find(','); // Find and parse the start time.
if (pos1 < 0) {
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
orig_line.c_str());
continue;
}
pos2 = line.find(',', pos1 + 1);
if (pos2 < 0) {
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
orig_line.c_str());
continue;
}
// Split the line into fields.
fields = split(line.c_str(), ",", format.size());
stime = line.substr(pos1 + 1, pos2 - pos1 - 1);
// Parse the start time.
stime = get_element("Start", fields);
start = parse_time(stime);
if (start < 0) {
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
orig_line.c_str());
continue;
}
line.erase(pos1, pos2 - pos1);
pos1 = line.find(','); // Find and parse the end time.
if (pos1 < 0) {
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
orig_line.c_str());
continue;
}
pos2 = line.find(',', pos1 + 1);
if (pos2 < 0) {
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
orig_line.c_str());
continue;
}
stime = line.substr(pos1 + 1, pos2 - pos1 - 1);
// Parse the end time.
stime = get_element("Start", fields);
end = parse_time(stime);
if (end < 0) {
if (start < 0) {
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
orig_line.c_str());
continue;
}
line.erase(pos1, pos2 - pos1);
// Specs say that the following fields are to put into the block:
// ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text
comma = ",";
line = comma + get_element("Layer", fields) + comma +
get_element("Style", fields) + comma +
get_element("Name", fields) + comma +
get_element("MarginL", fields) + comma +
get_element("MarginR", fields) + comma +
get_element("MarginV", fields) + comma +
get_element("Effect", fields) + comma +
recode_text(fields);
cline.line = safestrdup(line.c_str());
cline.start = start;
cline.end = end;
cline.num = num;
num++;
clines.push_back(cline);
} while (!mm_io->eof());
@ -221,8 +259,11 @@ int ssa_reader_c::read() {
stable_sort(clines.begin(), clines.end());
for (i = 0; i < clines.size(); i++) {
char buffer[20];
// Let the packetizer handle this line.
textsubs_packetizer->process((unsigned char *)clines[i].line, 0,
sprintf(buffer, "%d", clines[i].num);
line = string(buffer) + string(clines[i].line);
textsubs_packetizer->process((unsigned char *)line.c_str(), 0,
clines[i].start,
clines[i].end - clines[i].start);
safefree(clines[i].line);

View File

@ -25,17 +25,24 @@
#include <stdio.h>
#include <string>
#include <vector>
#include "mm_io.h"
#include "common.h"
#include "pr_generic.h"
#include "p_textsubs.h"
using namespace std;
class ssa_reader_c: public generic_reader_c {
private:
mm_io_c *mm_io;
textsubs_packetizer_c *textsubs_packetizer;
int act_wchar;
vector<string> format;
int cc_utf8;
public:
ssa_reader_c(track_info_t *nti) throw (error_c);
@ -53,6 +60,8 @@ public:
protected:
virtual int64_t parse_time(string &time);
virtual string get_element(const char *index, vector<string> &fields);
virtual string recode_text(vector<string> &fields);
};
#endif // __R_SSA_H