mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-24 20:01:53 +00:00
Made the SSA reader more spec compliant. Made the charset recoding in textsubs packetizer optional (only if the source is a SRT or OGM. Not if the source is a Matroska file (already UTF-8) nor for SSA/ASS files (reader will recode)). Renamed the long writeline_unix_newlines() to puts_unl().
This commit is contained in:
parent
d52e2f51bf
commit
95a6f7f9d4
@ -510,7 +510,7 @@ void create_output_files() {
|
||||
s = (char *)safemalloc(tracks[i].private_size + 1);
|
||||
memcpy(s, tracks[i].private_data, tracks[i].private_size);
|
||||
s[tracks[i].private_size] = 0;
|
||||
tracks[i].mm_io->writeline_unix_newlines(s);
|
||||
tracks[i].mm_io->puts_unl(s);
|
||||
safefree(s);
|
||||
}
|
||||
}
|
||||
@ -596,7 +596,7 @@ void handle_data(KaxBlock *block, int64_t block_duration, bool has_ref) {
|
||||
tracks[i].mm_io->write(buffer, strlen(buffer));
|
||||
|
||||
// Print the text itself.
|
||||
tracks[i].mm_io->writeline_unix_newlines(s);
|
||||
tracks[i].mm_io->puts_unl(s);
|
||||
safefree(s);
|
||||
break;
|
||||
|
||||
@ -623,22 +623,22 @@ void handle_data(KaxBlock *block, int64_t block_duration, bool has_ref) {
|
||||
}
|
||||
|
||||
// Print "Dialogue: "
|
||||
tracks[i].mm_io->writeline_unix_newlines("Dialogue: ");
|
||||
tracks[i].mm_io->puts_unl("Dialogue: ");
|
||||
*s2 = 0;
|
||||
s2++;
|
||||
tracks[i].mm_io->writeline_unix_newlines(s);
|
||||
tracks[i].mm_io->writeline_unix_newlines(",");
|
||||
tracks[i].mm_io->puts_unl(s);
|
||||
tracks[i].mm_io->puts_unl(",");
|
||||
sprintf(buffer, "%lld:%02lld:%02lld.%02lld",
|
||||
start / 1000 / 60 / 60, (start / 1000 / 60) % 60,
|
||||
(start / 1000) % 60, (start % 1000) / 10);
|
||||
tracks[i].mm_io->writeline_unix_newlines(buffer);
|
||||
tracks[i].mm_io->writeline_unix_newlines(",");
|
||||
tracks[i].mm_io->puts_unl(buffer);
|
||||
tracks[i].mm_io->puts_unl(",");
|
||||
sprintf(buffer, "%lld:%02lld:%02lld.%02lld",
|
||||
end / 1000 / 60 / 60, (end / 1000 / 60) % 60,
|
||||
(end / 1000) % 60, (end % 1000) / 10);
|
||||
tracks[i].mm_io->writeline_unix_newlines(buffer);
|
||||
tracks[i].mm_io->writeline_unix_newlines(",");
|
||||
tracks[i].mm_io->writeline_unix_newlines(s2);
|
||||
tracks[i].mm_io->puts_unl(buffer);
|
||||
tracks[i].mm_io->puts_unl(",");
|
||||
tracks[i].mm_io->puts_unl(s2);
|
||||
safefree(s);
|
||||
break;
|
||||
|
||||
|
@ -272,7 +272,7 @@ string mm_io_c::getline() {
|
||||
|
||||
#endif
|
||||
|
||||
size_t mm_io_c::writeline_unix_newlines(const char *s) {
|
||||
size_t mm_io_c::puts_unl(const char *s) {
|
||||
int i;
|
||||
size_t bytes_written;
|
||||
|
||||
|
2
mm_io.h
2
mm_io.h
@ -48,7 +48,7 @@ public:
|
||||
virtual bool eof();
|
||||
virtual char *gets(char *buffer, size_t max_size);
|
||||
virtual string getline();
|
||||
virtual size_t writeline_unix_newlines(const char *s);
|
||||
virtual size_t puts_unl(const char *s);
|
||||
};
|
||||
|
||||
class mm_null_io_c: public mm_io_c {
|
||||
|
@ -33,11 +33,13 @@ using namespace LIBMATROSKA_NAMESPACE;
|
||||
textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *nreader,
|
||||
const char *ncodec_id,
|
||||
const void *nglobal_data,
|
||||
int nglobal_size,
|
||||
int nglobal_size, bool nrecode,
|
||||
track_info_t *nti)
|
||||
throw (error_c): generic_packetizer_c(nreader, nti) {
|
||||
packetno = 0;
|
||||
cc_utf8 = utf8_init(ti->sub_charset);
|
||||
recode = nrecode;
|
||||
if (recode)
|
||||
cc_utf8 = utf8_init(ti->sub_charset);
|
||||
global_size = nglobal_size;
|
||||
global_data = safememdup(nglobal_data, global_size);
|
||||
codec_id = safestrdup(ncodec_id);
|
||||
@ -118,10 +120,13 @@ int textsubs_packetizer_c::process(unsigned char *_subs, int, int64_t start,
|
||||
}
|
||||
*idx2 = 0;
|
||||
|
||||
utf8_subs = to_utf8(cc_utf8, subs);
|
||||
add_packet((unsigned char *)utf8_subs, strlen(utf8_subs), start, length,
|
||||
1, -1, -1);
|
||||
safefree(utf8_subs);
|
||||
if (recode) {
|
||||
utf8_subs = to_utf8(cc_utf8, subs);
|
||||
add_packet((unsigned char *)utf8_subs, strlen(utf8_subs), start, length,
|
||||
1, -1, -1);
|
||||
safefree(utf8_subs);
|
||||
} else
|
||||
add_packet((unsigned char *)subs, strlen(subs), start, length, 1, -1, -1);
|
||||
|
||||
safefree(subs);
|
||||
|
||||
|
@ -33,11 +33,12 @@ private:
|
||||
void *global_data;
|
||||
int global_size;
|
||||
char *codec_id;
|
||||
bool recode;
|
||||
|
||||
public:
|
||||
textsubs_packetizer_c(generic_reader_c *nreader, const char *ncodec_id,
|
||||
const void *nglobal_data, int nglobal_size,
|
||||
track_info_t *nti) throw (error_c);
|
||||
bool nrecode, track_info_t *nti) throw (error_c);
|
||||
virtual ~textsubs_packetizer_c();
|
||||
|
||||
virtual int process(unsigned char *_subs, int size, int64_t start = -1,
|
||||
|
@ -1006,7 +1006,8 @@ void mkv_reader_c::create_packetizers() {
|
||||
nti.sub_charset = "UTF-8";
|
||||
t->packetizer = new textsubs_packetizer_c(this, t->codec_id,
|
||||
t->private_data,
|
||||
t->private_size, &nti);
|
||||
t->private_size, false,
|
||||
&nti);
|
||||
if (verbose)
|
||||
fprintf(stdout, "Matroska demultiplexer (%s): using the text "
|
||||
"subtitle output module for track ID %u.\n", ti->fname,
|
||||
|
@ -343,7 +343,7 @@ void ogm_reader_c::create_packetizers() {
|
||||
case OGM_STREAM_TYPE_TEXT:
|
||||
try {
|
||||
dmx->packetizer = new textsubs_packetizer_c(this, MKV_S_TEXTUTF8,
|
||||
NULL, 0, ti);
|
||||
NULL, 0, true, ti);
|
||||
} catch (error_c &error) {
|
||||
fprintf(stderr, "Error: ogm_reader: could not initialize the "
|
||||
"text subtitles packetizer for stream id %d. Will try to "
|
||||
|
@ -75,7 +75,7 @@ srt_reader_c::srt_reader_c(track_info_t *nti) throw (error_c):
|
||||
throw error_c("srt_reader: Source is not a valid SRT file.");
|
||||
ti->id = 0; // ID for this track.
|
||||
textsubs_packetizer = new textsubs_packetizer_c(this, MKV_S_TEXTUTF8, NULL,
|
||||
0, ti);
|
||||
0, true, ti);
|
||||
} catch (exception &ex) {
|
||||
throw error_c("srt_reader: Could not open the source file.");
|
||||
}
|
||||
|
113
r_ssa.cpp
113
r_ssa.cpp
@ -38,6 +38,7 @@ class ssa_line_c {
|
||||
public:
|
||||
char *line;
|
||||
int64_t start, end;
|
||||
int num;
|
||||
|
||||
bool operator < (const ssa_line_c &cmp) const;
|
||||
};
|
||||
@ -65,8 +66,11 @@ ssa_reader_c::ssa_reader_c(track_info_t *nti) throw (error_c):
|
||||
generic_reader_c(nti) {
|
||||
string line, global;
|
||||
int64_t old_pos;
|
||||
char section;
|
||||
bool is_ass;
|
||||
|
||||
cc_utf8 = utf8_init(ti->sub_charset);
|
||||
|
||||
is_ass = false;
|
||||
|
||||
try {
|
||||
@ -90,17 +94,33 @@ ssa_reader_c::ssa_reader_c(track_info_t *nti) throw (error_c):
|
||||
if (!strcasecmp(line.c_str(), "ScriptType: v4.00+") ||
|
||||
!strcasecmp(line.c_str(), "[V4+ Styles]"))
|
||||
is_ass = true;
|
||||
else if (!strcasecmp(line.c_str(), "[Events]"))
|
||||
section = 'e';
|
||||
// Analyze the format string.
|
||||
else if (!strncasecmp(line.c_str(), "Format: ", strlen("Format: ")) &&
|
||||
(section == 'e')) {
|
||||
format = split(&line.c_str()[strlen("Format: ")]);
|
||||
strip(format);
|
||||
}
|
||||
|
||||
// Now just append the current line and some DOS style newlines.
|
||||
global += "\r\n";
|
||||
global += line;
|
||||
// But not if we've already encountered the [Events] section.
|
||||
if (section != 'e') {
|
||||
global += "\r\n";
|
||||
global += line;
|
||||
}
|
||||
}
|
||||
|
||||
if (format.size() == 0)
|
||||
throw error_c("ssa_reader: Invalid format. Could not find the "
|
||||
"\"Format\" line in the \"[Events]\" section.");
|
||||
|
||||
textsubs_packetizer = new textsubs_packetizer_c(this, is_ass ?
|
||||
MKV_S_TEXTASS :
|
||||
MKV_S_TEXTSSA,
|
||||
global.c_str(),
|
||||
global.length(), ti);
|
||||
global.length(), false,
|
||||
ti);
|
||||
} catch (exception &ex) {
|
||||
throw error_c("ssa_reader: Could not open the source file.");
|
||||
}
|
||||
@ -114,6 +134,16 @@ ssa_reader_c::~ssa_reader_c() {
|
||||
delete textsubs_packetizer;
|
||||
}
|
||||
|
||||
string ssa_reader_c::get_element(const char *index, vector<string> &fields) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < format.size(); i++)
|
||||
if (format[i] == index)
|
||||
return fields[i];
|
||||
|
||||
return string("");
|
||||
}
|
||||
|
||||
int64_t ssa_reader_c::parse_time(string &stime) {
|
||||
int64_t th, tm, ts, tds;
|
||||
int pos;
|
||||
@ -152,13 +182,29 @@ int64_t ssa_reader_c::parse_time(string &stime) {
|
||||
return tds * 10 + ts * 1000 + tm * 60 * 1000 + th * 60 * 60 * 1000;
|
||||
}
|
||||
|
||||
string ssa_reader_c::recode_text(vector<string> &fields) {
|
||||
char *s;
|
||||
string res;
|
||||
|
||||
// TODO: Handle \fe encoding changes.
|
||||
res = get_element("Text", fields);
|
||||
s = to_utf8(cc_utf8, res.c_str());
|
||||
res = s;
|
||||
safefree(s);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int ssa_reader_c::read() {
|
||||
string line, stime, orig_line;
|
||||
int pos1, pos2, i;
|
||||
string line, stime, orig_line, comma;
|
||||
int i, num;
|
||||
int64_t start, end;
|
||||
vector<ssa_line_c> clines;
|
||||
vector<string> fields;
|
||||
ssa_line_c cline;
|
||||
|
||||
num = 1;
|
||||
|
||||
do {
|
||||
line = mm_io->getline();
|
||||
orig_line = line;
|
||||
@ -167,53 +213,45 @@ int ssa_reader_c::read() {
|
||||
|
||||
line.erase(0, strlen("Dialogue: ")); // Trim the start.
|
||||
|
||||
pos1 = line.find(','); // Find and parse the start time.
|
||||
if (pos1 < 0) {
|
||||
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
|
||||
orig_line.c_str());
|
||||
continue;
|
||||
}
|
||||
pos2 = line.find(',', pos1 + 1);
|
||||
if (pos2 < 0) {
|
||||
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
|
||||
orig_line.c_str());
|
||||
continue;
|
||||
}
|
||||
// Split the line into fields.
|
||||
fields = split(line.c_str(), ",", format.size());
|
||||
|
||||
stime = line.substr(pos1 + 1, pos2 - pos1 - 1);
|
||||
// Parse the start time.
|
||||
stime = get_element("Start", fields);
|
||||
start = parse_time(stime);
|
||||
if (start < 0) {
|
||||
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
|
||||
orig_line.c_str());
|
||||
continue;
|
||||
}
|
||||
line.erase(pos1, pos2 - pos1);
|
||||
|
||||
pos1 = line.find(','); // Find and parse the end time.
|
||||
if (pos1 < 0) {
|
||||
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
|
||||
orig_line.c_str());
|
||||
continue;
|
||||
}
|
||||
pos2 = line.find(',', pos1 + 1);
|
||||
if (pos2 < 0) {
|
||||
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
|
||||
orig_line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
stime = line.substr(pos1 + 1, pos2 - pos1 - 1);
|
||||
// Parse the end time.
|
||||
stime = get_element("Start", fields);
|
||||
end = parse_time(stime);
|
||||
if (end < 0) {
|
||||
if (start < 0) {
|
||||
fprintf(stderr, "ssa_reader: Warning: Malformed line? (%s)\n",
|
||||
orig_line.c_str());
|
||||
continue;
|
||||
}
|
||||
line.erase(pos1, pos2 - pos1);
|
||||
|
||||
// Specs say that the following fields are to put into the block:
|
||||
// ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
comma = ",";
|
||||
line = comma + get_element("Layer", fields) + comma +
|
||||
get_element("Style", fields) + comma +
|
||||
get_element("Name", fields) + comma +
|
||||
get_element("MarginL", fields) + comma +
|
||||
get_element("MarginR", fields) + comma +
|
||||
get_element("MarginV", fields) + comma +
|
||||
get_element("Effect", fields) + comma +
|
||||
recode_text(fields);
|
||||
|
||||
cline.line = safestrdup(line.c_str());
|
||||
cline.start = start;
|
||||
cline.end = end;
|
||||
cline.num = num;
|
||||
num++;
|
||||
|
||||
clines.push_back(cline);
|
||||
} while (!mm_io->eof());
|
||||
@ -221,8 +259,11 @@ int ssa_reader_c::read() {
|
||||
stable_sort(clines.begin(), clines.end());
|
||||
|
||||
for (i = 0; i < clines.size(); i++) {
|
||||
char buffer[20];
|
||||
// Let the packetizer handle this line.
|
||||
textsubs_packetizer->process((unsigned char *)clines[i].line, 0,
|
||||
sprintf(buffer, "%d", clines[i].num);
|
||||
line = string(buffer) + string(clines[i].line);
|
||||
textsubs_packetizer->process((unsigned char *)line.c_str(), 0,
|
||||
clines[i].start,
|
||||
clines[i].end - clines[i].start);
|
||||
safefree(clines[i].line);
|
||||
|
9
r_ssa.h
9
r_ssa.h
@ -25,17 +25,24 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mm_io.h"
|
||||
#include "common.h"
|
||||
#include "pr_generic.h"
|
||||
|
||||
#include "p_textsubs.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ssa_reader_c: public generic_reader_c {
|
||||
private:
|
||||
mm_io_c *mm_io;
|
||||
textsubs_packetizer_c *textsubs_packetizer;
|
||||
int act_wchar;
|
||||
vector<string> format;
|
||||
int cc_utf8;
|
||||
|
||||
public:
|
||||
ssa_reader_c(track_info_t *nti) throw (error_c);
|
||||
@ -53,6 +60,8 @@ public:
|
||||
|
||||
protected:
|
||||
virtual int64_t parse_time(string &time);
|
||||
virtual string get_element(const char *index, vector<string> &fields);
|
||||
virtual string recode_text(vector<string> &fields);
|
||||
};
|
||||
|
||||
#endif // __R_SSA_H
|
||||
|
Loading…
Reference in New Issue
Block a user