Some small bugfixes related to subtitle handling.

This commit is contained in:
Moritz Bunkus 2003-09-16 17:34:14 +00:00
parent 5812cb615e
commit 53417a48bb
9 changed files with 107 additions and 15 deletions

View File

@ -1,5 +1,11 @@
2003-09-16 Moritz Bunkus <moritz@bunkus.org>
* mkvmerge: bugfix: The SRT reader would abort if there was more
than one empty line between subtitle entries line.
* mkvextract: bugfix: Proper BOMs are written according to the
desired charset when extracting text subtitles.
* Released v0.7.0.
* mkvextract: Add an UTF-8 BOM to extracted SSA/ASS and SRT

View File

@ -531,6 +531,16 @@ char *to_utf8(int handle, const char *local) {
return convert_charset(kax_convs[handle].ict_to_utf8, local);
}
string &to_utf8(int handle, string &local) {
char *cutf8;
cutf8 = to_utf8(handle, local.c_str());
local = cutf8;
safefree(cutf8);
return local;
}
char *from_utf8(int handle, const char *utf8) {
char *copy;
@ -546,6 +556,16 @@ char *from_utf8(int handle, const char *utf8) {
return convert_charset(kax_convs[handle].ict_from_utf8, utf8);
}
string &from_utf8(int handle, string &utf8) {
char *clocal;
clocal = from_utf8(handle, utf8.c_str());
utf8 = clocal;
safefree(clocal);
return utf8;
}
/*
* Random unique uint32_t numbers
*/

View File

@ -116,6 +116,8 @@ int utf8_init(const char *charset);
void utf8_done();
char *to_utf8(int handle, const char *local);
char *from_utf8(int handle, const char *utf8);
string &to_utf8(int handle, string &local);
string &from_utf8(int handle, string &utf8);
void clear_list_of_unique_uint32();
bool is_unique_uint32(uint32_t number);

View File

@ -154,11 +154,12 @@ static bool chapter_format_simple = false;
void parse_args(int argc, char **argv, char *&file_name, int &mode) {
int i, conv_handle;
char *colon, *copy;
char *colon, *copy, *sub_charset;
int64_t tid;
kax_track_t track;
file_name = NULL;
sub_charset = NULL;
verbose = 0;
if (argc < 2) {
@ -208,6 +209,7 @@ void parse_args(int argc, char **argv, char *&file_name, int &mode) {
mxerror("-c lacks a charset.\n");
conv_handle = utf8_init(argv[i + 1]);
sub_charset = argv[i + 1];
i++;
} else if (mode == MODE_TAGS)
@ -239,6 +241,7 @@ void parse_args(int argc, char **argv, char *&file_name, int &mode) {
track.tid = tid;
track.out_name = safestrdup(colon);
track.conv_handle = conv_handle;
track.sub_charset = safestrdup(sub_charset);
tracks.push_back(track);
safefree(copy);
}

View File

@ -70,6 +70,7 @@ typedef struct {
int64_t default_duration;
int srt_num;
char *sub_charset;
int conv_handle;
vector<ssa_line_c> ssa_lines;
bool warning_printed;

View File

@ -111,7 +111,6 @@ static void create_output_files() {
bool something_to_do, is_ok;
unsigned char *c;
ogg_packet op;
const unsigned char utf8_bom[3] = {0xef, 0xbb, 0xbf};
something_to_do = false;
@ -393,24 +392,47 @@ static void create_output_files() {
} else if (tracks[i].type == TYPESRT) {
tracks[i].srt_num = 1;
tracks[i].out->write(utf8_bom, 3);
tracks[i].out->write_bom(tracks[i].sub_charset);
} else if (tracks[i].type == TYPESSA) {
char *s;
unsigned char *pd;
int bom_len;
string sconv;
pd = (unsigned char *)tracks[i].private_data;
bom_len = 0;
// Skip any BOM that might be present.
if ((tracks[i].private_size > 3) &&
(pd[0] == 0xef) && (pd[1] == 0xbb) && (pd[2] == 0xbf))
bom_len = 3;
else if ((tracks[i].private_size > 4) &&
(pd[0] == 0xff) && (pd[1] == 0xfe) &&
(pd[2] == 0x00) && (pd[3] == 0x00))
bom_len = 4;
else if ((tracks[i].private_size > 4) &&
(pd[0] == 0x00) && (pd[1] == 0x00) &&
(pd[2] == 0xfe) && (pd[3] == 0xff))
bom_len = 4;
else if ((tracks[i].private_size > 2) &&
(pd[0] == 0xff) && (pd[1] == 0xfe))
bom_len = 2;
else if ((tracks[i].private_size > 2) &&
(pd[0] == 0xfe) && (pd[1] == 0xff))
bom_len = 2;
pd += bom_len;
tracks[i].private_size -= bom_len;
s = (char *)safemalloc(tracks[i].private_size + 1);
memcpy(s, tracks[i].private_data, tracks[i].private_size);
memcpy(s, pd, tracks[i].private_size);
s[tracks[i].private_size] = 0;
pd = (unsigned char *)tracks[i].private_data;
if ((pd[0] != 0x00) && (pd[0] != 0xef) && (pd[0] != 0xff))
tracks[i].out->write(utf8_bom, 3);
tracks[i].out->puts_unl(s);
tracks[i].out->puts_unl("\n[Events]\nFormat: Marked, Start, End, "
"Style, Name, MarginL, MarginR, MarginV, "
"Effect, Text\n");
sconv = s;
safefree(s);
tracks[i].out->write_bom(tracks[i].sub_charset);
sconv += "\n[Events]\nFormat: Marked, Start, End, "
"Style, Name, MarginL, MarginR, MarginV, Effect, Text\n";
from_utf8(tracks[i].conv_handle, sconv);
tracks[i].out->puts_unl(sconv.c_str());
}
}
}
@ -583,9 +605,8 @@ static void handle_data(KaxBlock *block, int64_t block_duration,
fields[7] + comma; // Effect
// Do the charset conversion.
s = from_utf8(tracks[i].conv_handle, fields[8].c_str());
line += string(s) + "\n";
safefree(s);
line += fields[8] + "\n";
from_utf8(tracks[i].conv_handle, line);
// Now store that entry.
ssa_line.num = num;

View File

@ -374,6 +374,41 @@ bool mm_io_c::restore_pos() {
return true;
}
bool mm_io_c::write_bom(const char *charset) {
const unsigned char utf8_bom[3] = {0xef, 0xbb, 0xbf};
const unsigned char utf16le_bom[2] = {0xff, 0xfe};
const unsigned char utf16be_bom[2] = {0xfe, 0xff};
const unsigned char utf32le_bom[4] = {0xff, 0xfe, 0x00, 0x00};
const unsigned char utf32be_bom[4] = {0x00, 0x00, 0xff, 0xfe};
const unsigned char *bom;
int bom_len;
if (charset == NULL)
return false;
if (!strcmp(charset, "UTF-8") || !strcmp(charset, "UTF8")) {
bom_len = 3;
bom = utf8_bom;
} else if (!strcmp(charset, "UTF-16") || !strcmp(charset, "UTF-16LE") ||
!strcmp(charset, "UTF16") || !strcmp(charset, "UTF16LE")) {
bom_len = 2;
bom = utf16le_bom;
} else if (!strcmp(charset, "UTF-16BE") || !strcmp(charset, "UTF16BE")) {
bom_len = 2;
bom = utf16be_bom;
} else if (!strcmp(charset, "UTF-32") || !strcmp(charset, "UTF-32LE") ||
!strcmp(charset, "UTF32") || !strcmp(charset, "UTF32LE")) {
bom_len = 4;
bom = utf32le_bom;
} else if (!strcmp(charset, "UTF-32BE") || !strcmp(charset, "UTF32BE")) {
bom_len = 4;
bom = utf32be_bom;
} else
return false;
return (write(bom, bom_len) == bom_len);
}
/*
* Dummy class for output to /dev/null. Needed for two pass stuff.
*/

View File

@ -66,6 +66,7 @@ public:
virtual string getline();
virtual bool getline2(string &s);
virtual size_t puts_unl(const char *s);
virtual bool write_bom(const char *charset);
virtual void save_pos(int64_t new_pos = -1);
virtual bool restore_pos();

View File

@ -99,6 +99,9 @@ int srt_reader_c::read(generic_packetizer_c *) {
while (1) {
if (!mm_io->getline2(s))
break;
strip(s);
if (s.length() == 0)
continue;
if (!mm_io->getline2(s))
break;
if ((s.length() < 29) || !issrttimecode(s.c_str()))