mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-24 11:54:01 +00:00
Some small bugfixes related to subtitle handling.
This commit is contained in:
parent
5812cb615e
commit
53417a48bb
@ -1,5 +1,11 @@
|
||||
2003-09-16 Moritz Bunkus <moritz@bunkus.org>
|
||||
|
||||
* mkvmerge: bugfix: The SRT reader would abort if there was more
|
||||
than one empty line between subtitle entries line.
|
||||
|
||||
* mkvextract: bugfix: Proper BOMs are written according to the
|
||||
desired charset when extracting text subtitles.
|
||||
|
||||
* Released v0.7.0.
|
||||
|
||||
* mkvextract: Add an UTF-8 BOM to extracted SSA/ASS and SRT
|
||||
|
@ -531,6 +531,16 @@ char *to_utf8(int handle, const char *local) {
|
||||
return convert_charset(kax_convs[handle].ict_to_utf8, local);
|
||||
}
|
||||
|
||||
string &to_utf8(int handle, string &local) {
|
||||
char *cutf8;
|
||||
|
||||
cutf8 = to_utf8(handle, local.c_str());
|
||||
local = cutf8;
|
||||
safefree(cutf8);
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
char *from_utf8(int handle, const char *utf8) {
|
||||
char *copy;
|
||||
|
||||
@ -546,6 +556,16 @@ char *from_utf8(int handle, const char *utf8) {
|
||||
return convert_charset(kax_convs[handle].ict_from_utf8, utf8);
|
||||
}
|
||||
|
||||
string &from_utf8(int handle, string &utf8) {
|
||||
char *clocal;
|
||||
|
||||
clocal = from_utf8(handle, utf8.c_str());
|
||||
utf8 = clocal;
|
||||
safefree(clocal);
|
||||
|
||||
return utf8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Random unique uint32_t numbers
|
||||
*/
|
||||
|
@ -116,6 +116,8 @@ int utf8_init(const char *charset);
|
||||
void utf8_done();
|
||||
char *to_utf8(int handle, const char *local);
|
||||
char *from_utf8(int handle, const char *utf8);
|
||||
string &to_utf8(int handle, string &local);
|
||||
string &from_utf8(int handle, string &utf8);
|
||||
|
||||
void clear_list_of_unique_uint32();
|
||||
bool is_unique_uint32(uint32_t number);
|
||||
|
@ -154,11 +154,12 @@ static bool chapter_format_simple = false;
|
||||
|
||||
void parse_args(int argc, char **argv, char *&file_name, int &mode) {
|
||||
int i, conv_handle;
|
||||
char *colon, *copy;
|
||||
char *colon, *copy, *sub_charset;
|
||||
int64_t tid;
|
||||
kax_track_t track;
|
||||
|
||||
file_name = NULL;
|
||||
sub_charset = NULL;
|
||||
verbose = 0;
|
||||
|
||||
if (argc < 2) {
|
||||
@ -208,6 +209,7 @@ void parse_args(int argc, char **argv, char *&file_name, int &mode) {
|
||||
mxerror("-c lacks a charset.\n");
|
||||
|
||||
conv_handle = utf8_init(argv[i + 1]);
|
||||
sub_charset = argv[i + 1];
|
||||
i++;
|
||||
|
||||
} else if (mode == MODE_TAGS)
|
||||
@ -239,6 +241,7 @@ void parse_args(int argc, char **argv, char *&file_name, int &mode) {
|
||||
track.tid = tid;
|
||||
track.out_name = safestrdup(colon);
|
||||
track.conv_handle = conv_handle;
|
||||
track.sub_charset = safestrdup(sub_charset);
|
||||
tracks.push_back(track);
|
||||
safefree(copy);
|
||||
}
|
||||
|
@ -70,6 +70,7 @@ typedef struct {
|
||||
int64_t default_duration;
|
||||
|
||||
int srt_num;
|
||||
char *sub_charset;
|
||||
int conv_handle;
|
||||
vector<ssa_line_c> ssa_lines;
|
||||
bool warning_printed;
|
||||
|
@ -111,7 +111,6 @@ static void create_output_files() {
|
||||
bool something_to_do, is_ok;
|
||||
unsigned char *c;
|
||||
ogg_packet op;
|
||||
const unsigned char utf8_bom[3] = {0xef, 0xbb, 0xbf};
|
||||
|
||||
something_to_do = false;
|
||||
|
||||
@ -393,24 +392,47 @@ static void create_output_files() {
|
||||
|
||||
} else if (tracks[i].type == TYPESRT) {
|
||||
tracks[i].srt_num = 1;
|
||||
tracks[i].out->write(utf8_bom, 3);
|
||||
tracks[i].out->write_bom(tracks[i].sub_charset);
|
||||
|
||||
} else if (tracks[i].type == TYPESSA) {
|
||||
char *s;
|
||||
unsigned char *pd;
|
||||
int bom_len;
|
||||
string sconv;
|
||||
|
||||
pd = (unsigned char *)tracks[i].private_data;
|
||||
bom_len = 0;
|
||||
// Skip any BOM that might be present.
|
||||
if ((tracks[i].private_size > 3) &&
|
||||
(pd[0] == 0xef) && (pd[1] == 0xbb) && (pd[2] == 0xbf))
|
||||
bom_len = 3;
|
||||
else if ((tracks[i].private_size > 4) &&
|
||||
(pd[0] == 0xff) && (pd[1] == 0xfe) &&
|
||||
(pd[2] == 0x00) && (pd[3] == 0x00))
|
||||
bom_len = 4;
|
||||
else if ((tracks[i].private_size > 4) &&
|
||||
(pd[0] == 0x00) && (pd[1] == 0x00) &&
|
||||
(pd[2] == 0xfe) && (pd[3] == 0xff))
|
||||
bom_len = 4;
|
||||
else if ((tracks[i].private_size > 2) &&
|
||||
(pd[0] == 0xff) && (pd[1] == 0xfe))
|
||||
bom_len = 2;
|
||||
else if ((tracks[i].private_size > 2) &&
|
||||
(pd[0] == 0xfe) && (pd[1] == 0xff))
|
||||
bom_len = 2;
|
||||
pd += bom_len;
|
||||
tracks[i].private_size -= bom_len;
|
||||
|
||||
s = (char *)safemalloc(tracks[i].private_size + 1);
|
||||
memcpy(s, tracks[i].private_data, tracks[i].private_size);
|
||||
memcpy(s, pd, tracks[i].private_size);
|
||||
s[tracks[i].private_size] = 0;
|
||||
pd = (unsigned char *)tracks[i].private_data;
|
||||
if ((pd[0] != 0x00) && (pd[0] != 0xef) && (pd[0] != 0xff))
|
||||
tracks[i].out->write(utf8_bom, 3);
|
||||
tracks[i].out->puts_unl(s);
|
||||
tracks[i].out->puts_unl("\n[Events]\nFormat: Marked, Start, End, "
|
||||
"Style, Name, MarginL, MarginR, MarginV, "
|
||||
"Effect, Text\n");
|
||||
|
||||
sconv = s;
|
||||
safefree(s);
|
||||
tracks[i].out->write_bom(tracks[i].sub_charset);
|
||||
sconv += "\n[Events]\nFormat: Marked, Start, End, "
|
||||
"Style, Name, MarginL, MarginR, MarginV, Effect, Text\n";
|
||||
from_utf8(tracks[i].conv_handle, sconv);
|
||||
tracks[i].out->puts_unl(sconv.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -583,9 +605,8 @@ static void handle_data(KaxBlock *block, int64_t block_duration,
|
||||
fields[7] + comma; // Effect
|
||||
|
||||
// Do the charset conversion.
|
||||
s = from_utf8(tracks[i].conv_handle, fields[8].c_str());
|
||||
line += string(s) + "\n";
|
||||
safefree(s);
|
||||
line += fields[8] + "\n";
|
||||
from_utf8(tracks[i].conv_handle, line);
|
||||
|
||||
// Now store that entry.
|
||||
ssa_line.num = num;
|
||||
|
@ -374,6 +374,41 @@ bool mm_io_c::restore_pos() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool mm_io_c::write_bom(const char *charset) {
|
||||
const unsigned char utf8_bom[3] = {0xef, 0xbb, 0xbf};
|
||||
const unsigned char utf16le_bom[2] = {0xff, 0xfe};
|
||||
const unsigned char utf16be_bom[2] = {0xfe, 0xff};
|
||||
const unsigned char utf32le_bom[4] = {0xff, 0xfe, 0x00, 0x00};
|
||||
const unsigned char utf32be_bom[4] = {0x00, 0x00, 0xff, 0xfe};
|
||||
const unsigned char *bom;
|
||||
int bom_len;
|
||||
|
||||
if (charset == NULL)
|
||||
return false;
|
||||
|
||||
if (!strcmp(charset, "UTF-8") || !strcmp(charset, "UTF8")) {
|
||||
bom_len = 3;
|
||||
bom = utf8_bom;
|
||||
} else if (!strcmp(charset, "UTF-16") || !strcmp(charset, "UTF-16LE") ||
|
||||
!strcmp(charset, "UTF16") || !strcmp(charset, "UTF16LE")) {
|
||||
bom_len = 2;
|
||||
bom = utf16le_bom;
|
||||
} else if (!strcmp(charset, "UTF-16BE") || !strcmp(charset, "UTF16BE")) {
|
||||
bom_len = 2;
|
||||
bom = utf16be_bom;
|
||||
} else if (!strcmp(charset, "UTF-32") || !strcmp(charset, "UTF-32LE") ||
|
||||
!strcmp(charset, "UTF32") || !strcmp(charset, "UTF32LE")) {
|
||||
bom_len = 4;
|
||||
bom = utf32le_bom;
|
||||
} else if (!strcmp(charset, "UTF-32BE") || !strcmp(charset, "UTF32BE")) {
|
||||
bom_len = 4;
|
||||
bom = utf32be_bom;
|
||||
} else
|
||||
return false;
|
||||
|
||||
return (write(bom, bom_len) == bom_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dummy class for output to /dev/null. Needed for two pass stuff.
|
||||
*/
|
||||
|
@ -66,6 +66,7 @@ public:
|
||||
virtual string getline();
|
||||
virtual bool getline2(string &s);
|
||||
virtual size_t puts_unl(const char *s);
|
||||
virtual bool write_bom(const char *charset);
|
||||
|
||||
virtual void save_pos(int64_t new_pos = -1);
|
||||
virtual bool restore_pos();
|
||||
|
@ -99,6 +99,9 @@ int srt_reader_c::read(generic_packetizer_c *) {
|
||||
while (1) {
|
||||
if (!mm_io->getline2(s))
|
||||
break;
|
||||
strip(s);
|
||||
if (s.length() == 0)
|
||||
continue;
|
||||
if (!mm_io->getline2(s))
|
||||
break;
|
||||
if ((s.length() < 29) || !issrttimecode(s.c_str()))
|
||||
|
Loading…
Reference in New Issue
Block a user