mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2024-12-24 20:01:42 +00:00
Merge branch 'pr/n599_Izaron'
This commit is contained in:
commit
f8aae84bc4
@ -143,6 +143,7 @@ struct ccx_s_options // Options from user parameters
|
||||
unsigned send_to_srv;
|
||||
enum ccx_output_format write_format; // 0=Raw, 1=srt, 2=SMI
|
||||
int use_ass_instead_of_ssa;
|
||||
int use_webvtt_styling;
|
||||
LLONG debug_mask; // dbg_print will use this mask to print or ignore different types
|
||||
LLONG debug_mask_on_debug; // If we're using temp_debug to enable/disable debug "live", this is the mask when temp_debug=1
|
||||
/* Networking */
|
||||
|
@ -325,6 +325,15 @@ unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer
|
||||
return (unsigned)(buffer - orig); // Return length
|
||||
}
|
||||
|
||||
void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data) {
|
||||
*first = 0;
|
||||
*last = 32;
|
||||
while (data->colors[line_num][*first] == COL_TRANSPARENT)
|
||||
(*first)++;
|
||||
while (data->colors[line_num][*last] == COL_TRANSPARENT)
|
||||
(*last)--;
|
||||
}
|
||||
|
||||
/*void delete_all_lines_but_current(ccx_decoder_608_context *context, struct eia608_screen *data, int row)
|
||||
{
|
||||
for (int i=0;i<15;i++)
|
||||
|
@ -28,6 +28,7 @@ int clever_capitalize(struct encoder_ctx *context, int line_num, struct eia608_s
|
||||
void telx_correct_case(char *sub_line);
|
||||
unsigned get_decoder_line_encoded_for_gui(unsigned char *buffer, int line_num, struct eia608_screen *data);
|
||||
unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data);
|
||||
void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data);
|
||||
|
||||
int string_cmp(const void *p1, const void *p2);
|
||||
int string_cmp_function(const void *p1, const void *p2, void *arg);
|
||||
|
@ -106,8 +106,6 @@ int write_xtimestamp_header(struct encoder_ctx *context)
|
||||
write(context->out->fh, context->buffer, used);
|
||||
|
||||
}
|
||||
// Add the additional CRLF to finish the header
|
||||
write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
context->wrote_webvtt_sync_header = 1; // Do it even if couldn't write the header, because it won't be possible anyway
|
||||
}
|
||||
|
||||
@ -193,6 +191,93 @@ int write_cc_subtitle_as_webvtt(struct cc_subtitle *sub, struct encoder_ctx *con
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// TODO: move this repeating function from ccx_encoders_g608.c to the files ccx_encoders_helpers.(c|h)
|
||||
int get_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data)
|
||||
{
|
||||
unsigned char *orig = buffer;
|
||||
unsigned char *line = data->characters[line_num];
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
int bytes = 0;
|
||||
switch (ctx->encoding)
|
||||
{
|
||||
case CCX_ENC_UTF_8:
|
||||
bytes = get_char_in_utf_8(buffer, line[i]);
|
||||
break;
|
||||
case CCX_ENC_LATIN_1:
|
||||
get_char_in_latin_1(buffer, line[i]);
|
||||
bytes = 1;
|
||||
break;
|
||||
case CCX_ENC_UNICODE:
|
||||
get_char_in_unicode(buffer, line[i]);
|
||||
bytes = 2;
|
||||
case CCX_ENC_ASCII:
|
||||
*buffer = line[i];
|
||||
bytes = 1;
|
||||
break;
|
||||
}
|
||||
buffer += bytes;
|
||||
}
|
||||
return (unsigned int)(buffer - orig); // Return length
|
||||
}
|
||||
|
||||
void get_color_events(int *color_events, int line_num, struct eia608_screen *data)
|
||||
{
|
||||
int first, last;
|
||||
get_sentence_borders(&first, &last, line_num, data);
|
||||
|
||||
int last_color = COL_WHITE;
|
||||
for (int i = first; i <= last; i++)
|
||||
{
|
||||
if (data->colors[line_num][i] != last_color)
|
||||
{
|
||||
// It does not make sense to keep the default white color in the events
|
||||
// WebVTT supports colors only is [COL_WHITE..COL_MAGENTA]
|
||||
if (data->colors[line_num][i] <= COL_MAGENTA)
|
||||
color_events[i] |= data->colors[line_num][i]; // Add this new color
|
||||
|
||||
if (last_color != COL_WHITE && last_color <= COL_MAGENTA)
|
||||
color_events[i - 1] |= last_color << 16; // Remove old color (event in the second part of the integer)
|
||||
|
||||
last_color = data->colors[line_num][i];
|
||||
}
|
||||
}
|
||||
|
||||
if (last_color != COL_WHITE)
|
||||
{
|
||||
color_events[last] |= last_color << 16;
|
||||
}
|
||||
}
|
||||
|
||||
void get_font_events(int *font_events, int line_num, struct eia608_screen *data)
|
||||
{
|
||||
int first, last;
|
||||
get_sentence_borders(&first, &last, line_num, data);
|
||||
|
||||
int last_font = FONT_REGULAR;
|
||||
for (int i = first; i <= last; i++)
|
||||
{
|
||||
if (data->fonts[line_num][i] != last_font)
|
||||
{
|
||||
// It does not make sense to keep the regular font in the events
|
||||
// WebVTT supports all fonts from C608
|
||||
if (data->fonts[line_num][i] != FONT_REGULAR) // Really can do it without condition because FONT_REGULAR == 0
|
||||
font_events[i] |= data->fonts[line_num][i]; // Add this new font
|
||||
|
||||
if (last_font != FONT_REGULAR)
|
||||
font_events[i] |= last_font << 16; // Remove old font (event in the second part of the integer)
|
||||
|
||||
last_font = data->fonts[line_num][i];
|
||||
}
|
||||
}
|
||||
|
||||
if (last_font != FONT_REGULAR)
|
||||
{
|
||||
font_events[last] |= last_font << 16;
|
||||
}
|
||||
}
|
||||
|
||||
int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *context)
|
||||
{
|
||||
int used;
|
||||
@ -203,8 +288,6 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co
|
||||
int wrote_something = 0;
|
||||
ms_start = data->start_time;
|
||||
|
||||
int prev_line_start = -1, prev_line_end = -1; // Column in which the previous line started and ended, for autodash
|
||||
int prev_line_center1 = -1, prev_line_center2 = -1; // Center column of previous line text
|
||||
int empty_buf = 1;
|
||||
char timeline[128] = "";
|
||||
for (int i = 0; i<15; i++)
|
||||
@ -244,92 +327,92 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co
|
||||
{
|
||||
if (data->row_used[i])
|
||||
{
|
||||
if (context->sentence_cap)
|
||||
{
|
||||
if (clever_capitalize(context, i, data))
|
||||
correct_case_with_dictionary(i, data);
|
||||
}
|
||||
if (context->autodash && context->trim_subs)
|
||||
{
|
||||
int first = 0, last = 31, center1 = -1, center2 = -1;
|
||||
unsigned char *line = data->characters[i];
|
||||
int do_dash = 1, colon_pos = -1;
|
||||
find_limit_characters(line, &first, &last, CCX_DECODER_608_SCREEN_WIDTH);
|
||||
if (first == -1 || last == -1) // Probably a bug somewhere though
|
||||
break;
|
||||
// Is there a speaker named, for example: TOM: What are you doing?
|
||||
for (int j = first; j <= last; j++)
|
||||
{
|
||||
if (line[j] == ':')
|
||||
{
|
||||
colon_pos = j;
|
||||
break;
|
||||
}
|
||||
if (!isupper(line[j]))
|
||||
break;
|
||||
}
|
||||
if (prev_line_start == -1)
|
||||
do_dash = 0;
|
||||
if (first == prev_line_start) // Case of left alignment
|
||||
do_dash = 0;
|
||||
if (last == prev_line_end) // Right align
|
||||
do_dash = 0;
|
||||
if (first>prev_line_start && last<prev_line_end) // Fully contained
|
||||
do_dash = 0;
|
||||
if ((first>prev_line_start && first<prev_line_end) || // Overlap
|
||||
(last>prev_line_start && last<prev_line_end))
|
||||
do_dash = 0;
|
||||
int length = get_line_encoded(context, context->subline, i, data);
|
||||
|
||||
center1 = (first + last) / 2;
|
||||
if (colon_pos != -1)
|
||||
{
|
||||
while (colon_pos<CCX_DECODER_608_SCREEN_WIDTH &&
|
||||
(line[colon_pos] == ':' ||
|
||||
line[colon_pos] == ' ' ||
|
||||
line[colon_pos] == 0x89))
|
||||
colon_pos++; // Find actual text
|
||||
center2 = (colon_pos + last) / 2;
|
||||
}
|
||||
else
|
||||
center2 = center1;
|
||||
|
||||
if (center1 >= prev_line_center1 - 1 && center1 <= prev_line_center1 + 1 && center1 != -1) // Center align
|
||||
do_dash = 0;
|
||||
if (center2 >= prev_line_center2 - 2 && center1 <= prev_line_center2 + 2 && center1 != -1) // Center align
|
||||
do_dash = 0;
|
||||
|
||||
if (do_dash)
|
||||
{
|
||||
written = write(context->out->fh, "- ", 2);
|
||||
if (written != 2)
|
||||
return -1;
|
||||
}
|
||||
prev_line_start = first;
|
||||
prev_line_end = last;
|
||||
prev_line_center1 = center1;
|
||||
prev_line_center2 = center2;
|
||||
|
||||
}
|
||||
int length = get_decoder_line_encoded(context, context->subline, i, data);
|
||||
if (context->encoding != CCX_ENC_UNICODE)
|
||||
{
|
||||
dbg_print(CCX_DMT_DECODER_608, "\r");
|
||||
dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline);
|
||||
}
|
||||
written = write(context->out->fh, context->subline, length);
|
||||
if (written != length)
|
||||
return -1;
|
||||
|
||||
int *color_events;
|
||||
int *font_events;
|
||||
if (ccx_options.use_webvtt_styling)
|
||||
{
|
||||
color_events = (int *)malloc(sizeof(int) * length);
|
||||
font_events = (int *)malloc(sizeof(int) * length);
|
||||
memset(color_events, 0, sizeof(int) * length);
|
||||
memset(font_events, 0, sizeof(int) * length);
|
||||
|
||||
get_color_events(color_events, i, data);
|
||||
get_font_events(font_events, i, data);
|
||||
}
|
||||
|
||||
// Write symbol by symbol with events
|
||||
for (int j = 0; j < length; j++)
|
||||
{
|
||||
if (ccx_options.use_webvtt_styling)
|
||||
{
|
||||
// opening events for fonts
|
||||
int open_font = font_events[j] & 0xFF; // Last 16 bytes
|
||||
if (open_font != FONT_REGULAR)
|
||||
{
|
||||
if (open_font & FONT_ITALICS)
|
||||
write(context->out->fh, strdup("<i>"), 3);
|
||||
if (open_font & FONT_UNDERLINED)
|
||||
write(context->out->fh, strdup("<u>"), 3);
|
||||
}
|
||||
|
||||
// opening events for colors
|
||||
int open_color = color_events[j] & 0xFF; // Last 16 bytes
|
||||
if (open_color != COL_WHITE)
|
||||
{
|
||||
write(context->out->fh, strdup("<c."), 3);
|
||||
write(context->out->fh, color_text[open_color][0], strlen(color_text[open_color][0]));
|
||||
write(context->out->fh, ">", 1);
|
||||
}
|
||||
}
|
||||
|
||||
// write current text symbol
|
||||
write(context->out->fh, &(context->subline[j]), 1);
|
||||
|
||||
if (ccx_options.use_webvtt_styling)
|
||||
{
|
||||
// closing events for colors
|
||||
int close_color = color_events[j] >> 16; // First 16 bytes
|
||||
if (close_color != COL_WHITE)
|
||||
{
|
||||
write(context->out->fh, strdup("</c>"), 4);
|
||||
}
|
||||
|
||||
// closing events for fonts
|
||||
int close_font = font_events[j] >> 16; // First 16 bytes
|
||||
if (close_font != FONT_REGULAR)
|
||||
{
|
||||
if (close_font & FONT_ITALICS)
|
||||
write(context->out->fh, strdup("</i>"), 4);
|
||||
if (close_font & FONT_UNDERLINED)
|
||||
write(context->out->fh, strdup("</u>"), 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ccx_options.use_webvtt_styling)
|
||||
{
|
||||
free(color_events);
|
||||
free(font_events);
|
||||
}
|
||||
|
||||
written = write(context->out->fh,
|
||||
context->encoded_crlf, context->encoded_crlf_length);
|
||||
if (written != context->encoded_crlf_length)
|
||||
return -1;
|
||||
|
||||
wrote_something = 1;
|
||||
// fprintf (wb->fh,encoded_crlf);
|
||||
}
|
||||
}
|
||||
dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n");
|
||||
|
||||
// fprintf (wb->fh, encoded_crlf);
|
||||
written = write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
if (written != context->encoded_crlf_length)
|
||||
return -1;
|
||||
|
@ -1650,7 +1650,9 @@ int dvbsub_decode(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, co
|
||||
enc_ctx->srt_counter = enc_ctx->prev->srt_counter; //for dvb subs we need to update the current srt counter because we always encode the previous subtitle (and the counter is increased for the previous context)
|
||||
enc_ctx->prev_start = enc_ctx->prev->prev_start;
|
||||
sub->prev->got_output = 0;
|
||||
|
||||
if (enc_ctx->write_format == CCX_OF_WEBVTT) { // we already wrote header, but since we encoded last sub, we must prevent multiple headers in future
|
||||
enc_ctx->wrote_webvtt_sync_header = 1;
|
||||
}
|
||||
}
|
||||
memcpy(enc_ctx->prev, enc_ctx, sizeof(struct encoder_ctx)); //we save the current encoder context
|
||||
memcpy(sub->prev, sub, sizeof(struct cc_subtitle)); //we save the current subtitle
|
||||
|
@ -180,9 +180,13 @@ void set_output_format (struct ccx_s_options *opt, const char *format)
|
||||
opt->write_format = CCX_OF_SSA;
|
||||
if (strcmp (format,"ass")==0)
|
||||
opt->use_ass_instead_of_ssa = 1;
|
||||
} else if (strcmp(format, "webvtt") == 0)
|
||||
}
|
||||
else if (strcmp(format, "webvtt")==0 || strcmp(format, "webvtt-full")==0) {
|
||||
opt->write_format = CCX_OF_WEBVTT;
|
||||
else if (strcmp (format,"sami")==0 || strcmp (format,"smi")==0)
|
||||
if (strcmp(format, "webvtt-full")==0)
|
||||
opt->use_webvtt_styling = 1;
|
||||
}
|
||||
else if (strcmp(format, "sami") == 0 || strcmp(format, "smi") == 0)
|
||||
opt->write_format=CCX_OF_SAMI;
|
||||
else if (strcmp (format,"transcript")==0 || strcmp (format,"txt")==0)
|
||||
{
|
||||
@ -384,6 +388,7 @@ void print_usage (void)
|
||||
mprint (" srt -> SubRip (default, so not actually needed).\n");
|
||||
mprint (" ass/ssa -> SubStation Alpha.\n");
|
||||
mprint (" webvtt -> WebVTT format\n");
|
||||
mprint (" webvtt-full -> WebVTT format with styling\n");
|
||||
mprint (" sami -> MS Synchronized Accesible Media Interface.\n");
|
||||
mprint (" bin -> CC data in CCExtractor's own binary format.\n");
|
||||
mprint (" raw -> CC data in McPoodle's Broadcast format.\n");
|
||||
|
Loading…
Reference in New Issue
Block a user