Merge branch 'pr/n599_Izaron'

This commit is contained in:
cfsmp3 2017-01-04 17:59:06 +01:00
commit f8aae84bc4
6 changed files with 179 additions and 78 deletions

View File

@ -143,6 +143,7 @@ struct ccx_s_options // Options from user parameters
unsigned send_to_srv;
enum ccx_output_format write_format; // 0=Raw, 1=srt, 2=SMI
int use_ass_instead_of_ssa;
int use_webvtt_styling;
LLONG debug_mask; // dbg_print will use this mask to print or ignore different types
LLONG debug_mask_on_debug; // If we're using temp_debug to enable/disable debug "live", this is the mask when temp_debug=1
/* Networking */

View File

@ -325,6 +325,15 @@ unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer
return (unsigned)(buffer - orig); // Return length
}
void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data) {
*first = 0;
*last = 32;
while (data->colors[line_num][*first] == COL_TRANSPARENT)
(*first)++;
while (data->colors[line_num][*last] == COL_TRANSPARENT)
(*last)--;
}
/*void delete_all_lines_but_current(ccx_decoder_608_context *context, struct eia608_screen *data, int row)
{
for (int i=0;i<15;i++)

View File

@ -28,6 +28,7 @@ int clever_capitalize(struct encoder_ctx *context, int line_num, struct eia608_s
void telx_correct_case(char *sub_line);
unsigned get_decoder_line_encoded_for_gui(unsigned char *buffer, int line_num, struct eia608_screen *data);
unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data);
void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data);
int string_cmp(const void *p1, const void *p2);
int string_cmp_function(const void *p1, const void *p2, void *arg);

View File

@ -106,8 +106,6 @@ int write_xtimestamp_header(struct encoder_ctx *context)
write(context->out->fh, context->buffer, used);
}
// Add the additional CRLF to finish the header
write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
context->wrote_webvtt_sync_header = 1; // Do it even if couldn't write the header, because it won't be possible anyway
}
@ -193,6 +191,93 @@ int write_cc_subtitle_as_webvtt(struct cc_subtitle *sub, struct encoder_ctx *con
return ret;
}
// TODO: move this repeating function from ccx_encoders_g608.c to the files ccx_encoders_helpers.(c|h)
int get_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data)
{
unsigned char *orig = buffer;
unsigned char *line = data->characters[line_num];
for (int i = 0; i < 32; i++)
{
int bytes = 0;
switch (ctx->encoding)
{
case CCX_ENC_UTF_8:
bytes = get_char_in_utf_8(buffer, line[i]);
break;
case CCX_ENC_LATIN_1:
get_char_in_latin_1(buffer, line[i]);
bytes = 1;
break;
case CCX_ENC_UNICODE:
get_char_in_unicode(buffer, line[i]);
bytes = 2;
case CCX_ENC_ASCII:
*buffer = line[i];
bytes = 1;
break;
}
buffer += bytes;
}
return (unsigned int)(buffer - orig); // Return length
}
void get_color_events(int *color_events, int line_num, struct eia608_screen *data)
{
int first, last;
get_sentence_borders(&first, &last, line_num, data);
int last_color = COL_WHITE;
for (int i = first; i <= last; i++)
{
if (data->colors[line_num][i] != last_color)
{
// It does not make sense to keep the default white color in the events
// WebVTT supports colors only is [COL_WHITE..COL_MAGENTA]
if (data->colors[line_num][i] <= COL_MAGENTA)
color_events[i] |= data->colors[line_num][i]; // Add this new color
if (last_color != COL_WHITE && last_color <= COL_MAGENTA)
color_events[i - 1] |= last_color << 16; // Remove old color (event in the second part of the integer)
last_color = data->colors[line_num][i];
}
}
if (last_color != COL_WHITE)
{
color_events[last] |= last_color << 16;
}
}
void get_font_events(int *font_events, int line_num, struct eia608_screen *data)
{
int first, last;
get_sentence_borders(&first, &last, line_num, data);
int last_font = FONT_REGULAR;
for (int i = first; i <= last; i++)
{
if (data->fonts[line_num][i] != last_font)
{
// It does not make sense to keep the regular font in the events
// WebVTT supports all fonts from C608
if (data->fonts[line_num][i] != FONT_REGULAR) // Really can do it without condition because FONT_REGULAR == 0
font_events[i] |= data->fonts[line_num][i]; // Add this new font
if (last_font != FONT_REGULAR)
font_events[i] |= last_font << 16; // Remove old font (event in the second part of the integer)
last_font = data->fonts[line_num][i];
}
}
if (last_font != FONT_REGULAR)
{
font_events[last] |= last_font << 16;
}
}
int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *context)
{
int used;
@ -203,8 +288,6 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co
int wrote_something = 0;
ms_start = data->start_time;
int prev_line_start = -1, prev_line_end = -1; // Column in which the previous line started and ended, for autodash
int prev_line_center1 = -1, prev_line_center2 = -1; // Center column of previous line text
int empty_buf = 1;
char timeline[128] = "";
for (int i = 0; i<15; i++)
@ -244,92 +327,92 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co
{
if (data->row_used[i])
{
if (context->sentence_cap)
{
if (clever_capitalize(context, i, data))
correct_case_with_dictionary(i, data);
}
if (context->autodash && context->trim_subs)
{
int first = 0, last = 31, center1 = -1, center2 = -1;
unsigned char *line = data->characters[i];
int do_dash = 1, colon_pos = -1;
find_limit_characters(line, &first, &last, CCX_DECODER_608_SCREEN_WIDTH);
if (first == -1 || last == -1) // Probably a bug somewhere though
break;
// Is there a speaker named, for example: TOM: What are you doing?
for (int j = first; j <= last; j++)
{
if (line[j] == ':')
{
colon_pos = j;
break;
}
if (!isupper(line[j]))
break;
}
if (prev_line_start == -1)
do_dash = 0;
if (first == prev_line_start) // Case of left alignment
do_dash = 0;
if (last == prev_line_end) // Right align
do_dash = 0;
if (first>prev_line_start && last<prev_line_end) // Fully contained
do_dash = 0;
if ((first>prev_line_start && first<prev_line_end) || // Overlap
(last>prev_line_start && last<prev_line_end))
do_dash = 0;
int length = get_line_encoded(context, context->subline, i, data);
center1 = (first + last) / 2;
if (colon_pos != -1)
{
while (colon_pos<CCX_DECODER_608_SCREEN_WIDTH &&
(line[colon_pos] == ':' ||
line[colon_pos] == ' ' ||
line[colon_pos] == 0x89))
colon_pos++; // Find actual text
center2 = (colon_pos + last) / 2;
}
else
center2 = center1;
if (center1 >= prev_line_center1 - 1 && center1 <= prev_line_center1 + 1 && center1 != -1) // Center align
do_dash = 0;
if (center2 >= prev_line_center2 - 2 && center1 <= prev_line_center2 + 2 && center1 != -1) // Center align
do_dash = 0;
if (do_dash)
{
written = write(context->out->fh, "- ", 2);
if (written != 2)
return -1;
}
prev_line_start = first;
prev_line_end = last;
prev_line_center1 = center1;
prev_line_center2 = center2;
}
int length = get_decoder_line_encoded(context, context->subline, i, data);
if (context->encoding != CCX_ENC_UNICODE)
{
dbg_print(CCX_DMT_DECODER_608, "\r");
dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline);
}
written = write(context->out->fh, context->subline, length);
if (written != length)
return -1;
int *color_events;
int *font_events;
if (ccx_options.use_webvtt_styling)
{
color_events = (int *)malloc(sizeof(int) * length);
font_events = (int *)malloc(sizeof(int) * length);
memset(color_events, 0, sizeof(int) * length);
memset(font_events, 0, sizeof(int) * length);
get_color_events(color_events, i, data);
get_font_events(font_events, i, data);
}
// Write symbol by symbol with events
for (int j = 0; j < length; j++)
{
if (ccx_options.use_webvtt_styling)
{
// opening events for fonts
int open_font = font_events[j] & 0xFF; // Last 16 bytes
if (open_font != FONT_REGULAR)
{
if (open_font & FONT_ITALICS)
write(context->out->fh, strdup("<i>"), 3);
if (open_font & FONT_UNDERLINED)
write(context->out->fh, strdup("<u>"), 3);
}
// opening events for colors
int open_color = color_events[j] & 0xFF; // Last 16 bytes
if (open_color != COL_WHITE)
{
write(context->out->fh, strdup("<c."), 3);
write(context->out->fh, color_text[open_color][0], strlen(color_text[open_color][0]));
write(context->out->fh, ">", 1);
}
}
// write current text symbol
write(context->out->fh, &(context->subline[j]), 1);
if (ccx_options.use_webvtt_styling)
{
// closing events for colors
int close_color = color_events[j] >> 16; // First 16 bytes
if (close_color != COL_WHITE)
{
write(context->out->fh, strdup("</c>"), 4);
}
// closing events for fonts
int close_font = font_events[j] >> 16; // First 16 bytes
if (close_font != FONT_REGULAR)
{
if (close_font & FONT_ITALICS)
write(context->out->fh, strdup("</i>"), 4);
if (close_font & FONT_UNDERLINED)
write(context->out->fh, strdup("</u>"), 4);
}
}
}
if (ccx_options.use_webvtt_styling)
{
free(color_events);
free(font_events);
}
written = write(context->out->fh,
context->encoded_crlf, context->encoded_crlf_length);
if (written != context->encoded_crlf_length)
return -1;
wrote_something = 1;
// fprintf (wb->fh,encoded_crlf);
}
}
dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n");
// fprintf (wb->fh, encoded_crlf);
written = write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
if (written != context->encoded_crlf_length)
return -1;

View File

@ -1650,7 +1650,9 @@ int dvbsub_decode(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, co
enc_ctx->srt_counter = enc_ctx->prev->srt_counter; //for dvb subs we need to update the current srt counter because we always encode the previous subtitle (and the counter is increased for the previous context)
enc_ctx->prev_start = enc_ctx->prev->prev_start;
sub->prev->got_output = 0;
if (enc_ctx->write_format == CCX_OF_WEBVTT) { // we already wrote header, but since we encoded last sub, we must prevent multiple headers in future
enc_ctx->wrote_webvtt_sync_header = 1;
}
}
memcpy(enc_ctx->prev, enc_ctx, sizeof(struct encoder_ctx)); //we save the current encoder context
memcpy(sub->prev, sub, sizeof(struct cc_subtitle)); //we save the current subtitle

View File

@ -180,9 +180,13 @@ void set_output_format (struct ccx_s_options *opt, const char *format)
opt->write_format = CCX_OF_SSA;
if (strcmp (format,"ass")==0)
opt->use_ass_instead_of_ssa = 1;
} else if (strcmp(format, "webvtt") == 0)
}
else if (strcmp(format, "webvtt")==0 || strcmp(format, "webvtt-full")==0) {
opt->write_format = CCX_OF_WEBVTT;
else if (strcmp (format,"sami")==0 || strcmp (format,"smi")==0)
if (strcmp(format, "webvtt-full")==0)
opt->use_webvtt_styling = 1;
}
else if (strcmp(format, "sami") == 0 || strcmp(format, "smi") == 0)
opt->write_format=CCX_OF_SAMI;
else if (strcmp (format,"transcript")==0 || strcmp (format,"txt")==0)
{
@ -384,6 +388,7 @@ void print_usage (void)
mprint (" srt -> SubRip (default, so not actually needed).\n");
mprint (" ass/ssa -> SubStation Alpha.\n");
mprint (" webvtt -> WebVTT format\n");
mprint (" webvtt-full -> WebVTT format with styling\n");
mprint (" sami -> MS Synchronized Accesible Media Interface.\n");
mprint (" bin -> CC data in CCExtractor's own binary format.\n");
mprint (" raw -> CC data in McPoodle's Broadcast format.\n");