Merge branch 'pr/n599_Izaron'

2024-12-24 20:01:42 +00:00 · 2017-01-04 17:59:06 +01:00 · 2017-01-04 17:59:06 +01:00 · f8aae84bc4
commit f8aae84bc4
parent dfb7d8472c 34a21a931d
6 changed files with 179 additions and 78 deletions
--- a/src/lib_ccx/ccx_common_option.h
+++ b/src/lib_ccx/ccx_common_option.h
@ -143,6 +143,7 @@ struct ccx_s_options // Options from user parameters
 	unsigned send_to_srv;
 	enum ccx_output_format write_format;                // 0=Raw, 1=srt, 2=SMI
 	int use_ass_instead_of_ssa;
+	int use_webvtt_styling;
 	LLONG debug_mask;                                   // dbg_print will use this mask to print or ignore different types
 	LLONG debug_mask_on_debug;                          // If we're using temp_debug to enable/disable debug "live", this is the mask when temp_debug=1
 	/* Networking */
--- a/src/lib_ccx/ccx_encoders_helpers.c
+++ b/src/lib_ccx/ccx_encoders_helpers.c
@ -325,6 +325,15 @@ unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer
 	return (unsigned)(buffer - orig); // Return length
 }

+void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data) {
+	*first = 0;
+	*last = 32;
+	while (data->colors[line_num][*first] == COL_TRANSPARENT)
+		(*first)++;
+	while (data->colors[line_num][*last] == COL_TRANSPARENT)
+		(*last)--;
+}
+
 /*void delete_all_lines_but_current(ccx_decoder_608_context *context, struct eia608_screen *data, int row)
 {
 for (int i=0;i<15;i++)
--- a/src/lib_ccx/ccx_encoders_helpers.h
+++ b/src/lib_ccx/ccx_encoders_helpers.h
@ -28,6 +28,7 @@ int clever_capitalize(struct encoder_ctx *context, int line_num, struct eia608_s
 void telx_correct_case(char *sub_line);
 unsigned get_decoder_line_encoded_for_gui(unsigned char *buffer, int line_num, struct eia608_screen *data);
 unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data);
+void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data);

 int string_cmp(const void *p1, const void *p2);
 int string_cmp_function(const void *p1, const void *p2, void *arg);
--- a/src/lib_ccx/ccx_encoders_webvtt.c
+++ b/src/lib_ccx/ccx_encoders_webvtt.c
@ -106,8 +106,6 @@ int write_xtimestamp_header(struct encoder_ctx *context)
 		write(context->out->fh, context->buffer, used);

 	}
-	// Add the additional CRLF to finish the header
-	write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
 	context->wrote_webvtt_sync_header = 1; // Do it even if couldn't write the header, because it won't be possible anyway
 }

@ -193,6 +191,93 @@ int write_cc_subtitle_as_webvtt(struct cc_subtitle *sub, struct encoder_ctx *con

 	return ret;
 }
+
+// TODO: move this repeating function from ccx_encoders_g608.c to the files ccx_encoders_helpers.(c|h)
+int get_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data)
+{
+	unsigned char *orig = buffer;
+	unsigned char *line = data->characters[line_num];
+	for (int i = 0; i < 32; i++)
+	{
+		int bytes = 0;
+		switch (ctx->encoding)
+		{
+		case CCX_ENC_UTF_8:
+			bytes = get_char_in_utf_8(buffer, line[i]);
+			break;
+		case CCX_ENC_LATIN_1:
+			get_char_in_latin_1(buffer, line[i]);
+			bytes = 1;
+			break;
+		case CCX_ENC_UNICODE:
+			get_char_in_unicode(buffer, line[i]);
+			bytes = 2;
+		case CCX_ENC_ASCII:
+			*buffer = line[i];
+			bytes = 1;
+			break;
+		}
+		buffer += bytes;
+	}
+	return (unsigned int)(buffer - orig); // Return length
+}
+
+void get_color_events(int *color_events, int line_num, struct eia608_screen *data)
+{
+	int first, last;
+	get_sentence_borders(&first, &last, line_num, data);
+
+	int last_color = COL_WHITE;
+	for (int i = first; i <= last; i++)
+	{
+		if (data->colors[line_num][i] != last_color)
+		{
+			// It does not make sense to keep the default white color in the events
+			// WebVTT supports colors only is [COL_WHITE..COL_MAGENTA]
+			if (data->colors[line_num][i] <= COL_MAGENTA)
+				color_events[i] |= data->colors[line_num][i];	// Add this new color
+			
+			if (last_color != COL_WHITE && last_color <= COL_MAGENTA)
+				color_events[i - 1] |= last_color << 16;	// Remove old color (event in the second part of the integer)
+			
+			last_color = data->colors[line_num][i];
+		}
+	}
+
+	if (last_color != COL_WHITE)
+	{
+		color_events[last] |= last_color << 16;
+	}
+}
+
+void get_font_events(int *font_events, int line_num, struct eia608_screen *data)
+{
+	int first, last;
+	get_sentence_borders(&first, &last, line_num, data);
+
+	int last_font = FONT_REGULAR;
+	for (int i = first; i <= last; i++)
+	{
+		if (data->fonts[line_num][i] != last_font)
+		{
+			// It does not make sense to keep the regular font in the events
+			// WebVTT supports all fonts from C608
+			if (data->fonts[line_num][i] != FONT_REGULAR)	// Really can do it without condition because FONT_REGULAR == 0
+				font_events[i] |= data->fonts[line_num][i];		// Add this new font
+
+			if (last_font != FONT_REGULAR)
+				font_events[i] |= last_font << 16;	// Remove old font (event in the second part of the integer)
+
+			last_font = data->fonts[line_num][i];
+		}
+	}
+
+	if (last_font != FONT_REGULAR)
+	{
+		font_events[last] |= last_font << 16;
+	}
+}
+
 int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *context)
 {
 	int used;
@ -203,8 +288,6 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co
 	int wrote_something = 0;
 	ms_start = data->start_time;

-	int prev_line_start = -1, prev_line_end = -1; // Column in which the previous line started and ended, for autodash
-	int prev_line_center1 = -1, prev_line_center2 = -1; // Center column of previous line text
 	int empty_buf = 1;
 	char timeline[128] = "";
 	for (int i = 0; i<15; i++)
@ -244,92 +327,92 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co
 	{
 		if (data->row_used[i])
 		{
-			if (context->sentence_cap)
-			{
-				if (clever_capitalize(context, i, data))
-					correct_case_with_dictionary(i, data);
-			}
-			if (context->autodash && context->trim_subs)
-			{
-				int first = 0, last = 31, center1 = -1, center2 = -1;
-				unsigned char *line = data->characters[i];
-				int do_dash = 1, colon_pos = -1;
-				find_limit_characters(line, &first, &last, CCX_DECODER_608_SCREEN_WIDTH);
-				if (first == -1 || last == -1)  // Probably a bug somewhere though
-					break;
-				// Is there a speaker named, for example: TOM: What are you doing?
-				for (int j = first; j <= last; j++)
-				{
-					if (line[j] == ':')
-					{
-						colon_pos = j;
-						break;
-					}
-					if (!isupper(line[j]))
-						break;
-				}
-				if (prev_line_start == -1)
-					do_dash = 0;
-				if (first == prev_line_start) // Case of left alignment
-					do_dash = 0;
-				if (last == prev_line_end)  // Right align
-					do_dash = 0;
-				if (first>prev_line_start && last<prev_line_end) // Fully contained
-					do_dash = 0;
-				if ((first>prev_line_start && first<prev_line_end) || // Overlap
-					(last>prev_line_start && last<prev_line_end))
-					do_dash = 0;
+			int length = get_line_encoded(context, context->subline, i, data);

-				center1 = (first + last) / 2;
-				if (colon_pos != -1)
-				{
-					while (colon_pos<CCX_DECODER_608_SCREEN_WIDTH &&
-						(line[colon_pos] == ':' ||
-						line[colon_pos] == ' ' ||
-						line[colon_pos] == 0x89))
-						colon_pos++; // Find actual text
-					center2 = (colon_pos + last) / 2;
-				}
-				else
-					center2 = center1;
-
-				if (center1 >= prev_line_center1 - 1 && center1 <= prev_line_center1 + 1 && center1 != -1) // Center align
-					do_dash = 0;
-				if (center2 >= prev_line_center2 - 2 && center1 <= prev_line_center2 + 2 && center1 != -1) // Center align
-					do_dash = 0;
-
-				if (do_dash)
-				{
-					written = write(context->out->fh, "- ", 2);
-					if (written != 2)
-						return -1;
-				}
-				prev_line_start = first;
-				prev_line_end = last;
-				prev_line_center1 = center1;
-				prev_line_center2 = center2;
-
-			}
-			int length = get_decoder_line_encoded(context, context->subline, i, data);
 			if (context->encoding != CCX_ENC_UNICODE)
 			{
 				dbg_print(CCX_DMT_DECODER_608, "\r");
 				dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline);
 			}
-			written = write(context->out->fh, context->subline, length);
-			if (written != length)
-				return -1;
+
+			int *color_events;
+			int *font_events;
+			if (ccx_options.use_webvtt_styling)
+			{
+				color_events = (int *)malloc(sizeof(int) * length);
+				font_events = (int *)malloc(sizeof(int) * length);
+				memset(color_events, 0, sizeof(int) * length);
+				memset(font_events, 0, sizeof(int) * length);
+
+				get_color_events(color_events, i, data);
+				get_font_events(font_events, i, data);
+			}
+
+			// Write symbol by symbol with events
+			for (int j = 0; j < length; j++)
+			{
+				if (ccx_options.use_webvtt_styling)
+				{
+					// opening events for fonts
+					int open_font = font_events[j] & 0xFF;	// Last 16 bytes
+					if (open_font != FONT_REGULAR)
+					{
+						if (open_font & FONT_ITALICS)
+							write(context->out->fh, strdup("<i>"), 3);
+						if (open_font & FONT_UNDERLINED)
+							write(context->out->fh, strdup("<u>"), 3);
+					}
+
+					// opening events for colors
+					int open_color = color_events[j] & 0xFF;	// Last 16 bytes
+					if (open_color != COL_WHITE)
+					{
+						write(context->out->fh, strdup("<c."), 3);
+						write(context->out->fh, color_text[open_color][0], strlen(color_text[open_color][0]));
+						write(context->out->fh, ">", 1);
+					}
+				}
+
+				// write current text symbol
+				write(context->out->fh, &(context->subline[j]), 1);
+
+				if (ccx_options.use_webvtt_styling)
+				{
+					// closing events for colors
+					int close_color = color_events[j] >> 16;	// First 16 bytes
+					if (close_color != COL_WHITE)
+					{
+						write(context->out->fh, strdup("</c>"), 4);
+					}
+
+					// closing events for fonts
+					int close_font = font_events[j] >> 16;	// First 16 bytes
+					if (close_font != FONT_REGULAR)
+					{
+						if (close_font & FONT_ITALICS)
+							write(context->out->fh, strdup("</i>"), 4);
+						if (close_font & FONT_UNDERLINED)
+							write(context->out->fh, strdup("</u>"), 4);
+					}
+				}
+			}
+
+			if (ccx_options.use_webvtt_styling)
+			{
+				free(color_events);
+				free(font_events);
+			}
+
 			written = write(context->out->fh,
 				context->encoded_crlf, context->encoded_crlf_length);
 			if (written != context->encoded_crlf_length)
 				return -1;
+
 			wrote_something = 1;
-			// fprintf (wb->fh,encoded_crlf);
 		}
 	}
 	dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n");

-	// fprintf (wb->fh, encoded_crlf);
 	written = write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
 	if (written != context->encoded_crlf_length)
 		return -1;
--- a/src/lib_ccx/dvb_subtitle_decoder.c
+++ b/src/lib_ccx/dvb_subtitle_decoder.c
@ -1650,7 +1650,9 @@ int dvbsub_decode(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, co
 					enc_ctx->srt_counter = enc_ctx->prev->srt_counter; //for dvb subs we need to update the current srt counter because we always encode the previous subtitle (and the counter is increased for the previous context)
 					enc_ctx->prev_start = enc_ctx->prev->prev_start;
 					sub->prev->got_output = 0;
-
+					if (enc_ctx->write_format == CCX_OF_WEBVTT) {	// we already wrote header, but since we encoded last sub, we must prevent multiple headers in future
+						enc_ctx->wrote_webvtt_sync_header = 1;
+					}
 				}
 				memcpy(enc_ctx->prev, enc_ctx, sizeof(struct encoder_ctx)); //we save the current encoder context
 				memcpy(sub->prev, sub, sizeof(struct cc_subtitle)); //we save the current subtitle
--- a/src/lib_ccx/params.c
+++ b/src/lib_ccx/params.c
@ -180,9 +180,13 @@ void set_output_format (struct ccx_s_options *opt, const char *format)
 		opt->write_format = CCX_OF_SSA;
 		if (strcmp (format,"ass")==0)
 			opt->use_ass_instead_of_ssa = 1;
-	} else if (strcmp(format, "webvtt") == 0)
+	}
+	else if (strcmp(format, "webvtt")==0 || strcmp(format, "webvtt-full")==0) {
 		opt->write_format = CCX_OF_WEBVTT;
-	else if (strcmp (format,"sami")==0 || strcmp (format,"smi")==0)
+		if (strcmp(format, "webvtt-full")==0)
+			opt->use_webvtt_styling = 1;
+	}
+	else if (strcmp(format, "sami") == 0 || strcmp(format, "smi") == 0)
 		opt->write_format=CCX_OF_SAMI;
 	else if (strcmp (format,"transcript")==0 || strcmp (format,"txt")==0)
 	{
@ -384,6 +388,7 @@ void print_usage (void)
 	mprint ("                      srt     -> SubRip (default, so not actually needed).\n");
 	mprint ("                      ass/ssa -> SubStation Alpha.\n");
 	mprint ("                      webvtt  -> WebVTT format\n");
+	mprint ("                      webvtt-full -> WebVTT format with styling\n");
 	mprint ("                      sami    -> MS Synchronized Accesible Media Interface.\n");
 	mprint ("                      bin     -> CC data in CCExtractor's own binary format.\n");
 	mprint ("                      raw     -> CC data in McPoodle's Broadcast format.\n");