Italic Detection and improved documentation

2024-12-25 12:23:59 +00:00 · 2016-08-10 09:33:08 -07:00 · 2016-08-10 09:33:08 -07:00 · e2f850192f
commit e2f850192f
parent 5a6dfd0c18
4 changed files with 131 additions and 1 deletions
--- a/docs/HARDSUBX.txt
+++ b/docs/HARDSUBX.txt
@ -1,6 +1,9 @@
 Overview
 ========
 Subtitles which are burned into the video (or hard subbed) can be extracted using the -hardsubx flag.
 The system works by processing video frames and extracting only the subtitles from them, followed
 by an OCR recognition using Tesseract.
 Dependencies
 ============
@ -17,7 +20,23 @@ Linux
 Make sure Tesseract, Leptonica and FFMPeg are installed, and that their libraries can be found using pkg-config.
 Refer to OCR.txt for installation details.
-Run:-
+To install FFmpeg (libav), follow the steps at:-
 https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu - For Ubuntu, Debian and Linux Mint
 https://trac.ffmpeg.org/wiki/CompilationGuide/Generic - For generic Linux compilation
 To validate your FFMpeg installation, make sure you can run the following commands on your terminal:-
 pkg-config --cflags libavcodec
 pkg-config --cflags libavformat
 pkg-config --cflags libavutil
 pkg-config --cflags libswscale
 pkg-config --libs libavcodec
 pkg-config --libs libavformat
 pkg-config --libs libavutil
 pkg-config --libs libswscale
 On success, you should see the correct include directory path and the linker flags.
 To build the program with hardsubx support, from the linux directory run:-
 make ENABLE_HARDSUBX=yes
 Windows
--- a/src/lib_ccx/hardsubx_classifier.c
+++ b/src/lib_ccx/hardsubx_classifier.c
@ -45,6 +45,8 @@ char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
 	TessResultIterator *it = TessBaseAPIGetIterator(ctx->tess_handle);
 	TessPageIteratorLevel level = RIL_WORD;
 	int prev_ital = 0;
 	if(it!=0)
 	{
 		do
@ -54,11 +56,53 @@ char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
 				continue;
 			if(text_out == NULL)
 			{
 				if(ctx->detect_italics)
 				{
 					int italic=0;
 					int dummy=0;
 					TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
 					if(italic==1 && prev_ital==0)
 					{
 						char *word_copy = strdup(word);
 						word = realloc(word, strlen(word)+strlen("<i>")+2);
 						strcpy(word,"<i>");
 						strcat(word, word_copy);
 						free(word_copy);
 						prev_ital = 1;
 					}
 					else if(italic == 0 && prev_ital == 1)
 					{
 						word = realloc(word, strlen(word)+strlen("</i>")+2);
 						strcat(word, "</i>");
 						prev_ital = 0;
 					}	
 				}
 				text_out = strdup(word);
 				text_out = realloc(text_out, strlen(text_out)+2);
 				strcat(text_out, " ");
 				continue;
 			}
 			if(ctx->detect_italics)
 			{
 				int italic=0;
 				int dummy=0;
 				TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
 				if(italic==1 && prev_ital==0)
 				{
 					char *word_copy = strdup(word);
 					word = realloc(word, strlen(word)+strlen("<i>")+2);
 					strcpy(word,"<i>");
 					strcat(word, word_copy);
 					free(word_copy);
 					prev_ital = 1;
 				}
 				else if(italic == 0 && prev_ital == 1)
 				{
 					word = realloc(word, strlen(word)+strlen("</i>")+2);
 					strcat(word, "</i>");
 					prev_ital = 0;
 				}
 			}
 			text_out = realloc(text_out, strlen(text_out)+strlen(word)+2);
 			strcat(text_out, word);
 			strcat(text_out, " ");
@ -66,6 +110,12 @@ char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
 		} while(TessPageIteratorNext((TessPageIterator *)it, level));
 	}
 	if(ctx->detect_italics && prev_ital == 1)
 	{
 		text_out = realloc(text_out, strlen(text_out)+strlen("</i>")+2);
 		strcat(text_out, "</i>");
 	}
 	TessResultIteratorDelete(it);
 	return text_out;
@ -141,6 +191,8 @@ char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image,
 	TessResultIterator *it = TessBaseAPIGetIterator(ctx->tess_handle);
 	TessPageIteratorLevel level = RIL_WORD;
 	int prev_ital = 0;
 	if(it!=0)
 	{
 		do
@ -153,11 +205,53 @@ char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image,
 				continue;
 			if(text_out == NULL)
 			{
 				if(ctx->detect_italics)
 				{
 					int italic=0;
 					int dummy=0;
 					TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
 					if(italic==1 && prev_ital==0)
 					{
 						char *word_copy = strdup(word);
 						word = realloc(word, strlen(word)+strlen("<i>")+2);
 						strcpy(word,"<i>");
 						strcat(word, word_copy);
 						free(word_copy);
 						prev_ital = 1;
 					}
 					else if(italic == 0 && prev_ital == 1)
 					{
 						word = realloc(word, strlen(word)+strlen("</i>")+2);
 						strcat(word, "</i>");
 						prev_ital = 0;
 					}	
 				}
 				text_out = strdup(word);
 				text_out = realloc(text_out, strlen(text_out)+2);
 				strcat(text_out, " ");
 				continue;
 			}
 			if(ctx->detect_italics)
 			{
 				int italic=0;
 				int dummy=0;
 				TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
 				if(italic==1 && prev_ital==0)
 				{
 					char *word_copy = strdup(word);
 					word = realloc(word, strlen(word)+strlen("<i>")+2);
 					strcpy(word,"<i>");
 					strcat(word, word_copy);
 					free(word_copy);
 					prev_ital = 1;
 				}
 				else if(italic == 0 && prev_ital == 1)
 				{
 					word = realloc(word, strlen(word)+strlen("</i>")+2);
 					strcat(word, "</i>");
 					prev_ital = 0;
 				}
 			}
 			text_out = realloc(text_out, strlen(text_out)+strlen(word)+2);
 			strcat(text_out, word);
 			strcat(text_out, " ");
@ -165,6 +259,12 @@ char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image,
 		} while(TessPageIteratorNext((TessPageIterator *)it, level));
 	}
 	if(ctx->detect_italics && prev_ital == 1)
 	{
 		text_out = realloc(text_out, strlen(text_out)+strlen("</i>")+2);
 		strcat(text_out, "</i>");
 	}
 	TessResultIteratorDelete(it);
 	return text_out;
--- a/src/lib_ccx/hardsubx_decoder.c
+++ b/src/lib_ccx/hardsubx_decoder.c
@ -40,6 +40,11 @@ char* _process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, i
 		}
 	}
 	if(ctx->detect_italics)
 	{
 		ctx->ocr_mode = HARDSUBX_OCRMODE_WORD;
 	}
 	// TESSERACT OCR FOR THE FRAME HERE
 	switch(ctx->ocr_mode)
 	{
@ -117,6 +122,11 @@ char *_process_frame_color_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, i
 		}
 	}
 	if(ctx->detect_italics)
 	{
 		ctx->ocr_mode = HARDSUBX_OCRMODE_WORD;
 	}
 	// TESSERACT OCR FOR THE FRAME HERE
 	switch(ctx->ocr_mode)
 	{
--- a/src/lib_ccx/params.c
+++ b/src/lib_ccx/params.c
@ -752,6 +752,7 @@ void usage (void)
 	mprint("                     e.g. -min_sub_duration 1.0 (for a duration of 1 second)\n");
 	mprint("\n");
 	mprint("   -detect_italics : Specify whether italics are to be detected from the OCR text.\n");
 	mprint("                     Italic detection automatically enforces the OCR mode to be word-wise");
 	mprint("\n");
 	mprint("      -conf_thresh : Specify the classifier confidence threshold between 1 and 100.\n");
 	mprint("                     Try and use a threshold which works for you if you get a lot of garbage text.\n");