Italic Detection and improved documentation

This commit is contained in:
Abhinav Shukla 2016-08-10 09:33:08 -07:00
parent 5a6dfd0c18
commit e2f850192f
4 changed files with 131 additions and 1 deletions

View File

@ -1,6 +1,9 @@
Overview
========
Subtitles which are burned into the video (or hard subbed) can be extracted using the -hardsubx flag.
The system works by processing video frames and extracting only the subtitles from them, followed
by an OCR recognition using Tesseract.
Dependencies
============
@ -17,7 +20,23 @@ Linux
Make sure Tesseract, Leptonica and FFMPeg are installed, and that their libraries can be found using pkg-config.
Refer to OCR.txt for installation details.
Run:-
To install FFmpeg (libav), follow the steps at:-
https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu - For Ubuntu, Debian and Linux Mint
https://trac.ffmpeg.org/wiki/CompilationGuide/Generic - For generic Linux compilation
To validate your FFMpeg installation, make sure you can run the following commands on your terminal:-
pkg-config --cflags libavcodec
pkg-config --cflags libavformat
pkg-config --cflags libavutil
pkg-config --cflags libswscale
pkg-config --libs libavcodec
pkg-config --libs libavformat
pkg-config --libs libavutil
pkg-config --libs libswscale
On success, you should see the correct include directory path and the linker flags.
To build the program with hardsubx support, from the linux directory run:-
make ENABLE_HARDSUBX=yes
Windows

View File

@ -45,6 +45,8 @@ char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
TessResultIterator *it = TessBaseAPIGetIterator(ctx->tess_handle);
TessPageIteratorLevel level = RIL_WORD;
int prev_ital = 0;
if(it!=0)
{
do
@ -54,11 +56,53 @@ char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
continue;
if(text_out == NULL)
{
if(ctx->detect_italics)
{
int italic=0;
int dummy=0;
TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
if(italic==1 && prev_ital==0)
{
char *word_copy = strdup(word);
word = realloc(word, strlen(word)+strlen("<i>")+2);
strcpy(word,"<i>");
strcat(word, word_copy);
free(word_copy);
prev_ital = 1;
}
else if(italic == 0 && prev_ital == 1)
{
word = realloc(word, strlen(word)+strlen("</i>")+2);
strcat(word, "</i>");
prev_ital = 0;
}
}
text_out = strdup(word);
text_out = realloc(text_out, strlen(text_out)+2);
strcat(text_out, " ");
continue;
}
if(ctx->detect_italics)
{
int italic=0;
int dummy=0;
TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
if(italic==1 && prev_ital==0)
{
char *word_copy = strdup(word);
word = realloc(word, strlen(word)+strlen("<i>")+2);
strcpy(word,"<i>");
strcat(word, word_copy);
free(word_copy);
prev_ital = 1;
}
else if(italic == 0 && prev_ital == 1)
{
word = realloc(word, strlen(word)+strlen("</i>")+2);
strcat(word, "</i>");
prev_ital = 0;
}
}
text_out = realloc(text_out, strlen(text_out)+strlen(word)+2);
strcat(text_out, word);
strcat(text_out, " ");
@ -66,6 +110,12 @@ char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
} while(TessPageIteratorNext((TessPageIterator *)it, level));
}
if(ctx->detect_italics && prev_ital == 1)
{
text_out = realloc(text_out, strlen(text_out)+strlen("</i>")+2);
strcat(text_out, "</i>");
}
TessResultIteratorDelete(it);
return text_out;
@ -141,6 +191,8 @@ char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image,
TessResultIterator *it = TessBaseAPIGetIterator(ctx->tess_handle);
TessPageIteratorLevel level = RIL_WORD;
int prev_ital = 0;
if(it!=0)
{
do
@ -153,11 +205,53 @@ char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image,
continue;
if(text_out == NULL)
{
if(ctx->detect_italics)
{
int italic=0;
int dummy=0;
TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
if(italic==1 && prev_ital==0)
{
char *word_copy = strdup(word);
word = realloc(word, strlen(word)+strlen("<i>")+2);
strcpy(word,"<i>");
strcat(word, word_copy);
free(word_copy);
prev_ital = 1;
}
else if(italic == 0 && prev_ital == 1)
{
word = realloc(word, strlen(word)+strlen("</i>")+2);
strcat(word, "</i>");
prev_ital = 0;
}
}
text_out = strdup(word);
text_out = realloc(text_out, strlen(text_out)+2);
strcat(text_out, " ");
continue;
}
if(ctx->detect_italics)
{
int italic=0;
int dummy=0;
TessResultIteratorWordFontAttributes(it, &dummy, &italic,&dummy, &dummy, &dummy,&dummy, &dummy, &dummy);
if(italic==1 && prev_ital==0)
{
char *word_copy = strdup(word);
word = realloc(word, strlen(word)+strlen("<i>")+2);
strcpy(word,"<i>");
strcat(word, word_copy);
free(word_copy);
prev_ital = 1;
}
else if(italic == 0 && prev_ital == 1)
{
word = realloc(word, strlen(word)+strlen("</i>")+2);
strcat(word, "</i>");
prev_ital = 0;
}
}
text_out = realloc(text_out, strlen(text_out)+strlen(word)+2);
strcat(text_out, word);
strcat(text_out, " ");
@ -165,6 +259,12 @@ char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image,
} while(TessPageIteratorNext((TessPageIterator *)it, level));
}
if(ctx->detect_italics && prev_ital == 1)
{
text_out = realloc(text_out, strlen(text_out)+strlen("</i>")+2);
strcat(text_out, "</i>");
}
TessResultIteratorDelete(it);
return text_out;

View File

@ -40,6 +40,11 @@ char* _process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, i
}
}
if(ctx->detect_italics)
{
ctx->ocr_mode = HARDSUBX_OCRMODE_WORD;
}
// TESSERACT OCR FOR THE FRAME HERE
switch(ctx->ocr_mode)
{
@ -117,6 +122,11 @@ char *_process_frame_color_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, i
}
}
if(ctx->detect_italics)
{
ctx->ocr_mode = HARDSUBX_OCRMODE_WORD;
}
// TESSERACT OCR FOR THE FRAME HERE
switch(ctx->ocr_mode)
{

View File

@ -752,6 +752,7 @@ void usage (void)
mprint(" e.g. -min_sub_duration 1.0 (for a duration of 1 second)\n");
mprint("\n");
mprint(" -detect_italics : Specify whether italics are to be detected from the OCR text.\n");
mprint(" Italic detection automatically enforces the OCR mode to be word-wise");
mprint("\n");
mprint(" -conf_thresh : Specify the classifier confidence threshold between 1 and 100.\n");
mprint(" Try and use a threshold which works for you if you get a lot of garbage text.\n");