mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2024-12-25 04:11:38 +00:00
Merge pull request #681 from Abhinav95/master
Template for upcoming additions to burned in extraction
This commit is contained in:
commit
e79506b303
@ -60,6 +60,7 @@ void init_options (struct ccx_s_options *options)
|
||||
options->force_flush = 0; // Don't flush whenever content is written.
|
||||
options->append_mode = 0; //By default, files are overwritten.
|
||||
options->ucla = 0; // By default, -UCLA not used
|
||||
options->tickertext = 0; // By default, do not assume ticker style text
|
||||
options->hardsubx = 0; // By default, don't try to extract hard subtitles
|
||||
options->dvbcolor = 1; // By default, attempt to detect both text and color
|
||||
options->dvblang = NULL; // By default, autodetect DVB language
|
||||
|
@ -126,6 +126,7 @@ struct ccx_s_options // Options from user parameters
|
||||
int force_flush; // Force flush on content write
|
||||
int append_mode; // Append mode for output files
|
||||
int ucla; // 1 if UCLA used, 0 if not
|
||||
int tickertext; // 1 if ticker text style burned in subs, 0 if not
|
||||
int hardsubx; // 1 if burned-in subtitles to be extracted
|
||||
int dvbcolor; // 1 if Color to be detected for DVB
|
||||
char *dvblang; // The name of the language stream for DVB
|
||||
|
@ -101,7 +101,11 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx)
|
||||
enc_ctx = init_encoder(&ccx_options.enc_cfg);
|
||||
|
||||
mprint("Beginning burned-in subtitle detection...\n");
|
||||
hardsubx_process_frames_linear(ctx, enc_ctx);
|
||||
|
||||
if(ctx->tickertext)
|
||||
hardsubx_process_frames_tickertext(ctx, enc_ctx);
|
||||
else
|
||||
hardsubx_process_frames_linear(ctx, enc_ctx);
|
||||
|
||||
dinit_encoder(&enc_ctx, 0); //TODO: Replace 0 with end timestamp
|
||||
|
||||
@ -217,13 +221,29 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options)
|
||||
ctx->tess_handle = TessBaseAPICreate();
|
||||
char* pars_vec = strdup("debug_file");
|
||||
char* pars_values = strdup("/dev/null");
|
||||
int res = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
char *tessdata_dir_path=".";
|
||||
|
||||
int ret = -1;
|
||||
if(options->ocrlang)
|
||||
{
|
||||
ret = TessBaseAPIInit4(ctx->tess_handle, NULL, options->ocrlang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
if(ret != 0)
|
||||
{
|
||||
mprint("Failed loading language: %s, trying to load eng\n", options->ocrlang);
|
||||
}
|
||||
}
|
||||
if(ret != 0)
|
||||
{
|
||||
ret = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
}
|
||||
|
||||
free(pars_vec);
|
||||
free(pars_values);
|
||||
if(res != 0)
|
||||
if(ret != 0)
|
||||
{
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to initialize Tesseract!");
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "Tesseract not initialized!");
|
||||
}
|
||||
|
||||
//Initialize attributes common to lib_ccx context
|
||||
@ -237,6 +257,7 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options)
|
||||
ctx->cc_to_stdout = options->cc_to_stdout;
|
||||
|
||||
//Initialize subtitle text parameters
|
||||
ctx->tickertext = options->tickertext;
|
||||
ctx->cur_conf = 0.0;
|
||||
ctx->prev_conf = 0.0;
|
||||
ctx->ocr_mode = options->hardsubx_ocr_mode;
|
||||
|
@ -68,6 +68,7 @@ struct lib_hardsubx_ctx
|
||||
float prev_conf;
|
||||
|
||||
// Subtitle text parameters
|
||||
int tickertext;
|
||||
struct cc_subtitle *dec_sub;
|
||||
int ocr_mode;
|
||||
int subcolor;
|
||||
@ -84,6 +85,7 @@ void hardsubx(struct ccx_s_options *options);
|
||||
|
||||
//hardsubx_decoder.c
|
||||
int hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx);
|
||||
int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx);
|
||||
int hardsubx_process_frames_binary(struct lib_hardsubx_ctx *ctx);
|
||||
|
||||
//hardsubx_imgops.c
|
||||
|
@ -260,6 +260,117 @@ void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int
|
||||
pixDestroy(&pixd);
|
||||
}
|
||||
|
||||
char* _process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index)
|
||||
{
|
||||
PIX *im;
|
||||
PIX *edge_im;
|
||||
PIX *lum_im;
|
||||
PIX *feat_im;
|
||||
char *subtitle_text=NULL;
|
||||
im = pixCreate(width,height,32);
|
||||
lum_im = pixCreate(width,height,32);
|
||||
feat_im = pixCreate(width,height,32);
|
||||
int i,j;
|
||||
for(i=(92*height)/100;i<height;i++)
|
||||
{
|
||||
for(j=0;j<width;j++)
|
||||
{
|
||||
int p=j*3+i*frame->linesize[0];
|
||||
int r=frame->data[0][p];
|
||||
int g=frame->data[0][p+1];
|
||||
int b=frame->data[0][p+2];
|
||||
pixSetRGBPixel(im,j,i,r,g,b);
|
||||
float L,A,B;
|
||||
rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B);
|
||||
if(L > ctx->lum_thresh)
|
||||
pixSetRGBPixel(lum_im,j,i,255,255,255);
|
||||
else
|
||||
pixSetRGBPixel(lum_im,j,i,0,0,0);
|
||||
}
|
||||
}
|
||||
|
||||
//Handle the edge image
|
||||
edge_im = pixCreate(width,height,8);
|
||||
edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0);
|
||||
edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES);
|
||||
edge_im = pixDilateGray(edge_im, 21, 11);
|
||||
edge_im = pixThresholdToBinary(edge_im,50);
|
||||
|
||||
for(i=92*(height/100);i<height;i++)
|
||||
{
|
||||
for(j=0;j<width;j++)
|
||||
{
|
||||
unsigned int p1,p2,p3;
|
||||
pixGetPixel(edge_im,j,i,&p1);
|
||||
// pixGetPixel(pixd,j,i,&p2);
|
||||
pixGetPixel(lum_im,j,i,&p3);
|
||||
if(p1==0&&p3>0)
|
||||
pixSetRGBPixel(feat_im,j,i,255,255,255);
|
||||
else
|
||||
pixSetRGBPixel(feat_im,j,i,0,0,0);
|
||||
}
|
||||
}
|
||||
|
||||
// Tesseract OCR for the ticker text here
|
||||
subtitle_text = get_ocr_text_simple(ctx, lum_im);
|
||||
char write_path[100];
|
||||
sprintf(write_path,"./lum_im%04d.jpg",index);
|
||||
pixWrite(write_path,lum_im,IFF_JFIF_JPEG);
|
||||
sprintf(write_path,"./im%04d.jpg",index);
|
||||
pixWrite(write_path,im,IFF_JFIF_JPEG);
|
||||
|
||||
pixDestroy(&lum_im);
|
||||
pixDestroy(&im);
|
||||
pixDestroy(&edge_im);
|
||||
pixDestroy(&feat_im);
|
||||
|
||||
return subtitle_text;
|
||||
}
|
||||
|
||||
int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx)
|
||||
{
|
||||
// Search for ticker text at the bottom of the screen, such as in Russia TV1 or stock prices
|
||||
int got_frame;
|
||||
int cur_sec,total_sec,progress;
|
||||
int frame_number = 0;
|
||||
char *ticker_text = NULL;
|
||||
|
||||
while(av_read_frame(ctx->format_ctx, &ctx->packet)>=0)
|
||||
{
|
||||
if(ctx->packet.stream_index == ctx->video_stream_id)
|
||||
{
|
||||
frame_number++;
|
||||
//Decode the video stream packet
|
||||
avcodec_decode_video2(ctx->codec_ctx, ctx->frame, &got_frame, &ctx->packet);
|
||||
if(got_frame && frame_number % 1000 == 0)
|
||||
{
|
||||
// sws_scale is used to convert the pixel format to RGB24 from all other cases
|
||||
sws_scale(
|
||||
ctx->sws_ctx,
|
||||
(uint8_t const * const *)ctx->frame->data,
|
||||
ctx->frame->linesize,
|
||||
0,
|
||||
ctx->codec_ctx->height,
|
||||
ctx->rgb_frame->data,
|
||||
ctx->rgb_frame->linesize
|
||||
);
|
||||
|
||||
ticker_text = _process_frame_tickertext(ctx,ctx->rgb_frame,ctx->codec_ctx->width,ctx->codec_ctx->height,frame_number);
|
||||
printf("frame_number: %d\n", frame_number);
|
||||
|
||||
if(strlen(ticker_text)>0)printf("%s\n", ticker_text);
|
||||
|
||||
cur_sec = (int)convert_pts_to_s(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
|
||||
total_sec = (int)convert_pts_to_s(ctx->format_ctx->duration, AV_TIME_BASE_Q);
|
||||
progress = (cur_sec*100)/total_sec;
|
||||
activity_progress(progress,cur_sec/60,cur_sec%60);
|
||||
}
|
||||
}
|
||||
}
|
||||
activity_progress(100,cur_sec/60,cur_sec%60);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx)
|
||||
{
|
||||
// Do an exhaustive linear search over the video
|
||||
|
@ -1930,6 +1930,7 @@ int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[])
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
/* Red Hen/ UCLA Specific stuff */
|
||||
if (strcmp (argv[i],"-UCLA")==0 || strcmp (argv[i],"-ucla")==0)
|
||||
{
|
||||
opt->ucla = 1;
|
||||
@ -1945,6 +1946,11 @@ int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[])
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (strcmp (argv[i],"-tickertext")==0 || strcmp (argv[i],"-tickertape")==0)
|
||||
{
|
||||
opt->tickertext = 1;
|
||||
continue;
|
||||
}
|
||||
if (strcmp (argv[i],"-lf")==0 || strcmp (argv[i],"-LF")==0)
|
||||
{
|
||||
opt->enc_cfg.line_terminator_lf = 1;
|
||||
|
Loading…
Reference in New Issue
Block a user