Improve the way Tesseract is initialized in hardsubx. Fix segfault during the cleaning the frame data in hardsubx. (#1083)

This commit is contained in:
Artem Fedoskin 2019-03-26 00:20:45 +01:00 committed by Carlos Fernandez Sanz
parent 414a57d97e
commit 116f308a0b
3 changed files with 61 additions and 20 deletions

View File

@ -110,10 +110,10 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx)
dinit_encoder(&enc_ctx, 0); //TODO: Replace 0 with end timestamp
// Free the allocated memory for frame processing
av_free(ctx->rgb_buffer);
av_frame_free(ctx->rgb_frame);
av_frame_free(ctx->frame);
avcodec_close(ctx->codec_ctx);
av_free(ctx->rgb_buffer);
if(ctx->frame) av_frame_free(&ctx->frame);
if(ctx->rgb_frame) av_frame_free(&ctx->rgb_frame);
avcodec_close(ctx->codec_ctx);
avformat_close_input(&ctx->format_ctx);
}
@ -221,23 +221,44 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options)
ctx->tess_handle = TessBaseAPICreate();
char* pars_vec = strdup("debug_file");
char* pars_values = strdup("/dev/null");
char *tessdata_dir_path=".";
char* tessdata_path = NULL;
int ret = -1;
if(options->ocrlang)
{
ret = TessBaseAPIInit4(ctx->tess_handle, NULL, options->ocrlang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
&pars_values, 1, false);
if(ret != 0)
{
mprint("Failed loading language: %s, trying to load eng\n", options->ocrlang);
}
}
if(ret != 0)
{
ret = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", ccx_options.ocr_oem, NULL, 0, &pars_vec,
&pars_values, 1, false);
}
char* lang = options->ocrlang;
if(!lang) lang = "eng"; // English is default language
tessdata_path = probe_tessdata_location_string(lang);
if(!tessdata_path)
{
if (strcmp(lang, "eng") == 0)
{
mprint("eng.traineddata not found! No Switching Possible\n");
return NULL;
}
mprint("%s.traineddata not found! Switching to English\n", lang);
lang = "eng";
tessdata_path = probe_tessdata_location_string("eng");
if(!tessdata_path)
{
mprint("eng.traineddata not found! No Switching Possible\n");
return NULL;
}
}
int ret = -1;
if (!strncmp("4.", TessVersion(), 2))
{
char tess_path [1024];
snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata");
//ccx_options.ocr_oem are deprecated and only supported mode is OEM_LSTM_ONLY
ret = TessBaseAPIInit4(ctx->tess_handle, tess_path, lang, 1, NULL, 0, &pars_vec,
&pars_values, 1, false);
}
else
{
ret = TessBaseAPIInit4(ctx->tess_handle, tessdata_path, lang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
&pars_values, 1, false);
}
free(pars_vec);
free(pars_values);

View File

@ -129,6 +129,25 @@ char* probe_tessdata_location(int lang_index)
return NULL;
}
/**
* probe_tessdata_location_string
*
* This function returns tesseract data location given language string
*/
char* probe_tessdata_location_string(char* lang)
{
int lang_index = -1;
for(int i = 0; i < NB_LANGUAGE; i++) {
if(language[i]) {
if(strcmp(lang, language[i]) == 0) lang_index = i;
}
}
if(lang_index == -1) return NULL; // No such language found
return probe_tessdata_location(lang_index);
}
void* init_ocr(int lang_index)
{
int ret = -1;

View File

@ -12,6 +12,7 @@ struct image_copy //A copy of the original OCR image, used for color detection
};
void delete_ocr (void** arg);
char* probe_tessdata_location_string(char* lang);
void* init_ocr(int lang_index);
char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h, struct image_copy *copy);
int ocr_rect(void* arg, struct cc_bitmap *rect, char **str, int bgcolor, int ocr_quantmode);