mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2025-01-13 13:40:54 +00:00
Add support for 4.0 tesseract
This commit is contained in:
parent
5df1dbb922
commit
5dbbe654f0
@ -1,5 +1,6 @@
|
||||
0.88(2018-10-24) (unreleased)
|
||||
-----------------
|
||||
- New: Add support for tesseract 4.0
|
||||
- Optimize: Remove multiple RGB to grey conversion in OCR.
|
||||
- Fix: Update UTF8Proc to 2.2.0
|
||||
- Fix: Warn instead of fatal when a 0xFF marker is missing
|
||||
|
@ -1661,7 +1661,8 @@ static int write_dvb_sub(struct lib_cc_decode *dec_ctx, struct cc_subtitle *sub)
|
||||
// Perform OCR
|
||||
#ifdef ENABLE_OCR
|
||||
char *ocr_str = NULL;
|
||||
if (ctx->ocr_ctx) {
|
||||
if (ctx->ocr_ctx)
|
||||
{
|
||||
ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, region->bgcolor, dec_ctx->ocr_quantmode);
|
||||
if (ret >= 0)
|
||||
rect->ocr_text = ocr_str;
|
||||
@ -1669,7 +1670,8 @@ static int write_dvb_sub(struct lib_cc_decode *dec_ctx, struct cc_subtitle *sub)
|
||||
rect->ocr_text = NULL;
|
||||
dbg_print(CCX_DMT_DVB, "\nOCR Result: %s\n", rect->ocr_text ? rect->ocr_text : "NULL");
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
rect->ocr_text = NULL;
|
||||
}
|
||||
#endif
|
||||
@ -1889,7 +1891,7 @@ int parse_dvb_description(struct dvb_config* cfg, unsigned char*data,
|
||||
/* setting language to undefined if not found in language lkup table */
|
||||
char lang_name[4];
|
||||
dbg_print(CCX_DMT_DVB, "DVBSUB - LANGUAGE \"");
|
||||
|
||||
|
||||
for(int char_index = 0; char_index < 3; char_index++)
|
||||
{
|
||||
lang_name[char_index] = cctolower(data[char_index]);
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "png.h"
|
||||
#include "lib_ccx.h"
|
||||
#ifdef ENABLE_OCR
|
||||
#include "capi.h"
|
||||
#include "tesseract/capi.h"
|
||||
#include "ccx_common_constants.h"
|
||||
#include "allheaders.h"
|
||||
#include <dirent.h>
|
||||
@ -28,14 +28,14 @@ static int check_trans_tn_intensity(const void *p1, const void *p2, void *arg)
|
||||
unsigned char act_i;
|
||||
/** TODO verify that RGB follow ITU-R BT.709
|
||||
* Below formula is valid only for 709 standard
|
||||
* Y = 0.2126 R + 0.7152 G + 0.0722 B
|
||||
*/
|
||||
* Y = 0.2126 R + 0.7152 G + 0.0722 B
|
||||
*/
|
||||
tmp_i = (0.2126 * ti->palette[*tmp].red) + (0.7152 * ti->palette[*tmp].green) + (0.0722 * ti->palette[*tmp].blue);
|
||||
act_i = (0.2126 * ti->palette[*act].red) + (0.7152 * ti->palette[*act].green) + (0.0722 * ti->palette[*act].blue);
|
||||
|
||||
if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && tmp_i < act_i))
|
||||
if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && tmp_i < act_i))
|
||||
return -1;
|
||||
else if (ti->t[*tmp] == ti->t[*act] && tmp_i == act_i)
|
||||
else if (ti->t[*tmp] == ti->t[*act] && tmp_i == act_i)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
@ -43,10 +43,13 @@ static int check_trans_tn_intensity(const void *p1, const void *p2, void *arg)
|
||||
|
||||
static int search_language_pack(const char *dir_name,const char *lang_name)
|
||||
{
|
||||
if (!dir_name)
|
||||
return -1;
|
||||
|
||||
//Search for a tessdata folder in the specified directory
|
||||
char *dirname = strdup(dir_name);
|
||||
dirname = realloc(dirname,strlen(dirname)+strlen("/tessdata/")+1);
|
||||
strcat(dirname,"/tessdata/");
|
||||
dirname = realloc(dirname,strlen(dirname)+strlen("tessdata/")+1);
|
||||
strcat(dirname,"tessdata/");
|
||||
|
||||
DIR *dp;
|
||||
struct dirent *dirp;
|
||||
@ -79,15 +82,62 @@ void delete_ocr (void** arg)
|
||||
freep(arg);
|
||||
}
|
||||
|
||||
/**
|
||||
* probe_tessdata_location
|
||||
*
|
||||
* This function probe tesseract data location
|
||||
*
|
||||
* Priority of Tesseract traineddata file search paths:-
|
||||
* 1. tessdata in TESSDATA_PREFIX, if it is specified. Overrides others
|
||||
* 2. tessdata in current working directory
|
||||
* 3. tessdata in /usr/share
|
||||
*/
|
||||
char* probe_tessdata_location(int lang_index)
|
||||
{
|
||||
int ret = 0;
|
||||
char *tessdata_dir_path = getenv("TESSDATA_PREFIX");
|
||||
|
||||
ret = search_language_pack(tessdata_dir_path, language[lang_index]);
|
||||
if (!ret)
|
||||
return tessdata_dir_path;
|
||||
|
||||
tessdata_dir_path = "./";
|
||||
ret = search_language_pack(tessdata_dir_path,language[lang_index]);
|
||||
if (!ret)
|
||||
return tessdata_dir_path;
|
||||
|
||||
tessdata_dir_path = "/usr/share/";
|
||||
ret = search_language_pack(tessdata_dir_path, language[lang_index]);
|
||||
if (!ret)
|
||||
return tessdata_dir_path;
|
||||
|
||||
tessdata_dir_path = "/usr/local/share/";
|
||||
ret = search_language_pack(tessdata_dir_path, language[lang_index]);
|
||||
if (!ret)
|
||||
return tessdata_dir_path;
|
||||
|
||||
tessdata_dir_path = "/usr/share/tesseract-ocr/";
|
||||
ret = search_language_pack(tessdata_dir_path, language[lang_index]);
|
||||
if (!ret)
|
||||
return tessdata_dir_path;
|
||||
|
||||
tessdata_dir_path = "/usr/share/tesseract-ocr/4.00/";
|
||||
ret = search_language_pack(tessdata_dir_path, language[lang_index]);
|
||||
if (!ret)
|
||||
return tessdata_dir_path;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void* init_ocr(int lang_index)
|
||||
{
|
||||
int ret = -1;
|
||||
struct ocrCtx* ctx;
|
||||
const char* lang = NULL, *tessdata_path = NULL;
|
||||
|
||||
ctx = (struct ocrCtx*)malloc(sizeof(struct ocrCtx));
|
||||
if(!ctx)
|
||||
return NULL;
|
||||
ctx->api = TessBaseAPICreate();
|
||||
|
||||
/* if language was undefined use english */
|
||||
if(lang_index == 0)
|
||||
@ -102,53 +152,53 @@ void* init_ocr(int lang_index)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*Priority of Tesseract traineddata file search paths:-
|
||||
1. tessdata in TESSDATA_PREFIX, if it is specified. Overrides others
|
||||
2. tessdata in current working directory
|
||||
3. tessdata in /usr/share
|
||||
*/
|
||||
int data_location = 0;
|
||||
char *tessdata_dir_path=".";
|
||||
if(!getenv("TESSDATA_PREFIX"))
|
||||
{
|
||||
ret = search_language_pack(tessdata_dir_path,language[lang_index]);
|
||||
}
|
||||
if(ret < 0)
|
||||
{
|
||||
data_location = 1;
|
||||
if(getenv("TESSDATA_PREFIX"))
|
||||
ret = search_language_pack(getenv("TESSDATA_PREFIX"), language[lang_index]);
|
||||
else
|
||||
ret = search_language_pack("/usr/share", language[lang_index]);
|
||||
}
|
||||
if(ret < 0 && lang_index != 1 && ccx_options.ocrlang==NULL)
|
||||
tessdata_path = probe_tessdata_location(lang_index);
|
||||
if(!tessdata_path)
|
||||
{
|
||||
if (lang_index == 1)
|
||||
{
|
||||
mprint("eng.traineddata not found! No Switching Possible\n");
|
||||
return NULL;
|
||||
}
|
||||
mprint("%s.traineddata not found! Switching to English\n",language[lang_index]);
|
||||
/* select english */
|
||||
lang_index = 1;
|
||||
tessdata_path = probe_tessdata_location(lang_index);
|
||||
if(!tessdata_path)
|
||||
{
|
||||
mprint("eng.traineddata not found! No Switching Possible\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char* lang = NULL, *tessdata_path = NULL;
|
||||
if (ccx_options.ocrlang)
|
||||
lang = ccx_options.ocrlang;
|
||||
else if (data_location == 1)
|
||||
else
|
||||
lang = language[lang_index];
|
||||
else {
|
||||
lang = language[lang_index];
|
||||
tessdata_path = tessdata_dir_path;
|
||||
}
|
||||
|
||||
char* pars_vec = strdup("debug_file");
|
||||
char* pars_values = strdup("/dev/null");
|
||||
char* pars_values = strdup("tess.log");
|
||||
|
||||
ret = TessBaseAPIInit4(ctx->api, tessdata_path, lang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
ctx->api = TessBaseAPICreate();
|
||||
if (!strncmp("4.", TessVersion(), 2))
|
||||
{
|
||||
char tess_path [1024];
|
||||
snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata");
|
||||
//ccx_options.ocr_oem are depricated and only supported mode is OEM_LSTM_ONLY
|
||||
ret = TessBaseAPIInit4(ctx->api, tess_path, lang, 1, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = TessBaseAPIInit4(ctx->api, tessdata_path, lang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
}
|
||||
|
||||
free(pars_vec);
|
||||
free(pars_values);
|
||||
|
||||
if(ret < 0)
|
||||
{
|
||||
mprint("Failed TessBaseAPIInit4 %d\n", ret);
|
||||
goto fail;
|
||||
}
|
||||
return ctx;
|
||||
@ -297,18 +347,21 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
TessPageIteratorLevel level = RIL_WORD;
|
||||
TessBaseAPISetImage2(ctx->api, color_pix_out);
|
||||
tess_ret = TessBaseAPIRecognize(ctx->api, NULL);
|
||||
if (tess_ret != 0) {
|
||||
if (tess_ret != 0)
|
||||
{
|
||||
mprint("\nTessBaseAPIRecognize returned %d, skipping this bitmap.\n", tess_ret);
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
ri = TessBaseAPIGetIterator(ctx->api);
|
||||
}
|
||||
|
||||
if(!tess_ret && ri!=0)
|
||||
{
|
||||
do
|
||||
{
|
||||
char* word = TessResultIteratorGetUTF8Text(ri,level);
|
||||
float conf = TessResultIteratorConfidence(ri,level);
|
||||
// float conf = TessResultIteratorConfidence(ri,level);
|
||||
int x1, y1, x2, y2;
|
||||
if (!TessPageIteratorBoundingBox((TessPageIterator *)ri, level, &x1, &y1, &x2, &y2))
|
||||
continue;
|
||||
@ -325,7 +378,6 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
uint32_t *histogram = NULL;
|
||||
uint8_t *iot = NULL;
|
||||
uint32_t *mcit = NULL;
|
||||
int ret = 0;
|
||||
int max_color=2;
|
||||
|
||||
histogram = (uint32_t*) malloc(copy->nb_colors * sizeof(uint32_t));
|
||||
@ -334,7 +386,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
struct transIntensity ti = {copy->alpha,copy->palette};
|
||||
memset(histogram, 0, copy->nb_colors * sizeof(uint32_t));
|
||||
|
||||
/* initializing intensity ordered table with serial order of unsorted color table */
|
||||
/* initializing intensity ordered table with serial order of unsorted color table */
|
||||
for (int i = 0; i < copy->nb_colors; i++)
|
||||
{
|
||||
iot[i] = i;
|
||||
@ -342,7 +394,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
memset(mcit, 0, copy->nb_colors * sizeof(uint32_t));
|
||||
|
||||
/* calculate histogram of image */
|
||||
int firstpixel = copy->data[0]; //TODO: Verify this border pixel assumption holds
|
||||
int firstpixel = copy->data[0]; //TODO: Verify this border pixel assumption holds
|
||||
for(int i=y1;i<=y2;i++)
|
||||
{
|
||||
for(int j=x1;j<=x2;j++)
|
||||
@ -360,7 +412,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
// i, iot[i], histogram[iot[i]]);
|
||||
// }
|
||||
/**
|
||||
* using selection sort since need to find only max_color
|
||||
* using selection sort since need to find only max_color
|
||||
* Histogram becomes invalid in this loop
|
||||
*/
|
||||
for (int i = 0; i < max_color; i++)
|
||||
@ -390,7 +442,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
palette[i].blue = copy->palette[i].blue;
|
||||
alpha[i]=copy->alpha[i];
|
||||
}
|
||||
|
||||
|
||||
for (int i = 0, mxi = 0; i < copy->nb_colors; i++)
|
||||
{
|
||||
int step, inc;
|
||||
@ -419,13 +471,13 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Detecting the color present in quantized word image
|
||||
|
||||
// Detecting the color present in quantized word image
|
||||
int r_avg=0,g_avg=0,b_avg=0,denom=0;
|
||||
for (int i = 0; i < copy->nb_colors; i++)
|
||||
{
|
||||
if(palette[i].red == ((copy->bgcolor >> 16) & 0xff) &&
|
||||
palette[i].green == ((copy->bgcolor >> 8) & 0xff) &&
|
||||
palette[i].green == ((copy->bgcolor >> 8) & 0xff) &&
|
||||
palette[i].blue == ((copy->bgcolor >> 0) & 0xff))
|
||||
continue;
|
||||
denom++;
|
||||
@ -510,7 +562,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
int length_closing_font = 7; // exclude '\0'
|
||||
|
||||
char *line_start = text_out;
|
||||
int length = strlen(text_out) + length_closing_font * 10; // usually enough
|
||||
int length = strlen(text_out) + length_closing_font * 10; // usually enough
|
||||
char *new_text_out = malloc(length);
|
||||
char *new_text_out_iter = new_text_out;
|
||||
|
||||
@ -543,7 +595,6 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
long diff = new_text_out_iter - new_text_out;
|
||||
new_text_out = realloc(new_text_out, length);
|
||||
new_text_out_iter = new_text_out + diff;
|
||||
|
||||
}
|
||||
|
||||
// Add <font> to the beginning of the line if it is missing
|
||||
@ -575,7 +626,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
// Add </font> if it is indeed missing
|
||||
if (line_end - line_start < length_closing_font ||
|
||||
strncmp(line_start, closing_font, length_closing_font)) {
|
||||
|
||||
|
||||
memcpy(new_text_out_iter, closing_font, length_closing_font);
|
||||
new_text_out_iter += length_closing_font;
|
||||
|
||||
@ -600,7 +651,7 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
pixDestroy(&cpix_gs);
|
||||
pixDestroy(&color_pix);
|
||||
pixDestroy(&color_pix_out);
|
||||
|
||||
|
||||
return text_out;
|
||||
}
|
||||
|
||||
@ -653,7 +704,7 @@ static int quantize_map(png_byte *alpha, png_color *palette,
|
||||
|
||||
memset(histogram, 0, nb_color * sizeof(uint32_t));
|
||||
|
||||
/* initializing intensity ordered table with serial order of unsorted color table */
|
||||
/* initializing intensity ordered table with serial order of unsorted color table */
|
||||
for (int i = 0; i < nb_color; i++)
|
||||
{
|
||||
iot[i] = i;
|
||||
@ -677,7 +728,7 @@ static int quantize_map(png_byte *alpha, png_color *palette,
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* using selection sort since need to find only max_color
|
||||
* using selection sort since need to find only max_color
|
||||
* Histogram becomes invalid in this loop
|
||||
*/
|
||||
for (int i = 0; i < max_color; i++)
|
||||
@ -702,7 +753,7 @@ static int quantize_map(png_byte *alpha, png_color *palette,
|
||||
}
|
||||
|
||||
#ifdef OCR_DEBUG
|
||||
ccx_common_logging.log_ftn("max redundant intensities table\n");
|
||||
ccx_common_logging.log_ftn("max redundant intensities table\n");
|
||||
for (int i = 0; i < max_color; i++)
|
||||
{
|
||||
ccx_common_logging.log_ftn("%02d) mcit %02d\n",
|
||||
@ -756,7 +807,7 @@ int ocr_rect(void* arg, struct cc_bitmap *rect, char **str, int bgcolor, int ocr
|
||||
int ret = 0;
|
||||
png_color *palette = NULL;
|
||||
png_byte *alpha = NULL;
|
||||
|
||||
|
||||
struct image_copy *copy;
|
||||
copy = (struct image_copy *)malloc(sizeof(struct image_copy));
|
||||
copy->nb_colors = rect->nb_colors;
|
||||
@ -843,10 +894,7 @@ int compare_rect_by_ypos(const void*p1, const void *p2, void*arg)
|
||||
if(r1->x > r2->x)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void add_ocrtext2str(char *dest, char *src, const char *crlf, unsigned crlf_length)
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "utf8proc/utf8proc.h"
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
#include "capi.h"
|
||||
#include "tesseract/capi.h"
|
||||
#include "allheaders.h"
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user