From c8f65bb6862bf9d4b9367f1f42cf99d80129726e Mon Sep 17 00:00:00 2001 From: Anshul Maheshwari Date: Wed, 25 Jun 2014 10:58:10 +0530 Subject: [PATCH] Binary search for dictionary --- .gitignore | 2 ++ src/608_helpers.c | 60 +++++++++++++++++--------------------- src/dvb_subtitle_decoder.c | 53 ++++++++++++--------------------- src/params.c | 6 +++- src/utility.c | 40 +++++++++++++++++++++++++ src/utility.h | 3 ++ 6 files changed, 94 insertions(+), 70 deletions(-) diff --git a/.gitignore b/.gitignore index 617d5152..4c3ecccb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ CVS linux/ccextractor linux/depend windows/debug/* +*.sdf +*.suo diff --git a/src/608_helpers.c b/src/608_helpers.c index 873d88e8..0a661f3b 100644 --- a/src/608_helpers.c +++ b/src/608_helpers.c @@ -1,5 +1,5 @@ #include "ccextractor.h" - +#include "utility.h" //extern unsigned char encoded_crlf[16]; // Encodes a generic string. Note that since we use the encoders for closed caption @@ -30,40 +30,32 @@ unsigned encode_line (unsigned char *buffer, unsigned char *text) return bytes; } -#define ISSEPARATOR(c) (c==' ' || c==0x89 || ispunct(c) \ - || c==0x99) // This is the apostrofe. We get it here in CC encoding, not ASCII - - -void correct_case (int line_num, struct eia608_screen *data) +void correct_case(int line_num,struct eia608_screen *data) { - int i=0; - while (icharacters[line_num]; - size_t len=strlen (spell_correct[i]); - while ((c=strstr (c,spell_lower[i]))!=NULL) - { - // Make sure it's a whole word (start of line or - // preceded by space, and end of line or followed by - // space) - unsigned char prev; - if (c==(char *) data->characters[line_num]) // Beginning of line... - prev=' '; // ...Pretend we had a blank before - else - prev=*(c-1); - unsigned char next; - if (c-(char *) data->characters[line_num]+len==CC608_SCREEN_WIDTH) // End of line... - next=' '; // ... pretend we have a blank later - else - next=*(c+len); - if ( ISSEPARATOR(prev) && ISSEPARATOR(next)) - { - memcpy (c,spell_correct[i],len); - } - c++; - } - i++; - } + char delim[64] = { + ' ' ,'\n','\r', 0x89,0x99, + '!' , '"', '#', '%' , '&', + '\'', '(', ')', ';' , '<', + '=' , '>', '?', '[' ,'\\', + ']' , '*', '+', ',' , '-', + '.' , '/', ':', '^' , '_', + '{' , '|', '}', '~' ,'\0' }; + + char *line = strdup(((char*)data->characters[line_num])); + char *oline = (char*)data->characters[line_num]; + char *c = strtok(line,delim); + do + { + char **index = bsearch(&c,spell_lower,spell_words,sizeof(*spell_lower),string_cmp); + + if(index) + { + char *correct_c = *(spell_correct + (index - spell_lower)); + size_t len=strlen (correct_c); + memcpy(oline + (c - line),correct_c,len); + } + } while ( ( c = strtok(NULL,delim) ) != NULL ); + free(line); } void capitalize (int line_num, struct eia608_screen *data) diff --git a/src/dvb_subtitle_decoder.c b/src/dvb_subtitle_decoder.c index e9de11fe..f24c8340 100644 --- a/src/dvb_subtitle_decoder.c +++ b/src/dvb_subtitle_decoder.c @@ -199,41 +199,25 @@ static void freep(void *arg) } #ifdef DEBUG -int check_trans_tn_intensity(int p1,int p2,int p3,int p4) +struct transIntensity { - return p1 < p2 || (p1 == p2 && p3 < p4); -} -/** - * @param t array stating transparency of image - * @param i array stating intensity - * @param lout output array which orederd list hash table - * this array must be initialized with valid index of array - * @param nb number of element in array - */ -void sort_intensity_wise(uint8_t *t, uint8_t *i, uint8_t *lout, int nb) + uint8_t *t; + uint8_t *i; +}; +int check_trans_tn_intensity(const void *p1, const void *p2, void *arg) { - for (int gap = nb / 2; gap > 0; gap = gap / 2) - { - int p, j, tmp; - for (p = gap; p < nb; p++) - { -#define CHECK_TRANSPARENCY ( t[tmp] < t[lout[j - gap]] ) - /* Transparency have major role in intensity so priority to transperency over Y */ -#define CHECK_INTENSITY (t[tmp] == t[lout[j - gap]]&& i[tmp] < i[lout[j - gap]]) -#define CHECK_TRANS_TN_INTEN (CHECK_TRANSPARENCY || CHECK_INTENSITY) - tmp = lout[p]; - for (j = p; j >= gap && check_trans_tn_intensity(t[tmp],t[lout[j - gap]],i[tmp],i[lout[j - gap]]); j -= gap) - { - lout[j] = lout[j - gap]; - } - lout[j] = tmp; -#undef CHECK_TRANSPARENCY -#undef CHECK_INTENSITY -#undef CHECK_TRANS_TN_INTEN - } - } -} + struct transIntensity *ti = arg; + unsigned char* tmp = (unsigned char*)p1; + unsigned char* act = (unsigned char*)p2; + if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && ti->i[*tmp] < ti->i[*act])) + return -1; + else if (ti->t[*tmp] == ti->t[*act] && ti->i[*tmp] == ti->i[*act]) + return 0; + + + return 1; +} int mapclut_paletee(png_color *palette, png_byte *alpha, uint32_t *clut, uint8_t depth) { @@ -268,6 +252,7 @@ int quantize_map(png_byte *alpha, uint8_t *intensity, png_color *palette, * save index of intensity order table */ uint32_t *mcit = NULL; + struct transIntensity ti = { alpha,intensity}; int ret = 0; @@ -304,7 +289,7 @@ int quantize_map(png_byte *alpha, uint8_t *intensity, png_color *palette, { histogram[bitmap[i]]++; } - sort_intensity_wise((uint8_t*) alpha, (uint8_t*) intensity, iot, nb_color); + shell_sort((void*)iot, nb_color, sizeof(*iot), check_trans_tn_intensity, (void*)&ti); /* using selection sort since need to find only max_color */ for (int i = 0; i < max_color; i++) @@ -1754,7 +1739,6 @@ static void dvbsub_parse_page_segment(void *dvb_ctx, const uint8_t *buf, int timeout; int version; long long start = get_visible_start(); - char *filename; void *sp = ctx->out->spupng_data; @@ -1774,7 +1758,6 @@ static void dvbsub_parse_page_segment(void *dvb_ctx, const uint8_t *buf, ctx->time_out = timeout; ctx->version = version; - filename = get_spupng_filename(sp); if(ctx->prev_start == 0) { write_sputag(sp, ctx->prev_start, start); diff --git a/src/params.c b/src/params.c index 962f6308..fb121af9 100644 --- a/src/params.c +++ b/src/params.c @@ -1,4 +1,5 @@ #include "ccextractor.h" +#include "utility.h" static int inputfile_capacity=0; static int spell_builtin_added=0; // so we don't do it twice @@ -158,7 +159,6 @@ int process_cap_file (char *filename) fclose (fi); return 0; } - int isanumber (char *s) { while (*s) @@ -1058,6 +1058,8 @@ void parse_parameters (int argc, char *argv[]) if (add_built_in_words()) fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory for word list"); ccx_options.sentence_cap=1; + shell_sort(spell_lower,spell_words,sizeof(*spell_lower),string_cmp2,NULL); + shell_sort(spell_correct,spell_words,sizeof(*spell_correct),string_cmp2,NULL); continue; } if ((strcmp (argv[i],"--capfile")==0 || @@ -1071,6 +1073,8 @@ void parse_parameters (int argc, char *argv[]) ccx_options.sentence_cap=1; ccx_options.sentence_cap_file=argv[i+1]; i++; + shell_sort(spell_lower,spell_words,sizeof(*spell_lower),string_cmp2,NULL); + shell_sort(spell_correct,spell_words,sizeof(*spell_correct),string_cmp2,NULL); continue; } if (strcmp (argv[i],"--program-number")==0 || diff --git a/src/utility.c b/src/utility.c index 140db4c8..1e3c387f 100644 --- a/src/utility.c +++ b/src/utility.c @@ -1,5 +1,9 @@ #include "ccextractor.h" +#ifdef _MSC_VER +#define strcasecmp stricmp +#endif + static char *text; static int text_size=0; @@ -296,3 +300,39 @@ int hex2int (char high, char low) return -1; return h*16+l; } +/** + * @param base points to the start of the array + * @param nb number of element in array + * @param size size of each element + * @param compar Comparison function, which is called with three argument + * that point to the objects being compared and arg. + * @param arg argument passed as it is to compare function + */ +void shell_sort(void *base, int nb,size_t size,int (*compar)(const void*p1,const void *p2,void*arg),void *arg) +{ + unsigned char *lbase = (unsigned char*)base; + unsigned char *tmp = (unsigned char*)malloc(size); + for (int gap = nb / 2; gap > 0; gap = gap / 2) + { + int p, j; + for (p = gap; p < nb; p++) + { + memcpy(tmp, lbase + (p *size), size); + for (j = p; j >= gap && ( compar(tmp,lbase + ( (j - gap) * size),arg) < 0); j -= gap) + { + memcpy(lbase + (j*size),lbase + ( (j - gap) * size),size); + } + memcpy(lbase + (j *size),tmp, size); + } + } + free(tmp); +} + +int string_cmp2(const void *p1,const void *p2,void *arg) +{ + return strcasecmp(*(char**)p1,*(char**)p2); +} +int string_cmp(const void *p1,const void *p2) +{ + return string_cmp2(p1, p2, NULL); +} diff --git a/src/utility.h b/src/utility.h index 7e0a9b9d..c5ab031f 100644 --- a/src/utility.h +++ b/src/utility.h @@ -10,5 +10,8 @@ #define RL16(x) (*(unsigned short int*)(x)) #define RB16(x) (ntohs(*(unsigned short int*)(x))) +void shell_sort(void *base, int nb,size_t size,int (*compar)(const void*p1,const void *p2,void*arg),void *arg); +int string_cmp(const void *p1,const void *p2); +int string_cmp2(const void *p1,const void *p2,void *arg); #endif