Binary search for dictionary

This commit is contained in:
Anshul Maheshwari 2014-06-25 10:58:10 +05:30
parent 9d8bb65e2a
commit c8f65bb686
6 changed files with 94 additions and 70 deletions

2
.gitignore vendored
View File

@ -5,3 +5,5 @@ CVS
linux/ccextractor
linux/depend
windows/debug/*
*.sdf
*.suo

View File

@ -1,5 +1,5 @@
#include "ccextractor.h"
#include "utility.h"
//extern unsigned char encoded_crlf[16];
// Encodes a generic string. Note that since we use the encoders for closed caption
@ -30,40 +30,32 @@ unsigned encode_line (unsigned char *buffer, unsigned char *text)
return bytes;
}
#define ISSEPARATOR(c) (c==' ' || c==0x89 || ispunct(c) \
|| c==0x99) // This is the apostrofe. We get it here in CC encoding, not ASCII
void correct_case (int line_num, struct eia608_screen *data)
void correct_case(int line_num,struct eia608_screen *data)
{
int i=0;
while (i<spell_words)
{
char *c=(char *) data->characters[line_num];
size_t len=strlen (spell_correct[i]);
while ((c=strstr (c,spell_lower[i]))!=NULL)
{
// Make sure it's a whole word (start of line or
// preceded by space, and end of line or followed by
// space)
unsigned char prev;
if (c==(char *) data->characters[line_num]) // Beginning of line...
prev=' '; // ...Pretend we had a blank before
else
prev=*(c-1);
unsigned char next;
if (c-(char *) data->characters[line_num]+len==CC608_SCREEN_WIDTH) // End of line...
next=' '; // ... pretend we have a blank later
else
next=*(c+len);
if ( ISSEPARATOR(prev) && ISSEPARATOR(next))
{
memcpy (c,spell_correct[i],len);
}
c++;
}
i++;
}
char delim[64] = {
' ' ,'\n','\r', 0x89,0x99,
'!' , '"', '#', '%' , '&',
'\'', '(', ')', ';' , '<',
'=' , '>', '?', '[' ,'\\',
']' , '*', '+', ',' , '-',
'.' , '/', ':', '^' , '_',
'{' , '|', '}', '~' ,'\0' };
char *line = strdup(((char*)data->characters[line_num]));
char *oline = (char*)data->characters[line_num];
char *c = strtok(line,delim);
do
{
char **index = bsearch(&c,spell_lower,spell_words,sizeof(*spell_lower),string_cmp);
if(index)
{
char *correct_c = *(spell_correct + (index - spell_lower));
size_t len=strlen (correct_c);
memcpy(oline + (c - line),correct_c,len);
}
} while ( ( c = strtok(NULL,delim) ) != NULL );
free(line);
}
void capitalize (int line_num, struct eia608_screen *data)

View File

@ -199,41 +199,25 @@ static void freep(void *arg)
}
#ifdef DEBUG
int check_trans_tn_intensity(int p1,int p2,int p3,int p4)
struct transIntensity
{
return p1 < p2 || (p1 == p2 && p3 < p4);
}
/**
* @param t array stating transparency of image
* @param i array stating intensity
* @param lout output array which orederd list hash table
* this array must be initialized with valid index of array
* @param nb number of element in array
*/
void sort_intensity_wise(uint8_t *t, uint8_t *i, uint8_t *lout, int nb)
uint8_t *t;
uint8_t *i;
};
int check_trans_tn_intensity(const void *p1, const void *p2, void *arg)
{
for (int gap = nb / 2; gap > 0; gap = gap / 2)
{
int p, j, tmp;
for (p = gap; p < nb; p++)
{
#define CHECK_TRANSPARENCY ( t[tmp] < t[lout[j - gap]] )
/* Transparency have major role in intensity so priority to transperency over Y */
#define CHECK_INTENSITY (t[tmp] == t[lout[j - gap]]&& i[tmp] < i[lout[j - gap]])
#define CHECK_TRANS_TN_INTEN (CHECK_TRANSPARENCY || CHECK_INTENSITY)
tmp = lout[p];
for (j = p; j >= gap && check_trans_tn_intensity(t[tmp],t[lout[j - gap]],i[tmp],i[lout[j - gap]]); j -= gap)
{
lout[j] = lout[j - gap];
}
lout[j] = tmp;
#undef CHECK_TRANSPARENCY
#undef CHECK_INTENSITY
#undef CHECK_TRANS_TN_INTEN
}
}
}
struct transIntensity *ti = arg;
unsigned char* tmp = (unsigned char*)p1;
unsigned char* act = (unsigned char*)p2;
if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && ti->i[*tmp] < ti->i[*act]))
return -1;
else if (ti->t[*tmp] == ti->t[*act] && ti->i[*tmp] == ti->i[*act])
return 0;
return 1;
}
int mapclut_paletee(png_color *palette, png_byte *alpha, uint32_t *clut,
uint8_t depth)
{
@ -268,6 +252,7 @@ int quantize_map(png_byte *alpha, uint8_t *intensity, png_color *palette,
* save index of intensity order table
*/
uint32_t *mcit = NULL;
struct transIntensity ti = { alpha,intensity};
int ret = 0;
@ -304,7 +289,7 @@ int quantize_map(png_byte *alpha, uint8_t *intensity, png_color *palette,
{
histogram[bitmap[i]]++;
}
sort_intensity_wise((uint8_t*) alpha, (uint8_t*) intensity, iot, nb_color);
shell_sort((void*)iot, nb_color, sizeof(*iot), check_trans_tn_intensity, (void*)&ti);
/* using selection sort since need to find only max_color */
for (int i = 0; i < max_color; i++)
@ -1754,7 +1739,6 @@ static void dvbsub_parse_page_segment(void *dvb_ctx, const uint8_t *buf,
int timeout;
int version;
long long start = get_visible_start();
char *filename;
void *sp = ctx->out->spupng_data;
@ -1774,7 +1758,6 @@ static void dvbsub_parse_page_segment(void *dvb_ctx, const uint8_t *buf,
ctx->time_out = timeout;
ctx->version = version;
filename = get_spupng_filename(sp);
if(ctx->prev_start == 0)
{
write_sputag(sp, ctx->prev_start, start);

View File

@ -1,4 +1,5 @@
#include "ccextractor.h"
#include "utility.h"
static int inputfile_capacity=0;
static int spell_builtin_added=0; // so we don't do it twice
@ -158,7 +159,6 @@ int process_cap_file (char *filename)
fclose (fi);
return 0;
}
int isanumber (char *s)
{
while (*s)
@ -1058,6 +1058,8 @@ void parse_parameters (int argc, char *argv[])
if (add_built_in_words())
fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory for word list");
ccx_options.sentence_cap=1;
shell_sort(spell_lower,spell_words,sizeof(*spell_lower),string_cmp2,NULL);
shell_sort(spell_correct,spell_words,sizeof(*spell_correct),string_cmp2,NULL);
continue;
}
if ((strcmp (argv[i],"--capfile")==0 ||
@ -1071,6 +1073,8 @@ void parse_parameters (int argc, char *argv[])
ccx_options.sentence_cap=1;
ccx_options.sentence_cap_file=argv[i+1];
i++;
shell_sort(spell_lower,spell_words,sizeof(*spell_lower),string_cmp2,NULL);
shell_sort(spell_correct,spell_words,sizeof(*spell_correct),string_cmp2,NULL);
continue;
}
if (strcmp (argv[i],"--program-number")==0 ||

View File

@ -1,5 +1,9 @@
#include "ccextractor.h"
#ifdef _MSC_VER
#define strcasecmp stricmp
#endif
static char *text;
static int text_size=0;
@ -296,3 +300,39 @@ int hex2int (char high, char low)
return -1;
return h*16+l;
}
/**
* @param base points to the start of the array
* @param nb number of element in array
* @param size size of each element
* @param compar Comparison function, which is called with three argument
* that point to the objects being compared and arg.
* @param arg argument passed as it is to compare function
*/
void shell_sort(void *base, int nb,size_t size,int (*compar)(const void*p1,const void *p2,void*arg),void *arg)
{
unsigned char *lbase = (unsigned char*)base;
unsigned char *tmp = (unsigned char*)malloc(size);
for (int gap = nb / 2; gap > 0; gap = gap / 2)
{
int p, j;
for (p = gap; p < nb; p++)
{
memcpy(tmp, lbase + (p *size), size);
for (j = p; j >= gap && ( compar(tmp,lbase + ( (j - gap) * size),arg) < 0); j -= gap)
{
memcpy(lbase + (j*size),lbase + ( (j - gap) * size),size);
}
memcpy(lbase + (j *size),tmp, size);
}
}
free(tmp);
}
int string_cmp2(const void *p1,const void *p2,void *arg)
{
return strcasecmp(*(char**)p1,*(char**)p2);
}
int string_cmp(const void *p1,const void *p2)
{
return string_cmp2(p1, p2, NULL);
}

View File

@ -10,5 +10,8 @@
#define RL16(x) (*(unsigned short int*)(x))
#define RB16(x) (ntohs(*(unsigned short int*)(x)))
void shell_sort(void *base, int nb,size_t size,int (*compar)(const void*p1,const void *p2,void*arg),void *arg);
int string_cmp(const void *p1,const void *p2);
int string_cmp2(const void *p1,const void *p2,void *arg);
#endif