mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-02-17 17:25:10 +00:00
avoid one transpose (730->680 dezicycles on duron)
Originally committed as revision 4332 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
85bbfcd4ee
commit
e4b36d4434
@ -333,6 +333,8 @@ typedef struct H264Context{
|
||||
uint8_t *direct_table;
|
||||
uint8_t direct_cache[5*8];
|
||||
|
||||
uint8_t zigzag_scan[16];
|
||||
uint8_t field_scan[16];
|
||||
}H264Context;
|
||||
|
||||
static VLC coeff_token_vlc[4];
|
||||
@ -2721,6 +2723,18 @@ static int decode_init(AVCodecContext *avctx){
|
||||
s->low_delay= 1;
|
||||
avctx->pix_fmt= PIX_FMT_YUV420P;
|
||||
|
||||
if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
|
||||
memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
|
||||
memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
|
||||
}else{
|
||||
int i;
|
||||
for(i=0; i<16; i++){
|
||||
#define T(x) (x>>2) | ((x<<2) & 0xF)
|
||||
h->zigzag_scan[i] = T(zigzag_scan[i]);
|
||||
h-> field_scan[i] = T( field_scan[i]);
|
||||
}
|
||||
}
|
||||
|
||||
decode_init_vlc(h);
|
||||
|
||||
if(avctx->extradata_size > 0 && avctx->extradata &&
|
||||
@ -4591,10 +4605,10 @@ decode_intra_mb:
|
||||
// fill_non_zero_count_cache(h);
|
||||
|
||||
if(IS_INTERLACED(mb_type)){
|
||||
scan= field_scan;
|
||||
scan= h->field_scan;
|
||||
dc_scan= luma_dc_field_scan;
|
||||
}else{
|
||||
scan= zigzag_scan;
|
||||
scan= h->zigzag_scan;
|
||||
dc_scan= luma_dc_zigzag_scan;
|
||||
}
|
||||
|
||||
@ -5575,10 +5589,10 @@ decode_intra_mb:
|
||||
int dqp;
|
||||
|
||||
if(IS_INTERLACED(mb_type)){
|
||||
scan= field_scan;
|
||||
scan= h->field_scan;
|
||||
dc_scan= luma_dc_field_scan;
|
||||
}else{
|
||||
scan= zigzag_scan;
|
||||
scan= h->zigzag_scan;
|
||||
dc_scan= luma_dc_zigzag_scan;
|
||||
}
|
||||
|
||||
|
@ -673,14 +673,11 @@ void ff_h264_idct_add_mmx2(uint8_t *dst, int16_t *block, int stride)
|
||||
/* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */
|
||||
IDCT4_1D( %%mm3, %%mm2, %%mm1, %%mm0, %%mm4, %%mm5 )
|
||||
|
||||
/* in: 2,4,1,3 out: 2,3,0,1 */
|
||||
TRANSPOSE4( %%mm2, %%mm4, %%mm1, %%mm3, %%mm0 )
|
||||
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
:: "m"(ff_pw_32));
|
||||
|
||||
STORE_DIFF_4P( %%mm2, %%mm4, %%mm7, &dst[0*stride] );
|
||||
STORE_DIFF_4P( %%mm3, %%mm4, %%mm7, &dst[1*stride] );
|
||||
STORE_DIFF_4P( %%mm0, %%mm4, %%mm7, &dst[2*stride] );
|
||||
STORE_DIFF_4P( %%mm1, %%mm4, %%mm7, &dst[3*stride] );
|
||||
STORE_DIFF_4P( %%mm2, %%mm0, %%mm7, &dst[0*stride] );
|
||||
STORE_DIFF_4P( %%mm4, %%mm0, %%mm7, &dst[1*stride] );
|
||||
STORE_DIFF_4P( %%mm1, %%mm0, %%mm7, &dst[2*stride] );
|
||||
STORE_DIFF_4P( %%mm3, %%mm0, %%mm7, &dst[3*stride] );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user