From 8ecd38312210d48ec9e50d78fc223d60e71a30ed Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 29 Mar 2024 03:49:56 +0100 Subject: [PATCH] avcodec/huffyuvencdsp: Fix load of misaligned values Affected many ffvhuff FATE tests. Signed-off-by: Andreas Rheinhardt --- libavcodec/huffyuvencdsp.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c index 36e8f6130b..27428635af 100644 --- a/libavcodec/huffyuvencdsp.c +++ b/libavcodec/huffyuvencdsp.c @@ -18,16 +18,32 @@ #include "config.h" #include "libavutil/attributes.h" +#include "libavutil/intreadwrite.h" #include "huffyuvencdsp.h" #include "mathops.h" +#if HAVE_FAST_64BIT +#define BITS 64 +typedef uint64_t uint_native; +#else +#define BITS 32 +typedef uint32_t uint_native; +#endif +#define RN AV_JOIN(AV_RN, BITS) +#define RNA AV_JOIN(AV_JOIN(AV_RN, BITS), A) +#define WNA AV_JOIN(AV_JOIN(AV_WN, BITS), A) + +// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size +#define pb_7f (~(uint_native)0 / 255 * 0x7f) +#define pb_80 (~(uint_native)0 / 255 * 0x80) + // 0x00010001 or 0x0001000100010001 or whatever, depending on the cpu's native arithmetic size -#define pw_1 (ULONG_MAX / UINT16_MAX) +#define pw_1 ((uint_native)-1 / UINT16_MAX) static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ long i; #if !HAVE_FAST_UNALIGNED - if((long)src2 & (sizeof(long)-1)){ + if ((uintptr_t)src2 & (sizeof(uint_native) - 1)) { for(i=0; i+3> 1) * pw_1; - unsigned long pw_msb = pw_lsb + pw_1; + uint_native pw_lsb = (mask >> 1) * pw_1; + uint_native pw_msb = pw_lsb + pw_1; - for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { - long a = *(long*)(src1+i); - long b = *(long*)(src2+i); - *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb); + for (i = 0; i <= w - (int)sizeof(uint_native)/2; i += sizeof(uint_native)/2) { + uint_native a = RNA(src1 + i); + uint_native b = RN (src2 + i); + WNA(dst + i, ((a | pw_msb) - (b & pw_lsb)) ^ ((a^b^pw_msb) & pw_msb)); } } for (; i