From ce467421dc9e2061b8af22973ba4ba6248f16de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sat, 11 Nov 2023 17:08:45 +0200 Subject: [PATCH] lavc/exrdsp: unroll predictor With explicit unrolling, we can skip half of the sign bit flips, and the compiler is then better able to optimise the scalar loop: predictor_c: 31376.0 (before) predictor_c: 23703.0 (after) --- libavcodec/exrdsp.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c index 752e1eb553..248cb93c5a 100644 --- a/libavcodec/exrdsp.c +++ b/libavcodec/exrdsp.c @@ -40,10 +40,20 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si static void predictor_scalar(uint8_t *src, ptrdiff_t size) { - ptrdiff_t i; + /* Unrolled: `src[i + 1] += src[i] - 128;` */ + if ((size & 1) == 0) { + src[1] += src[0] ^ 0x80; + src++; + size--; + } - for (i = 1; i < size; i++) - src[i] += src[i-1] - 128; + for (ptrdiff_t i = 1; i < size; i += 2) { + uint8_t a = src[i] + src[i - 1]; + + src[i] = a; + src[i + 1] += a; + src[i] ^= 0x80; + } } av_cold void ff_exrdsp_init(ExrDSPContext *c)