ffmpeg: fix General protection fault in ff_yuv_420_rgb32_ssse3

This commit is contained in:
Sergei Reznikov
2020-06-20 17:58:33 +00:00
parent e597819386
commit c0bfd8fe55
2 changed files with 312 additions and 9 deletions

View File

@@ -6,7 +6,7 @@ HOMEPAGE="https://ffmpeg.org/"
COPYRIGHT="2000-2003 Fabrice Bellard
2003-2020 the FFmpeg developers"
LICENSE="GNU LGPL v3"
REVISION="1"
REVISION="2"
SOURCE_URI="https://ffmpeg.org/releases/ffmpeg-$portVersion.tar.xz"
CHECKSUM_SHA256="1d0ad06484f44bcb97eba5e93c40bcb893890f9f64aeb43e46cd9bb4cbd6795d"
PATCHES="ffmpeg-$portVersion.patchset"

View File

@@ -1,14 +1,14 @@
From b23b1b46b089645399db6650af4494cc845d8320 Mon Sep 17 00:00:00 2001
From 44a9e0dfeb696ba37406d0d078283da29542a05e Mon Sep 17 00:00:00 2001
From: Jerome Duval <jerome.duval@gmail.com>
Date: Wed, 7 Aug 2019 16:21:12 +0300
Subject: disable ebx on x86.
diff --git a/configure b/configure
index 34c2adb..ff4f783 100755
index 8569a60..e3a0c31 100755
--- a/configure
+++ b/configure
@@ -6923,6 +6923,7 @@ case $target_os in
@@ -7039,6 +7039,7 @@ case $target_os in
haiku)
disable memalign
disable posix_memalign
@@ -17,10 +17,10 @@ index 34c2adb..ff4f783 100755
*-dos|freedos|opendos)
if test_cpp_condition sys/version.h "defined(__DJGPP__) && __DJGPP__ == 2 && __DJGPP_MINOR__ == 5"; then
--
2.21.0
2.27.0
From eea5a92a4edbc3b9343c668d19051dda3f223f32 Mon Sep 17 00:00:00 2001
From 984fba03b111be8ad8adf67e131576b198658921 Mon Sep 17 00:00:00 2001
From: Adrien Destugues <pulkomandy@pulkomandy.tk>
Date: Wed, 7 Aug 2019 16:21:32 +0300
Subject: Re-enable memalign for Haiku
@@ -32,10 +32,10 @@ If there are still problems with it we should rather fix them on Haiku
side.
diff --git a/configure b/configure
index ff4f783..7ddfa31 100755
index e3a0c31..35a4dc9 100755
--- a/configure
+++ b/configure
@@ -6921,8 +6921,6 @@ enabled avresample && warn "Building with deprecated library libavresample"
@@ -7037,8 +7037,6 @@ enabled avresample && warn "Building with deprecated library libavresample"
case $target_os in
haiku)
@@ -45,5 +45,308 @@ index ff4f783..7ddfa31 100755
;;
*-dos|freedos|opendos)
--
2.21.0
2.27.0
From d84c5b8eaa14eb8339d426b4bb8d9a0c36cd4d8b Mon Sep 17 00:00:00 2001
From: Sergei Reznikov <diver@gelios.net>
Date: Sat, 20 Jun 2020 17:52:36 +0000
Subject: Revert libswscale/x86/yuv2rgb: add ssse3 version
* this reverts fc6a5883d6af8cae0e96af84dda0ad74b360a084
* fixes a General protection fault in ff_yuv_420_rgb32_ssse3
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index c12e88c..c7668f4 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -67,15 +67,6 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
#include "yuv2rgb_template.c"
#endif /* HAVE_MMXEXT */
-//SSSE3 versions
-#if HAVE_SSSE3
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _ssse3
-#include "yuv2rgb_template.c"
-#endif
-
#endif /* HAVE_X86ASM */
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
@@ -83,35 +74,6 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
#if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_SSSE3(cpu_flags)) {
- switch (c->dstFormat) {
- case AV_PIX_FMT_RGB32:
- if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if CONFIG_SWSCALE_ALPHA
- return yuva420_rgb32_ssse3;
-#endif
- break;
- } else
- return yuv420_rgb32_ssse3;
- case AV_PIX_FMT_BGR32:
- if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if CONFIG_SWSCALE_ALPHA
- return yuva420_bgr32_ssse3;
-#endif
- break;
- } else
- return yuv420_bgr32_ssse3;
- case AV_PIX_FMT_RGB24:
- return yuv420_rgb24_ssse3;
- case AV_PIX_FMT_BGR24:
- return yuv420_bgr24_ssse3;
- case AV_PIX_FMT_RGB565:
- return yuv420_rgb16_ssse3;
- case AV_PIX_FMT_RGB555:
- return yuv420_rgb15_ssse3;
- }
- }
-
if (EXTERNAL_MMXEXT(cpu_flags)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB24:
diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index 575a84d..36a7baa 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -25,18 +25,11 @@
SECTION_RODATA
-; below variables are named like mask_dwXY, which means to preserve dword No.X & No.Y
-mask_dw036 : db -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0
-mask_dw147 : db 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1
-mask_dw25 : db 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0
-rgb24_shuf1: db 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11
-rgb24_shuf2: db 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5
-rgb24_shuf3: db 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15
-pw_00ff: times 8 dw 255
-pb_f8: times 16 db 248
-pb_e0: times 16 db 224
-pb_03: times 16 db 3
-pb_07: times 16 db 7
+pw_00ff: times 4 dw 255
+pb_f8: times 8 db 248
+pb_e0: times 8 db 224
+pb_03: times 8 db 3
+pb_07: times 8 db 7
mask_1101: dw -1, -1, 0, -1
mask_0010: dw 0, 0, -1, 0
@@ -56,11 +49,7 @@ SECTION .text
;-----------------------------------------------------------------------------
%macro MOV_H2L 1
-%if mmsize == 8
- psrlq %1, 32
-%else ; mmsize == 16
- psrldq %1, 8
-%endif
+psrlq %1, 32
%endmacro
%macro yuv2rgb_fn 3
@@ -88,7 +77,6 @@ SECTION .text
%define m_blue m1
%endif
-%if mmsize == 8
%define time_num 1
%define reg_num 8
%define y_offset [pointer_c_ditherq + 8 * 8]
@@ -99,45 +87,11 @@ SECTION .text
%define y_coff [pointer_c_ditherq + 3 * 8]
%define ub_coff [pointer_c_ditherq + 5 * 8]
%define vr_coff [pointer_c_ditherq + 4 * 8]
-%elif mmsize == 16
-%define time_num 2
-%if ARCH_X86_32
-%define reg_num 8
-%define my_offset [pointer_c_ditherq + 8 * 8]
-%define mu_offset [pointer_c_ditherq + 9 * 8]
-%define mv_offset [pointer_c_ditherq + 10 * 8]
-%define mug_coff [pointer_c_ditherq + 7 * 8]
-%define mvg_coff [pointer_c_ditherq + 6 * 8]
-%define my_coff [pointer_c_ditherq + 3 * 8]
-%define mub_coff [pointer_c_ditherq + 5 * 8]
-%define mvr_coff [pointer_c_ditherq + 4 * 8]
-%else ; ARCH_X86_64
-%define reg_num 16
-%define y_offset m8
-%define u_offset m9
-%define v_offset m10
-%define ug_coff m11
-%define vg_coff m12
-%define y_coff m13
-%define ub_coff m14
-%define vr_coff m15
-%endif ; ARCH_X86_32/64
-%endif ; coeff define mmsize == 8/16
cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
%if ARCH_X86_64
movsxd indexq, indexd
-%if mmsize == 16
- VBROADCASTSD y_offset, [pointer_c_ditherq + 8 * 8]
- VBROADCASTSD u_offset, [pointer_c_ditherq + 9 * 8]
- VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8]
- VBROADCASTSD ug_coff, [pointer_c_ditherq + 7 * 8]
- VBROADCASTSD vg_coff, [pointer_c_ditherq + 6 * 8]
- VBROADCASTSD y_coff, [pointer_c_ditherq + 3 * 8]
- VBROADCASTSD ub_coff, [pointer_c_ditherq + 5 * 8]
- VBROADCASTSD vr_coff, [pointer_c_ditherq + 4 * 8]
-%endif
%endif
movu m_y, [py_2indexq + 2 * indexq]
movh m_u, [pu_indexq + indexq]
@@ -154,30 +108,10 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
psllw m1, 3
psllw m6, 3
psllw m7, 3
-%if (ARCH_X86_32 && mmsize == 16)
- VBROADCASTSD m2, mu_offset
- VBROADCASTSD m3, mv_offset
- VBROADCASTSD m4, my_offset
- psubsw m0, m2 ; U = U - 128
- psubsw m1, m3 ; V = V - 128
- psubw m6, m4
- psubw m7, m4
- VBROADCASTSD m2, mug_coff
- VBROADCASTSD m3, mvg_coff
- VBROADCASTSD m4, my_coff
- VBROADCASTSD m5, mub_coff
- pmulhw m2, m0
- pmulhw m3, m1
- pmulhw m6, m4
- pmulhw m7, m4
- pmulhw m0, m5
- VBROADCASTSD m4, mvr_coff
- pmulhw m1, m4
-%else ; ARCH_X86_64 || mmsize == 8
psubsw m0, u_offset ; U = U - 128
psubsw m1, v_offset ; V = V - 128
- psubw m6, y_offset
- psubw m7, y_offset
+ psubw m6, y_offset
+ psubw m7, y_offset
mova m2, m0
mova m3, m1
pmulhw m2, ug_coff
@@ -186,7 +120,6 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
pmulhw m7, y_coff
pmulhw m0, ub_coff
pmulhw m1, vr_coff
-%endif
paddsw m2, m3
mova m3, m7
mova m5, m7
@@ -209,7 +142,6 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
punpcklbw m6, m_red ; B0 R1 B2 R3 B4 R5 B6 R7 B8 R9 ...
mova m5, m3
punpckhbw m2, m_blue ; G1 B1 G3 B3 G5 B5 G7 B7 G9 B9 ...
-%if mmsize == 8
punpcklwd m3 ,m6 ; R0 G0 B0 R1 R2 G2 B2 R3
punpckhwd m5, m6 ; R4 G4 B4 R5 R6 G6 B6 R7
%if cpuflag(mmxext)
@@ -245,33 +177,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
psrlq m5, 32
movd [imageq + 20], m2 ; -- -- G7 B7
movd [imageq + 18], m5 ; R6 G6 B6 R7
-%endif ; mmsize = 8
-%else ; mmsize == 16
- pshufb m3, [rgb24_shuf1] ; r0 g0 r6 g6 r12 g12 r2 g2 r8 g8 r14 g14 r4 g4 r10 g10
- pshufb m6, [rgb24_shuf2] ; b10 r11 b0 r1 b6 r7 b12 r13 b2 r3 b8 r9 b14 r15 b4 r5
- pshufb m2, [rgb24_shuf3] ; g5 b5 g11 b11 g1 b1 g7 b7 g13 b13 g3 b3 g9 b9 g15 b15
- mova m7, [mask_dw036]
- mova m4, [mask_dw147]
- mova m5, [mask_dw25]
- pand m0, m7, m3 ; r0 g0 --- --- --- --- r2 g2 --- --- --- --- r4 g4 --- ---
- pand m1, m4, m6 ; --- --- b0 r1 --- --- --- --- b2 r3 --- --- --- --- b4 r5
- por m0, m1
- pand m1, m5, m2 ; --- --- --- --- g1 b1 --- --- --- --- g3 b3 --- --- --- ---
- por m0, m1 ; r0 g0 b0 r1 g1 b1 r2 g2 b2 r3 g3 b3 r4 g4 b4 r5
- pand m1, m7, m2 ; g5 b5 --- --- --- --- g7 b7 --- --- --- --- g9 b9 --- ---
- pand m7, m6 ; b10 r11 --- --- --- --- b12 r13 --- --- --- --- b14 r15 --- ---
- pand m6, m5 ; --- --- --- --- b6 r7 --- --- --- --- b8 r9 --- --- --- ---
- por m1, m6
- pand m6, m4, m3 ; --- --- r6 g6 --- --- --- --- r8 g8 --- --- --- --- r10 g10
- pand m2, m4 ; --- --- g11 b11 --- --- --- --- g13 b13 --- --- --- --- g15 b15
- pand m3, m5 ; --- --- --- --- r12 g12 --- --- --- --- r14 g14 --- --- --- ---
- por m2, m7
- por m1, m6 ; g5 b5 r6 g6 b6 r7 g7 b7 r8 g8 b8 r9 g9 b9 r10 g10
- por m2, m3 ; b10 r11 g11 b11 r12 g12 b12 r13 g13 b13 r14 g14 b14 r15 g15 b15
- mova [imageq], m0
- mova [imageq + 16], m1
- mova [imageq + 32], m2
-%endif ; mmsize = 16
+%endif
%else ; PACK RGB15/16/32
packuswb m0, m1
packuswb m3, m5
@@ -290,10 +196,10 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
%endif
mova m5, m_blue
mova m6, m_red
- punpckhbw m5, m_green
+ punpckhbw m5, m_green
punpcklbw m_blue, m_green
- punpckhbw m6, m_alpha
- punpcklbw m_red, m_alpha
+ punpckhbw m6, m_alpha
+ punpcklbw m_red, m_alpha
mova m_green, m_blue
mova m_alpha, m5
punpcklwd m_blue, m_red
@@ -301,23 +207,14 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
punpcklwd m5, m6
punpckhwd m_alpha, m6
mova [imageq + 0], m_blue
- mova [imageq + 8 * time_num], m_green
+ mova [imageq + 8 * time_num], m_green
mova [imageq + 16 * time_num], m5
mova [imageq + 24 * time_num], m_alpha
%else ; PACK RGB15/16
%define depth 2
-%if cpuflag(ssse3)
- %define red_dither m3
- %define green_dither m4
- %define blue_dither m5
- VBROADCASTSD red_dither, [pointer_c_ditherq + 0 * 8]
- VBROADCASTSD green_dither, [pointer_c_ditherq + 1 * 8]
- VBROADCASTSD blue_dither, [pointer_c_ditherq + 2 * 8]
-%else ; cpuflag(mmx/mmxext)
-%define blue_dither [pointer_c_ditherq + 2 * 8]
-%define green_dither [pointer_c_ditherq + 1 * 8]
-%define red_dither [pointer_c_ditherq + 0 * 8]
-%endif
+%define blue_dither [pointer_c_ditherq + 2 * 8]
+%define green_dither [pointer_c_ditherq + 1 * 8]
+%define red_dither [pointer_c_ditherq + 0 * 8]
%if %3 == 15
%define gmask pb_03
%define isRGB15 1
@@ -371,13 +268,3 @@ yuv2rgb_fn yuv, rgb, 16
INIT_MMX mmxext
yuv2rgb_fn yuv, rgb, 24
yuv2rgb_fn yuv, bgr, 24
-
-INIT_XMM ssse3
-yuv2rgb_fn yuv, rgb, 24
-yuv2rgb_fn yuv, bgr, 24
-yuv2rgb_fn yuv, rgb, 32
-yuv2rgb_fn yuv, bgr, 32
-yuv2rgb_fn yuva, rgb, 32
-yuv2rgb_fn yuva, bgr, 32
-yuv2rgb_fn yuv, rgb, 15
-yuv2rgb_fn yuv, rgb, 16
--
2.27.0