mirror of
https://github.com/yann64/haikuports.git
synced 2026-04-09 05:10:05 +02:00
1497 lines
73 KiB
Plaintext
1497 lines
73 KiB
Plaintext
diff --git a/Setup.in b/Setup.in
|
|
index 4bb6c1c..9236df4 100644
|
|
--- a/Setup.in
|
|
+++ b/Setup.in
|
|
@@ -33,9 +33,9 @@ mixer_music src/music.c $(SDL) $(MIXER) $(DEBUG)
|
|
_numericsurfarray src/_numericsurfarray.c $(SDL) $(DEBUG)
|
|
_numericsndarray src/_numericsndarray.c $(SDL) $(MIXER) $(DEBUG)
|
|
movie src/movie.c $(SDL) $(SMPEG) $(DEBUG)
|
|
-scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
|
|
+#scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
|
|
_camera src/_camera.c src/camera_v4l2.c src/camera_v4l.c $(SDL) $(DEBUG)
|
|
-pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
|
|
+#pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
|
|
|
|
GFX = src/SDL_gfx/SDL_gfxPrimitives.c
|
|
#GFX = src/SDL_gfx/SDL_gfxBlitFunc.c src/SDL_gfx/SDL_gfxPrimitives.c
|
|
@@ -64,7 +64,7 @@ joystick src/joystick.c $(SDL) $(DEBUG)
|
|
draw src/draw.c $(SDL) $(DEBUG)
|
|
image src/image.c $(SDL) $(DEBUG)
|
|
overlay src/overlay.c $(SDL) $(DEBUG)
|
|
-transform src/transform.c src/rotozoom.c src/scale2x.c src/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
|
|
+transform src/transform.c src/rotozoom.c src/scale2x.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
|
|
mask src/mask.c src/bitmask.c $(SDL) $(DEBUG)
|
|
bufferproxy src/bufferproxy.c $(SDL) $(DEBUG)
|
|
pixelarray src/pixelarray.c $(SDL) $(DEBUG)
|
|
diff --git a/config.py b/config.py
|
|
index f60d64c..6e0d766 100644
|
|
--- a/config.py
|
|
+++ b/config.py
|
|
@@ -119,12 +119,16 @@ def main():
|
|
elif sys.platform == 'win32':
|
|
print_('Using WINDOWS mingw/msys configuration...\n')
|
|
import config_msys as CFG
|
|
+ elif sys.platform == 'haiku1' or sys.platform == 'haiku1_x86':
|
|
+ print_('Using Haiku configuration...\n')
|
|
+ import config_haiku as CFG
|
|
elif sys.platform == 'darwin':
|
|
print_('Using Darwin configuration...\n')
|
|
import config_darwin as CFG
|
|
additional_platform_setup = open("Setup_Darwin.in", "r").readlines()
|
|
else:
|
|
print_('Using UNIX configuration...\n')
|
|
+ print_(sys.platform)
|
|
import config_unix as CFG
|
|
|
|
if os.path.isfile('Setup'):
|
|
diff --git a/pygame.egg-info/SOURCES.txt b/pygame.egg-info/SOURCES.txt
|
|
index a7ec677..39c2a55 100644
|
|
--- a/pygame.egg-info/SOURCES.txt
|
|
+++ b/pygame.egg-info/SOURCES.txt
|
|
@@ -301,11 +301,7 @@ src/rect.c
|
|
src/rect.doc
|
|
src/rotozoom.c
|
|
src/rwobject.c
|
|
-src/scale.h
|
|
src/scale2x.c
|
|
-src/scale_mmx.c
|
|
-src/scale_mmx32.c
|
|
-src/scale_mmx64.c
|
|
src/scrap.c
|
|
src/scrap.doc
|
|
src/scrap.h
|
|
@@ -465,4 +461,4 @@ test/util/build_page/results/.htaccess
|
|
test/util/build_page/results/index.py
|
|
test/util/build_page/results/results.css
|
|
test/util/build_page/upload_results/.htaccess
|
|
-test/util/build_page/upload_results/index.py
|
|
\ No newline at end of file
|
|
+test/util/build_page/upload_results/index.py
|
|
diff --git a/setup.py b/setup.py
|
|
index 45af61f..bf352dc 100644
|
|
--- a/setup.py
|
|
+++ b/setup.py
|
|
@@ -116,7 +116,7 @@ else:
|
|
#headers to install
|
|
headers = glob.glob(os.path.join('src', '*.h'))
|
|
headers.remove(os.path.join('src', 'numeric_arrayobject.h'))
|
|
-headers.remove(os.path.join('src', 'scale.h'))
|
|
+#headers.remove(os.path.join('src', 'scale.h'))
|
|
|
|
#sanity check for any arguments
|
|
if len(sys.argv) == 1:
|
|
@@ -354,17 +354,6 @@ if sys.platform == 'win32':
|
|
cmdclass['build_ext'] = WinBuildExt
|
|
|
|
# Add the precompiled smooth scale MMX functions to transform.
|
|
- def replace_scale_mmx():
|
|
- for e in extensions:
|
|
- if e.name == 'transform':
|
|
- e.extra_objects.append(
|
|
- os.path.join('obj', 'win32', 'scale_mmx.obj'))
|
|
- for i in range(len(e.sources)):
|
|
- if e.sources[i].endswith('scale_mmx.c'):
|
|
- del e.sources[i]
|
|
- return
|
|
- replace_scale_mmx()
|
|
-
|
|
|
|
#clean up the list of extensions
|
|
for e in extensions[:]:
|
|
diff --git a/src/pgcompat.h b/src/pgcompat.h
|
|
index 6b9eea0..e34d2ba 100644
|
|
--- a/src/pgcompat.h
|
|
+++ b/src/pgcompat.h
|
|
@@ -69,9 +69,7 @@
|
|
#define DECREF_MOD(mod)
|
|
|
|
/* Type header differs. */
|
|
-#define TYPE_HEAD(x,y) \
|
|
- PyObject_HEAD_INIT(x) \
|
|
- 0,
|
|
+#define TYPE_HEAD(x,y) PyObject_HEAD_INIT(x) 0,
|
|
|
|
/* Text interface. Use ascii strings. */
|
|
#define Text_Type PyString_Type
|
|
diff --git a/src/scale.h b/src/scale.h
|
|
deleted file mode 100644
|
|
index 0bb0eb2..0000000
|
|
--- a/src/scale.h
|
|
+++ /dev/null
|
|
@@ -1,61 +0,0 @@
|
|
-/*
|
|
- pygame - Python Game Library
|
|
- Copyright (C) 2000-2001 Pete Shinners
|
|
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
|
|
-
|
|
- This library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Library General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2 of the License, or (at your option) any later version.
|
|
-
|
|
- This library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Library General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Library General Public
|
|
- License along with this library; if not, write to the Free
|
|
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-
|
|
- Pete Shinners
|
|
- pete@shinners.org
|
|
-*/
|
|
-
|
|
-/* Pentium MMX/SSE smoothscale routines
|
|
- * Available on Win32 or GCC on a Pentium.
|
|
- * Sorry, no Win64 support yet for Visual C builds, but it can be added.
|
|
- */
|
|
-
|
|
-#if !defined(SCALE_HEADER)
|
|
-#define SCALE_HEADER
|
|
-
|
|
-#if (defined(__GNUC__) && ((defined(__x86_64__) && !defined(_NO_MMX_FOR_X86_64)) || defined(__i386__))) || defined(MS_WIN32)
|
|
-#define SCALE_MMX_SUPPORT
|
|
-
|
|
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
|
|
- */
|
|
-void filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
|
|
-
|
|
-void filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
|
|
-
|
|
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
|
|
- */
|
|
-void filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
|
|
-
|
|
-void filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
|
|
-
|
|
-/* These functions implement a bilinear filter in the X-dimension.
|
|
- */
|
|
-void filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
|
|
-
|
|
-void filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
|
|
-
|
|
-/* These functions implement a bilinear filter in the Y-dimension.
|
|
- */
|
|
-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
|
|
-
|
|
-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
|
|
-
|
|
-#endif /* #if (defined(__GNUC__) && .....) */
|
|
-
|
|
-#endif /* #if !defined(SCALE_HEADER) */
|
|
diff --git a/src/scale_mmx.c b/src/scale_mmx.c
|
|
deleted file mode 100644
|
|
index 36e7af0..0000000
|
|
--- a/src/scale_mmx.c
|
|
+++ /dev/null
|
|
@@ -1,37 +0,0 @@
|
|
-/*
|
|
- pygame - Python Game Library
|
|
- Copyright (C) 2000-2001 Pete Shinners
|
|
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
|
|
-
|
|
- This library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Library General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2 of the License, or (at your option) any later version.
|
|
-
|
|
- This library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Library General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Library General Public
|
|
- License along with this library; if not, write to the Free
|
|
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-
|
|
- Pete Shinners
|
|
- pete@shinners.org
|
|
-*/
|
|
-
|
|
-/* Pentium MMX/SSE smoothscale routines
|
|
- * These are only compiled with GCC.
|
|
- */
|
|
-#if defined(__GNUC__)
|
|
-/* Choose between the 32 bit and 64 bit versions.
|
|
- * Including source code like this may be frowned upon by some,
|
|
- * but the alternative is ungainly conditionally compiled code.
|
|
- */
|
|
-# if defined(__x86_64__)
|
|
-# include "scale_mmx64.c"
|
|
-# elif defined(__i386__)
|
|
-# include "scale_mmx32.c"
|
|
-# endif
|
|
-#endif
|
|
diff --git a/src/scale_mmx32.c b/src/scale_mmx32.c
|
|
deleted file mode 100644
|
|
index 14cd8d2..0000000
|
|
--- a/src/scale_mmx32.c
|
|
+++ /dev/null
|
|
@@ -1,620 +0,0 @@
|
|
-/*
|
|
- pygame - Python Game Library
|
|
- Copyright (C) 2000-2001 Pete Shinners
|
|
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
|
|
-
|
|
- This library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Library General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2 of the License, or (at your option) any later version.
|
|
-
|
|
- This library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Library General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Library General Public
|
|
- License along with this library; if not, write to the Free
|
|
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-
|
|
- Pete Shinners
|
|
- pete@shinners.org
|
|
-*/
|
|
-
|
|
-/* Pentium 32 bit SSE/MMX smoothscale filter routines
|
|
- * These are written for compilation with GCC only.
|
|
- *
|
|
- * This file should not depend on anything but the C standard library.
|
|
- */
|
|
-
|
|
-#if !defined(__GNUC__) || !defined(__i386__) || defined(__x86_64__)
|
|
-#error "Pygame build bug: should not be compiling this file!"
|
|
-#endif
|
|
-
|
|
-#include <stdint.h>
|
|
-typedef uint8_t Uint8; /* SDL convension */
|
|
-typedef uint16_t Uint16; /* SDL convension */
|
|
-#include <stdlib.h>
|
|
-#include <memory.h>
|
|
-#include "scale.h"
|
|
-
|
|
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
|
|
- */
|
|
-void
|
|
-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int srcdiff = srcpitch - (srcwidth * 4);
|
|
- int dstdiff = dstpitch - (dstwidth * 4);
|
|
-
|
|
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
|
|
- int xrecip = 0x40000000 / xspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
-
|
|
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
|
|
- " punpcklwd %%mm7, %%mm7; "
|
|
- " punpckldq %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " movl %5, %%ecx; " /* ecx == xcounter */
|
|
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: " /* inner X-loop */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movd (%0), %%mm2; " /* mm2 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm2; "
|
|
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 4f; "
|
|
- "3: " /* prepare to output a pixel */
|
|
- " movd %%ecx, %%mm2; "
|
|
- " movq %2, %%mm3; " /* mm3 = 2^14 */
|
|
- " punpcklwd %%mm2, %%mm2; "
|
|
- " punpckldq %%mm2, %%mm2; "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
|
|
- " psllw $2, %%mm4; "
|
|
- " movq %%mm4, %%mm5; " /* mm2 = (srcpix * xcounter >> 16) */
|
|
- " psraw $15, %%mm5; "
|
|
- " pand %%mm2, %%mm5; "
|
|
- " movq %%mm2, %%mm6; "
|
|
- " psraw $15, %%mm6; "
|
|
- " pand %%mm4, %%mm6; "
|
|
- " pmulhw %%mm4, %%mm2; "
|
|
- " paddw %%mm5, %%mm2; "
|
|
- " paddw %%mm6, %%mm2; "
|
|
- " movq %%mm4, %%mm5; " /* mm3 = (srcpix * xfrac) >> 16) */
|
|
- " psraw $15, %%mm5; "
|
|
- " pand %%mm3, %%mm5; "
|
|
- " movq %%mm3, %%mm6; "
|
|
- " psraw $15, %%mm6; "
|
|
- " pand %%mm4, %%mm6; "
|
|
- " pmulhw %%mm4, %%mm3; "
|
|
- " paddw %%mm5, %%mm3; "
|
|
- " paddw %%mm6, %%mm3; "
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
|
|
- " movq %%mm7, %%mm5; "
|
|
- " psraw $15, %%mm5; "
|
|
- " pand %%mm2, %%mm5; "
|
|
- " movq %%mm2, %%mm6; "
|
|
- " psraw $15, %%mm6; "
|
|
- " pand %%mm7, %%mm6; "
|
|
- " pmulhw %%mm7, %%mm2; "
|
|
- " paddw %%mm5, %%mm2; "
|
|
- " paddw %%mm6, %%mm2; "
|
|
- " packuswb %%mm0, %%mm2; "
|
|
- " movd %%mm2, (%1); "
|
|
- " add %5, %%ecx; "
|
|
- " add $4, %1; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "4: " /* tail of inner X-loop */
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(One64), "m"(height), "m"(srcwidth),
|
|
- "m"(xspace), "m"(xrecip), "m"(srcdiff), "m"(dstdiff) /* input */
|
|
- : "%ecx","%edx" /* clobbered */
|
|
- );
|
|
-}
|
|
-
|
|
-void
|
|
-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int srcdiff = srcpitch - (srcwidth * 4);
|
|
- int dstdiff = dstpitch - (dstwidth * 4);
|
|
-
|
|
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
|
|
- int xrecip = 0x40000000 / xspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
-
|
|
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
|
|
- " movq %2, %%mm6; " /* mm6 = 2^14 */
|
|
- " pshufw $0, %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " movl %5, %%ecx; " /* ecx == xcounter */
|
|
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: " /* inner X-loop */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movd (%0), %%mm2; " /* mm2 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm2; "
|
|
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 4f; "
|
|
- "3: " /* prepare to output a pixel */
|
|
- " movd %%ecx, %%mm2; "
|
|
- " movq %%mm6, %%mm3; " /* mm3 = 2^14 */
|
|
- " pshufw $0, %%mm2, %%mm2; "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
|
|
- " psllw $2, %%mm4; "
|
|
- " pmulhuw %%mm4, %%mm2; " /* mm2 = (srcpix * xcounter >> 16) */
|
|
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * xfrac) >> 16 */
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
|
|
- " pmulhuw %%mm7, %%mm2; "
|
|
- " packuswb %%mm0, %%mm2; "
|
|
- " movd %%mm2, (%1); "
|
|
- " add %5, %%ecx; "
|
|
- " add $4, %1; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "4: " /* tail of inner X-loop */
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(One64), "m"(height), "m"(srcwidth),
|
|
- "m"(xspace), "m"(xrecip), "m"(srcdiff), "m"(dstdiff) /* input */
|
|
- : "%ecx","%edx" /* clobbered */
|
|
- );
|
|
-}
|
|
-
|
|
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
|
|
- */
|
|
-void
|
|
-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- Uint16 *templine;
|
|
- int srcdiff = srcpitch - (width * 4);
|
|
- int dstdiff = dstpitch - (width * 4);
|
|
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
|
|
- int yrecip = 0x40000000 / yspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
-
|
|
- /* allocate and clear a memory area for storing the accumulator line */
|
|
- templine = (Uint16 *) malloc(dstpitch * 2);
|
|
- if (templine == 0) return;
|
|
- memset(templine, 0, dstpitch * 2);
|
|
-
|
|
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
|
|
- " movl %5, %%ecx; " /* ecx == ycounter */
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
|
|
- " punpcklwd %%mm7, %%mm7; "
|
|
- " punpckldq %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " movl %2, %%eax; " /* rax == accumulate */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: "
|
|
- " movd (%0), %%mm1; "
|
|
- " add $4, %0; "
|
|
- " movq (%%eax), %%mm2; "
|
|
- " punpcklbw %%mm0, %%mm1; "
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm2, (%%eax); "
|
|
- " add $8, %%eax; "
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 6f; "
|
|
- "3: " /* prepare to output a line */
|
|
- " movd %%ecx, %%mm1; "
|
|
- " movl %4, %%edx; " /* edx = width */
|
|
- " movq %9, %%mm6; " /* mm6 = 2^14 */
|
|
- " punpcklwd %%mm1, %%mm1; "
|
|
- " punpckldq %%mm1, %%mm1; "
|
|
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
|
|
- "4: "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " movq (%%eax), %%mm5; " /* mm5 = accumulate */
|
|
- " movq %%mm6, %%mm3; "
|
|
- " psllw $2, %%mm4; "
|
|
- " movq %%mm4, %%mm0; " /* mm3 = (srcpix * yfrac) >> 16) */
|
|
- " psraw $15, %%mm0; "
|
|
- " pand %%mm3, %%mm0; "
|
|
- " movq %%mm3, %%mm2; "
|
|
- " psraw $15, %%mm2; "
|
|
- " pand %%mm4, %%mm2; "
|
|
- " pmulhw %%mm4, %%mm3; "
|
|
- " paddw %%mm0, %%mm3; "
|
|
- " paddw %%mm2, %%mm3; "
|
|
- " movq %%mm1, %%mm0; " /* mm4 = (srcpix * ycounter >> 16) */
|
|
- " psraw $15, %%mm0; "
|
|
- " pand %%mm4, %%mm0; "
|
|
- " movq %%mm4, %%mm2; "
|
|
- " psraw $15, %%mm2; "
|
|
- " pand %%mm1, %%mm2; "
|
|
- " pmulhw %%mm1, %%mm4; "
|
|
- " paddw %%mm0, %%mm4; "
|
|
- " paddw %%mm2, %%mm4; "
|
|
- " movq %%mm3, (%%eax); "
|
|
- " paddw %%mm5, %%mm4; "
|
|
- " add $8, %%eax; "
|
|
- " movq %%mm7, %%mm0; "
|
|
- " psraw $15, %%mm0; "
|
|
- " pand %%mm4, %%mm0; "
|
|
- " movq %%mm4, %%mm2; "
|
|
- " psraw $15, %%mm2; "
|
|
- " pand %%mm7, %%mm2; "
|
|
- " pmulhw %%mm7, %%mm4; "
|
|
- " paddw %%mm0, %%mm4; "
|
|
- " paddw %%mm2, %%mm4; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " packuswb %%mm0, %%mm4; "
|
|
- " movd %%mm4, (%1); "
|
|
- " add $4, %1; "
|
|
- " decl %%edx; "
|
|
- " jne 4b; "
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " addl %5, %%ecx; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "6: " /* tail of outer Y-loop */
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(templine), "m"(srcheight), "m"(width), "m"(yspace),
|
|
- "m"(yrecip), "m"(srcdiff), "m"(dstdiff),"m"(One64) /* input */
|
|
- : "%ecx","%edx","%eax" /* clobbered */
|
|
- );
|
|
-
|
|
- /* free the temporary memory */
|
|
- free(templine);
|
|
-}
|
|
-
|
|
-void
|
|
-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- Uint16 *templine;
|
|
- int srcdiff = srcpitch - (width * 4);
|
|
- int dstdiff = dstpitch - (width * 4);
|
|
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
|
|
- int yrecip = 0x40000000 / yspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
-
|
|
- /* allocate and clear a memory area for storing the accumulator line */
|
|
- templine = (Uint16 *) malloc(dstpitch * 2);
|
|
- if (templine == 0) return;
|
|
- memset(templine, 0, dstpitch * 2);
|
|
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
|
|
- " movl %5, %%ecx; " /* ecx == ycounter */
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
|
|
- " pshufw $0, %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " movl %2, %%eax; " /* rax == accumulate */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: "
|
|
- " movd (%0), %%mm1; "
|
|
- " add $4, %0; "
|
|
- " movq (%%eax), %%mm2; "
|
|
- " punpcklbw %%mm0, %%mm1; "
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm2, (%%eax); "
|
|
- " add $8, %%eax; "
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 6f; "
|
|
- "3: " /* prepare to output a line */
|
|
- " movd %%ecx, %%mm1; "
|
|
- " movl %4, %%edx; " /* edx = width */
|
|
- " movq %9, %%mm6; " /* mm6 = 2^14 */
|
|
- " pshufw $0, %%mm1, %%mm1; "
|
|
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
|
|
- "4: "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " movq (%%eax), %%mm5; " /* mm5 = accumulate */
|
|
- " movq %%mm6, %%mm3; "
|
|
- " psllw $2, %%mm4; "
|
|
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * yfrac) >> 16 */
|
|
- " pmulhuw %%mm1, %%mm4; " /* mm4 = (srcpix * ycounter >> 16) */
|
|
- " movq %%mm3, (%%eax); "
|
|
- " paddw %%mm5, %%mm4; "
|
|
- " add $8, %%eax; "
|
|
- " pmulhuw %%mm7, %%mm4; "
|
|
- " packuswb %%mm0, %%mm4; "
|
|
- " movd %%mm4, (%1); "
|
|
- " add $4, %1; "
|
|
- " decl %%edx; "
|
|
- " jne 4b; "
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " addl %5, %%ecx; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "6: " /* tail of outer Y-loop */
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(templine), "m"(srcheight), "m"(width), "m"(yspace),
|
|
- "m"(yrecip), "m"(srcdiff), "m"(dstdiff),"m"(One64) /* input */
|
|
- : "%ecx","%edx","%eax" /* clobbered */
|
|
- );
|
|
-
|
|
- /* free the temporary memory */
|
|
- free(templine);
|
|
-}
|
|
-
|
|
-/* These functions implement a bilinear filter in the X-dimension.
|
|
- */
|
|
-void
|
|
-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int *xidx0, *xmult0, *xmult1;
|
|
- int x, y;
|
|
- int factorwidth = 8;
|
|
- long long One64 = 0x0100010001000100ULL;
|
|
-
|
|
- /* Allocate memory for factors */
|
|
- xidx0 = malloc(dstwidth * 4);
|
|
- if (xidx0 == 0) return;
|
|
- xmult0 = (int *) malloc(dstwidth * factorwidth);
|
|
- xmult1 = (int *) malloc(dstwidth * factorwidth);
|
|
- if (xmult0 == 0 || xmult1 == 0)
|
|
- {
|
|
- free(xidx0);
|
|
- if (xmult0) free(xmult0);
|
|
- if (xmult1) free(xmult1);
|
|
- }
|
|
-
|
|
- /* Create multiplier factors and starting indices and put them in arrays */
|
|
- for (x = 0; x < dstwidth; x++)
|
|
- {
|
|
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
|
|
- int xm0 = 0x100 - xm1;
|
|
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
|
|
- xmult1[x*2] = xm1 | (xm1 << 16);
|
|
- xmult1[x*2+1] = xm1 | (xm1 << 16);
|
|
- xmult0[x*2] = xm0 | (xm0 << 16);
|
|
- xmult0[x*2+1] = xm0 | (xm0 << 16);
|
|
- }
|
|
-
|
|
- /* Do the scaling in raster order so we don't trash the cache */
|
|
- for (y = 0; y < height; y++)
|
|
- {
|
|
- Uint8 *srcrow0 = srcpix + y * srcpitch;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- int *xm0 = xmult0;
|
|
- int *x0 = xidx0;
|
|
- int width = dstwidth;
|
|
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movq %5, %%mm7; "
|
|
- "1: "
|
|
- " movl (%2), %%eax; " /* get xidx0[x] */
|
|
- " add $4, %2; "
|
|
- " movq %%mm7, %%mm2; "
|
|
- " movq (%0), %%mm1; " /* load mult0 */
|
|
- " add $8, %0; "
|
|
- " psubw %%mm1, %%mm2; " /* load mult1 */
|
|
- " movd (%4,%%eax,4), %%mm4; "
|
|
- " movd 4(%4,%%eax,4), %%mm5; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%1); "
|
|
- " add $4, %1; "
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(xm0), "+r"(dstrow), "+r"(x0), "+m"(width) /* outputs */
|
|
- : "S"(srcrow0), "m"(One64) /* input */
|
|
- : "%eax" /* clobbered */
|
|
- );
|
|
- }
|
|
-
|
|
- /* free memory */
|
|
- free(xidx0);
|
|
- free(xmult0);
|
|
- free(xmult1);
|
|
-}
|
|
-
|
|
-void
|
|
-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int *xidx0, *xmult0, *xmult1;
|
|
- int x, y;
|
|
- int factorwidth = 8;
|
|
- long long One64 = 0x0100010001000100ULL;
|
|
-
|
|
- /* Allocate memory for factors */
|
|
- xidx0 = malloc(dstwidth * 4);
|
|
- if (xidx0 == 0) return;
|
|
- xmult0 = (int *) malloc(dstwidth * factorwidth);
|
|
- xmult1 = (int *) malloc(dstwidth * factorwidth);
|
|
- if (xmult0 == 0 || xmult1 == 0)
|
|
- {
|
|
- free(xidx0);
|
|
- if (xmult0) free(xmult0);
|
|
- if (xmult1) free(xmult1);
|
|
- }
|
|
-
|
|
- /* Create multiplier factors and starting indices and put them in arrays */
|
|
- for (x = 0; x < dstwidth; x++)
|
|
- {
|
|
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
|
|
- int xm0 = 0x100 - xm1;
|
|
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
|
|
- xmult1[x*2] = xm1 | (xm1 << 16);
|
|
- xmult1[x*2+1] = xm1 | (xm1 << 16);
|
|
- xmult0[x*2] = xm0 | (xm0 << 16);
|
|
- xmult0[x*2+1] = xm0 | (xm0 << 16);
|
|
- }
|
|
-
|
|
- /* Do the scaling in raster order so we don't trash the cache */
|
|
- for (y = 0; y < height; y++)
|
|
- {
|
|
- Uint8 *srcrow0 = srcpix + y * srcpitch;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- int *xm0 = xmult0;
|
|
- int *x0 = xidx0;
|
|
- int width = dstwidth;
|
|
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movq %5, %%mm7; "
|
|
- "1: "
|
|
- " movl (%2), %%eax; " /* get xidx0[x] */
|
|
- " add $4, %2; "
|
|
- " movq %%mm7, %%mm2; "
|
|
- " movq (%0), %%mm1; " /* load mult0 */
|
|
- " add $8, %0; "
|
|
- " psubw %%mm1, %%mm2; " /* load mult1 */
|
|
- " movd (%4,%%eax,4), %%mm4; "
|
|
- " movd 4(%4,%%eax,4), %%mm5; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%1); "
|
|
- " add $4, %1; "
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(xm0), "+r"(dstrow), "+r"(x0), "+m"(width) /* outputs */
|
|
- : "S"(srcrow0), "m"(One64) /* input */
|
|
- : "%eax" /* clobbered */
|
|
- );
|
|
- }
|
|
-
|
|
- /* free memory */
|
|
- free(xidx0);
|
|
- free(xmult0);
|
|
- free(xmult1);
|
|
-}
|
|
-
|
|
-/* These functions implement a bilinear filter in the Y-dimension.
|
|
- */
|
|
-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- int y;
|
|
-
|
|
- for (y = 0; y < dstheight; y++)
|
|
- {
|
|
- int yidx0 = y * (srcheight - 1) / dstheight;
|
|
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
|
|
- Uint8 *srcrow1 = srcrow0 + srcpitch;
|
|
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
|
|
- int ymult0 = 0x0100 - ymult1;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
|
|
- " movl %5, %%eax; "
|
|
- " movd %3, %%mm1; "
|
|
- " movd %4, %%mm2; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " punpcklwd %%mm1, %%mm1; "
|
|
- " punpckldq %%mm1, %%mm1; "
|
|
- " punpcklwd %%mm2, %%mm2; "
|
|
- " punpckldq %%mm2, %%mm2; "
|
|
- "1: "
|
|
- " movd (%0), %%mm4; "
|
|
- " add $4, %0; "
|
|
- " movd (%1), %%mm5; "
|
|
- " add $4, %1; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%2); "
|
|
- " add $4, %2; "
|
|
- " decl %%eax; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow) /* no outputs */
|
|
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
|
|
- : "%eax" /* clobbered */
|
|
- );
|
|
- }
|
|
-}
|
|
-
|
|
-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- int y;
|
|
-
|
|
- for (y = 0; y < dstheight; y++)
|
|
- {
|
|
- int yidx0 = y * (srcheight - 1) / dstheight;
|
|
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
|
|
- Uint8 *srcrow1 = srcrow0 + srcpitch;
|
|
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
|
|
- int ymult0 = 0x0100 - ymult1;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
|
|
- " movl %5, %%eax; "
|
|
- " movd %3, %%mm1; "
|
|
- " movd %4, %%mm2; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " pshufw $0, %%mm1, %%mm1; "
|
|
- " pshufw $0, %%mm2, %%mm2; "
|
|
- "1: "
|
|
- " movd (%0), %%mm4; "
|
|
- " add $4, %0; "
|
|
- " movd (%1), %%mm5; "
|
|
- " add $4, %1; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%2); "
|
|
- " add $4, %2; "
|
|
- " decl %%eax; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow) /* no outputs */
|
|
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
|
|
- : "%eax" /* clobbered */
|
|
- );
|
|
- }
|
|
-}
|
|
diff --git a/src/scale_mmx64.c b/src/scale_mmx64.c
|
|
deleted file mode 100644
|
|
index e897f76..0000000
|
|
--- a/src/scale_mmx64.c
|
|
+++ /dev/null
|
|
@@ -1,626 +0,0 @@
|
|
-/*
|
|
- pygame - Python Game Library
|
|
- Copyright (C) 2000-2001 Pete Shinners
|
|
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
|
|
-
|
|
- This library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Library General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2 of the License, or (at your option) any later version.
|
|
-
|
|
- This library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Library General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Library General Public
|
|
- License along with this library; if not, write to the Free
|
|
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-
|
|
- Pete Shinners
|
|
- pete@shinners.org
|
|
-*/
|
|
-
|
|
-/* Pentium 64 bit SSE/MMX smoothscale routines
|
|
- * These are written for compilation with GCC only.
|
|
- *
|
|
- * This file should not depend on anything but the C standard library.
|
|
- */
|
|
-
|
|
-#if !defined(__GNUC__) || !defined(__x86_64__)
|
|
-#error "Pygame build bug: should not be compiling this file!"
|
|
-#endif
|
|
-
|
|
-#include <stdint.h>
|
|
-typedef uint8_t Uint8; /* SDL convension */
|
|
-typedef uint16_t Uint16; /* SDL convension */
|
|
-#include <stdlib.h>
|
|
-#include <memory.h>
|
|
-#include "scale.h"
|
|
-
|
|
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
|
|
- */
|
|
-void
|
|
-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int srcdiff = srcpitch - (srcwidth * 4);
|
|
- int dstdiff = dstpitch - (dstwidth * 4);
|
|
-
|
|
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
|
|
- int xrecip = 0x40000000 / xspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
- long long srcdiff64 = srcdiff;
|
|
- long long dstdiff64 = dstdiff;
|
|
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
|
|
- " punpcklwd %%mm7, %%mm7; "
|
|
- " punpckldq %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " movl %5, %%ecx; " /* ecx == xcounter */
|
|
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: " /* inner X-loop */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movd (%0), %%mm2; " /* mm2 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm2; "
|
|
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 4f; "
|
|
- "3: " /* prepare to output a pixel */
|
|
- " movd %%ecx, %%mm2; "
|
|
- " movq %2, %%mm3; " /* mm3 = 2^14 */
|
|
- " punpcklwd %%mm2, %%mm2; "
|
|
- " punpckldq %%mm2, %%mm2; "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
|
|
- " psllw $2, %%mm4; "
|
|
- " movq %%mm4, %%mm5; " /* mm2 = (srcpix * xcounter >> 16) */
|
|
- " psraw $15, %%mm5; "
|
|
- " pand %%mm2, %%mm5; "
|
|
- " movq %%mm2, %%mm6; "
|
|
- " psraw $15, %%mm6; "
|
|
- " pand %%mm4, %%mm6; "
|
|
- " pmulhw %%mm4, %%mm2; "
|
|
- " paddw %%mm5, %%mm2; "
|
|
- " paddw %%mm6, %%mm2; "
|
|
- " movq %%mm4, %%mm5; " /* mm3 = (srcpix * xfrac) >> 16) */
|
|
- " psraw $15, %%mm5; "
|
|
- " pand %%mm3, %%mm5; "
|
|
- " movq %%mm3, %%mm6; "
|
|
- " psraw $15, %%mm6; "
|
|
- " pand %%mm4, %%mm6; "
|
|
- " pmulhw %%mm4, %%mm3; "
|
|
- " paddw %%mm5, %%mm3; "
|
|
- " paddw %%mm6, %%mm3; "
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
|
|
- " movq %%mm7, %%mm5; "
|
|
- " psraw $15, %%mm5; "
|
|
- " pand %%mm2, %%mm5; "
|
|
- " movq %%mm2, %%mm6; "
|
|
- " psraw $15, %%mm6; "
|
|
- " pand %%mm7, %%mm6; "
|
|
- " pmulhw %%mm7, %%mm2; "
|
|
- " paddw %%mm5, %%mm2; "
|
|
- " paddw %%mm6, %%mm2; "
|
|
- " packuswb %%mm0, %%mm2; "
|
|
- " movd %%mm2, (%1); "
|
|
- " add %5, %%ecx; "
|
|
- " add $4, %1; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "4: " /* tail of inner X-loop */
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(One64), "m"(height), "m"(srcwidth),
|
|
- "m"(xspace), "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64) /* inputs */
|
|
- : "%ecx","%edx" /* clobbered */
|
|
- );
|
|
-}
|
|
-
|
|
-void
|
|
-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int srcdiff = srcpitch - (srcwidth * 4);
|
|
- int dstdiff = dstpitch - (dstwidth * 4);
|
|
-
|
|
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
|
|
- int xrecip = 0x40000000 / xspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
- long long srcdiff64 = srcdiff;
|
|
- long long dstdiff64 = dstdiff;
|
|
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
|
|
- " movq %2, %%mm6; " /* mm6 = 2^14 */
|
|
- " pshufw $0, %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " movl %5, %%ecx; " /* ecx == xcounter */
|
|
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: " /* inner X-loop */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movd (%0), %%mm2; " /* mm2 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm2; "
|
|
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 4f; "
|
|
- "3: " /* prepare to output a pixel */
|
|
- " movd %%ecx, %%mm2; "
|
|
- " movq %%mm6, %%mm3; " /* mm3 = 2^14 */
|
|
- " pshufw $0, %%mm2, %%mm2; "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
|
|
- " psllw $2, %%mm4; "
|
|
- " pmulhuw %%mm4, %%mm2; " /* mm2 = (srcpix * xcounter >> 16) */
|
|
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * xfrac) >> 16 */
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
|
|
- " pmulhuw %%mm7, %%mm2; "
|
|
- " packuswb %%mm0, %%mm2; "
|
|
- " movd %%mm2, (%1); "
|
|
- " add %5, %%ecx; "
|
|
- " add $4, %1; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "4: " /* tail of inner X-loop */
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(One64), "m"(height), "m"(srcwidth),
|
|
- "m"(xspace), "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64) /* inputs */
|
|
- : "%ecx","%edx" /* clobbered */
|
|
- );
|
|
-}
|
|
-
|
|
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
|
|
- */
|
|
-void
|
|
-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- Uint16 *templine;
|
|
- int srcdiff = srcpitch - (width * 4);
|
|
- int dstdiff = dstpitch - (width * 4);
|
|
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
|
|
- int yrecip = 0x40000000 / yspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
-
|
|
- /* allocate and clear a memory area for storing the accumulator line */
|
|
- templine = (Uint16 *) malloc(dstpitch * 2);
|
|
- if (templine == 0) return;
|
|
- memset(templine, 0, dstpitch * 2);
|
|
- long long srcdiff64 = srcdiff;
|
|
- long long dstdiff64 = dstdiff;
|
|
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
|
|
- " movl %5, %%ecx; " /* ecx == ycounter */
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
|
|
- " punpcklwd %%mm7, %%mm7; "
|
|
- " punpckldq %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " mov %2, %%rax; " /* rax == accumulate */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: "
|
|
- " movd (%0), %%mm1; "
|
|
- " add $4, %0; "
|
|
- " movq (%%rax), %%mm2; "
|
|
- " punpcklbw %%mm0, %%mm1; "
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm2, (%%rax); "
|
|
- " add $8, %%rax; "
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 6f; "
|
|
- "3: " /* prepare to output a line */
|
|
- " movd %%ecx, %%mm1; "
|
|
- " movl %4, %%edx; " /* edx = width */
|
|
- " movq %9, %%mm6; " /* mm6 = 2^14 */
|
|
- " punpcklwd %%mm1, %%mm1; "
|
|
- " punpckldq %%mm1, %%mm1; "
|
|
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
|
|
- "4: "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " movq (%%rax), %%mm5; " /* mm5 = accumulate */
|
|
- " movq %%mm6, %%mm3; "
|
|
- " psllw $2, %%mm4; "
|
|
- " movq %%mm4, %%mm0; " /* mm3 = (srcpix * yfrac) >> 16) */
|
|
- " psraw $15, %%mm0; "
|
|
- " pand %%mm3, %%mm0; "
|
|
- " movq %%mm3, %%mm2; "
|
|
- " psraw $15, %%mm2; "
|
|
- " pand %%mm4, %%mm2; "
|
|
- " pmulhw %%mm4, %%mm3; "
|
|
- " paddw %%mm0, %%mm3; "
|
|
- " paddw %%mm2, %%mm3; "
|
|
- " movq %%mm1, %%mm0; " /* mm4 = (srcpix * ycounter >> 16) */
|
|
- " psraw $15, %%mm0; "
|
|
- " pand %%mm4, %%mm0; "
|
|
- " movq %%mm4, %%mm2; "
|
|
- " psraw $15, %%mm2; "
|
|
- " pand %%mm1, %%mm2; "
|
|
- " pmulhw %%mm1, %%mm4; "
|
|
- " paddw %%mm0, %%mm4; "
|
|
- " paddw %%mm2, %%mm4; "
|
|
- " movq %%mm3, (%%rax); "
|
|
- " paddw %%mm5, %%mm4; "
|
|
- " add $8, %%rax; "
|
|
- " movq %%mm7, %%mm0; "
|
|
- " psraw $15, %%mm0; "
|
|
- " pand %%mm4, %%mm0; "
|
|
- " movq %%mm4, %%mm2; "
|
|
- " psraw $15, %%mm2; "
|
|
- " pand %%mm7, %%mm2; "
|
|
- " pmulhw %%mm7, %%mm4; "
|
|
- " paddw %%mm0, %%mm4; "
|
|
- " paddw %%mm2, %%mm4; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " packuswb %%mm0, %%mm4; "
|
|
- " movd %%mm4, (%1); "
|
|
- " add $4, %1; "
|
|
- " decl %%edx; "
|
|
- " jne 4b; "
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " addl %5, %%ecx; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "6: " /* tail of outer Y-loop */
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(templine),"m"(srcheight), "m"(width), "m"(yspace),
|
|
- "m"(yrecip), "m"(srcdiff64), "m"(dstdiff64), "m"(One64) /* input */
|
|
- : "%ecx","%edx","%rax" /* clobbered */
|
|
- );
|
|
-
|
|
- /* free the temporary memory */
|
|
- free(templine);
|
|
-}
|
|
-
|
|
-void
|
|
-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- Uint16 *templine;
|
|
- int srcdiff = srcpitch - (width * 4);
|
|
- int dstdiff = dstpitch - (width * 4);
|
|
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
|
|
- int yrecip = 0x40000000 / yspace;
|
|
- long long One64 = 0x4000400040004000ULL;
|
|
-
|
|
- /* allocate and clear a memory area for storing the accumulator line */
|
|
- templine = (Uint16 *) malloc(dstpitch * 2);
|
|
- if (templine == 0) return;
|
|
- memset(templine, 0, dstpitch * 2);
|
|
- long long srcdiff64 = srcdiff;
|
|
- long long dstdiff64 = dstdiff;
|
|
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
|
|
- " movl %5, %%ecx; " /* ecx == ycounter */
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
|
|
- " pshufw $0, %%mm7, %%mm7; "
|
|
- "1: " /* outer Y-loop */
|
|
- " mov %2, %%rax; " /* rax == accumulate */
|
|
- " cmpl $0x4000, %%ecx; "
|
|
- " jbe 3f; "
|
|
- " movl %4, %%edx; " /* edx == width */
|
|
- "2: "
|
|
- " movd (%0), %%mm1; "
|
|
- " add $4, %0; "
|
|
- " movq (%%rax), %%mm2; "
|
|
- " punpcklbw %%mm0, %%mm1; "
|
|
- " paddw %%mm1, %%mm2; "
|
|
- " movq %%mm2, (%%rax); "
|
|
- " add $8, %%rax; "
|
|
- " decl %%edx; "
|
|
- " jne 2b; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- " jmp 6f; "
|
|
- "3: " /* prepare to output a line */
|
|
- " movd %%ecx, %%mm1; "
|
|
- " movl %4, %%edx; " /* edx = width */
|
|
- " movq %9, %%mm6; " /* mm6 = 2^14 */
|
|
- " pshufw $0, %%mm1, %%mm1; "
|
|
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
|
|
- "4: "
|
|
- " movd (%0), %%mm4; " /* mm4 = srcpix */
|
|
- " add $4, %0; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " movq (%%rax), %%mm5; " /* mm5 = accumulate */
|
|
- " movq %%mm6, %%mm3; "
|
|
- " psllw $2, %%mm4; "
|
|
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * yfrac) >> 16 */
|
|
- " pmulhuw %%mm1, %%mm4; " /* mm4 = (srcpix * ycounter >> 16) */
|
|
- " movq %%mm3, (%%rax); "
|
|
- " paddw %%mm5, %%mm4; "
|
|
- " add $8, %%rax; "
|
|
- " pmulhuw %%mm7, %%mm4; "
|
|
- " packuswb %%mm0, %%mm4; "
|
|
- " movd %%mm4, (%1); "
|
|
- " add $4, %1; "
|
|
- " decl %%edx; "
|
|
- " jne 4b; "
|
|
- " add %8, %1; " /* dstpix += dstdiff */
|
|
- " addl %5, %%ecx; "
|
|
- " subl $0x4000, %%ecx; "
|
|
- "6: " /* tail of outer Y-loop */
|
|
- " add %7, %0; " /* srcpix += srcdiff */
|
|
- " decl %3; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
|
|
- : "m"(templine),"m"(srcheight), "m"(width), "m"(yspace),
|
|
- "m"(yrecip), "m"(srcdiff64), "m"(dstdiff64), "m"(One64) /* input */
|
|
- : "%ecx","%edx","%rax" /* clobbered */
|
|
- );
|
|
-
|
|
- /* free the temporary memory */
|
|
- free(templine);
|
|
-}
|
|
-
|
|
-/* These functions implement a bilinear filter in the X-dimension.
|
|
- */
|
|
-void
|
|
-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int *xidx0, *xmult0, *xmult1;
|
|
- int x, y;
|
|
- int factorwidth = 8;
|
|
-
|
|
- /* Allocate memory for factors */
|
|
- xidx0 = malloc(dstwidth * 4);
|
|
- if (xidx0 == 0) return;
|
|
- xmult0 = (int *) malloc(dstwidth * factorwidth);
|
|
- xmult1 = (int *) malloc(dstwidth * factorwidth);
|
|
- if (xmult0 == 0 || xmult1 == 0)
|
|
- {
|
|
- free(xidx0);
|
|
- if (xmult0) free(xmult0);
|
|
- if (xmult1) free(xmult1);
|
|
- }
|
|
-
|
|
- /* Create multiplier factors and starting indices and put them in arrays */
|
|
- for (x = 0; x < dstwidth; x++)
|
|
- {
|
|
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
|
|
- int xm0 = 0x100 - xm1;
|
|
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
|
|
- xmult1[x*2] = xm1 | (xm1 << 16);
|
|
- xmult1[x*2+1] = xm1 | (xm1 << 16);
|
|
- xmult0[x*2] = xm0 | (xm0 << 16);
|
|
- xmult0[x*2+1] = xm0 | (xm0 << 16);
|
|
- }
|
|
-
|
|
- /* Do the scaling in raster order so we don't trash the cache */
|
|
- for (y = 0; y < height; y++)
|
|
- {
|
|
- Uint8 *srcrow0 = srcpix + y * srcpitch;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- int *xm0 = xmult0;
|
|
- int *xm1 = xmult1;
|
|
- int *x0 = xidx0;
|
|
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
|
|
- " movl %5, %%ecx; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- "1: "
|
|
- " movsxl (%3), %%rax; " /* get xidx0[x] */
|
|
- " add $4, %3; "
|
|
- " movq (%0), %%mm1; " /* load mult0 */
|
|
- " add $8, %0; "
|
|
- " movq (%1), %%mm2; " /* load mult1 */
|
|
- " add $8, %1; "
|
|
- " movd (%4,%%rax,4), %%mm4; "
|
|
- " movd 4(%4,%%rax,4), %%mm5; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%2); "
|
|
- " add $4, %2; "
|
|
- " decl %%ecx; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(xm0), "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
|
|
- : "r"(srcrow0),"m"(dstwidth) /* input */
|
|
- : "%ecx","%rax" /* clobbered */
|
|
- );
|
|
- }
|
|
-
|
|
- /* free memory */
|
|
- free(xidx0);
|
|
- free(xmult0);
|
|
- free(xmult1);
|
|
-}
|
|
-
|
|
-void
|
|
-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
|
|
-{
|
|
- int *xidx0, *xmult0, *xmult1;
|
|
- int x, y;
|
|
- int factorwidth = 8;
|
|
-
|
|
- /* Allocate memory for factors */
|
|
- xidx0 = malloc(dstwidth * 4);
|
|
- if (xidx0 == 0) return;
|
|
- xmult0 = (int *) malloc(dstwidth * factorwidth);
|
|
- xmult1 = (int *) malloc(dstwidth * factorwidth);
|
|
- if (xmult0 == 0 || xmult1 == 0)
|
|
- {
|
|
- free(xidx0);
|
|
- if (xmult0) free(xmult0);
|
|
- if (xmult1) free(xmult1);
|
|
- }
|
|
-
|
|
- /* Create multiplier factors and starting indices and put them in arrays */
|
|
- for (x = 0; x < dstwidth; x++)
|
|
- {
|
|
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
|
|
- int xm0 = 0x100 - xm1;
|
|
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
|
|
- xmult1[x*2] = xm1 | (xm1 << 16);
|
|
- xmult1[x*2+1] = xm1 | (xm1 << 16);
|
|
- xmult0[x*2] = xm0 | (xm0 << 16);
|
|
- xmult0[x*2+1] = xm0 | (xm0 << 16);
|
|
- }
|
|
-
|
|
- /* Do the scaling in raster order so we don't trash the cache */
|
|
- for (y = 0; y < height; y++)
|
|
- {
|
|
- Uint8 *srcrow0 = srcpix + y * srcpitch;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- int *xm0 = xmult0;
|
|
- int *xm1 = xmult1;
|
|
- int *x0 = xidx0;
|
|
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
|
|
- " movl %5, %%ecx; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- "1: "
|
|
- " movsxl (%3), %%rax; " /* get xidx0[x] */
|
|
- " add $4, %3; "
|
|
- " movq (%0), %%mm1; " /* load mult0 */
|
|
- " add $8, %0; "
|
|
- " movq (%1), %%mm2; " /* load mult1 */
|
|
- " add $8, %1; "
|
|
- " movd (%4,%%rax,4), %%mm4; "
|
|
- " movd 4(%4,%%rax,4), %%mm5; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%2); "
|
|
- " add $4, %2; "
|
|
- " decl %%ecx; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(xm0), "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
|
|
- : "r"(srcrow0),"m"(dstwidth) /* input */
|
|
- : "%ecx","%rax" /* clobbered */
|
|
- );
|
|
- }
|
|
-
|
|
- /* free memory */
|
|
- free(xidx0);
|
|
- free(xmult0);
|
|
- free(xmult1);
|
|
-}
|
|
-
|
|
-/* These functions implement a bilinear filter in the Y-dimension
|
|
- */
|
|
-void
|
|
-filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- int y;
|
|
-
|
|
- for (y = 0; y < dstheight; y++)
|
|
- {
|
|
- int yidx0 = y * (srcheight - 1) / dstheight;
|
|
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
|
|
- Uint8 *srcrow1 = srcrow0 + srcpitch;
|
|
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
|
|
- int ymult0 = 0x0100 - ymult1;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
|
|
- " movl %5, %%ecx; "
|
|
- " movd %3, %%mm1; "
|
|
- " movd %4, %%mm2; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " punpcklwd %%mm1, %%mm1; "
|
|
- " punpckldq %%mm1, %%mm1; "
|
|
- " punpcklwd %%mm2, %%mm2; "
|
|
- " punpckldq %%mm2, %%mm2; "
|
|
- "1: "
|
|
- " movd (%0), %%mm4; "
|
|
- " add $4, %0; "
|
|
- " movd (%1), %%mm5; "
|
|
- " add $4, %1; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%2); "
|
|
- " add $4, %2; "
|
|
- " decl %%ecx; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow) /* outputs */
|
|
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
|
|
- : "%ecx" /* clobbered */
|
|
- );
|
|
- }
|
|
-}
|
|
-
|
|
-void
|
|
-filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
|
|
-{
|
|
- int y;
|
|
-
|
|
- for (y = 0; y < dstheight; y++)
|
|
- {
|
|
- int yidx0 = y * (srcheight - 1) / dstheight;
|
|
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
|
|
- Uint8 *srcrow1 = srcrow0 + srcpitch;
|
|
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
|
|
- int ymult0 = 0x0100 - ymult1;
|
|
- Uint8 *dstrow = dstpix + y * dstpitch;
|
|
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
|
|
- " movl %5, %%ecx; "
|
|
- " movd %3, %%mm1; "
|
|
- " movd %4, %%mm2; "
|
|
- " pxor %%mm0, %%mm0; "
|
|
- " pshufw $0, %%mm1, %%mm1; "
|
|
- " pshufw $0, %%mm2, %%mm2; "
|
|
- "1: "
|
|
- " movd (%0), %%mm4; "
|
|
- " add $4, %0; "
|
|
- " movd (%1), %%mm5; "
|
|
- " add $4, %1; "
|
|
- " punpcklbw %%mm0, %%mm4; "
|
|
- " punpcklbw %%mm0, %%mm5; "
|
|
- " pmullw %%mm1, %%mm4; "
|
|
- " pmullw %%mm2, %%mm5; "
|
|
- " paddw %%mm4, %%mm5; "
|
|
- " psrlw $8, %%mm5; "
|
|
- " packuswb %%mm0, %%mm5; "
|
|
- " movd %%mm5, (%2); "
|
|
- " add $4, %2; "
|
|
- " decl %%ecx; "
|
|
- " jne 1b; "
|
|
- " emms; "
|
|
- : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow) /* outputs */
|
|
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
|
|
- : "%ecx" /* clobbered */
|
|
- );
|
|
- }
|
|
-}
|
|
-
|
|
diff --git a/src/transform.c b/src/transform.c
|
|
index c997deb..ee0d03d 100644
|
|
--- a/src/transform.c
|
|
+++ b/src/transform.c
|
|
@@ -29,7 +29,7 @@
|
|
#include "pygamedocs.h"
|
|
#include <math.h>
|
|
#include <string.h>
|
|
-#include "scale.h"
|
|
+//#include "scale.h"
|
|
|
|
|
|
typedef void (* SMOOTHSCALE_FILTER_P)(Uint8 *, Uint8 *, int, int, int, int, int);
|