Files
haikuports/dev-python/pygame/patches/pygame-1.9.1.patchset

1497 lines
73 KiB
Plaintext

diff --git a/Setup.in b/Setup.in
index 4bb6c1c..9236df4 100644
--- a/Setup.in
+++ b/Setup.in
@@ -33,9 +33,9 @@ mixer_music src/music.c $(SDL) $(MIXER) $(DEBUG)
_numericsurfarray src/_numericsurfarray.c $(SDL) $(DEBUG)
_numericsndarray src/_numericsndarray.c $(SDL) $(MIXER) $(DEBUG)
movie src/movie.c $(SDL) $(SMPEG) $(DEBUG)
-scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
+#scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
_camera src/_camera.c src/camera_v4l2.c src/camera_v4l.c $(SDL) $(DEBUG)
-pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
+#pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
GFX = src/SDL_gfx/SDL_gfxPrimitives.c
#GFX = src/SDL_gfx/SDL_gfxBlitFunc.c src/SDL_gfx/SDL_gfxPrimitives.c
@@ -64,7 +64,7 @@ joystick src/joystick.c $(SDL) $(DEBUG)
draw src/draw.c $(SDL) $(DEBUG)
image src/image.c $(SDL) $(DEBUG)
overlay src/overlay.c $(SDL) $(DEBUG)
-transform src/transform.c src/rotozoom.c src/scale2x.c src/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
+transform src/transform.c src/rotozoom.c src/scale2x.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
mask src/mask.c src/bitmask.c $(SDL) $(DEBUG)
bufferproxy src/bufferproxy.c $(SDL) $(DEBUG)
pixelarray src/pixelarray.c $(SDL) $(DEBUG)
diff --git a/config.py b/config.py
index f60d64c..6e0d766 100644
--- a/config.py
+++ b/config.py
@@ -119,12 +119,16 @@ def main():
elif sys.platform == 'win32':
print_('Using WINDOWS mingw/msys configuration...\n')
import config_msys as CFG
+ elif sys.platform == 'haiku1' or sys.platform == 'haiku1_x86':
+ print_('Using Haiku configuration...\n')
+ import config_haiku as CFG
elif sys.platform == 'darwin':
print_('Using Darwin configuration...\n')
import config_darwin as CFG
additional_platform_setup = open("Setup_Darwin.in", "r").readlines()
else:
print_('Using UNIX configuration...\n')
+ print_(sys.platform)
import config_unix as CFG
if os.path.isfile('Setup'):
diff --git a/pygame.egg-info/SOURCES.txt b/pygame.egg-info/SOURCES.txt
index a7ec677..39c2a55 100644
--- a/pygame.egg-info/SOURCES.txt
+++ b/pygame.egg-info/SOURCES.txt
@@ -301,11 +301,7 @@ src/rect.c
src/rect.doc
src/rotozoom.c
src/rwobject.c
-src/scale.h
src/scale2x.c
-src/scale_mmx.c
-src/scale_mmx32.c
-src/scale_mmx64.c
src/scrap.c
src/scrap.doc
src/scrap.h
@@ -465,4 +461,4 @@ test/util/build_page/results/.htaccess
test/util/build_page/results/index.py
test/util/build_page/results/results.css
test/util/build_page/upload_results/.htaccess
-test/util/build_page/upload_results/index.py
\ No newline at end of file
+test/util/build_page/upload_results/index.py
diff --git a/setup.py b/setup.py
index 45af61f..bf352dc 100644
--- a/setup.py
+++ b/setup.py
@@ -116,7 +116,7 @@ else:
#headers to install
headers = glob.glob(os.path.join('src', '*.h'))
headers.remove(os.path.join('src', 'numeric_arrayobject.h'))
-headers.remove(os.path.join('src', 'scale.h'))
+#headers.remove(os.path.join('src', 'scale.h'))
#sanity check for any arguments
if len(sys.argv) == 1:
@@ -354,17 +354,6 @@ if sys.platform == 'win32':
cmdclass['build_ext'] = WinBuildExt
# Add the precompiled smooth scale MMX functions to transform.
- def replace_scale_mmx():
- for e in extensions:
- if e.name == 'transform':
- e.extra_objects.append(
- os.path.join('obj', 'win32', 'scale_mmx.obj'))
- for i in range(len(e.sources)):
- if e.sources[i].endswith('scale_mmx.c'):
- del e.sources[i]
- return
- replace_scale_mmx()
-
#clean up the list of extensions
for e in extensions[:]:
diff --git a/src/pgcompat.h b/src/pgcompat.h
index 6b9eea0..e34d2ba 100644
--- a/src/pgcompat.h
+++ b/src/pgcompat.h
@@ -69,9 +69,7 @@
#define DECREF_MOD(mod)
/* Type header differs. */
-#define TYPE_HEAD(x,y) \
- PyObject_HEAD_INIT(x) \
- 0,
+#define TYPE_HEAD(x,y) PyObject_HEAD_INIT(x) 0,
/* Text interface. Use ascii strings. */
#define Text_Type PyString_Type
diff --git a/src/scale.h b/src/scale.h
deleted file mode 100644
index 0bb0eb2..0000000
--- a/src/scale.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- pygame - Python Game Library
- Copyright (C) 2000-2001 Pete Shinners
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
- Pete Shinners
- pete@shinners.org
-*/
-
-/* Pentium MMX/SSE smoothscale routines
- * Available on Win32 or GCC on a Pentium.
- * Sorry, no Win64 support yet for Visual C builds, but it can be added.
- */
-
-#if !defined(SCALE_HEADER)
-#define SCALE_HEADER
-
-#if (defined(__GNUC__) && ((defined(__x86_64__) && !defined(_NO_MMX_FOR_X86_64)) || defined(__i386__))) || defined(MS_WIN32)
-#define SCALE_MMX_SUPPORT
-
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
- */
-void filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-void filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
- */
-void filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-void filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-/* These functions implement a bilinear filter in the X-dimension.
- */
-void filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-void filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-/* These functions implement a bilinear filter in the Y-dimension.
- */
-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-#endif /* #if (defined(__GNUC__) && .....) */
-
-#endif /* #if !defined(SCALE_HEADER) */
diff --git a/src/scale_mmx.c b/src/scale_mmx.c
deleted file mode 100644
index 36e7af0..0000000
--- a/src/scale_mmx.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- pygame - Python Game Library
- Copyright (C) 2000-2001 Pete Shinners
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
- Pete Shinners
- pete@shinners.org
-*/
-
-/* Pentium MMX/SSE smoothscale routines
- * These are only compiled with GCC.
- */
-#if defined(__GNUC__)
-/* Choose between the 32 bit and 64 bit versions.
- * Including source code like this may be frowned upon by some,
- * but the alternative is ungainly conditionally compiled code.
- */
-# if defined(__x86_64__)
-# include "scale_mmx64.c"
-# elif defined(__i386__)
-# include "scale_mmx32.c"
-# endif
-#endif
diff --git a/src/scale_mmx32.c b/src/scale_mmx32.c
deleted file mode 100644
index 14cd8d2..0000000
--- a/src/scale_mmx32.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
- pygame - Python Game Library
- Copyright (C) 2000-2001 Pete Shinners
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
- Pete Shinners
- pete@shinners.org
-*/
-
-/* Pentium 32 bit SSE/MMX smoothscale filter routines
- * These are written for compilation with GCC only.
- *
- * This file should not depend on anything but the C standard library.
- */
-
-#if !defined(__GNUC__) || !defined(__i386__) || defined(__x86_64__)
-#error "Pygame build bug: should not be compiling this file!"
-#endif
-
-#include <stdint.h>
-typedef uint8_t Uint8; /* SDL convension */
-typedef uint16_t Uint16; /* SDL convension */
-#include <stdlib.h>
-#include <memory.h>
-#include "scale.h"
-
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
- */
-void
-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int srcdiff = srcpitch - (srcwidth * 4);
- int dstdiff = dstpitch - (dstwidth * 4);
-
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
- int xrecip = 0x40000000 / xspace;
- long long One64 = 0x4000400040004000ULL;
-
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
- " punpcklwd %%mm7, %%mm7; "
- " punpckldq %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " movl %5, %%ecx; " /* ecx == xcounter */
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
- " movl %4, %%edx; " /* edx == width */
- "2: " /* inner X-loop */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movd (%0), %%mm2; " /* mm2 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm2; "
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
- " subl $0x4000, %%ecx; "
- " jmp 4f; "
- "3: " /* prepare to output a pixel */
- " movd %%ecx, %%mm2; "
- " movq %2, %%mm3; " /* mm3 = 2^14 */
- " punpcklwd %%mm2, %%mm2; "
- " punpckldq %%mm2, %%mm2; "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
- " psllw $2, %%mm4; "
- " movq %%mm4, %%mm5; " /* mm2 = (srcpix * xcounter >> 16) */
- " psraw $15, %%mm5; "
- " pand %%mm2, %%mm5; "
- " movq %%mm2, %%mm6; "
- " psraw $15, %%mm6; "
- " pand %%mm4, %%mm6; "
- " pmulhw %%mm4, %%mm2; "
- " paddw %%mm5, %%mm2; "
- " paddw %%mm6, %%mm2; "
- " movq %%mm4, %%mm5; " /* mm3 = (srcpix * xfrac) >> 16) */
- " psraw $15, %%mm5; "
- " pand %%mm3, %%mm5; "
- " movq %%mm3, %%mm6; "
- " psraw $15, %%mm6; "
- " pand %%mm4, %%mm6; "
- " pmulhw %%mm4, %%mm3; "
- " paddw %%mm5, %%mm3; "
- " paddw %%mm6, %%mm3; "
- " paddw %%mm1, %%mm2; "
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
- " movq %%mm7, %%mm5; "
- " psraw $15, %%mm5; "
- " pand %%mm2, %%mm5; "
- " movq %%mm2, %%mm6; "
- " psraw $15, %%mm6; "
- " pand %%mm7, %%mm6; "
- " pmulhw %%mm7, %%mm2; "
- " paddw %%mm5, %%mm2; "
- " paddw %%mm6, %%mm2; "
- " packuswb %%mm0, %%mm2; "
- " movd %%mm2, (%1); "
- " add %5, %%ecx; "
- " add $4, %1; "
- " subl $0x4000, %%ecx; "
- "4: " /* tail of inner X-loop */
- " decl %%edx; "
- " jne 2b; "
- " add %7, %0; " /* srcpix += srcdiff */
- " add %8, %1; " /* dstpix += dstdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(One64), "m"(height), "m"(srcwidth),
- "m"(xspace), "m"(xrecip), "m"(srcdiff), "m"(dstdiff) /* input */
- : "%ecx","%edx" /* clobbered */
- );
-}
-
-void
-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int srcdiff = srcpitch - (srcwidth * 4);
- int dstdiff = dstpitch - (dstwidth * 4);
-
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
- int xrecip = 0x40000000 / xspace;
- long long One64 = 0x4000400040004000ULL;
-
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
- " movq %2, %%mm6; " /* mm6 = 2^14 */
- " pshufw $0, %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " movl %5, %%ecx; " /* ecx == xcounter */
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
- " movl %4, %%edx; " /* edx == width */
- "2: " /* inner X-loop */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movd (%0), %%mm2; " /* mm2 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm2; "
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
- " subl $0x4000, %%ecx; "
- " jmp 4f; "
- "3: " /* prepare to output a pixel */
- " movd %%ecx, %%mm2; "
- " movq %%mm6, %%mm3; " /* mm3 = 2^14 */
- " pshufw $0, %%mm2, %%mm2; "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
- " psllw $2, %%mm4; "
- " pmulhuw %%mm4, %%mm2; " /* mm2 = (srcpix * xcounter >> 16) */
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * xfrac) >> 16 */
- " paddw %%mm1, %%mm2; "
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
- " pmulhuw %%mm7, %%mm2; "
- " packuswb %%mm0, %%mm2; "
- " movd %%mm2, (%1); "
- " add %5, %%ecx; "
- " add $4, %1; "
- " subl $0x4000, %%ecx; "
- "4: " /* tail of inner X-loop */
- " decl %%edx; "
- " jne 2b; "
- " add %7, %0; " /* srcpix += srcdiff */
- " add %8, %1; " /* dstpix += dstdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(One64), "m"(height), "m"(srcwidth),
- "m"(xspace), "m"(xrecip), "m"(srcdiff), "m"(dstdiff) /* input */
- : "%ecx","%edx" /* clobbered */
- );
-}
-
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
- */
-void
-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- Uint16 *templine;
- int srcdiff = srcpitch - (width * 4);
- int dstdiff = dstpitch - (width * 4);
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
- int yrecip = 0x40000000 / yspace;
- long long One64 = 0x4000400040004000ULL;
-
- /* allocate and clear a memory area for storing the accumulator line */
- templine = (Uint16 *) malloc(dstpitch * 2);
- if (templine == 0) return;
- memset(templine, 0, dstpitch * 2);
-
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
- " movl %5, %%ecx; " /* ecx == ycounter */
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
- " punpcklwd %%mm7, %%mm7; "
- " punpckldq %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " movl %2, %%eax; " /* rax == accumulate */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movl %4, %%edx; " /* edx == width */
- "2: "
- " movd (%0), %%mm1; "
- " add $4, %0; "
- " movq (%%eax), %%mm2; "
- " punpcklbw %%mm0, %%mm1; "
- " paddw %%mm1, %%mm2; "
- " movq %%mm2, (%%eax); "
- " add $8, %%eax; "
- " decl %%edx; "
- " jne 2b; "
- " subl $0x4000, %%ecx; "
- " jmp 6f; "
- "3: " /* prepare to output a line */
- " movd %%ecx, %%mm1; "
- " movl %4, %%edx; " /* edx = width */
- " movq %9, %%mm6; " /* mm6 = 2^14 */
- " punpcklwd %%mm1, %%mm1; "
- " punpckldq %%mm1, %%mm1; "
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
- "4: "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " movq (%%eax), %%mm5; " /* mm5 = accumulate */
- " movq %%mm6, %%mm3; "
- " psllw $2, %%mm4; "
- " movq %%mm4, %%mm0; " /* mm3 = (srcpix * yfrac) >> 16) */
- " psraw $15, %%mm0; "
- " pand %%mm3, %%mm0; "
- " movq %%mm3, %%mm2; "
- " psraw $15, %%mm2; "
- " pand %%mm4, %%mm2; "
- " pmulhw %%mm4, %%mm3; "
- " paddw %%mm0, %%mm3; "
- " paddw %%mm2, %%mm3; "
- " movq %%mm1, %%mm0; " /* mm4 = (srcpix * ycounter >> 16) */
- " psraw $15, %%mm0; "
- " pand %%mm4, %%mm0; "
- " movq %%mm4, %%mm2; "
- " psraw $15, %%mm2; "
- " pand %%mm1, %%mm2; "
- " pmulhw %%mm1, %%mm4; "
- " paddw %%mm0, %%mm4; "
- " paddw %%mm2, %%mm4; "
- " movq %%mm3, (%%eax); "
- " paddw %%mm5, %%mm4; "
- " add $8, %%eax; "
- " movq %%mm7, %%mm0; "
- " psraw $15, %%mm0; "
- " pand %%mm4, %%mm0; "
- " movq %%mm4, %%mm2; "
- " psraw $15, %%mm2; "
- " pand %%mm7, %%mm2; "
- " pmulhw %%mm7, %%mm4; "
- " paddw %%mm0, %%mm4; "
- " paddw %%mm2, %%mm4; "
- " pxor %%mm0, %%mm0; "
- " packuswb %%mm0, %%mm4; "
- " movd %%mm4, (%1); "
- " add $4, %1; "
- " decl %%edx; "
- " jne 4b; "
- " add %8, %1; " /* dstpix += dstdiff */
- " addl %5, %%ecx; "
- " subl $0x4000, %%ecx; "
- "6: " /* tail of outer Y-loop */
- " add %7, %0; " /* srcpix += srcdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(templine), "m"(srcheight), "m"(width), "m"(yspace),
- "m"(yrecip), "m"(srcdiff), "m"(dstdiff),"m"(One64) /* input */
- : "%ecx","%edx","%eax" /* clobbered */
- );
-
- /* free the temporary memory */
- free(templine);
-}
-
-void
-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- Uint16 *templine;
- int srcdiff = srcpitch - (width * 4);
- int dstdiff = dstpitch - (width * 4);
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
- int yrecip = 0x40000000 / yspace;
- long long One64 = 0x4000400040004000ULL;
-
- /* allocate and clear a memory area for storing the accumulator line */
- templine = (Uint16 *) malloc(dstpitch * 2);
- if (templine == 0) return;
- memset(templine, 0, dstpitch * 2);
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
- " movl %5, %%ecx; " /* ecx == ycounter */
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
- " pshufw $0, %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " movl %2, %%eax; " /* rax == accumulate */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movl %4, %%edx; " /* edx == width */
- "2: "
- " movd (%0), %%mm1; "
- " add $4, %0; "
- " movq (%%eax), %%mm2; "
- " punpcklbw %%mm0, %%mm1; "
- " paddw %%mm1, %%mm2; "
- " movq %%mm2, (%%eax); "
- " add $8, %%eax; "
- " decl %%edx; "
- " jne 2b; "
- " subl $0x4000, %%ecx; "
- " jmp 6f; "
- "3: " /* prepare to output a line */
- " movd %%ecx, %%mm1; "
- " movl %4, %%edx; " /* edx = width */
- " movq %9, %%mm6; " /* mm6 = 2^14 */
- " pshufw $0, %%mm1, %%mm1; "
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
- "4: "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " movq (%%eax), %%mm5; " /* mm5 = accumulate */
- " movq %%mm6, %%mm3; "
- " psllw $2, %%mm4; "
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * yfrac) >> 16 */
- " pmulhuw %%mm1, %%mm4; " /* mm4 = (srcpix * ycounter >> 16) */
- " movq %%mm3, (%%eax); "
- " paddw %%mm5, %%mm4; "
- " add $8, %%eax; "
- " pmulhuw %%mm7, %%mm4; "
- " packuswb %%mm0, %%mm4; "
- " movd %%mm4, (%1); "
- " add $4, %1; "
- " decl %%edx; "
- " jne 4b; "
- " add %8, %1; " /* dstpix += dstdiff */
- " addl %5, %%ecx; "
- " subl $0x4000, %%ecx; "
- "6: " /* tail of outer Y-loop */
- " add %7, %0; " /* srcpix += srcdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(templine), "m"(srcheight), "m"(width), "m"(yspace),
- "m"(yrecip), "m"(srcdiff), "m"(dstdiff),"m"(One64) /* input */
- : "%ecx","%edx","%eax" /* clobbered */
- );
-
- /* free the temporary memory */
- free(templine);
-}
-
-/* These functions implement a bilinear filter in the X-dimension.
- */
-void
-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int *xidx0, *xmult0, *xmult1;
- int x, y;
- int factorwidth = 8;
- long long One64 = 0x0100010001000100ULL;
-
- /* Allocate memory for factors */
- xidx0 = malloc(dstwidth * 4);
- if (xidx0 == 0) return;
- xmult0 = (int *) malloc(dstwidth * factorwidth);
- xmult1 = (int *) malloc(dstwidth * factorwidth);
- if (xmult0 == 0 || xmult1 == 0)
- {
- free(xidx0);
- if (xmult0) free(xmult0);
- if (xmult1) free(xmult1);
- }
-
- /* Create multiplier factors and starting indices and put them in arrays */
- for (x = 0; x < dstwidth; x++)
- {
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
- int xm0 = 0x100 - xm1;
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
- xmult1[x*2] = xm1 | (xm1 << 16);
- xmult1[x*2+1] = xm1 | (xm1 << 16);
- xmult0[x*2] = xm0 | (xm0 << 16);
- xmult0[x*2+1] = xm0 | (xm0 << 16);
- }
-
- /* Do the scaling in raster order so we don't trash the cache */
- for (y = 0; y < height; y++)
- {
- Uint8 *srcrow0 = srcpix + y * srcpitch;
- Uint8 *dstrow = dstpix + y * dstpitch;
- int *xm0 = xmult0;
- int *x0 = xidx0;
- int width = dstwidth;
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
- " pxor %%mm0, %%mm0; "
- " movq %5, %%mm7; "
- "1: "
- " movl (%2), %%eax; " /* get xidx0[x] */
- " add $4, %2; "
- " movq %%mm7, %%mm2; "
- " movq (%0), %%mm1; " /* load mult0 */
- " add $8, %0; "
- " psubw %%mm1, %%mm2; " /* load mult1 */
- " movd (%4,%%eax,4), %%mm4; "
- " movd 4(%4,%%eax,4), %%mm5; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%1); "
- " add $4, %1; "
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(xm0), "+r"(dstrow), "+r"(x0), "+m"(width) /* outputs */
- : "S"(srcrow0), "m"(One64) /* input */
- : "%eax" /* clobbered */
- );
- }
-
- /* free memory */
- free(xidx0);
- free(xmult0);
- free(xmult1);
-}
-
-void
-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int *xidx0, *xmult0, *xmult1;
- int x, y;
- int factorwidth = 8;
- long long One64 = 0x0100010001000100ULL;
-
- /* Allocate memory for factors */
- xidx0 = malloc(dstwidth * 4);
- if (xidx0 == 0) return;
- xmult0 = (int *) malloc(dstwidth * factorwidth);
- xmult1 = (int *) malloc(dstwidth * factorwidth);
- if (xmult0 == 0 || xmult1 == 0)
- {
- free(xidx0);
- if (xmult0) free(xmult0);
- if (xmult1) free(xmult1);
- }
-
- /* Create multiplier factors and starting indices and put them in arrays */
- for (x = 0; x < dstwidth; x++)
- {
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
- int xm0 = 0x100 - xm1;
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
- xmult1[x*2] = xm1 | (xm1 << 16);
- xmult1[x*2+1] = xm1 | (xm1 << 16);
- xmult0[x*2] = xm0 | (xm0 << 16);
- xmult0[x*2+1] = xm0 | (xm0 << 16);
- }
-
- /* Do the scaling in raster order so we don't trash the cache */
- for (y = 0; y < height; y++)
- {
- Uint8 *srcrow0 = srcpix + y * srcpitch;
- Uint8 *dstrow = dstpix + y * dstpitch;
- int *xm0 = xmult0;
- int *x0 = xidx0;
- int width = dstwidth;
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
- " pxor %%mm0, %%mm0; "
- " movq %5, %%mm7; "
- "1: "
- " movl (%2), %%eax; " /* get xidx0[x] */
- " add $4, %2; "
- " movq %%mm7, %%mm2; "
- " movq (%0), %%mm1; " /* load mult0 */
- " add $8, %0; "
- " psubw %%mm1, %%mm2; " /* load mult1 */
- " movd (%4,%%eax,4), %%mm4; "
- " movd 4(%4,%%eax,4), %%mm5; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%1); "
- " add $4, %1; "
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(xm0), "+r"(dstrow), "+r"(x0), "+m"(width) /* outputs */
- : "S"(srcrow0), "m"(One64) /* input */
- : "%eax" /* clobbered */
- );
- }
-
- /* free memory */
- free(xidx0);
- free(xmult0);
- free(xmult1);
-}
-
-/* These functions implement a bilinear filter in the Y-dimension.
- */
-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- int y;
-
- for (y = 0; y < dstheight; y++)
- {
- int yidx0 = y * (srcheight - 1) / dstheight;
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
- Uint8 *srcrow1 = srcrow0 + srcpitch;
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
- int ymult0 = 0x0100 - ymult1;
- Uint8 *dstrow = dstpix + y * dstpitch;
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
- " movl %5, %%eax; "
- " movd %3, %%mm1; "
- " movd %4, %%mm2; "
- " pxor %%mm0, %%mm0; "
- " punpcklwd %%mm1, %%mm1; "
- " punpckldq %%mm1, %%mm1; "
- " punpcklwd %%mm2, %%mm2; "
- " punpckldq %%mm2, %%mm2; "
- "1: "
- " movd (%0), %%mm4; "
- " add $4, %0; "
- " movd (%1), %%mm5; "
- " add $4, %1; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%2); "
- " add $4, %2; "
- " decl %%eax; "
- " jne 1b; "
- " emms; "
- : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow) /* no outputs */
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
- : "%eax" /* clobbered */
- );
- }
-}
-
-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- int y;
-
- for (y = 0; y < dstheight; y++)
- {
- int yidx0 = y * (srcheight - 1) / dstheight;
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
- Uint8 *srcrow1 = srcrow0 + srcpitch;
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
- int ymult0 = 0x0100 - ymult1;
- Uint8 *dstrow = dstpix + y * dstpitch;
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
- " movl %5, %%eax; "
- " movd %3, %%mm1; "
- " movd %4, %%mm2; "
- " pxor %%mm0, %%mm0; "
- " pshufw $0, %%mm1, %%mm1; "
- " pshufw $0, %%mm2, %%mm2; "
- "1: "
- " movd (%0), %%mm4; "
- " add $4, %0; "
- " movd (%1), %%mm5; "
- " add $4, %1; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%2); "
- " add $4, %2; "
- " decl %%eax; "
- " jne 1b; "
- " emms; "
- : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow) /* no outputs */
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
- : "%eax" /* clobbered */
- );
- }
-}
diff --git a/src/scale_mmx64.c b/src/scale_mmx64.c
deleted file mode 100644
index e897f76..0000000
--- a/src/scale_mmx64.c
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- pygame - Python Game Library
- Copyright (C) 2000-2001 Pete Shinners
- Copyright (C) 2007 Rene Dudfield, Richard Goedeken
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
- Pete Shinners
- pete@shinners.org
-*/
-
-/* Pentium 64 bit SSE/MMX smoothscale routines
- * These are written for compilation with GCC only.
- *
- * This file should not depend on anything but the C standard library.
- */
-
-#if !defined(__GNUC__) || !defined(__x86_64__)
-#error "Pygame build bug: should not be compiling this file!"
-#endif
-
-#include <stdint.h>
-typedef uint8_t Uint8; /* SDL convension */
-typedef uint16_t Uint16; /* SDL convension */
-#include <stdlib.h>
-#include <memory.h>
-#include "scale.h"
-
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
- */
-void
-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int srcdiff = srcpitch - (srcwidth * 4);
- int dstdiff = dstpitch - (dstwidth * 4);
-
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
- int xrecip = 0x40000000 / xspace;
- long long One64 = 0x4000400040004000ULL;
- long long srcdiff64 = srcdiff;
- long long dstdiff64 = dstdiff;
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
- " punpcklwd %%mm7, %%mm7; "
- " punpckldq %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " movl %5, %%ecx; " /* ecx == xcounter */
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
- " movl %4, %%edx; " /* edx == width */
- "2: " /* inner X-loop */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movd (%0), %%mm2; " /* mm2 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm2; "
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
- " subl $0x4000, %%ecx; "
- " jmp 4f; "
- "3: " /* prepare to output a pixel */
- " movd %%ecx, %%mm2; "
- " movq %2, %%mm3; " /* mm3 = 2^14 */
- " punpcklwd %%mm2, %%mm2; "
- " punpckldq %%mm2, %%mm2; "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
- " psllw $2, %%mm4; "
- " movq %%mm4, %%mm5; " /* mm2 = (srcpix * xcounter >> 16) */
- " psraw $15, %%mm5; "
- " pand %%mm2, %%mm5; "
- " movq %%mm2, %%mm6; "
- " psraw $15, %%mm6; "
- " pand %%mm4, %%mm6; "
- " pmulhw %%mm4, %%mm2; "
- " paddw %%mm5, %%mm2; "
- " paddw %%mm6, %%mm2; "
- " movq %%mm4, %%mm5; " /* mm3 = (srcpix * xfrac) >> 16) */
- " psraw $15, %%mm5; "
- " pand %%mm3, %%mm5; "
- " movq %%mm3, %%mm6; "
- " psraw $15, %%mm6; "
- " pand %%mm4, %%mm6; "
- " pmulhw %%mm4, %%mm3; "
- " paddw %%mm5, %%mm3; "
- " paddw %%mm6, %%mm3; "
- " paddw %%mm1, %%mm2; "
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
- " movq %%mm7, %%mm5; "
- " psraw $15, %%mm5; "
- " pand %%mm2, %%mm5; "
- " movq %%mm2, %%mm6; "
- " psraw $15, %%mm6; "
- " pand %%mm7, %%mm6; "
- " pmulhw %%mm7, %%mm2; "
- " paddw %%mm5, %%mm2; "
- " paddw %%mm6, %%mm2; "
- " packuswb %%mm0, %%mm2; "
- " movd %%mm2, (%1); "
- " add %5, %%ecx; "
- " add $4, %1; "
- " subl $0x4000, %%ecx; "
- "4: " /* tail of inner X-loop */
- " decl %%edx; "
- " jne 2b; "
- " add %7, %0; " /* srcpix += srcdiff */
- " add %8, %1; " /* dstpix += dstdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(One64), "m"(height), "m"(srcwidth),
- "m"(xspace), "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64) /* inputs */
- : "%ecx","%edx" /* clobbered */
- );
-}
-
-void
-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int srcdiff = srcpitch - (srcwidth * 4);
- int dstdiff = dstpitch - (dstwidth * 4);
-
- int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
- int xrecip = 0x40000000 / xspace;
- long long One64 = 0x4000400040004000ULL;
- long long srcdiff64 = srcdiff;
- long long dstdiff64 = dstdiff;
- asm __volatile__(" /* MMX code for X-shrink area average filter */ "
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == xrecipmmx */
- " movq %2, %%mm6; " /* mm6 = 2^14 */
- " pshufw $0, %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " movl %5, %%ecx; " /* ecx == xcounter */
- " pxor %%mm1, %%mm1; " /* mm1 == accumulator */
- " movl %4, %%edx; " /* edx == width */
- "2: " /* inner X-loop */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movd (%0), %%mm2; " /* mm2 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm2; "
- " paddw %%mm2, %%mm1; " /* accumulator += srcpix */
- " subl $0x4000, %%ecx; "
- " jmp 4f; "
- "3: " /* prepare to output a pixel */
- " movd %%ecx, %%mm2; "
- " movq %%mm6, %%mm3; " /* mm3 = 2^14 */
- " pshufw $0, %%mm2, %%mm2; "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " psubw %%mm2, %%mm3; " /* mm3 = xfrac */
- " psllw $2, %%mm4; "
- " pmulhuw %%mm4, %%mm2; " /* mm2 = (srcpix * xcounter >> 16) */
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * xfrac) >> 16 */
- " paddw %%mm1, %%mm2; "
- " movq %%mm3, %%mm1; " /* accumulator = (srcpix * xfrac) >> 16 */
- " pmulhuw %%mm7, %%mm2; "
- " packuswb %%mm0, %%mm2; "
- " movd %%mm2, (%1); "
- " add %5, %%ecx; "
- " add $4, %1; "
- " subl $0x4000, %%ecx; "
- "4: " /* tail of inner X-loop */
- " decl %%edx; "
- " jne 2b; "
- " add %7, %0; " /* srcpix += srcdiff */
- " add %8, %1; " /* dstpix += dstdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(One64), "m"(height), "m"(srcwidth),
- "m"(xspace), "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64) /* inputs */
- : "%ecx","%edx" /* clobbered */
- );
-}
-
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
- */
-void
-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- Uint16 *templine;
- int srcdiff = srcpitch - (width * 4);
- int dstdiff = dstpitch - (width * 4);
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
- int yrecip = 0x40000000 / yspace;
- long long One64 = 0x4000400040004000ULL;
-
- /* allocate and clear a memory area for storing the accumulator line */
- templine = (Uint16 *) malloc(dstpitch * 2);
- if (templine == 0) return;
- memset(templine, 0, dstpitch * 2);
- long long srcdiff64 = srcdiff;
- long long dstdiff64 = dstdiff;
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
- " movl %5, %%ecx; " /* ecx == ycounter */
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
- " punpcklwd %%mm7, %%mm7; "
- " punpckldq %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " mov %2, %%rax; " /* rax == accumulate */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movl %4, %%edx; " /* edx == width */
- "2: "
- " movd (%0), %%mm1; "
- " add $4, %0; "
- " movq (%%rax), %%mm2; "
- " punpcklbw %%mm0, %%mm1; "
- " paddw %%mm1, %%mm2; "
- " movq %%mm2, (%%rax); "
- " add $8, %%rax; "
- " decl %%edx; "
- " jne 2b; "
- " subl $0x4000, %%ecx; "
- " jmp 6f; "
- "3: " /* prepare to output a line */
- " movd %%ecx, %%mm1; "
- " movl %4, %%edx; " /* edx = width */
- " movq %9, %%mm6; " /* mm6 = 2^14 */
- " punpcklwd %%mm1, %%mm1; "
- " punpckldq %%mm1, %%mm1; "
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
- "4: "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " movq (%%rax), %%mm5; " /* mm5 = accumulate */
- " movq %%mm6, %%mm3; "
- " psllw $2, %%mm4; "
- " movq %%mm4, %%mm0; " /* mm3 = (srcpix * yfrac) >> 16) */
- " psraw $15, %%mm0; "
- " pand %%mm3, %%mm0; "
- " movq %%mm3, %%mm2; "
- " psraw $15, %%mm2; "
- " pand %%mm4, %%mm2; "
- " pmulhw %%mm4, %%mm3; "
- " paddw %%mm0, %%mm3; "
- " paddw %%mm2, %%mm3; "
- " movq %%mm1, %%mm0; " /* mm4 = (srcpix * ycounter >> 16) */
- " psraw $15, %%mm0; "
- " pand %%mm4, %%mm0; "
- " movq %%mm4, %%mm2; "
- " psraw $15, %%mm2; "
- " pand %%mm1, %%mm2; "
- " pmulhw %%mm1, %%mm4; "
- " paddw %%mm0, %%mm4; "
- " paddw %%mm2, %%mm4; "
- " movq %%mm3, (%%rax); "
- " paddw %%mm5, %%mm4; "
- " add $8, %%rax; "
- " movq %%mm7, %%mm0; "
- " psraw $15, %%mm0; "
- " pand %%mm4, %%mm0; "
- " movq %%mm4, %%mm2; "
- " psraw $15, %%mm2; "
- " pand %%mm7, %%mm2; "
- " pmulhw %%mm7, %%mm4; "
- " paddw %%mm0, %%mm4; "
- " paddw %%mm2, %%mm4; "
- " pxor %%mm0, %%mm0; "
- " packuswb %%mm0, %%mm4; "
- " movd %%mm4, (%1); "
- " add $4, %1; "
- " decl %%edx; "
- " jne 4b; "
- " add %8, %1; " /* dstpix += dstdiff */
- " addl %5, %%ecx; "
- " subl $0x4000, %%ecx; "
- "6: " /* tail of outer Y-loop */
- " add %7, %0; " /* srcpix += srcdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(templine),"m"(srcheight), "m"(width), "m"(yspace),
- "m"(yrecip), "m"(srcdiff64), "m"(dstdiff64), "m"(One64) /* input */
- : "%ecx","%edx","%rax" /* clobbered */
- );
-
- /* free the temporary memory */
- free(templine);
-}
-
-void
-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- Uint16 *templine;
- int srcdiff = srcpitch - (width * 4);
- int dstdiff = dstpitch - (width * 4);
- int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
- int yrecip = 0x40000000 / yspace;
- long long One64 = 0x4000400040004000ULL;
-
- /* allocate and clear a memory area for storing the accumulator line */
- templine = (Uint16 *) malloc(dstpitch * 2);
- if (templine == 0) return;
- memset(templine, 0, dstpitch * 2);
- long long srcdiff64 = srcdiff;
- long long dstdiff64 = dstdiff;
- asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
- " movl %5, %%ecx; " /* ecx == ycounter */
- " pxor %%mm0, %%mm0; "
- " movd %6, %%mm7; " /* mm7 == yrecipmmx */
- " pshufw $0, %%mm7, %%mm7; "
- "1: " /* outer Y-loop */
- " mov %2, %%rax; " /* rax == accumulate */
- " cmpl $0x4000, %%ecx; "
- " jbe 3f; "
- " movl %4, %%edx; " /* edx == width */
- "2: "
- " movd (%0), %%mm1; "
- " add $4, %0; "
- " movq (%%rax), %%mm2; "
- " punpcklbw %%mm0, %%mm1; "
- " paddw %%mm1, %%mm2; "
- " movq %%mm2, (%%rax); "
- " add $8, %%rax; "
- " decl %%edx; "
- " jne 2b; "
- " subl $0x4000, %%ecx; "
- " jmp 6f; "
- "3: " /* prepare to output a line */
- " movd %%ecx, %%mm1; "
- " movl %4, %%edx; " /* edx = width */
- " movq %9, %%mm6; " /* mm6 = 2^14 */
- " pshufw $0, %%mm1, %%mm1; "
- " psubw %%mm1, %%mm6; " /* mm6 = yfrac */
- "4: "
- " movd (%0), %%mm4; " /* mm4 = srcpix */
- " add $4, %0; "
- " punpcklbw %%mm0, %%mm4; "
- " movq (%%rax), %%mm5; " /* mm5 = accumulate */
- " movq %%mm6, %%mm3; "
- " psllw $2, %%mm4; "
- " pmulhuw %%mm4, %%mm3; " /* mm3 = (srcpix * yfrac) >> 16 */
- " pmulhuw %%mm1, %%mm4; " /* mm4 = (srcpix * ycounter >> 16) */
- " movq %%mm3, (%%rax); "
- " paddw %%mm5, %%mm4; "
- " add $8, %%rax; "
- " pmulhuw %%mm7, %%mm4; "
- " packuswb %%mm0, %%mm4; "
- " movd %%mm4, (%1); "
- " add $4, %1; "
- " decl %%edx; "
- " jne 4b; "
- " add %8, %1; " /* dstpix += dstdiff */
- " addl %5, %%ecx; "
- " subl $0x4000, %%ecx; "
- "6: " /* tail of outer Y-loop */
- " add %7, %0; " /* srcpix += srcdiff */
- " decl %3; "
- " jne 1b; "
- " emms; "
- : "+r"(srcpix), "+r"(dstpix) /* outputs */
- : "m"(templine),"m"(srcheight), "m"(width), "m"(yspace),
- "m"(yrecip), "m"(srcdiff64), "m"(dstdiff64), "m"(One64) /* input */
- : "%ecx","%edx","%rax" /* clobbered */
- );
-
- /* free the temporary memory */
- free(templine);
-}
-
-/* These functions implement a bilinear filter in the X-dimension.
- */
-void
-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int *xidx0, *xmult0, *xmult1;
- int x, y;
- int factorwidth = 8;
-
- /* Allocate memory for factors */
- xidx0 = malloc(dstwidth * 4);
- if (xidx0 == 0) return;
- xmult0 = (int *) malloc(dstwidth * factorwidth);
- xmult1 = (int *) malloc(dstwidth * factorwidth);
- if (xmult0 == 0 || xmult1 == 0)
- {
- free(xidx0);
- if (xmult0) free(xmult0);
- if (xmult1) free(xmult1);
- }
-
- /* Create multiplier factors and starting indices and put them in arrays */
- for (x = 0; x < dstwidth; x++)
- {
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
- int xm0 = 0x100 - xm1;
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
- xmult1[x*2] = xm1 | (xm1 << 16);
- xmult1[x*2+1] = xm1 | (xm1 << 16);
- xmult0[x*2] = xm0 | (xm0 << 16);
- xmult0[x*2+1] = xm0 | (xm0 << 16);
- }
-
- /* Do the scaling in raster order so we don't trash the cache */
- for (y = 0; y < height; y++)
- {
- Uint8 *srcrow0 = srcpix + y * srcpitch;
- Uint8 *dstrow = dstpix + y * dstpitch;
- int *xm0 = xmult0;
- int *xm1 = xmult1;
- int *x0 = xidx0;
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
- " movl %5, %%ecx; "
- " pxor %%mm0, %%mm0; "
- "1: "
- " movsxl (%3), %%rax; " /* get xidx0[x] */
- " add $4, %3; "
- " movq (%0), %%mm1; " /* load mult0 */
- " add $8, %0; "
- " movq (%1), %%mm2; " /* load mult1 */
- " add $8, %1; "
- " movd (%4,%%rax,4), %%mm4; "
- " movd 4(%4,%%rax,4), %%mm5; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%2); "
- " add $4, %2; "
- " decl %%ecx; "
- " jne 1b; "
- " emms; "
- : "+r"(xm0), "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
- : "r"(srcrow0),"m"(dstwidth) /* input */
- : "%ecx","%rax" /* clobbered */
- );
- }
-
- /* free memory */
- free(xidx0);
- free(xmult0);
- free(xmult1);
-}
-
-void
-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
- int *xidx0, *xmult0, *xmult1;
- int x, y;
- int factorwidth = 8;
-
- /* Allocate memory for factors */
- xidx0 = malloc(dstwidth * 4);
- if (xidx0 == 0) return;
- xmult0 = (int *) malloc(dstwidth * factorwidth);
- xmult1 = (int *) malloc(dstwidth * factorwidth);
- if (xmult0 == 0 || xmult1 == 0)
- {
- free(xidx0);
- if (xmult0) free(xmult0);
- if (xmult1) free(xmult1);
- }
-
- /* Create multiplier factors and starting indices and put them in arrays */
- for (x = 0; x < dstwidth; x++)
- {
- int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
- int xm0 = 0x100 - xm1;
- xidx0[x] = x * (srcwidth - 1) / dstwidth;
- xmult1[x*2] = xm1 | (xm1 << 16);
- xmult1[x*2+1] = xm1 | (xm1 << 16);
- xmult0[x*2] = xm0 | (xm0 << 16);
- xmult0[x*2+1] = xm0 | (xm0 << 16);
- }
-
- /* Do the scaling in raster order so we don't trash the cache */
- for (y = 0; y < height; y++)
- {
- Uint8 *srcrow0 = srcpix + y * srcpitch;
- Uint8 *dstrow = dstpix + y * dstpitch;
- int *xm0 = xmult0;
- int *xm1 = xmult1;
- int *x0 = xidx0;
- asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
- " movl %5, %%ecx; "
- " pxor %%mm0, %%mm0; "
- "1: "
- " movsxl (%3), %%rax; " /* get xidx0[x] */
- " add $4, %3; "
- " movq (%0), %%mm1; " /* load mult0 */
- " add $8, %0; "
- " movq (%1), %%mm2; " /* load mult1 */
- " add $8, %1; "
- " movd (%4,%%rax,4), %%mm4; "
- " movd 4(%4,%%rax,4), %%mm5; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%2); "
- " add $4, %2; "
- " decl %%ecx; "
- " jne 1b; "
- " emms; "
- : "+r"(xm0), "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
- : "r"(srcrow0),"m"(dstwidth) /* input */
- : "%ecx","%rax" /* clobbered */
- );
- }
-
- /* free memory */
- free(xidx0);
- free(xmult0);
- free(xmult1);
-}
-
-/* These functions implement a bilinear filter in the Y-dimension
- */
-void
-filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- int y;
-
- for (y = 0; y < dstheight; y++)
- {
- int yidx0 = y * (srcheight - 1) / dstheight;
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
- Uint8 *srcrow1 = srcrow0 + srcpitch;
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
- int ymult0 = 0x0100 - ymult1;
- Uint8 *dstrow = dstpix + y * dstpitch;
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
- " movl %5, %%ecx; "
- " movd %3, %%mm1; "
- " movd %4, %%mm2; "
- " pxor %%mm0, %%mm0; "
- " punpcklwd %%mm1, %%mm1; "
- " punpckldq %%mm1, %%mm1; "
- " punpcklwd %%mm2, %%mm2; "
- " punpckldq %%mm2, %%mm2; "
- "1: "
- " movd (%0), %%mm4; "
- " add $4, %0; "
- " movd (%1), %%mm5; "
- " add $4, %1; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%2); "
- " add $4, %2; "
- " decl %%ecx; "
- " jne 1b; "
- " emms; "
- : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow) /* outputs */
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
- : "%ecx" /* clobbered */
- );
- }
-}
-
-void
-filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
- int y;
-
- for (y = 0; y < dstheight; y++)
- {
- int yidx0 = y * (srcheight - 1) / dstheight;
- Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
- Uint8 *srcrow1 = srcrow0 + srcpitch;
- int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
- int ymult0 = 0x0100 - ymult1;
- Uint8 *dstrow = dstpix + y * dstpitch;
- asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
- " movl %5, %%ecx; "
- " movd %3, %%mm1; "
- " movd %4, %%mm2; "
- " pxor %%mm0, %%mm0; "
- " pshufw $0, %%mm1, %%mm1; "
- " pshufw $0, %%mm2, %%mm2; "
- "1: "
- " movd (%0), %%mm4; "
- " add $4, %0; "
- " movd (%1), %%mm5; "
- " add $4, %1; "
- " punpcklbw %%mm0, %%mm4; "
- " punpcklbw %%mm0, %%mm5; "
- " pmullw %%mm1, %%mm4; "
- " pmullw %%mm2, %%mm5; "
- " paddw %%mm4, %%mm5; "
- " psrlw $8, %%mm5; "
- " packuswb %%mm0, %%mm5; "
- " movd %%mm5, (%2); "
- " add $4, %2; "
- " decl %%ecx; "
- " jne 1b; "
- " emms; "
- : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow) /* outputs */
- : "m"(ymult0), "m"(ymult1), "m"(width) /* input */
- : "%ecx" /* clobbered */
- );
- }
-}
-
diff --git a/src/transform.c b/src/transform.c
index c997deb..ee0d03d 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -29,7 +29,7 @@
#include "pygamedocs.h"
#include <math.h>
#include <string.h>
-#include "scale.h"
+//#include "scale.h"
typedef void (* SMOOTHSCALE_FILTER_P)(Uint8 *, Uint8 *, int, int, int, int, int);