From 34766c430d884038a4743c86aa1b3f2460c51351 Mon Sep 17 00:00:00 2001
From: Humdinger <humdingerb@gmail.com>
Date: Thu, 23 Jul 2015 07:04:01 +0200
Subject: Now formatted existing patch.


diff --git a/Setup.in b/Setup.in
index 4bb6c1c..9236df4 100644
--- a/Setup.in
+++ b/Setup.in
@@ -33,9 +33,9 @@ mixer_music src/music.c $(SDL) $(MIXER) $(DEBUG)
 _numericsurfarray src/_numericsurfarray.c $(SDL) $(DEBUG)
 _numericsndarray src/_numericsndarray.c $(SDL) $(MIXER) $(DEBUG)
 movie src/movie.c $(SDL) $(SMPEG) $(DEBUG)
-scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
+#scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
 _camera src/_camera.c src/camera_v4l2.c src/camera_v4l.c $(SDL) $(DEBUG)
-pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
+#pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
 
 GFX = src/SDL_gfx/SDL_gfxPrimitives.c 
 #GFX = src/SDL_gfx/SDL_gfxBlitFunc.c src/SDL_gfx/SDL_gfxPrimitives.c 
@@ -64,7 +64,7 @@ joystick src/joystick.c $(SDL) $(DEBUG)
 draw src/draw.c $(SDL) $(DEBUG)
 image src/image.c $(SDL) $(DEBUG)
 overlay src/overlay.c $(SDL) $(DEBUG)
-transform src/transform.c src/rotozoom.c src/scale2x.c src/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
+transform src/transform.c src/rotozoom.c src/scale2x.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
 mask src/mask.c src/bitmask.c $(SDL) $(DEBUG)
 bufferproxy src/bufferproxy.c $(SDL) $(DEBUG)
 pixelarray src/pixelarray.c $(SDL) $(DEBUG)
diff --git a/config.py b/config.py
index f60d64c..6e0d766 100644
--- a/config.py
+++ b/config.py
@@ -119,12 +119,16 @@ def main():
     elif sys.platform == 'win32':
         print_('Using WINDOWS mingw/msys configuration...\n')
         import config_msys as CFG
+    elif sys.platform == 'haiku1' or sys.platform == 'haiku1_x86':
+        print_('Using Haiku configuration...\n')
+        import config_haiku as CFG
     elif sys.platform == 'darwin':
         print_('Using Darwin configuration...\n')
         import config_darwin as CFG
         additional_platform_setup = open("Setup_Darwin.in", "r").readlines()
     else:
         print_('Using UNIX configuration...\n')
+        print_(sys.platform)
         import config_unix as CFG
     
     if os.path.isfile('Setup'):
diff --git a/pygame.egg-info/SOURCES.txt b/pygame.egg-info/SOURCES.txt
index a7ec677..39c2a55 100644
--- a/pygame.egg-info/SOURCES.txt
+++ b/pygame.egg-info/SOURCES.txt
@@ -301,11 +301,7 @@ src/rect.c
 src/rect.doc
 src/rotozoom.c
 src/rwobject.c
-src/scale.h
 src/scale2x.c
-src/scale_mmx.c
-src/scale_mmx32.c
-src/scale_mmx64.c
 src/scrap.c
 src/scrap.doc
 src/scrap.h
@@ -465,4 +461,4 @@ test/util/build_page/results/.htaccess
 test/util/build_page/results/index.py
 test/util/build_page/results/results.css
 test/util/build_page/upload_results/.htaccess
-test/util/build_page/upload_results/index.py
\ No newline at end of file
+test/util/build_page/upload_results/index.py
diff --git a/setup.py b/setup.py
index 45af61f..bf352dc 100644
--- a/setup.py
+++ b/setup.py
@@ -116,7 +116,7 @@ else:
 #headers to install
 headers = glob.glob(os.path.join('src', '*.h'))
 headers.remove(os.path.join('src', 'numeric_arrayobject.h'))
-headers.remove(os.path.join('src', 'scale.h'))
+#headers.remove(os.path.join('src', 'scale.h'))
 
 #sanity check for any arguments
 if len(sys.argv) == 1:
@@ -354,17 +354,6 @@ if sys.platform == 'win32':
     cmdclass['build_ext'] = WinBuildExt
 
     # Add the precompiled smooth scale MMX functions to transform.
-    def replace_scale_mmx():
-        for e in extensions:
-            if e.name == 'transform':
-                e.extra_objects.append(
-                    os.path.join('obj', 'win32', 'scale_mmx.obj'))
-                for i in range(len(e.sources)):
-                    if e.sources[i].endswith('scale_mmx.c'):
-                        del e.sources[i]
-                        return
-    replace_scale_mmx()
-
 
 #clean up the list of extensions
 for e in extensions[:]:
diff --git a/src/pgcompat.h b/src/pgcompat.h
index 6b9eea0..e34d2ba 100644
--- a/src/pgcompat.h
+++ b/src/pgcompat.h
@@ -69,9 +69,7 @@
 #define DECREF_MOD(mod)
 
 /* Type header differs. */
-#define TYPE_HEAD(x,y)                          \
-    PyObject_HEAD_INIT(x)                       \
-    0,
+#define TYPE_HEAD(x,y) PyObject_HEAD_INIT(x) 0,
 
 /* Text interface. Use ascii strings. */
 #define Text_Type PyString_Type
diff --git a/src/scale.h b/src/scale.h
deleted file mode 100644
index 0bb0eb2..0000000
--- a/src/scale.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-  pygame - Python Game Library
-  Copyright (C) 2000-2001  Pete Shinners
-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Library General Public
-  License as published by the Free Software Foundation; either
-  version 2 of the License, or (at your option) any later version.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Library General Public License for more details.
-
-  You should have received a copy of the GNU Library General Public
-  License along with this library; if not, write to the Free
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  Pete Shinners
-  pete@shinners.org
-*/
-
-/* Pentium MMX/SSE smoothscale routines
- * Available on Win32 or GCC on a Pentium.
- * Sorry, no Win64 support yet for Visual C builds, but it can be added.
- */
-
-#if !defined(SCALE_HEADER)
-#define SCALE_HEADER
-
-#if (defined(__GNUC__) && ((defined(__x86_64__) && !defined(_NO_MMX_FOR_X86_64)) || defined(__i386__))) || defined(MS_WIN32)
-#define SCALE_MMX_SUPPORT
-
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
- */
-void filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-void filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
- */
-void filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-void filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-/* These functions implement a bilinear filter in the X-dimension.
- */
-void filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-void filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
-
-/* These functions implement a bilinear filter in the Y-dimension.
- */
-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
-
-#endif /* #if (defined(__GNUC__) && .....) */
-
-#endif /* #if !defined(SCALE_HEADER) */
diff --git a/src/scale_mmx.c b/src/scale_mmx.c
deleted file mode 100644
index 36e7af0..0000000
--- a/src/scale_mmx.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
-  pygame - Python Game Library
-  Copyright (C) 2000-2001  Pete Shinners
-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Library General Public
-  License as published by the Free Software Foundation; either
-  version 2 of the License, or (at your option) any later version.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Library General Public License for more details.
-
-  You should have received a copy of the GNU Library General Public
-  License along with this library; if not, write to the Free
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  Pete Shinners
-  pete@shinners.org
-*/
-
-/* Pentium MMX/SSE smoothscale routines
- * These are only compiled with GCC.
- */
-#if defined(__GNUC__)
-/* Choose between the 32 bit and 64 bit versions.
- * Including source code like this may be frowned upon by some,
- * but the alternative is ungainly conditionally compiled code.
- */
-#   if defined(__x86_64__)
-#       include "scale_mmx64.c"
-#   elif defined(__i386__)
-#       include "scale_mmx32.c"
-#   endif
-#endif
diff --git a/src/scale_mmx32.c b/src/scale_mmx32.c
deleted file mode 100644
index 14cd8d2..0000000
--- a/src/scale_mmx32.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
-  pygame - Python Game Library
-  Copyright (C) 2000-2001  Pete Shinners
-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Library General Public
-  License as published by the Free Software Foundation; either
-  version 2 of the License, or (at your option) any later version.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Library General Public License for more details.
-
-  You should have received a copy of the GNU Library General Public
-  License along with this library; if not, write to the Free
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  Pete Shinners
-  pete@shinners.org
-*/
-
-/* Pentium 32 bit SSE/MMX smoothscale filter routines
- * These are written for compilation with GCC only.
- *
- * This file should not depend on anything but the C standard library.
- */
-
-#if !defined(__GNUC__) || !defined(__i386__) || defined(__x86_64__)
-#error "Pygame build bug: should not be compiling this file!"
-#endif
-
-#include <stdint.h>
-typedef uint8_t Uint8;    /* SDL convension */
-typedef uint16_t Uint16;  /* SDL convension */
-#include <stdlib.h>
-#include <memory.h>
-#include "scale.h"
-
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
- */
-void
-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int srcdiff = srcpitch - (srcwidth * 4);
-    int dstdiff = dstpitch - (dstwidth * 4);
-
-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
-    int xrecip = 0x40000000 / xspace;
-    long long One64 = 0x4000400040004000ULL;
-
-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
-        " punpcklwd     %%mm7,      %%mm7;           "
-        " punpckldq     %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          " /* inner X-loop */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm2;           "
-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              4f;                       "
-        "3:                                          " /* prepare to output a pixel */
-        " movd          %%ecx,      %%mm2;           "
-        " movq             %2,      %%mm3;           " /* mm3 = 2^14  */
-        " punpcklwd     %%mm2,      %%mm2;           "
-        " punpckldq     %%mm2,      %%mm2;           "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
-        " psllw            $2,      %%mm4;           "
-        " movq          %%mm4,      %%mm5;           " /* mm2 = (srcpix * xcounter >> 16) */
-        " psraw           $15,      %%mm5;           "
-        " pand          %%mm2,      %%mm5;           "
-        " movq          %%mm2,      %%mm6;           "
-        " psraw           $15,      %%mm6;           "
-        " pand          %%mm4,      %%mm6;           "
-        " pmulhw        %%mm4,      %%mm2;           "
-        " paddw         %%mm5,      %%mm2;           "
-        " paddw         %%mm6,      %%mm2;           "
-        " movq          %%mm4,      %%mm5;           " /* mm3 = (srcpix * xfrac) >> 16) */
-        " psraw           $15,      %%mm5;           "
-        " pand          %%mm3,      %%mm5;           "
-        " movq          %%mm3,      %%mm6;           "
-        " psraw           $15,      %%mm6;           "
-        " pand          %%mm4,      %%mm6;           "
-        " pmulhw        %%mm4,      %%mm3;           "
-        " paddw         %%mm5,      %%mm3;           "
-        " paddw         %%mm6,      %%mm3;           "
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
-        " movq          %%mm7,      %%mm5;           "
-        " psraw           $15,      %%mm5;           "
-        " pand          %%mm2,      %%mm5;           "
-        " movq          %%mm2,      %%mm6;           "
-        " psraw           $15,      %%mm6;           "
-        " pand          %%mm7,      %%mm6;           "
-        " pmulhw        %%mm7,      %%mm2;           "
-        " paddw         %%mm5,      %%mm2;           "
-        " paddw         %%mm6,      %%mm2;           "
-        " packuswb      %%mm0,      %%mm2;           "
-        " movd          %%mm2,       (%1);           "
-        " add              %5,      %%ecx;           "
-        " add              $4,         %1;           "
-        " subl        $0x4000,      %%ecx;           "
-        "4:                                          " /* tail of inner X-loop */
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix), "+r"(dstpix)                   /* outputs */
-        : "m"(One64),   "m"(height), "m"(srcwidth),
-          "m"(xspace),  "m"(xrecip), "m"(srcdiff),  "m"(dstdiff)  /* input */
-        : "%ecx","%edx"     /* clobbered */
-        );
-}
-
-void
-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int srcdiff = srcpitch - (srcwidth * 4);
-    int dstdiff = dstpitch - (dstwidth * 4);
-
-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
-    int xrecip = 0x40000000 / xspace;
-    long long One64 = 0x4000400040004000ULL;
-
-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
-        " movq             %2,      %%mm6;           " /* mm6 = 2^14  */
-        " pshufw    $0, %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          " /* inner X-loop */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm2;           "
-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              4f;                       "
-        "3:                                          " /* prepare to output a pixel */
-        " movd          %%ecx,      %%mm2;           "
-        " movq          %%mm6,      %%mm3;           " /* mm3 = 2^14  */
-        " pshufw    $0, %%mm2,      %%mm2;           "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
-        " psllw            $2,      %%mm4;           "
-        " pmulhuw       %%mm4,      %%mm2;           " /* mm2 = (srcpix * xcounter >> 16) */
-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * xfrac) >> 16 */
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
-        " pmulhuw       %%mm7,      %%mm2;           "
-        " packuswb      %%mm0,      %%mm2;           "
-        " movd          %%mm2,       (%1);           "
-        " add              %5,      %%ecx;           "
-        " add              $4,         %1;           "
-        " subl        $0x4000,      %%ecx;           "
-        "4:                                          " /* tail of inner X-loop */
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix), "+r"(dstpix)                   /* outputs */
-        : "m"(One64),   "m"(height), "m"(srcwidth),
-          "m"(xspace),  "m"(xrecip), "m"(srcdiff),  "m"(dstdiff)  /* input */
-        : "%ecx","%edx"     /* clobbered */
-        );
-}
-
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
- */
-void
-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    Uint16 *templine;
-    int srcdiff = srcpitch - (width * 4);
-    int dstdiff = dstpitch - (width * 4);
-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
-    int yrecip = 0x40000000 / yspace;
-    long long One64 = 0x4000400040004000ULL;
-
-    /* allocate and clear a memory area for storing the accumulator line */
-    templine = (Uint16 *) malloc(dstpitch * 2);
-    if (templine == 0) return;
-    memset(templine, 0, dstpitch * 2);
-
-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
-        " punpcklwd     %%mm7,      %%mm7;           "
-        " punpckldq     %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " movl             %2,      %%eax;           " /* rax == accumulate */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          "
-        " movd           (%0),      %%mm1;           "
-        " add              $4,         %0;           "
-        " movq        (%%eax),      %%mm2;           "
-        " punpcklbw     %%mm0,      %%mm1;           "
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm2,    (%%eax);           "
-        " add              $8,      %%eax;           "
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              6f;                       "
-        "3:                                          " /* prepare to output a line */
-        " movd          %%ecx,      %%mm1;           "
-        " movl             %4,      %%edx;           " /* edx = width */
-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
-        " punpcklwd     %%mm1,      %%mm1;           "
-        " punpckldq     %%mm1,      %%mm1;           "
-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
-        "4:                                          "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " movq        (%%eax),      %%mm5;           " /* mm5 = accumulate */
-        " movq          %%mm6,      %%mm3;           "
-        " psllw            $2,      %%mm4;           "
-        " movq          %%mm4,      %%mm0;           " /* mm3 = (srcpix * yfrac) >> 16) */
-        " psraw           $15,      %%mm0;           "
-        " pand          %%mm3,      %%mm0;           "
-        " movq          %%mm3,      %%mm2;           "
-        " psraw           $15,      %%mm2;           "
-        " pand          %%mm4,      %%mm2;           "
-        " pmulhw        %%mm4,      %%mm3;           "
-        " paddw         %%mm0,      %%mm3;           "
-        " paddw         %%mm2,      %%mm3;           "
-        " movq          %%mm1,      %%mm0;           " /* mm4 = (srcpix * ycounter >> 16) */
-        " psraw           $15,      %%mm0;           "
-        " pand          %%mm4,      %%mm0;           "
-        " movq          %%mm4,      %%mm2;           "
-        " psraw           $15,      %%mm2;           "
-        " pand          %%mm1,      %%mm2;           "
-        " pmulhw        %%mm1,      %%mm4;           "
-        " paddw         %%mm0,      %%mm4;           "
-        " paddw         %%mm2,      %%mm4;           "
-        " movq          %%mm3,    (%%eax);           "
-        " paddw         %%mm5,      %%mm4;           "
-        " add              $8,      %%eax;           "
-        " movq          %%mm7,      %%mm0;           "
-        " psraw           $15,      %%mm0;           "
-        " pand          %%mm4,      %%mm0;           "
-        " movq          %%mm4,      %%mm2;           "
-        " psraw           $15,      %%mm2;           "
-        " pand          %%mm7,      %%mm2;           "
-        " pmulhw        %%mm7,      %%mm4;           "
-        " paddw         %%mm0,      %%mm4;           "
-        " paddw         %%mm2,      %%mm4;           "
-        " pxor          %%mm0,      %%mm0;           "
-        " packuswb      %%mm0,      %%mm4;           "
-        " movd          %%mm4,       (%1);           "
-        " add              $4,         %1;           "
-        " decl          %%edx;                       "
-        " jne              4b;                       "
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " addl             %5,      %%ecx;           "
-        " subl        $0x4000,      %%ecx;           "
-        "6:                                          " /* tail of outer Y-loop */
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix),  "+r"(dstpix)     /* outputs */
-        : "m"(templine), "m"(srcheight), "m"(width),  "m"(yspace),
-          "m"(yrecip),   "m"(srcdiff),   "m"(dstdiff),"m"(One64)  /* input */
-        : "%ecx","%edx","%eax"           /* clobbered */
-        );
-
-    /* free the temporary memory */
-    free(templine);
-}
-
-void
-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    Uint16 *templine;
-    int srcdiff = srcpitch - (width * 4);
-    int dstdiff = dstpitch - (width * 4);
-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
-    int yrecip = 0x40000000 / yspace;
-    long long One64 = 0x4000400040004000ULL;
-
-    /* allocate and clear a memory area for storing the accumulator line */
-    templine = (Uint16 *) malloc(dstpitch * 2);
-    if (templine == 0) return;
-    memset(templine, 0, dstpitch * 2);
-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
-        " pshufw    $0, %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " movl             %2,      %%eax;           " /* rax == accumulate */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          "
-        " movd           (%0),      %%mm1;           "
-        " add              $4,         %0;           "
-        " movq        (%%eax),      %%mm2;           "
-        " punpcklbw     %%mm0,      %%mm1;           "
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm2,    (%%eax);           "
-        " add              $8,      %%eax;           "
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              6f;                       "
-        "3:                                          " /* prepare to output a line */
-        " movd          %%ecx,      %%mm1;           "
-        " movl             %4,      %%edx;           " /* edx = width */
-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
-        " pshufw    $0, %%mm1,      %%mm1;           "
-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
-        "4:                                          "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " movq        (%%eax),      %%mm5;           " /* mm5 = accumulate */
-        " movq          %%mm6,      %%mm3;           "
-        " psllw            $2,      %%mm4;           "
-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * yfrac) >> 16 */
-        " pmulhuw       %%mm1,      %%mm4;           " /* mm4 = (srcpix * ycounter >> 16) */
-        " movq          %%mm3,    (%%eax);           "
-        " paddw         %%mm5,      %%mm4;           "
-        " add              $8,      %%eax;           "
-        " pmulhuw       %%mm7,      %%mm4;           "
-        " packuswb      %%mm0,      %%mm4;           "
-        " movd          %%mm4,       (%1);           "
-        " add              $4,         %1;           "
-        " decl          %%edx;                       "
-        " jne              4b;                       "
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " addl             %5,      %%ecx;           "
-        " subl        $0x4000,      %%ecx;           "
-        "6:                                          " /* tail of outer Y-loop */
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix),  "+r"(dstpix)     /* outputs */
-        : "m"(templine), "m"(srcheight), "m"(width),  "m"(yspace),
-          "m"(yrecip),   "m"(srcdiff),   "m"(dstdiff),"m"(One64)  /* input */
-        : "%ecx","%edx","%eax"           /* clobbered */
-        );
-
-    /* free the temporary memory */
-    free(templine);
-}
-
-/* These functions implement a bilinear filter in the X-dimension.
- */
-void
-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int *xidx0, *xmult0, *xmult1;
-    int x, y;
-    int factorwidth = 8;
-  	long long One64 = 0x0100010001000100ULL;
-
-    /* Allocate memory for factors */
-    xidx0 = malloc(dstwidth * 4);
-    if (xidx0 == 0) return;
-    xmult0 = (int *) malloc(dstwidth * factorwidth);
-    xmult1 = (int *) malloc(dstwidth * factorwidth);
-    if (xmult0 == 0 || xmult1 == 0)
-    {
-        free(xidx0);
-        if (xmult0) free(xmult0);
-        if (xmult1) free(xmult1);
-    }
-
-    /* Create multiplier factors and starting indices and put them in arrays */
-    for (x = 0; x < dstwidth; x++)
-    {
-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
-        int xm0 = 0x100 - xm1;
-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
-        xmult1[x*2]   = xm1 | (xm1 << 16);
-        xmult1[x*2+1] = xm1 | (xm1 << 16);
-        xmult0[x*2]   = xm0 | (xm0 << 16);
-        xmult0[x*2+1] = xm0 | (xm0 << 16);
-    }
-
-    /* Do the scaling in raster order so we don't trash the cache */
-    for (y = 0; y < height; y++)
-    {
-        Uint8 *srcrow0 = srcpix + y * srcpitch;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        int *xm0 = xmult0;
-        int *x0 = xidx0;
-    	int width = dstwidth;
-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
-             " pxor          %%mm0,      %%mm0;           "
-             " movq             %5,      %%mm7;           "
-             "1:                                          "
-             " movl           (%2),      %%eax;           " /* get xidx0[x] */
-             " add              $4,         %2;           "
-             " movq          %%mm7,      %%mm2;           "
-             " movq           (%0),      %%mm1;           " /* load mult0 */
-             " add              $8,         %0;           "
-             " psubw         %%mm1,      %%mm2;           " /* load mult1 */
-             " movd   (%4,%%eax,4),      %%mm4;           "
-             " movd  4(%4,%%eax,4),      %%mm5;           "
-             " punpcklbw     %%mm0,      %%mm4;           "
-             " punpcklbw     %%mm0,      %%mm5;           "
-             " pmullw        %%mm1,      %%mm4;           "
-             " pmullw        %%mm2,      %%mm5;           "
-             " paddw         %%mm4,      %%mm5;           "
-             " psrlw            $8,      %%mm5;           "
-             " packuswb      %%mm0,      %%mm5;           "
-             " movd          %%mm5,       (%1);           "
-             " add              $4,         %1;           "
-             " decl             %3;                       "
-             " jne              1b;                       "
-             " emms;                                      "
-             : "+r"(xm0),    "+r"(dstrow), "+r"(x0), "+m"(width)  /* outputs */
-             : "S"(srcrow0), "m"(One64)    /* input */
-             : "%eax"            /* clobbered */
-             );
-    }
-
-    /* free memory */
-    free(xidx0);
-    free(xmult0);
-    free(xmult1);
-}
-
-void
-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int *xidx0, *xmult0, *xmult1;
-    int x, y;
-    int factorwidth = 8;
-  	long long One64 = 0x0100010001000100ULL;
-
-    /* Allocate memory for factors */
-    xidx0 = malloc(dstwidth * 4);
-    if (xidx0 == 0) return;
-    xmult0 = (int *) malloc(dstwidth * factorwidth);
-    xmult1 = (int *) malloc(dstwidth * factorwidth);
-    if (xmult0 == 0 || xmult1 == 0)
-    {
-        free(xidx0);
-        if (xmult0) free(xmult0);
-        if (xmult1) free(xmult1);
-    }
-
-    /* Create multiplier factors and starting indices and put them in arrays */
-    for (x = 0; x < dstwidth; x++)
-    {
-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
-        int xm0 = 0x100 - xm1;
-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
-        xmult1[x*2]   = xm1 | (xm1 << 16);
-        xmult1[x*2+1] = xm1 | (xm1 << 16);
-        xmult0[x*2]   = xm0 | (xm0 << 16);
-        xmult0[x*2+1] = xm0 | (xm0 << 16);
-    }
-
-    /* Do the scaling in raster order so we don't trash the cache */
-    for (y = 0; y < height; y++)
-    {
-        Uint8 *srcrow0 = srcpix + y * srcpitch;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        int *xm0 = xmult0;
-        int *x0 = xidx0;
-    	int width = dstwidth;
-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
-             " pxor          %%mm0,      %%mm0;           "
-             " movq             %5,      %%mm7;           "
-             "1:                                          "
-             " movl           (%2),      %%eax;           " /* get xidx0[x] */
-             " add              $4,         %2;           "
-             " movq          %%mm7,      %%mm2;           "
-             " movq           (%0),      %%mm1;           " /* load mult0 */
-             " add              $8,         %0;           "
-             " psubw         %%mm1,      %%mm2;           " /* load mult1 */
-             " movd   (%4,%%eax,4),      %%mm4;           "
-             " movd  4(%4,%%eax,4),      %%mm5;           "
-             " punpcklbw     %%mm0,      %%mm4;           "
-             " punpcklbw     %%mm0,      %%mm5;           "
-             " pmullw        %%mm1,      %%mm4;           "
-             " pmullw        %%mm2,      %%mm5;           "
-             " paddw         %%mm4,      %%mm5;           "
-             " psrlw            $8,      %%mm5;           "
-             " packuswb      %%mm0,      %%mm5;           "
-             " movd          %%mm5,       (%1);           "
-             " add              $4,         %1;           "
-             " decl             %3;                       "
-             " jne              1b;                       "
-             " emms;                                      "
-             : "+r"(xm0),    "+r"(dstrow), "+r"(x0), "+m"(width)  /* outputs */
-             : "S"(srcrow0), "m"(One64)    /* input */
-             : "%eax"            /* clobbered */
-             );
-    }
-
-    /* free memory */
-    free(xidx0);
-    free(xmult0);
-    free(xmult1);
-}
-
-/* These functions implement a bilinear filter in the Y-dimension.
- */
-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    int y;
-
-    for (y = 0; y < dstheight; y++)
-    {
-        int yidx0 = y * (srcheight - 1) / dstheight;
-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
-        Uint8 *srcrow1 = srcrow0 + srcpitch;
-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
-        int ymult0 = 0x0100 - ymult1;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
-             " movl          %5,      %%eax;                      "
-             " movd          %3,      %%mm1;                      "
-             " movd          %4,      %%mm2;                      "
-             " pxor       %%mm0,      %%mm0;                      "
-             " punpcklwd  %%mm1,      %%mm1;                      "
-             " punpckldq  %%mm1,      %%mm1;                      "
-             " punpcklwd  %%mm2,      %%mm2;                      "
-             " punpckldq  %%mm2,      %%mm2;                      "
-             "1:                                                  "
-             " movd        (%0),      %%mm4;                      "
-             " add           $4,         %0;                      "
-             " movd        (%1),      %%mm5;                      "
-             " add           $4,         %1;                      "
-             " punpcklbw  %%mm0,     %%mm4;                       "
-             " punpcklbw  %%mm0,     %%mm5;                       "
-             " pmullw     %%mm1,     %%mm4;                       "
-             " pmullw     %%mm2,     %%mm5;                       "
-             " paddw      %%mm4,     %%mm5;                       "
-             " psrlw         $8,     %%mm5;                       "
-             " packuswb   %%mm0,     %%mm5;                       "
-             " movd       %%mm5,      (%2);                       "
-             " add           $4,        %2;                       "
-             " decl       %%eax;                                  "
-             " jne           1b;                                  "
-             " emms;                                              "
-             : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow)   /* no outputs */
-             : "m"(ymult0),   "m"(ymult1),  "m"(width)    /* input */
-             : "%eax"        /* clobbered */
-             );
-    }
-}
-
-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    int y;
-
-    for (y = 0; y < dstheight; y++)
-    {
-        int yidx0 = y * (srcheight - 1) / dstheight;
-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
-        Uint8 *srcrow1 = srcrow0 + srcpitch;
-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
-        int ymult0 = 0x0100 - ymult1;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
-             " movl          %5,      %%eax;                      "
-             " movd          %3,      %%mm1;                      "
-             " movd          %4,      %%mm2;                      "
-             " pxor       %%mm0,      %%mm0;                      "
-             " pshufw      $0, %%mm1, %%mm1;                      "
-             " pshufw      $0, %%mm2, %%mm2;                      "
-             "1:                                                  "
-             " movd        (%0),      %%mm4;                      "
-             " add           $4,         %0;                      "
-             " movd        (%1),      %%mm5;                      "
-             " add           $4,         %1;                      "
-             " punpcklbw  %%mm0,     %%mm4;                       "
-             " punpcklbw  %%mm0,     %%mm5;                       "
-             " pmullw     %%mm1,     %%mm4;                       "
-             " pmullw     %%mm2,     %%mm5;                       "
-             " paddw      %%mm4,     %%mm5;                       "
-             " psrlw         $8,     %%mm5;                       "
-             " packuswb   %%mm0,     %%mm5;                       "
-             " movd       %%mm5,      (%2);                       "
-             " add           $4,        %2;                       "
-             " decl       %%eax;                                  "
-             " jne           1b;                                  "
-             " emms;                                              "
-             : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow)   /* no outputs */
-             : "m"(ymult0),   "m"(ymult1),  "m"(width)    /* input */
-             : "%eax"        /* clobbered */
-             );
-    }
-}
diff --git a/src/scale_mmx64.c b/src/scale_mmx64.c
deleted file mode 100644
index e897f76..0000000
--- a/src/scale_mmx64.c
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
-  pygame - Python Game Library
-  Copyright (C) 2000-2001  Pete Shinners
-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Library General Public
-  License as published by the Free Software Foundation; either
-  version 2 of the License, or (at your option) any later version.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Library General Public License for more details.
-
-  You should have received a copy of the GNU Library General Public
-  License along with this library; if not, write to the Free
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  Pete Shinners
-  pete@shinners.org
-*/
-
-/* Pentium 64 bit SSE/MMX smoothscale routines
- * These are written for compilation with GCC only.
- *
- * This file should not depend on anything but the C standard library.
- */
-
-#if !defined(__GNUC__) || !defined(__x86_64__)
-#error "Pygame build bug: should not be compiling this file!"
-#endif
-
-#include <stdint.h>
-typedef uint8_t Uint8;    /* SDL convension */
-typedef uint16_t Uint16;  /* SDL convension */
-#include <stdlib.h>
-#include <memory.h>
-#include "scale.h"
-
-/* These functions implement an area-averaging shrinking filter in the X-dimension.
- */
-void
-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int srcdiff = srcpitch - (srcwidth * 4);
-    int dstdiff = dstpitch - (dstwidth * 4);
-
-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
-    int xrecip = 0x40000000 / xspace;
-    long long One64 = 0x4000400040004000ULL;
-    long long srcdiff64 = srcdiff;
-    long long dstdiff64 = dstdiff;
-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
-        " punpcklwd     %%mm7,      %%mm7;           "
-        " punpckldq     %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          " /* inner X-loop */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm2;           "
-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              4f;                       "
-        "3:                                          " /* prepare to output a pixel */
-        " movd          %%ecx,      %%mm2;           "
-        " movq             %2,      %%mm3;           " /* mm3 = 2^14  */
-        " punpcklwd     %%mm2,      %%mm2;           "
-        " punpckldq     %%mm2,      %%mm2;           "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
-        " psllw            $2,      %%mm4;           "
-        " movq          %%mm4,      %%mm5;           " /* mm2 = (srcpix * xcounter >> 16) */
-        " psraw           $15,      %%mm5;           "
-        " pand          %%mm2,      %%mm5;           "
-        " movq          %%mm2,      %%mm6;           "
-        " psraw           $15,      %%mm6;           "
-        " pand          %%mm4,      %%mm6;           "
-        " pmulhw        %%mm4,      %%mm2;           "
-        " paddw         %%mm5,      %%mm2;           "
-        " paddw         %%mm6,      %%mm2;           "
-        " movq          %%mm4,      %%mm5;           " /* mm3 = (srcpix * xfrac) >> 16) */
-        " psraw           $15,      %%mm5;           "
-        " pand          %%mm3,      %%mm5;           "
-        " movq          %%mm3,      %%mm6;           "
-        " psraw           $15,      %%mm6;           "
-        " pand          %%mm4,      %%mm6;           "
-        " pmulhw        %%mm4,      %%mm3;           "
-        " paddw         %%mm5,      %%mm3;           "
-        " paddw         %%mm6,      %%mm3;           "
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
-        " movq          %%mm7,      %%mm5;           "
-        " psraw           $15,      %%mm5;           "
-        " pand          %%mm2,      %%mm5;           "
-        " movq          %%mm2,      %%mm6;           "
-        " psraw           $15,      %%mm6;           "
-        " pand          %%mm7,      %%mm6;           "
-        " pmulhw        %%mm7,      %%mm2;           "
-        " paddw         %%mm5,      %%mm2;           "
-        " paddw         %%mm6,      %%mm2;           "
-        " packuswb      %%mm0,      %%mm2;           "
-        " movd          %%mm2,       (%1);           "
-        " add              %5,      %%ecx;           "
-        " add              $4,         %1;           "
-        " subl        $0x4000,      %%ecx;           "
-        "4:                                          " /* tail of inner X-loop */
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix), "+r"(dstpix)  /* outputs */
-        : "m"(One64),   "m"(height), "m"(srcwidth),
-          "m"(xspace),  "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64)     /* inputs */
-        : "%ecx","%edx"               /* clobbered */
-        );
-}
-
-void
-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int srcdiff = srcpitch - (srcwidth * 4);
-    int dstdiff = dstpitch - (dstwidth * 4);
-
-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
-    int xrecip = 0x40000000 / xspace;
-    long long One64 = 0x4000400040004000ULL;
-    long long srcdiff64 = srcdiff;
-    long long dstdiff64 = dstdiff;
-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
-        " movq             %2,      %%mm6;           " /* mm6 = 2^14  */
-        " pshufw    $0, %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          " /* inner X-loop */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm2;           "
-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              4f;                       "
-        "3:                                          " /* prepare to output a pixel */
-        " movd          %%ecx,      %%mm2;           "
-        " movq          %%mm6,      %%mm3;           " /* mm3 = 2^14  */
-        " pshufw    $0, %%mm2,      %%mm2;           "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
-        " psllw            $2,      %%mm4;           "
-        " pmulhuw       %%mm4,      %%mm2;           " /* mm2 = (srcpix * xcounter >> 16) */
-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * xfrac) >> 16 */
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
-        " pmulhuw       %%mm7,      %%mm2;           "
-        " packuswb      %%mm0,      %%mm2;           "
-        " movd          %%mm2,       (%1);           "
-        " add              %5,      %%ecx;           "
-        " add              $4,         %1;           "
-        " subl        $0x4000,      %%ecx;           "
-        "4:                                          " /* tail of inner X-loop */
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix), "+r"(dstpix)  /* outputs */
-        : "m"(One64),   "m"(height), "m"(srcwidth),
-          "m"(xspace),  "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64)     /* inputs */
-        : "%ecx","%edx"               /* clobbered */
-        );
-}
-
-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
- */
-void
-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    Uint16 *templine;
-    int srcdiff = srcpitch - (width * 4);
-    int dstdiff = dstpitch - (width * 4);
-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
-    int yrecip = 0x40000000 / yspace;
-    long long One64 = 0x4000400040004000ULL;
-
-    /* allocate and clear a memory area for storing the accumulator line */
-    templine = (Uint16 *) malloc(dstpitch * 2);
-    if (templine == 0) return;
-    memset(templine, 0, dstpitch * 2);
-    long long srcdiff64 = srcdiff;
-    long long dstdiff64 = dstdiff;
-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
-        " punpcklwd     %%mm7,      %%mm7;           "
-        " punpckldq     %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " mov              %2,      %%rax;           " /* rax == accumulate */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          "
-        " movd           (%0),      %%mm1;           "
-        " add              $4,         %0;           "
-        " movq        (%%rax),      %%mm2;           "
-        " punpcklbw     %%mm0,      %%mm1;           "
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm2,    (%%rax);           "
-        " add              $8,      %%rax;           "
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              6f;                       "
-        "3:                                          " /* prepare to output a line */
-        " movd          %%ecx,      %%mm1;           "
-        " movl             %4,      %%edx;           " /* edx = width */
-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
-        " punpcklwd     %%mm1,      %%mm1;           "
-        " punpckldq     %%mm1,      %%mm1;           "
-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
-        "4:                                          "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " movq        (%%rax),      %%mm5;           " /* mm5 = accumulate */
-        " movq          %%mm6,      %%mm3;           "
-        " psllw            $2,      %%mm4;           "
-        " movq          %%mm4,      %%mm0;           " /* mm3 = (srcpix * yfrac) >> 16) */
-        " psraw           $15,      %%mm0;           "
-        " pand          %%mm3,      %%mm0;           "
-        " movq          %%mm3,      %%mm2;           "
-        " psraw           $15,      %%mm2;           "
-        " pand          %%mm4,      %%mm2;           "
-        " pmulhw        %%mm4,      %%mm3;           "
-        " paddw         %%mm0,      %%mm3;           "
-        " paddw         %%mm2,      %%mm3;           "
-        " movq          %%mm1,      %%mm0;           " /* mm4 = (srcpix * ycounter >> 16) */
-        " psraw           $15,      %%mm0;           "
-        " pand          %%mm4,      %%mm0;           "
-        " movq          %%mm4,      %%mm2;           "
-        " psraw           $15,      %%mm2;           "
-        " pand          %%mm1,      %%mm2;           "
-        " pmulhw        %%mm1,      %%mm4;           "
-        " paddw         %%mm0,      %%mm4;           "
-        " paddw         %%mm2,      %%mm4;           "
-        " movq          %%mm3,    (%%rax);           "
-        " paddw         %%mm5,      %%mm4;           "
-        " add              $8,      %%rax;           "
-        " movq          %%mm7,      %%mm0;           "
-        " psraw           $15,      %%mm0;           "
-        " pand          %%mm4,      %%mm0;           "
-        " movq          %%mm4,      %%mm2;           "
-        " psraw           $15,      %%mm2;           "
-        " pand          %%mm7,      %%mm2;           "
-        " pmulhw        %%mm7,      %%mm4;           "
-        " paddw         %%mm0,      %%mm4;           "
-        " paddw         %%mm2,      %%mm4;           "
-        " pxor          %%mm0,      %%mm0;           "
-        " packuswb      %%mm0,      %%mm4;           "
-        " movd          %%mm4,       (%1);           "
-        " add              $4,         %1;           "
-        " decl          %%edx;                       "
-        " jne              4b;                       "
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " addl             %5,      %%ecx;           "
-        " subl        $0x4000,      %%ecx;           "
-        "6:                                          " /* tail of outer Y-loop */
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix), "+r"(dstpix)    /* outputs */
-        : "m"(templine),"m"(srcheight), "m"(width),     "m"(yspace),  
-          "m"(yrecip),  "m"(srcdiff64), "m"(dstdiff64), "m"(One64)  /* input */
-        : "%ecx","%edx","%rax"          /* clobbered */
-        );
-
-    /* free the temporary memory */
-    free(templine);
-}
-
-void
-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    Uint16 *templine;
-    int srcdiff = srcpitch - (width * 4);
-    int dstdiff = dstpitch - (width * 4);
-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
-    int yrecip = 0x40000000 / yspace;
-    long long One64 = 0x4000400040004000ULL;
-
-    /* allocate and clear a memory area for storing the accumulator line */
-    templine = (Uint16 *) malloc(dstpitch * 2);
-    if (templine == 0) return;
-    memset(templine, 0, dstpitch * 2);
-    long long srcdiff64 = srcdiff;
-    long long dstdiff64 = dstdiff;
-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
-        " pxor          %%mm0,      %%mm0;           "
-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
-        " pshufw    $0, %%mm7,      %%mm7;           "
-        "1:                                          " /* outer Y-loop */
-        " mov              %2,      %%rax;           " /* rax == accumulate */
-        " cmpl        $0x4000,      %%ecx;           "
-        " jbe              3f;                       "
-        " movl             %4,      %%edx;           " /* edx == width */
-        "2:                                          "
-        " movd           (%0),      %%mm1;           "
-        " add              $4,         %0;           "
-        " movq        (%%rax),      %%mm2;           "
-        " punpcklbw     %%mm0,      %%mm1;           "
-        " paddw         %%mm1,      %%mm2;           "
-        " movq          %%mm2,    (%%rax);           "
-        " add              $8,      %%rax;           "
-        " decl          %%edx;                       "
-        " jne              2b;                       "
-        " subl        $0x4000,      %%ecx;           "
-        " jmp              6f;                       "
-        "3:                                          " /* prepare to output a line */
-        " movd          %%ecx,      %%mm1;           "
-        " movl             %4,      %%edx;           " /* edx = width */
-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
-        " pshufw    $0, %%mm1,      %%mm1;           "
-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
-        "4:                                          "
-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
-        " add              $4,         %0;           "
-        " punpcklbw     %%mm0,      %%mm4;           "
-        " movq        (%%rax),      %%mm5;           " /* mm5 = accumulate */
-        " movq          %%mm6,      %%mm3;           "
-        " psllw            $2,      %%mm4;           "
-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * yfrac) >> 16 */
-        " pmulhuw       %%mm1,      %%mm4;           " /* mm4 = (srcpix * ycounter >> 16) */
-        " movq          %%mm3,    (%%rax);           "
-        " paddw         %%mm5,      %%mm4;           "
-        " add              $8,      %%rax;           "
-        " pmulhuw       %%mm7,      %%mm4;           "
-        " packuswb      %%mm0,      %%mm4;           "
-        " movd          %%mm4,       (%1);           "
-        " add              $4,         %1;           "
-        " decl          %%edx;                       "
-        " jne              4b;                       "
-        " add              %8,         %1;           " /* dstpix += dstdiff */
-        " addl             %5,      %%ecx;           "
-        " subl        $0x4000,      %%ecx;           "
-        "6:                                          " /* tail of outer Y-loop */
-        " add              %7,         %0;           " /* srcpix += srcdiff */
-        " decl             %3;                       "
-        " jne              1b;                       "
-        " emms;                                      "
-        : "+r"(srcpix), "+r"(dstpix)    /* outputs */
-        : "m"(templine),"m"(srcheight), "m"(width),     "m"(yspace),  
-          "m"(yrecip),  "m"(srcdiff64), "m"(dstdiff64), "m"(One64)  /* input */
-        : "%ecx","%edx","%rax"          /* clobbered */
-        );
-
-    /* free the temporary memory */
-    free(templine);
-}
-
-/* These functions implement a bilinear filter in the X-dimension.
- */
-void
-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int *xidx0, *xmult0, *xmult1;
-    int x, y;
-    int factorwidth = 8;
-
-    /* Allocate memory for factors */
-    xidx0 = malloc(dstwidth * 4);
-    if (xidx0 == 0) return;
-    xmult0 = (int *) malloc(dstwidth * factorwidth);
-    xmult1 = (int *) malloc(dstwidth * factorwidth);
-    if (xmult0 == 0 || xmult1 == 0)
-    {
-        free(xidx0);
-        if (xmult0) free(xmult0);
-        if (xmult1) free(xmult1);
-    }
-
-    /* Create multiplier factors and starting indices and put them in arrays */
-    for (x = 0; x < dstwidth; x++)
-    {
-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
-        int xm0 = 0x100 - xm1;
-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
-        xmult1[x*2]   = xm1 | (xm1 << 16);
-        xmult1[x*2+1] = xm1 | (xm1 << 16);
-        xmult0[x*2]   = xm0 | (xm0 << 16);
-        xmult0[x*2+1] = xm0 | (xm0 << 16);
-    }
-
-    /* Do the scaling in raster order so we don't trash the cache */
-    for (y = 0; y < height; y++)
-    {
-        Uint8 *srcrow0 = srcpix + y * srcpitch;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        int *xm0 = xmult0;
-		int *xm1 = xmult1;
-        int *x0 = xidx0;
-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
-             " movl             %5,      %%ecx;           "
-             " pxor          %%mm0,      %%mm0;           "
-             "1:                                          "
-             " movsxl         (%3),      %%rax;           " /* get xidx0[x] */
-             " add              $4,         %3;           "
-             " movq           (%0),      %%mm1;           " /* load mult0 */
-             " add              $8,         %0;           "
-             " movq           (%1),      %%mm2;           " /* load mult1 */
-             " add              $8,         %1;           "
-             " movd   (%4,%%rax,4),      %%mm4;           "
-             " movd  4(%4,%%rax,4),      %%mm5;           "
-             " punpcklbw     %%mm0,      %%mm4;           "
-             " punpcklbw     %%mm0,      %%mm5;           "
-             " pmullw        %%mm1,      %%mm4;           "
-             " pmullw        %%mm2,      %%mm5;           "
-             " paddw         %%mm4,      %%mm5;           "
-             " psrlw            $8,      %%mm5;           "
-             " packuswb      %%mm0,      %%mm5;           "
-             " movd          %%mm5,       (%2);           "
-             " add              $4,         %2;           "
-             " decl          %%ecx;                       "
-             " jne              1b;                       "
-             " emms;                                      "
-             : "+r"(xm0),   "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
-             : "r"(srcrow0),"m"(dstwidth)  /* input */
-             : "%ecx","%rax"                /* clobbered */
-             );
-    }
-
-    /* free memory */
-    free(xidx0);
-    free(xmult0);
-    free(xmult1);
-}
-
-void
-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
-{
-    int *xidx0, *xmult0, *xmult1;
-    int x, y;
-    int factorwidth = 8;
-
-    /* Allocate memory for factors */
-    xidx0 = malloc(dstwidth * 4);
-    if (xidx0 == 0) return;
-    xmult0 = (int *) malloc(dstwidth * factorwidth);
-    xmult1 = (int *) malloc(dstwidth * factorwidth);
-    if (xmult0 == 0 || xmult1 == 0)
-    {
-        free(xidx0);
-        if (xmult0) free(xmult0);
-        if (xmult1) free(xmult1);
-    }
-
-    /* Create multiplier factors and starting indices and put them in arrays */
-    for (x = 0; x < dstwidth; x++)
-    {
-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
-        int xm0 = 0x100 - xm1;
-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
-        xmult1[x*2]   = xm1 | (xm1 << 16);
-        xmult1[x*2+1] = xm1 | (xm1 << 16);
-        xmult0[x*2]   = xm0 | (xm0 << 16);
-        xmult0[x*2+1] = xm0 | (xm0 << 16);
-    }
-
-    /* Do the scaling in raster order so we don't trash the cache */
-    for (y = 0; y < height; y++)
-    {
-        Uint8 *srcrow0 = srcpix + y * srcpitch;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        int *xm0 = xmult0;
-		int *xm1 = xmult1;
-        int *x0 = xidx0;
-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
-             " movl             %5,      %%ecx;           "
-             " pxor          %%mm0,      %%mm0;           "
-             "1:                                          "
-             " movsxl         (%3),      %%rax;           " /* get xidx0[x] */
-             " add              $4,         %3;           "
-             " movq           (%0),      %%mm1;           " /* load mult0 */
-             " add              $8,         %0;           "
-             " movq           (%1),      %%mm2;           " /* load mult1 */
-             " add              $8,         %1;           "
-             " movd   (%4,%%rax,4),      %%mm4;           "
-             " movd  4(%4,%%rax,4),      %%mm5;           "
-             " punpcklbw     %%mm0,      %%mm4;           "
-             " punpcklbw     %%mm0,      %%mm5;           "
-             " pmullw        %%mm1,      %%mm4;           "
-             " pmullw        %%mm2,      %%mm5;           "
-             " paddw         %%mm4,      %%mm5;           "
-             " psrlw            $8,      %%mm5;           "
-             " packuswb      %%mm0,      %%mm5;           "
-             " movd          %%mm5,       (%2);           "
-             " add              $4,         %2;           "
-             " decl          %%ecx;                       "
-             " jne              1b;                       "
-             " emms;                                      "
-             : "+r"(xm0),   "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
-             : "r"(srcrow0),"m"(dstwidth)  /* input */
-             : "%ecx","%rax"                /* clobbered */
-             );
-    }
-
-    /* free memory */
-    free(xidx0);
-    free(xmult0);
-    free(xmult1);
-}
-
-/* These functions implement a bilinear filter in the Y-dimension
- */
-void
-filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    int y;
-
-    for (y = 0; y < dstheight; y++)
-    {
-        int yidx0 = y * (srcheight - 1) / dstheight;
-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
-        Uint8 *srcrow1 = srcrow0 + srcpitch;
-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
-        int ymult0 = 0x0100 - ymult1;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
-             " movl          %5,      %%ecx;                      "
-             " movd          %3,      %%mm1;                      "
-             " movd          %4,      %%mm2;                      "
-             " pxor       %%mm0,      %%mm0;                      "
-             " punpcklwd  %%mm1,      %%mm1;                      "
-             " punpckldq  %%mm1,      %%mm1;                      "
-             " punpcklwd  %%mm2,      %%mm2;                      "
-             " punpckldq  %%mm2,      %%mm2;                      "
-             "1:                                                  "
-             " movd        (%0),      %%mm4;                      "
-             " add           $4,         %0;                      "
-             " movd        (%1),      %%mm5;                      "
-             " add           $4,         %1;                      "
-             " punpcklbw  %%mm0,      %%mm4;                      "
-             " punpcklbw  %%mm0,      %%mm5;                      "
-             " pmullw     %%mm1,      %%mm4;                      "
-             " pmullw     %%mm2,      %%mm5;                      "
-             " paddw      %%mm4,      %%mm5;                      "
-             " psrlw         $8,      %%mm5;                      "
-             " packuswb   %%mm0,      %%mm5;                      "
-             " movd       %%mm5,       (%2);                      "
-             " add           $4,         %2;                      "
-             " decl       %%ecx;                                  "
-             " jne           1b;                                  "
-             " emms;                                              "
-             : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow)   /* outputs */
-             : "m"(ymult0),   "m"(ymult1),   "m"(width)    /* input */
-             : "%ecx"         /* clobbered */
-             );
-    }
-}
-
-void
-filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
-{
-    int y;
-
-    for (y = 0; y < dstheight; y++)
-    {
-        int yidx0 = y * (srcheight - 1) / dstheight;
-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
-        Uint8 *srcrow1 = srcrow0 + srcpitch;
-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
-        int ymult0 = 0x0100 - ymult1;
-        Uint8 *dstrow = dstpix + y * dstpitch;
-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
-             " movl          %5,      %%ecx;                      "
-             " movd          %3,      %%mm1;                      "
-             " movd          %4,      %%mm2;                      "
-             " pxor       %%mm0,      %%mm0;                      "
-             " pshufw      $0, %%mm1, %%mm1;                      "
-             " pshufw      $0, %%mm2, %%mm2;                      "
-             "1:                                                  "
-             " movd        (%0),      %%mm4;                      "
-             " add           $4,         %0;                      "
-             " movd        (%1),      %%mm5;                      "
-             " add           $4,         %1;                      "
-             " punpcklbw  %%mm0,      %%mm4;                      "
-             " punpcklbw  %%mm0,      %%mm5;                      "
-             " pmullw     %%mm1,      %%mm4;                      "
-             " pmullw     %%mm2,      %%mm5;                      "
-             " paddw      %%mm4,      %%mm5;                      "
-             " psrlw         $8,      %%mm5;                      "
-             " packuswb   %%mm0,      %%mm5;                      "
-             " movd       %%mm5,       (%2);                      "
-             " add           $4,         %2;                      "
-             " decl       %%ecx;                                  "
-             " jne           1b;                                  "
-             " emms;                                              "
-             : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow)   /* outputs */
-             : "m"(ymult0),   "m"(ymult1),   "m"(width)    /* input */
-             : "%ecx"         /* clobbered */
-             );
-    }
-}
-
diff --git a/src/transform.c b/src/transform.c
index c997deb..ee0d03d 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -29,7 +29,7 @@
 #include "pygamedocs.h"
 #include <math.h>
 #include <string.h>
-#include "scale.h"
+//#include "scale.h"
 
 
 typedef void (* SMOOTHSCALE_FILTER_P)(Uint8 *, Uint8 *, int, int, int, int, int);
-- 
2.2.2