From 6fb03418c8a378f3baf84df9b2e600c2f60fbc87 Mon Sep 17 00:00:00 2001
From: Guest One <popov_e_n@mail.ru>
Date: Sat, 5 Jul 2014 18:06:47 +0700
Subject: [PATCH] pygame recipe. Only x86_gcc2. Patchset in wrong format as
 haikuporter -e does not work for me.

---
 dev-python/pygame/patches/pygame-1.9.1.patch  |  204 ---
 .../pygame/patches/pygame-1.9.1.patchset      | 1496 +++++++++++++++++
 dev-python/pygame/pygame-1.9.1.recipe         |   95 +-
 3 files changed, 1570 insertions(+), 225 deletions(-)
 delete mode 100644 dev-python/pygame/patches/pygame-1.9.1.patch
 create mode 100644 dev-python/pygame/patches/pygame-1.9.1.patchset

diff --git a/dev-python/pygame/patches/pygame-1.9.1.patch b/dev-python/pygame/patches/pygame-1.9.1.patch
deleted file mode 100644
index 2dfb9f636..000000000
--- a/dev-python/pygame/patches/pygame-1.9.1.patch
+++ /dev/null
@@ -1,204 +0,0 @@
-diff -urN pygame-1.9.1release/config.py pygame-1.9.1release-haiku/config.py
---- pygame-1.9.1release/config.py	2009-07-09 06:13:20.025952256 +0000
-+++ pygame-1.9.1release-haiku/config.py	2010-09-17 09:54:05.000000000 +0000
-@@ -119,6 +119,9 @@
-     elif sys.platform == 'win32':
-         print_('Using WINDOWS mingw/msys configuration...\n')
-         import config_msys as CFG
-+    elif sys.platform == 'haiku1':
-+        print_('Using Haiku configuration...\n')
-+        import config_haiku as CFG
-     elif sys.platform == 'darwin':
-         print_('Using Darwin configuration...\n')
-         import config_darwin as CFG
-diff -urN pygame-1.9.1release/config_haiku.py pygame-1.9.1release-haiku/config_haiku.py
---- pygame-1.9.1release/config_haiku.py	1970-01-01 00:00:00.000000000 +0000
-+++ pygame-1.9.1release-haiku/config_haiku.py	2010-09-17 10:02:14.000000000 +0000
-@@ -0,0 +1,187 @@
-+"""Config on Haiku"""
-+
-+import os, sys
-+from glob import glob
-+from distutils.sysconfig import get_python_inc
-+
-+# Python 2.x/3.x compatibility
-+try:
-+    raw_input
-+except NameError:
-+    raw_input = input
-+
-+configcommand = os.environ.get('SDL_CONFIG', 'sdl-config',)
-+configcommand = configcommand + ' --version --cflags --libs'
-+localbase = os.environ.get('LOCALBASE', '')
-+
-+#these get prefixes with '/usr' and '/usr/local' or the $LOCALBASE
-+origincdirs = ['/include', '/include/SDL', '/include/SDL',
-+               '/include/smpeg' ]
-+origlibdirs = ['/lib','/lib64','/X11R6/lib']
-+
-+def confirm(message):
-+    "ask a yes/no question, return result"
-+    reply = raw_input('\n' + message + ' [Y/n]:')
-+    if reply and (reply[0].lower()) == 'n':
-+        return 0
-+    return 1
-+
-+class DependencyProg:
-+    def __init__(self, name, envname, exename, minver, defaultlibs):
-+        self.name = name
-+        command = os.environ.get(envname, exename)
-+        self.lib_dir = ''
-+        self.inc_dir = ''
-+        self.libs = []
-+        self.cflags = ''
-+        try:
-+            config = os.popen(command + ' --version --cflags --libs').readlines()
-+            flags = ' '.join(config[1:]).split()
-+
-+            # remove this GNU_SOURCE if there... since python has it already,
-+            #   it causes a warning.
-+            if '-D_GNU_SOURCE=1' in flags:
-+                flags.remove('-D_GNU_SOURCE=1')
-+            self.ver = config[0].strip()
-+            if minver and self.ver < minver:
-+                err= 'WARNING: requires %s version %s (%s found)' % (self.name, self.ver, minver)
-+                raise ValueError(err)
-+            self.found = 1
-+            self.cflags = ''
-+            for f in flags:
-+                if f[:2] in ('-l', '-D', '-I', '-L'):
-+                    self.cflags += f + ' '
-+                elif f[:3] == '-Wl':
-+                    self.cflags += '-Xlinker ' + f + ' '
-+            if self.name == 'SDL':
-+                inc = '-I' + '/usr/X11R6/include'
-+                self.cflags = inc + ' ' + self.cflags
-+        except:
-+            print ('WARNING: "%s" failed!' % command)
-+            self.found = 0
-+            self.ver = '0'
-+            self.libs = defaultlibs
-+
-+    def configure(self, incdirs, libdir):
-+        if self.found:
-+            print (self.name + '        '[len(self.name):] + ': found ' + self.ver)
-+            self.found = 1
-+        else:
-+            print (self.name + '        '[len(self.name):] + ': not found')
-+
-+class Dependency:
-+    def __init__(self, name, checkhead, checklib, libs):
-+        self.name = name
-+        self.inc_dir = None
-+        self.lib_dir = None
-+        self.libs = libs
-+        self.found = 0
-+        self.checklib = checklib
-+        self.checkhead = checkhead
-+        self.cflags = ''
-+    
-+    def configure(self, incdirs, libdirs):
-+        incname = self.checkhead
-+        libnames = self.checklib, self.name.lower()
-+        
-+        if incname:
-+            for dir in incdirs:
-+                path = os.path.join(dir, incname)
-+                if os.path.isfile(path):
-+                    self.inc_dir = dir
-+
-+        for dir in libdirs:
-+            for name in libnames:
-+                path = os.path.join(dir, name)
-+                if filter(os.path.isfile, glob(path+'*')):
-+                    self.lib_dir = dir
-+
-+        if (incname and self.lib_dir and self.inc_dir) or (not incname and self.lib_dir):
-+            print (self.name + '        '[len(self.name):] + ': found')
-+            self.found = 1
-+        else:
-+            print (self.name + '        '[len(self.name):] + ': not found')
-+
-+class DependencyPython:
-+    def __init__(self, name, module, header):
-+        self.name = name
-+        self.lib_dir = ''
-+        self.inc_dir = ''
-+        self.libs = []
-+        self.cflags = ''
-+        self.found = 0
-+        self.ver = '0'
-+        self.module = module
-+        self.header = header
-+ 
-+    def configure(self, incdirs, libdirs):
-+        self.found = 1
-+        if self.module:
-+            try:
-+                self.ver = __import__(self.module).__version__
-+            except ImportError:
-+                self.found = 0
-+        if self.found and self.header:
-+            fullpath = os.path.join(get_python_inc(0), self.header)
-+            if not os.path.isfile(fullpath):
-+                self.found = 0
-+            else:
-+                self.inc_dir = os.path.split(fullpath)[0]
-+        if self.found:
-+            print (self.name + '        '[len(self.name):] + ': found', self.ver)
-+        else:
-+            print (self.name + '        '[len(self.name):] + ': not found')
-+
-+sdl_lib_name = 'SDL'
-+
-+def main():
-+    print ('\nHunting dependencies...')
-+    DEPS = [
-+        DependencyProg('SDL', 'SDL_CONFIG', 'sdl-config', '1.2', ['sdl']),
-+        Dependency('FONT', 'SDL_ttf.h', 'libSDL_ttf.so', ['SDL_ttf']),
-+        Dependency('IMAGE', 'SDL_image.h', 'libSDL_image.so', ['SDL_image']),
-+        Dependency('MIXER', 'SDL_mixer.h', 'libSDL_mixer.so', ['SDL_mixer']),
-+        DependencyProg('SMPEG', 'SMPEG_CONFIG', 'smpeg-config', '0.4.3', ['smpeg']),
-+        Dependency('PNG', 'png.h', 'libpng', ['png']),
-+        Dependency('JPEG', 'jpeglib.h', 'libjpeg', ['jpeg']),
-+        Dependency('SCRAP', '', 'libX11', ['X11']),
-+        Dependency('PORTMIDI', 'portmidi.h', 'libportmidi.so', ['portmidi']),
-+        Dependency('PORTTIME', 'porttime.h', 'libporttime.so', ['porttime']),
-+        #Dependency('GFX', 'SDL_gfxPrimitives.h', 'libSDL_gfx.so', ['SDL_gfx']),
-+    ]
-+    if not DEPS[0].found:
-+        print ('Unable to run "sdl-config". Please make sure a development version of SDL is installed.')
-+        raise SystemExit
-+
-+    if localbase:
-+        incdirs = [localbase+d for d in origincdirs]
-+        libdirs = [localbase+d for d in origlibdirs]
-+    else:
-+        incdirs = []
-+        libdirs = []
-+    incdirs += ["/boot/common"+d for d in origincdirs]
-+    libdirs += ["/boot/common"+d for d in origlibdirs]
-+
-+    for arg in DEPS[0].cflags.split():
-+        if arg[:2] == '-I':
-+            incdirs.append(arg[2:])
-+        elif arg[:2] == '-L':
-+            libdirs.append(arg[2:])
-+    for d in DEPS:
-+        d.configure(incdirs, libdirs)
-+
-+    for d in DEPS[1:]:
-+        if not d.found:
-+            if not confirm("""
-+Warning, some of the pygame dependencies were not found. Pygame can still
-+compile and install, but games that depend on those missing dependencies
-+will not run. Would you like to continue the configuration?"""):
-+                raise SystemExit
-+            break
-+
-+    return DEPS
-+
-+if __name__ == '__main__':
-+    print ("""This is the configuration subscript for Unix.
-+Please run "config.py" for full configuration.""")
-+
diff --git a/dev-python/pygame/patches/pygame-1.9.1.patchset b/dev-python/pygame/patches/pygame-1.9.1.patchset
new file mode 100644
index 000000000..83e2d9462
--- /dev/null
+++ b/dev-python/pygame/patches/pygame-1.9.1.patchset
@@ -0,0 +1,1496 @@
+diff --git a/Setup.in b/Setup.in
+index 4bb6c1c..9236df4 100644
+--- a/Setup.in
++++ b/Setup.in
+@@ -33,9 +33,9 @@ mixer_music src/music.c $(SDL) $(MIXER) $(DEBUG)
+ _numericsurfarray src/_numericsurfarray.c $(SDL) $(DEBUG)
+ _numericsndarray src/_numericsndarray.c $(SDL) $(MIXER) $(DEBUG)
+ movie src/movie.c $(SDL) $(SMPEG) $(DEBUG)
+-scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
++#scrap src/scrap.c $(SDL) $(SCRAP) $(DEBUG)
+ _camera src/_camera.c src/camera_v4l2.c src/camera_v4l.c $(SDL) $(DEBUG)
+-pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
++#pypm src/pypm.c $(SDL) $(PORTMIDI) $(PORTTIME) $(DEBUG)
+ 
+ GFX = src/SDL_gfx/SDL_gfxPrimitives.c 
+ #GFX = src/SDL_gfx/SDL_gfxBlitFunc.c src/SDL_gfx/SDL_gfxPrimitives.c 
+@@ -64,7 +64,7 @@ joystick src/joystick.c $(SDL) $(DEBUG)
+ draw src/draw.c $(SDL) $(DEBUG)
+ image src/image.c $(SDL) $(DEBUG)
+ overlay src/overlay.c $(SDL) $(DEBUG)
+-transform src/transform.c src/rotozoom.c src/scale2x.c src/scale_mmx.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
++transform src/transform.c src/rotozoom.c src/scale2x.c $(SDL) $(DEBUG) -D_NO_MMX_FOR_X86_64
+ mask src/mask.c src/bitmask.c $(SDL) $(DEBUG)
+ bufferproxy src/bufferproxy.c $(SDL) $(DEBUG)
+ pixelarray src/pixelarray.c $(SDL) $(DEBUG)
+diff --git a/config.py b/config.py
+index f60d64c..6e0d766 100644
+--- a/config.py
++++ b/config.py
+@@ -119,12 +119,16 @@ def main():
+     elif sys.platform == 'win32':
+         print_('Using WINDOWS mingw/msys configuration...\n')
+         import config_msys as CFG
++    elif sys.platform == 'haiku1' or sys.platform == 'haiku1_x86':
++        print_('Using Haiku configuration...\n')
++        import config_haiku as CFG
+     elif sys.platform == 'darwin':
+         print_('Using Darwin configuration...\n')
+         import config_darwin as CFG
+         additional_platform_setup = open("Setup_Darwin.in", "r").readlines()
+     else:
+         print_('Using UNIX configuration...\n')
++        print_(sys.platform)
+         import config_unix as CFG
+     
+     if os.path.isfile('Setup'):
+diff --git a/pygame.egg-info/SOURCES.txt b/pygame.egg-info/SOURCES.txt
+index a7ec677..39c2a55 100644
+--- a/pygame.egg-info/SOURCES.txt
++++ b/pygame.egg-info/SOURCES.txt
+@@ -301,11 +301,7 @@ src/rect.c
+ src/rect.doc
+ src/rotozoom.c
+ src/rwobject.c
+-src/scale.h
+ src/scale2x.c
+-src/scale_mmx.c
+-src/scale_mmx32.c
+-src/scale_mmx64.c
+ src/scrap.c
+ src/scrap.doc
+ src/scrap.h
+@@ -465,4 +461,4 @@ test/util/build_page/results/.htaccess
+ test/util/build_page/results/index.py
+ test/util/build_page/results/results.css
+ test/util/build_page/upload_results/.htaccess
+-test/util/build_page/upload_results/index.py
+\ No newline at end of file
++test/util/build_page/upload_results/index.py
+diff --git a/setup.py b/setup.py
+index 45af61f..bf352dc 100644
+--- a/setup.py
++++ b/setup.py
+@@ -116,7 +116,7 @@ else:
+ #headers to install
+ headers = glob.glob(os.path.join('src', '*.h'))
+ headers.remove(os.path.join('src', 'numeric_arrayobject.h'))
+-headers.remove(os.path.join('src', 'scale.h'))
++#headers.remove(os.path.join('src', 'scale.h'))
+ 
+ #sanity check for any arguments
+ if len(sys.argv) == 1:
+@@ -354,17 +354,6 @@ if sys.platform == 'win32':
+     cmdclass['build_ext'] = WinBuildExt
+ 
+     # Add the precompiled smooth scale MMX functions to transform.
+-    def replace_scale_mmx():
+-        for e in extensions:
+-            if e.name == 'transform':
+-                e.extra_objects.append(
+-                    os.path.join('obj', 'win32', 'scale_mmx.obj'))
+-                for i in range(len(e.sources)):
+-                    if e.sources[i].endswith('scale_mmx.c'):
+-                        del e.sources[i]
+-                        return
+-    replace_scale_mmx()
+-
+ 
+ #clean up the list of extensions
+ for e in extensions[:]:
+diff --git a/src/pgcompat.h b/src/pgcompat.h
+index 6b9eea0..e34d2ba 100644
+--- a/src/pgcompat.h
++++ b/src/pgcompat.h
+@@ -69,9 +69,7 @@
+ #define DECREF_MOD(mod)
+ 
+ /* Type header differs. */
+-#define TYPE_HEAD(x,y)                          \
+-    PyObject_HEAD_INIT(x)                       \
+-    0,
++#define TYPE_HEAD(x,y) PyObject_HEAD_INIT(x) 0,
+ 
+ /* Text interface. Use ascii strings. */
+ #define Text_Type PyString_Type
+diff --git a/src/scale.h b/src/scale.h
+deleted file mode 100644
+index 0bb0eb2..0000000
+--- a/src/scale.h
++++ /dev/null
+@@ -1,61 +0,0 @@
+-/*
+-  pygame - Python Game Library
+-  Copyright (C) 2000-2001  Pete Shinners
+-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
+-
+-  This library is free software; you can redistribute it and/or
+-  modify it under the terms of the GNU Library General Public
+-  License as published by the Free Software Foundation; either
+-  version 2 of the License, or (at your option) any later version.
+-
+-  This library is distributed in the hope that it will be useful,
+-  but WITHOUT ANY WARRANTY; without even the implied warranty of
+-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-  Library General Public License for more details.
+-
+-  You should have received a copy of the GNU Library General Public
+-  License along with this library; if not, write to the Free
+-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-
+-  Pete Shinners
+-  pete@shinners.org
+-*/
+-
+-/* Pentium MMX/SSE smoothscale routines
+- * Available on Win32 or GCC on a Pentium.
+- * Sorry, no Win64 support yet for Visual C builds, but it can be added.
+- */
+-
+-#if !defined(SCALE_HEADER)
+-#define SCALE_HEADER
+-
+-#if (defined(__GNUC__) && ((defined(__x86_64__) && !defined(_NO_MMX_FOR_X86_64)) || defined(__i386__))) || defined(MS_WIN32)
+-#define SCALE_MMX_SUPPORT
+-
+-/* These functions implement an area-averaging shrinking filter in the X-dimension.
+- */
+-void filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
+-
+-void filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
+-
+-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
+- */
+-void filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
+-
+-void filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
+-
+-/* These functions implement a bilinear filter in the X-dimension.
+- */
+-void filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
+-
+-void filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth);
+-
+-/* These functions implement a bilinear filter in the Y-dimension.
+- */
+-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
+-
+-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight);
+-
+-#endif /* #if (defined(__GNUC__) && .....) */
+-
+-#endif /* #if !defined(SCALE_HEADER) */
+diff --git a/src/scale_mmx.c b/src/scale_mmx.c
+deleted file mode 100644
+index 36e7af0..0000000
+--- a/src/scale_mmx.c
++++ /dev/null
+@@ -1,37 +0,0 @@
+-/*
+-  pygame - Python Game Library
+-  Copyright (C) 2000-2001  Pete Shinners
+-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
+-
+-  This library is free software; you can redistribute it and/or
+-  modify it under the terms of the GNU Library General Public
+-  License as published by the Free Software Foundation; either
+-  version 2 of the License, or (at your option) any later version.
+-
+-  This library is distributed in the hope that it will be useful,
+-  but WITHOUT ANY WARRANTY; without even the implied warranty of
+-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-  Library General Public License for more details.
+-
+-  You should have received a copy of the GNU Library General Public
+-  License along with this library; if not, write to the Free
+-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-
+-  Pete Shinners
+-  pete@shinners.org
+-*/
+-
+-/* Pentium MMX/SSE smoothscale routines
+- * These are only compiled with GCC.
+- */
+-#if defined(__GNUC__)
+-/* Choose between the 32 bit and 64 bit versions.
+- * Including source code like this may be frowned upon by some,
+- * but the alternative is ungainly conditionally compiled code.
+- */
+-#   if defined(__x86_64__)
+-#       include "scale_mmx64.c"
+-#   elif defined(__i386__)
+-#       include "scale_mmx32.c"
+-#   endif
+-#endif
+diff --git a/src/scale_mmx32.c b/src/scale_mmx32.c
+deleted file mode 100644
+index 14cd8d2..0000000
+--- a/src/scale_mmx32.c
++++ /dev/null
+@@ -1,620 +0,0 @@
+-/*
+-  pygame - Python Game Library
+-  Copyright (C) 2000-2001  Pete Shinners
+-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
+-
+-  This library is free software; you can redistribute it and/or
+-  modify it under the terms of the GNU Library General Public
+-  License as published by the Free Software Foundation; either
+-  version 2 of the License, or (at your option) any later version.
+-
+-  This library is distributed in the hope that it will be useful,
+-  but WITHOUT ANY WARRANTY; without even the implied warranty of
+-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-  Library General Public License for more details.
+-
+-  You should have received a copy of the GNU Library General Public
+-  License along with this library; if not, write to the Free
+-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-
+-  Pete Shinners
+-  pete@shinners.org
+-*/
+-
+-/* Pentium 32 bit SSE/MMX smoothscale filter routines
+- * These are written for compilation with GCC only.
+- *
+- * This file should not depend on anything but the C standard library.
+- */
+-
+-#if !defined(__GNUC__) || !defined(__i386__) || defined(__x86_64__)
+-#error "Pygame build bug: should not be compiling this file!"
+-#endif
+-
+-#include <stdint.h>
+-typedef uint8_t Uint8;    /* SDL convension */
+-typedef uint16_t Uint16;  /* SDL convension */
+-#include <stdlib.h>
+-#include <memory.h>
+-#include "scale.h"
+-
+-/* These functions implement an area-averaging shrinking filter in the X-dimension.
+- */
+-void
+-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int srcdiff = srcpitch - (srcwidth * 4);
+-    int dstdiff = dstpitch - (dstwidth * 4);
+-
+-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
+-    int xrecip = 0x40000000 / xspace;
+-    long long One64 = 0x4000400040004000ULL;
+-
+-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
+-        " punpcklwd     %%mm7,      %%mm7;           "
+-        " punpckldq     %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
+-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          " /* inner X-loop */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm2;           "
+-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              4f;                       "
+-        "3:                                          " /* prepare to output a pixel */
+-        " movd          %%ecx,      %%mm2;           "
+-        " movq             %2,      %%mm3;           " /* mm3 = 2^14  */
+-        " punpcklwd     %%mm2,      %%mm2;           "
+-        " punpckldq     %%mm2,      %%mm2;           "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
+-        " psllw            $2,      %%mm4;           "
+-        " movq          %%mm4,      %%mm5;           " /* mm2 = (srcpix * xcounter >> 16) */
+-        " psraw           $15,      %%mm5;           "
+-        " pand          %%mm2,      %%mm5;           "
+-        " movq          %%mm2,      %%mm6;           "
+-        " psraw           $15,      %%mm6;           "
+-        " pand          %%mm4,      %%mm6;           "
+-        " pmulhw        %%mm4,      %%mm2;           "
+-        " paddw         %%mm5,      %%mm2;           "
+-        " paddw         %%mm6,      %%mm2;           "
+-        " movq          %%mm4,      %%mm5;           " /* mm3 = (srcpix * xfrac) >> 16) */
+-        " psraw           $15,      %%mm5;           "
+-        " pand          %%mm3,      %%mm5;           "
+-        " movq          %%mm3,      %%mm6;           "
+-        " psraw           $15,      %%mm6;           "
+-        " pand          %%mm4,      %%mm6;           "
+-        " pmulhw        %%mm4,      %%mm3;           "
+-        " paddw         %%mm5,      %%mm3;           "
+-        " paddw         %%mm6,      %%mm3;           "
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
+-        " movq          %%mm7,      %%mm5;           "
+-        " psraw           $15,      %%mm5;           "
+-        " pand          %%mm2,      %%mm5;           "
+-        " movq          %%mm2,      %%mm6;           "
+-        " psraw           $15,      %%mm6;           "
+-        " pand          %%mm7,      %%mm6;           "
+-        " pmulhw        %%mm7,      %%mm2;           "
+-        " paddw         %%mm5,      %%mm2;           "
+-        " paddw         %%mm6,      %%mm2;           "
+-        " packuswb      %%mm0,      %%mm2;           "
+-        " movd          %%mm2,       (%1);           "
+-        " add              %5,      %%ecx;           "
+-        " add              $4,         %1;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "4:                                          " /* tail of inner X-loop */
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix), "+r"(dstpix)                   /* outputs */
+-        : "m"(One64),   "m"(height), "m"(srcwidth),
+-          "m"(xspace),  "m"(xrecip), "m"(srcdiff),  "m"(dstdiff)  /* input */
+-        : "%ecx","%edx"     /* clobbered */
+-        );
+-}
+-
+-void
+-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int srcdiff = srcpitch - (srcwidth * 4);
+-    int dstdiff = dstpitch - (dstwidth * 4);
+-
+-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
+-    int xrecip = 0x40000000 / xspace;
+-    long long One64 = 0x4000400040004000ULL;
+-
+-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
+-        " movq             %2,      %%mm6;           " /* mm6 = 2^14  */
+-        " pshufw    $0, %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
+-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          " /* inner X-loop */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm2;           "
+-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              4f;                       "
+-        "3:                                          " /* prepare to output a pixel */
+-        " movd          %%ecx,      %%mm2;           "
+-        " movq          %%mm6,      %%mm3;           " /* mm3 = 2^14  */
+-        " pshufw    $0, %%mm2,      %%mm2;           "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
+-        " psllw            $2,      %%mm4;           "
+-        " pmulhuw       %%mm4,      %%mm2;           " /* mm2 = (srcpix * xcounter >> 16) */
+-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * xfrac) >> 16 */
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
+-        " pmulhuw       %%mm7,      %%mm2;           "
+-        " packuswb      %%mm0,      %%mm2;           "
+-        " movd          %%mm2,       (%1);           "
+-        " add              %5,      %%ecx;           "
+-        " add              $4,         %1;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "4:                                          " /* tail of inner X-loop */
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix), "+r"(dstpix)                   /* outputs */
+-        : "m"(One64),   "m"(height), "m"(srcwidth),
+-          "m"(xspace),  "m"(xrecip), "m"(srcdiff),  "m"(dstdiff)  /* input */
+-        : "%ecx","%edx"     /* clobbered */
+-        );
+-}
+-
+-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
+- */
+-void
+-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    Uint16 *templine;
+-    int srcdiff = srcpitch - (width * 4);
+-    int dstdiff = dstpitch - (width * 4);
+-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
+-    int yrecip = 0x40000000 / yspace;
+-    long long One64 = 0x4000400040004000ULL;
+-
+-    /* allocate and clear a memory area for storing the accumulator line */
+-    templine = (Uint16 *) malloc(dstpitch * 2);
+-    if (templine == 0) return;
+-    memset(templine, 0, dstpitch * 2);
+-
+-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
+-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
+-        " punpcklwd     %%mm7,      %%mm7;           "
+-        " punpckldq     %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " movl             %2,      %%eax;           " /* rax == accumulate */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          "
+-        " movd           (%0),      %%mm1;           "
+-        " add              $4,         %0;           "
+-        " movq        (%%eax),      %%mm2;           "
+-        " punpcklbw     %%mm0,      %%mm1;           "
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm2,    (%%eax);           "
+-        " add              $8,      %%eax;           "
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              6f;                       "
+-        "3:                                          " /* prepare to output a line */
+-        " movd          %%ecx,      %%mm1;           "
+-        " movl             %4,      %%edx;           " /* edx = width */
+-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
+-        " punpcklwd     %%mm1,      %%mm1;           "
+-        " punpckldq     %%mm1,      %%mm1;           "
+-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
+-        "4:                                          "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " movq        (%%eax),      %%mm5;           " /* mm5 = accumulate */
+-        " movq          %%mm6,      %%mm3;           "
+-        " psllw            $2,      %%mm4;           "
+-        " movq          %%mm4,      %%mm0;           " /* mm3 = (srcpix * yfrac) >> 16) */
+-        " psraw           $15,      %%mm0;           "
+-        " pand          %%mm3,      %%mm0;           "
+-        " movq          %%mm3,      %%mm2;           "
+-        " psraw           $15,      %%mm2;           "
+-        " pand          %%mm4,      %%mm2;           "
+-        " pmulhw        %%mm4,      %%mm3;           "
+-        " paddw         %%mm0,      %%mm3;           "
+-        " paddw         %%mm2,      %%mm3;           "
+-        " movq          %%mm1,      %%mm0;           " /* mm4 = (srcpix * ycounter >> 16) */
+-        " psraw           $15,      %%mm0;           "
+-        " pand          %%mm4,      %%mm0;           "
+-        " movq          %%mm4,      %%mm2;           "
+-        " psraw           $15,      %%mm2;           "
+-        " pand          %%mm1,      %%mm2;           "
+-        " pmulhw        %%mm1,      %%mm4;           "
+-        " paddw         %%mm0,      %%mm4;           "
+-        " paddw         %%mm2,      %%mm4;           "
+-        " movq          %%mm3,    (%%eax);           "
+-        " paddw         %%mm5,      %%mm4;           "
+-        " add              $8,      %%eax;           "
+-        " movq          %%mm7,      %%mm0;           "
+-        " psraw           $15,      %%mm0;           "
+-        " pand          %%mm4,      %%mm0;           "
+-        " movq          %%mm4,      %%mm2;           "
+-        " psraw           $15,      %%mm2;           "
+-        " pand          %%mm7,      %%mm2;           "
+-        " pmulhw        %%mm7,      %%mm4;           "
+-        " paddw         %%mm0,      %%mm4;           "
+-        " paddw         %%mm2,      %%mm4;           "
+-        " pxor          %%mm0,      %%mm0;           "
+-        " packuswb      %%mm0,      %%mm4;           "
+-        " movd          %%mm4,       (%1);           "
+-        " add              $4,         %1;           "
+-        " decl          %%edx;                       "
+-        " jne              4b;                       "
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " addl             %5,      %%ecx;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "6:                                          " /* tail of outer Y-loop */
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix),  "+r"(dstpix)     /* outputs */
+-        : "m"(templine), "m"(srcheight), "m"(width),  "m"(yspace),
+-          "m"(yrecip),   "m"(srcdiff),   "m"(dstdiff),"m"(One64)  /* input */
+-        : "%ecx","%edx","%eax"           /* clobbered */
+-        );
+-
+-    /* free the temporary memory */
+-    free(templine);
+-}
+-
+-void
+-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    Uint16 *templine;
+-    int srcdiff = srcpitch - (width * 4);
+-    int dstdiff = dstpitch - (width * 4);
+-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
+-    int yrecip = 0x40000000 / yspace;
+-    long long One64 = 0x4000400040004000ULL;
+-
+-    /* allocate and clear a memory area for storing the accumulator line */
+-    templine = (Uint16 *) malloc(dstpitch * 2);
+-    if (templine == 0) return;
+-    memset(templine, 0, dstpitch * 2);
+-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
+-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
+-        " pshufw    $0, %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " movl             %2,      %%eax;           " /* rax == accumulate */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          "
+-        " movd           (%0),      %%mm1;           "
+-        " add              $4,         %0;           "
+-        " movq        (%%eax),      %%mm2;           "
+-        " punpcklbw     %%mm0,      %%mm1;           "
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm2,    (%%eax);           "
+-        " add              $8,      %%eax;           "
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              6f;                       "
+-        "3:                                          " /* prepare to output a line */
+-        " movd          %%ecx,      %%mm1;           "
+-        " movl             %4,      %%edx;           " /* edx = width */
+-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
+-        " pshufw    $0, %%mm1,      %%mm1;           "
+-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
+-        "4:                                          "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " movq        (%%eax),      %%mm5;           " /* mm5 = accumulate */
+-        " movq          %%mm6,      %%mm3;           "
+-        " psllw            $2,      %%mm4;           "
+-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * yfrac) >> 16 */
+-        " pmulhuw       %%mm1,      %%mm4;           " /* mm4 = (srcpix * ycounter >> 16) */
+-        " movq          %%mm3,    (%%eax);           "
+-        " paddw         %%mm5,      %%mm4;           "
+-        " add              $8,      %%eax;           "
+-        " pmulhuw       %%mm7,      %%mm4;           "
+-        " packuswb      %%mm0,      %%mm4;           "
+-        " movd          %%mm4,       (%1);           "
+-        " add              $4,         %1;           "
+-        " decl          %%edx;                       "
+-        " jne              4b;                       "
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " addl             %5,      %%ecx;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "6:                                          " /* tail of outer Y-loop */
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix),  "+r"(dstpix)     /* outputs */
+-        : "m"(templine), "m"(srcheight), "m"(width),  "m"(yspace),
+-          "m"(yrecip),   "m"(srcdiff),   "m"(dstdiff),"m"(One64)  /* input */
+-        : "%ecx","%edx","%eax"           /* clobbered */
+-        );
+-
+-    /* free the temporary memory */
+-    free(templine);
+-}
+-
+-/* These functions implement a bilinear filter in the X-dimension.
+- */
+-void
+-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int *xidx0, *xmult0, *xmult1;
+-    int x, y;
+-    int factorwidth = 8;
+-  	long long One64 = 0x0100010001000100ULL;
+-
+-    /* Allocate memory for factors */
+-    xidx0 = malloc(dstwidth * 4);
+-    if (xidx0 == 0) return;
+-    xmult0 = (int *) malloc(dstwidth * factorwidth);
+-    xmult1 = (int *) malloc(dstwidth * factorwidth);
+-    if (xmult0 == 0 || xmult1 == 0)
+-    {
+-        free(xidx0);
+-        if (xmult0) free(xmult0);
+-        if (xmult1) free(xmult1);
+-    }
+-
+-    /* Create multiplier factors and starting indices and put them in arrays */
+-    for (x = 0; x < dstwidth; x++)
+-    {
+-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
+-        int xm0 = 0x100 - xm1;
+-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
+-        xmult1[x*2]   = xm1 | (xm1 << 16);
+-        xmult1[x*2+1] = xm1 | (xm1 << 16);
+-        xmult0[x*2]   = xm0 | (xm0 << 16);
+-        xmult0[x*2+1] = xm0 | (xm0 << 16);
+-    }
+-
+-    /* Do the scaling in raster order so we don't trash the cache */
+-    for (y = 0; y < height; y++)
+-    {
+-        Uint8 *srcrow0 = srcpix + y * srcpitch;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        int *xm0 = xmult0;
+-        int *x0 = xidx0;
+-    	int width = dstwidth;
+-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
+-             " pxor          %%mm0,      %%mm0;           "
+-             " movq             %5,      %%mm7;           "
+-             "1:                                          "
+-             " movl           (%2),      %%eax;           " /* get xidx0[x] */
+-             " add              $4,         %2;           "
+-             " movq          %%mm7,      %%mm2;           "
+-             " movq           (%0),      %%mm1;           " /* load mult0 */
+-             " add              $8,         %0;           "
+-             " psubw         %%mm1,      %%mm2;           " /* load mult1 */
+-             " movd   (%4,%%eax,4),      %%mm4;           "
+-             " movd  4(%4,%%eax,4),      %%mm5;           "
+-             " punpcklbw     %%mm0,      %%mm4;           "
+-             " punpcklbw     %%mm0,      %%mm5;           "
+-             " pmullw        %%mm1,      %%mm4;           "
+-             " pmullw        %%mm2,      %%mm5;           "
+-             " paddw         %%mm4,      %%mm5;           "
+-             " psrlw            $8,      %%mm5;           "
+-             " packuswb      %%mm0,      %%mm5;           "
+-             " movd          %%mm5,       (%1);           "
+-             " add              $4,         %1;           "
+-             " decl             %3;                       "
+-             " jne              1b;                       "
+-             " emms;                                      "
+-             : "+r"(xm0),    "+r"(dstrow), "+r"(x0), "+m"(width)  /* outputs */
+-             : "S"(srcrow0), "m"(One64)    /* input */
+-             : "%eax"            /* clobbered */
+-             );
+-    }
+-
+-    /* free memory */
+-    free(xidx0);
+-    free(xmult0);
+-    free(xmult1);
+-}
+-
+-void
+-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int *xidx0, *xmult0, *xmult1;
+-    int x, y;
+-    int factorwidth = 8;
+-  	long long One64 = 0x0100010001000100ULL;
+-
+-    /* Allocate memory for factors */
+-    xidx0 = malloc(dstwidth * 4);
+-    if (xidx0 == 0) return;
+-    xmult0 = (int *) malloc(dstwidth * factorwidth);
+-    xmult1 = (int *) malloc(dstwidth * factorwidth);
+-    if (xmult0 == 0 || xmult1 == 0)
+-    {
+-        free(xidx0);
+-        if (xmult0) free(xmult0);
+-        if (xmult1) free(xmult1);
+-    }
+-
+-    /* Create multiplier factors and starting indices and put them in arrays */
+-    for (x = 0; x < dstwidth; x++)
+-    {
+-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
+-        int xm0 = 0x100 - xm1;
+-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
+-        xmult1[x*2]   = xm1 | (xm1 << 16);
+-        xmult1[x*2+1] = xm1 | (xm1 << 16);
+-        xmult0[x*2]   = xm0 | (xm0 << 16);
+-        xmult0[x*2+1] = xm0 | (xm0 << 16);
+-    }
+-
+-    /* Do the scaling in raster order so we don't trash the cache */
+-    for (y = 0; y < height; y++)
+-    {
+-        Uint8 *srcrow0 = srcpix + y * srcpitch;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        int *xm0 = xmult0;
+-        int *x0 = xidx0;
+-    	int width = dstwidth;
+-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
+-             " pxor          %%mm0,      %%mm0;           "
+-             " movq             %5,      %%mm7;           "
+-             "1:                                          "
+-             " movl           (%2),      %%eax;           " /* get xidx0[x] */
+-             " add              $4,         %2;           "
+-             " movq          %%mm7,      %%mm2;           "
+-             " movq           (%0),      %%mm1;           " /* load mult0 */
+-             " add              $8,         %0;           "
+-             " psubw         %%mm1,      %%mm2;           " /* load mult1 */
+-             " movd   (%4,%%eax,4),      %%mm4;           "
+-             " movd  4(%4,%%eax,4),      %%mm5;           "
+-             " punpcklbw     %%mm0,      %%mm4;           "
+-             " punpcklbw     %%mm0,      %%mm5;           "
+-             " pmullw        %%mm1,      %%mm4;           "
+-             " pmullw        %%mm2,      %%mm5;           "
+-             " paddw         %%mm4,      %%mm5;           "
+-             " psrlw            $8,      %%mm5;           "
+-             " packuswb      %%mm0,      %%mm5;           "
+-             " movd          %%mm5,       (%1);           "
+-             " add              $4,         %1;           "
+-             " decl             %3;                       "
+-             " jne              1b;                       "
+-             " emms;                                      "
+-             : "+r"(xm0),    "+r"(dstrow), "+r"(x0), "+m"(width)  /* outputs */
+-             : "S"(srcrow0), "m"(One64)    /* input */
+-             : "%eax"            /* clobbered */
+-             );
+-    }
+-
+-    /* free memory */
+-    free(xidx0);
+-    free(xmult0);
+-    free(xmult1);
+-}
+-
+-/* These functions implement a bilinear filter in the Y-dimension.
+- */
+-void filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    int y;
+-
+-    for (y = 0; y < dstheight; y++)
+-    {
+-        int yidx0 = y * (srcheight - 1) / dstheight;
+-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
+-        Uint8 *srcrow1 = srcrow0 + srcpitch;
+-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
+-        int ymult0 = 0x0100 - ymult1;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
+-             " movl          %5,      %%eax;                      "
+-             " movd          %3,      %%mm1;                      "
+-             " movd          %4,      %%mm2;                      "
+-             " pxor       %%mm0,      %%mm0;                      "
+-             " punpcklwd  %%mm1,      %%mm1;                      "
+-             " punpckldq  %%mm1,      %%mm1;                      "
+-             " punpcklwd  %%mm2,      %%mm2;                      "
+-             " punpckldq  %%mm2,      %%mm2;                      "
+-             "1:                                                  "
+-             " movd        (%0),      %%mm4;                      "
+-             " add           $4,         %0;                      "
+-             " movd        (%1),      %%mm5;                      "
+-             " add           $4,         %1;                      "
+-             " punpcklbw  %%mm0,     %%mm4;                       "
+-             " punpcklbw  %%mm0,     %%mm5;                       "
+-             " pmullw     %%mm1,     %%mm4;                       "
+-             " pmullw     %%mm2,     %%mm5;                       "
+-             " paddw      %%mm4,     %%mm5;                       "
+-             " psrlw         $8,     %%mm5;                       "
+-             " packuswb   %%mm0,     %%mm5;                       "
+-             " movd       %%mm5,      (%2);                       "
+-             " add           $4,        %2;                       "
+-             " decl       %%eax;                                  "
+-             " jne           1b;                                  "
+-             " emms;                                              "
+-             : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow)   /* no outputs */
+-             : "m"(ymult0),   "m"(ymult1),  "m"(width)    /* input */
+-             : "%eax"        /* clobbered */
+-             );
+-    }
+-}
+-
+-void filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    int y;
+-
+-    for (y = 0; y < dstheight; y++)
+-    {
+-        int yidx0 = y * (srcheight - 1) / dstheight;
+-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
+-        Uint8 *srcrow1 = srcrow0 + srcpitch;
+-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
+-        int ymult0 = 0x0100 - ymult1;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
+-             " movl          %5,      %%eax;                      "
+-             " movd          %3,      %%mm1;                      "
+-             " movd          %4,      %%mm2;                      "
+-             " pxor       %%mm0,      %%mm0;                      "
+-             " pshufw      $0, %%mm1, %%mm1;                      "
+-             " pshufw      $0, %%mm2, %%mm2;                      "
+-             "1:                                                  "
+-             " movd        (%0),      %%mm4;                      "
+-             " add           $4,         %0;                      "
+-             " movd        (%1),      %%mm5;                      "
+-             " add           $4,         %1;                      "
+-             " punpcklbw  %%mm0,     %%mm4;                       "
+-             " punpcklbw  %%mm0,     %%mm5;                       "
+-             " pmullw     %%mm1,     %%mm4;                       "
+-             " pmullw     %%mm2,     %%mm5;                       "
+-             " paddw      %%mm4,     %%mm5;                       "
+-             " psrlw         $8,     %%mm5;                       "
+-             " packuswb   %%mm0,     %%mm5;                       "
+-             " movd       %%mm5,      (%2);                       "
+-             " add           $4,        %2;                       "
+-             " decl       %%eax;                                  "
+-             " jne           1b;                                  "
+-             " emms;                                              "
+-             : "+r"(srcrow0), "+r"(srcrow1),"+r"(dstrow)   /* no outputs */
+-             : "m"(ymult0),   "m"(ymult1),  "m"(width)    /* input */
+-             : "%eax"        /* clobbered */
+-             );
+-    }
+-}
+diff --git a/src/scale_mmx64.c b/src/scale_mmx64.c
+deleted file mode 100644
+index e897f76..0000000
+--- a/src/scale_mmx64.c
++++ /dev/null
+@@ -1,626 +0,0 @@
+-/*
+-  pygame - Python Game Library
+-  Copyright (C) 2000-2001  Pete Shinners
+-  Copyright (C) 2007  Rene Dudfield, Richard Goedeken 
+-
+-  This library is free software; you can redistribute it and/or
+-  modify it under the terms of the GNU Library General Public
+-  License as published by the Free Software Foundation; either
+-  version 2 of the License, or (at your option) any later version.
+-
+-  This library is distributed in the hope that it will be useful,
+-  but WITHOUT ANY WARRANTY; without even the implied warranty of
+-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-  Library General Public License for more details.
+-
+-  You should have received a copy of the GNU Library General Public
+-  License along with this library; if not, write to the Free
+-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-
+-  Pete Shinners
+-  pete@shinners.org
+-*/
+-
+-/* Pentium 64 bit SSE/MMX smoothscale routines
+- * These are written for compilation with GCC only.
+- *
+- * This file should not depend on anything but the C standard library.
+- */
+-
+-#if !defined(__GNUC__) || !defined(__x86_64__)
+-#error "Pygame build bug: should not be compiling this file!"
+-#endif
+-
+-#include <stdint.h>
+-typedef uint8_t Uint8;    /* SDL convension */
+-typedef uint16_t Uint16;  /* SDL convension */
+-#include <stdlib.h>
+-#include <memory.h>
+-#include "scale.h"
+-
+-/* These functions implement an area-averaging shrinking filter in the X-dimension.
+- */
+-void
+-filter_shrink_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int srcdiff = srcpitch - (srcwidth * 4);
+-    int dstdiff = dstpitch - (dstwidth * 4);
+-
+-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
+-    int xrecip = 0x40000000 / xspace;
+-    long long One64 = 0x4000400040004000ULL;
+-    long long srcdiff64 = srcdiff;
+-    long long dstdiff64 = dstdiff;
+-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
+-        " punpcklwd     %%mm7,      %%mm7;           "
+-        " punpckldq     %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
+-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          " /* inner X-loop */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm2;           "
+-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              4f;                       "
+-        "3:                                          " /* prepare to output a pixel */
+-        " movd          %%ecx,      %%mm2;           "
+-        " movq             %2,      %%mm3;           " /* mm3 = 2^14  */
+-        " punpcklwd     %%mm2,      %%mm2;           "
+-        " punpckldq     %%mm2,      %%mm2;           "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
+-        " psllw            $2,      %%mm4;           "
+-        " movq          %%mm4,      %%mm5;           " /* mm2 = (srcpix * xcounter >> 16) */
+-        " psraw           $15,      %%mm5;           "
+-        " pand          %%mm2,      %%mm5;           "
+-        " movq          %%mm2,      %%mm6;           "
+-        " psraw           $15,      %%mm6;           "
+-        " pand          %%mm4,      %%mm6;           "
+-        " pmulhw        %%mm4,      %%mm2;           "
+-        " paddw         %%mm5,      %%mm2;           "
+-        " paddw         %%mm6,      %%mm2;           "
+-        " movq          %%mm4,      %%mm5;           " /* mm3 = (srcpix * xfrac) >> 16) */
+-        " psraw           $15,      %%mm5;           "
+-        " pand          %%mm3,      %%mm5;           "
+-        " movq          %%mm3,      %%mm6;           "
+-        " psraw           $15,      %%mm6;           "
+-        " pand          %%mm4,      %%mm6;           "
+-        " pmulhw        %%mm4,      %%mm3;           "
+-        " paddw         %%mm5,      %%mm3;           "
+-        " paddw         %%mm6,      %%mm3;           "
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
+-        " movq          %%mm7,      %%mm5;           "
+-        " psraw           $15,      %%mm5;           "
+-        " pand          %%mm2,      %%mm5;           "
+-        " movq          %%mm2,      %%mm6;           "
+-        " psraw           $15,      %%mm6;           "
+-        " pand          %%mm7,      %%mm6;           "
+-        " pmulhw        %%mm7,      %%mm2;           "
+-        " paddw         %%mm5,      %%mm2;           "
+-        " paddw         %%mm6,      %%mm2;           "
+-        " packuswb      %%mm0,      %%mm2;           "
+-        " movd          %%mm2,       (%1);           "
+-        " add              %5,      %%ecx;           "
+-        " add              $4,         %1;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "4:                                          " /* tail of inner X-loop */
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix), "+r"(dstpix)  /* outputs */
+-        : "m"(One64),   "m"(height), "m"(srcwidth),
+-          "m"(xspace),  "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64)     /* inputs */
+-        : "%ecx","%edx"               /* clobbered */
+-        );
+-}
+-
+-void
+-filter_shrink_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int srcdiff = srcpitch - (srcwidth * 4);
+-    int dstdiff = dstpitch - (dstwidth * 4);
+-
+-    int xspace = 0x04000 * srcwidth / dstwidth; /* must be > 1 */
+-    int xrecip = 0x40000000 / xspace;
+-    long long One64 = 0x4000400040004000ULL;
+-    long long srcdiff64 = srcdiff;
+-    long long dstdiff64 = dstdiff;
+-    asm __volatile__(" /* MMX code for X-shrink area average filter */ "
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == xrecipmmx */
+-        " movq             %2,      %%mm6;           " /* mm6 = 2^14  */
+-        " pshufw    $0, %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " movl             %5,      %%ecx;           " /* ecx == xcounter */
+-        " pxor          %%mm1,      %%mm1;           " /* mm1 == accumulator */
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          " /* inner X-loop */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movd           (%0),      %%mm2;           " /* mm2 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm2;           "
+-        " paddw         %%mm2,      %%mm1;           " /* accumulator += srcpix */
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              4f;                       "
+-        "3:                                          " /* prepare to output a pixel */
+-        " movd          %%ecx,      %%mm2;           "
+-        " movq          %%mm6,      %%mm3;           " /* mm3 = 2^14  */
+-        " pshufw    $0, %%mm2,      %%mm2;           "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " psubw         %%mm2,      %%mm3;           " /* mm3 = xfrac */
+-        " psllw            $2,      %%mm4;           "
+-        " pmulhuw       %%mm4,      %%mm2;           " /* mm2 = (srcpix * xcounter >> 16) */
+-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * xfrac) >> 16 */
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm3,      %%mm1;           " /* accumulator = (srcpix * xfrac) >> 16 */
+-        " pmulhuw       %%mm7,      %%mm2;           "
+-        " packuswb      %%mm0,      %%mm2;           "
+-        " movd          %%mm2,       (%1);           "
+-        " add              %5,      %%ecx;           "
+-        " add              $4,         %1;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "4:                                          " /* tail of inner X-loop */
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix), "+r"(dstpix)  /* outputs */
+-        : "m"(One64),   "m"(height), "m"(srcwidth),
+-          "m"(xspace),  "m"(xrecip), "m"(srcdiff64), "m"(dstdiff64)     /* inputs */
+-        : "%ecx","%edx"               /* clobbered */
+-        );
+-}
+-
+-/* These functions implement an area-averaging shrinking filter in the Y-dimension.
+- */
+-void
+-filter_shrink_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    Uint16 *templine;
+-    int srcdiff = srcpitch - (width * 4);
+-    int dstdiff = dstpitch - (width * 4);
+-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
+-    int yrecip = 0x40000000 / yspace;
+-    long long One64 = 0x4000400040004000ULL;
+-
+-    /* allocate and clear a memory area for storing the accumulator line */
+-    templine = (Uint16 *) malloc(dstpitch * 2);
+-    if (templine == 0) return;
+-    memset(templine, 0, dstpitch * 2);
+-    long long srcdiff64 = srcdiff;
+-    long long dstdiff64 = dstdiff;
+-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
+-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
+-        " punpcklwd     %%mm7,      %%mm7;           "
+-        " punpckldq     %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " mov              %2,      %%rax;           " /* rax == accumulate */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          "
+-        " movd           (%0),      %%mm1;           "
+-        " add              $4,         %0;           "
+-        " movq        (%%rax),      %%mm2;           "
+-        " punpcklbw     %%mm0,      %%mm1;           "
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm2,    (%%rax);           "
+-        " add              $8,      %%rax;           "
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              6f;                       "
+-        "3:                                          " /* prepare to output a line */
+-        " movd          %%ecx,      %%mm1;           "
+-        " movl             %4,      %%edx;           " /* edx = width */
+-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
+-        " punpcklwd     %%mm1,      %%mm1;           "
+-        " punpckldq     %%mm1,      %%mm1;           "
+-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
+-        "4:                                          "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " movq        (%%rax),      %%mm5;           " /* mm5 = accumulate */
+-        " movq          %%mm6,      %%mm3;           "
+-        " psllw            $2,      %%mm4;           "
+-        " movq          %%mm4,      %%mm0;           " /* mm3 = (srcpix * yfrac) >> 16) */
+-        " psraw           $15,      %%mm0;           "
+-        " pand          %%mm3,      %%mm0;           "
+-        " movq          %%mm3,      %%mm2;           "
+-        " psraw           $15,      %%mm2;           "
+-        " pand          %%mm4,      %%mm2;           "
+-        " pmulhw        %%mm4,      %%mm3;           "
+-        " paddw         %%mm0,      %%mm3;           "
+-        " paddw         %%mm2,      %%mm3;           "
+-        " movq          %%mm1,      %%mm0;           " /* mm4 = (srcpix * ycounter >> 16) */
+-        " psraw           $15,      %%mm0;           "
+-        " pand          %%mm4,      %%mm0;           "
+-        " movq          %%mm4,      %%mm2;           "
+-        " psraw           $15,      %%mm2;           "
+-        " pand          %%mm1,      %%mm2;           "
+-        " pmulhw        %%mm1,      %%mm4;           "
+-        " paddw         %%mm0,      %%mm4;           "
+-        " paddw         %%mm2,      %%mm4;           "
+-        " movq          %%mm3,    (%%rax);           "
+-        " paddw         %%mm5,      %%mm4;           "
+-        " add              $8,      %%rax;           "
+-        " movq          %%mm7,      %%mm0;           "
+-        " psraw           $15,      %%mm0;           "
+-        " pand          %%mm4,      %%mm0;           "
+-        " movq          %%mm4,      %%mm2;           "
+-        " psraw           $15,      %%mm2;           "
+-        " pand          %%mm7,      %%mm2;           "
+-        " pmulhw        %%mm7,      %%mm4;           "
+-        " paddw         %%mm0,      %%mm4;           "
+-        " paddw         %%mm2,      %%mm4;           "
+-        " pxor          %%mm0,      %%mm0;           "
+-        " packuswb      %%mm0,      %%mm4;           "
+-        " movd          %%mm4,       (%1);           "
+-        " add              $4,         %1;           "
+-        " decl          %%edx;                       "
+-        " jne              4b;                       "
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " addl             %5,      %%ecx;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "6:                                          " /* tail of outer Y-loop */
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix), "+r"(dstpix)    /* outputs */
+-        : "m"(templine),"m"(srcheight), "m"(width),     "m"(yspace),  
+-          "m"(yrecip),  "m"(srcdiff64), "m"(dstdiff64), "m"(One64)  /* input */
+-        : "%ecx","%edx","%rax"          /* clobbered */
+-        );
+-
+-    /* free the temporary memory */
+-    free(templine);
+-}
+-
+-void
+-filter_shrink_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    Uint16 *templine;
+-    int srcdiff = srcpitch - (width * 4);
+-    int dstdiff = dstpitch - (width * 4);
+-    int yspace = 0x4000 * srcheight / dstheight; /* must be > 1 */
+-    int yrecip = 0x40000000 / yspace;
+-    long long One64 = 0x4000400040004000ULL;
+-
+-    /* allocate and clear a memory area for storing the accumulator line */
+-    templine = (Uint16 *) malloc(dstpitch * 2);
+-    if (templine == 0) return;
+-    memset(templine, 0, dstpitch * 2);
+-    long long srcdiff64 = srcdiff;
+-    long long dstdiff64 = dstdiff;
+-    asm __volatile__(" /* MMX code for Y-shrink area average filter */ "
+-        " movl             %5,      %%ecx;           " /* ecx == ycounter */
+-        " pxor          %%mm0,      %%mm0;           "
+-        " movd             %6,      %%mm7;           " /* mm7 == yrecipmmx */
+-        " pshufw    $0, %%mm7,      %%mm7;           "
+-        "1:                                          " /* outer Y-loop */
+-        " mov              %2,      %%rax;           " /* rax == accumulate */
+-        " cmpl        $0x4000,      %%ecx;           "
+-        " jbe              3f;                       "
+-        " movl             %4,      %%edx;           " /* edx == width */
+-        "2:                                          "
+-        " movd           (%0),      %%mm1;           "
+-        " add              $4,         %0;           "
+-        " movq        (%%rax),      %%mm2;           "
+-        " punpcklbw     %%mm0,      %%mm1;           "
+-        " paddw         %%mm1,      %%mm2;           "
+-        " movq          %%mm2,    (%%rax);           "
+-        " add              $8,      %%rax;           "
+-        " decl          %%edx;                       "
+-        " jne              2b;                       "
+-        " subl        $0x4000,      %%ecx;           "
+-        " jmp              6f;                       "
+-        "3:                                          " /* prepare to output a line */
+-        " movd          %%ecx,      %%mm1;           "
+-        " movl             %4,      %%edx;           " /* edx = width */
+-        " movq             %9,      %%mm6;           " /* mm6 = 2^14  */
+-        " pshufw    $0, %%mm1,      %%mm1;           "
+-        " psubw         %%mm1,      %%mm6;           " /* mm6 = yfrac */
+-        "4:                                          "
+-        " movd           (%0),      %%mm4;           " /* mm4 = srcpix */
+-        " add              $4,         %0;           "
+-        " punpcklbw     %%mm0,      %%mm4;           "
+-        " movq        (%%rax),      %%mm5;           " /* mm5 = accumulate */
+-        " movq          %%mm6,      %%mm3;           "
+-        " psllw            $2,      %%mm4;           "
+-        " pmulhuw       %%mm4,      %%mm3;           " /* mm3 = (srcpix * yfrac) >> 16 */
+-        " pmulhuw       %%mm1,      %%mm4;           " /* mm4 = (srcpix * ycounter >> 16) */
+-        " movq          %%mm3,    (%%rax);           "
+-        " paddw         %%mm5,      %%mm4;           "
+-        " add              $8,      %%rax;           "
+-        " pmulhuw       %%mm7,      %%mm4;           "
+-        " packuswb      %%mm0,      %%mm4;           "
+-        " movd          %%mm4,       (%1);           "
+-        " add              $4,         %1;           "
+-        " decl          %%edx;                       "
+-        " jne              4b;                       "
+-        " add              %8,         %1;           " /* dstpix += dstdiff */
+-        " addl             %5,      %%ecx;           "
+-        " subl        $0x4000,      %%ecx;           "
+-        "6:                                          " /* tail of outer Y-loop */
+-        " add              %7,         %0;           " /* srcpix += srcdiff */
+-        " decl             %3;                       "
+-        " jne              1b;                       "
+-        " emms;                                      "
+-        : "+r"(srcpix), "+r"(dstpix)    /* outputs */
+-        : "m"(templine),"m"(srcheight), "m"(width),     "m"(yspace),  
+-          "m"(yrecip),  "m"(srcdiff64), "m"(dstdiff64), "m"(One64)  /* input */
+-        : "%ecx","%edx","%rax"          /* clobbered */
+-        );
+-
+-    /* free the temporary memory */
+-    free(templine);
+-}
+-
+-/* These functions implement a bilinear filter in the X-dimension.
+- */
+-void
+-filter_expand_X_MMX(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int *xidx0, *xmult0, *xmult1;
+-    int x, y;
+-    int factorwidth = 8;
+-
+-    /* Allocate memory for factors */
+-    xidx0 = malloc(dstwidth * 4);
+-    if (xidx0 == 0) return;
+-    xmult0 = (int *) malloc(dstwidth * factorwidth);
+-    xmult1 = (int *) malloc(dstwidth * factorwidth);
+-    if (xmult0 == 0 || xmult1 == 0)
+-    {
+-        free(xidx0);
+-        if (xmult0) free(xmult0);
+-        if (xmult1) free(xmult1);
+-    }
+-
+-    /* Create multiplier factors and starting indices and put them in arrays */
+-    for (x = 0; x < dstwidth; x++)
+-    {
+-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
+-        int xm0 = 0x100 - xm1;
+-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
+-        xmult1[x*2]   = xm1 | (xm1 << 16);
+-        xmult1[x*2+1] = xm1 | (xm1 << 16);
+-        xmult0[x*2]   = xm0 | (xm0 << 16);
+-        xmult0[x*2+1] = xm0 | (xm0 << 16);
+-    }
+-
+-    /* Do the scaling in raster order so we don't trash the cache */
+-    for (y = 0; y < height; y++)
+-    {
+-        Uint8 *srcrow0 = srcpix + y * srcpitch;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        int *xm0 = xmult0;
+-		int *xm1 = xmult1;
+-        int *x0 = xidx0;
+-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
+-             " movl             %5,      %%ecx;           "
+-             " pxor          %%mm0,      %%mm0;           "
+-             "1:                                          "
+-             " movsxl         (%3),      %%rax;           " /* get xidx0[x] */
+-             " add              $4,         %3;           "
+-             " movq           (%0),      %%mm1;           " /* load mult0 */
+-             " add              $8,         %0;           "
+-             " movq           (%1),      %%mm2;           " /* load mult1 */
+-             " add              $8,         %1;           "
+-             " movd   (%4,%%rax,4),      %%mm4;           "
+-             " movd  4(%4,%%rax,4),      %%mm5;           "
+-             " punpcklbw     %%mm0,      %%mm4;           "
+-             " punpcklbw     %%mm0,      %%mm5;           "
+-             " pmullw        %%mm1,      %%mm4;           "
+-             " pmullw        %%mm2,      %%mm5;           "
+-             " paddw         %%mm4,      %%mm5;           "
+-             " psrlw            $8,      %%mm5;           "
+-             " packuswb      %%mm0,      %%mm5;           "
+-             " movd          %%mm5,       (%2);           "
+-             " add              $4,         %2;           "
+-             " decl          %%ecx;                       "
+-             " jne              1b;                       "
+-             " emms;                                      "
+-             : "+r"(xm0),   "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
+-             : "r"(srcrow0),"m"(dstwidth)  /* input */
+-             : "%ecx","%rax"                /* clobbered */
+-             );
+-    }
+-
+-    /* free memory */
+-    free(xidx0);
+-    free(xmult0);
+-    free(xmult1);
+-}
+-
+-void
+-filter_expand_X_SSE(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch, int dstpitch, int srcwidth, int dstwidth)
+-{
+-    int *xidx0, *xmult0, *xmult1;
+-    int x, y;
+-    int factorwidth = 8;
+-
+-    /* Allocate memory for factors */
+-    xidx0 = malloc(dstwidth * 4);
+-    if (xidx0 == 0) return;
+-    xmult0 = (int *) malloc(dstwidth * factorwidth);
+-    xmult1 = (int *) malloc(dstwidth * factorwidth);
+-    if (xmult0 == 0 || xmult1 == 0)
+-    {
+-        free(xidx0);
+-        if (xmult0) free(xmult0);
+-        if (xmult1) free(xmult1);
+-    }
+-
+-    /* Create multiplier factors and starting indices and put them in arrays */
+-    for (x = 0; x < dstwidth; x++)
+-    {
+-        int xm1 = 0x100 * ((x * (srcwidth - 1)) % dstwidth) / dstwidth;
+-        int xm0 = 0x100 - xm1;
+-        xidx0[x] = x * (srcwidth - 1) / dstwidth;
+-        xmult1[x*2]   = xm1 | (xm1 << 16);
+-        xmult1[x*2+1] = xm1 | (xm1 << 16);
+-        xmult0[x*2]   = xm0 | (xm0 << 16);
+-        xmult0[x*2+1] = xm0 | (xm0 << 16);
+-    }
+-
+-    /* Do the scaling in raster order so we don't trash the cache */
+-    for (y = 0; y < height; y++)
+-    {
+-        Uint8 *srcrow0 = srcpix + y * srcpitch;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        int *xm0 = xmult0;
+-		int *xm1 = xmult1;
+-        int *x0 = xidx0;
+-        asm __volatile__( " /* MMX code for inner loop of X bilinear filter */ "
+-             " movl             %5,      %%ecx;           "
+-             " pxor          %%mm0,      %%mm0;           "
+-             "1:                                          "
+-             " movsxl         (%3),      %%rax;           " /* get xidx0[x] */
+-             " add              $4,         %3;           "
+-             " movq           (%0),      %%mm1;           " /* load mult0 */
+-             " add              $8,         %0;           "
+-             " movq           (%1),      %%mm2;           " /* load mult1 */
+-             " add              $8,         %1;           "
+-             " movd   (%4,%%rax,4),      %%mm4;           "
+-             " movd  4(%4,%%rax,4),      %%mm5;           "
+-             " punpcklbw     %%mm0,      %%mm4;           "
+-             " punpcklbw     %%mm0,      %%mm5;           "
+-             " pmullw        %%mm1,      %%mm4;           "
+-             " pmullw        %%mm2,      %%mm5;           "
+-             " paddw         %%mm4,      %%mm5;           "
+-             " psrlw            $8,      %%mm5;           "
+-             " packuswb      %%mm0,      %%mm5;           "
+-             " movd          %%mm5,       (%2);           "
+-             " add              $4,         %2;           "
+-             " decl          %%ecx;                       "
+-             " jne              1b;                       "
+-             " emms;                                      "
+-             : "+r"(xm0),   "+r"(xm1), "+r"(dstrow), "+r"(x0) /* outputs */
+-             : "r"(srcrow0),"m"(dstwidth)  /* input */
+-             : "%ecx","%rax"                /* clobbered */
+-             );
+-    }
+-
+-    /* free memory */
+-    free(xidx0);
+-    free(xmult0);
+-    free(xmult1);
+-}
+-
+-/* These functions implement a bilinear filter in the Y-dimension
+- */
+-void
+-filter_expand_Y_MMX(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    int y;
+-
+-    for (y = 0; y < dstheight; y++)
+-    {
+-        int yidx0 = y * (srcheight - 1) / dstheight;
+-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
+-        Uint8 *srcrow1 = srcrow0 + srcpitch;
+-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
+-        int ymult0 = 0x0100 - ymult1;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
+-             " movl          %5,      %%ecx;                      "
+-             " movd          %3,      %%mm1;                      "
+-             " movd          %4,      %%mm2;                      "
+-             " pxor       %%mm0,      %%mm0;                      "
+-             " punpcklwd  %%mm1,      %%mm1;                      "
+-             " punpckldq  %%mm1,      %%mm1;                      "
+-             " punpcklwd  %%mm2,      %%mm2;                      "
+-             " punpckldq  %%mm2,      %%mm2;                      "
+-             "1:                                                  "
+-             " movd        (%0),      %%mm4;                      "
+-             " add           $4,         %0;                      "
+-             " movd        (%1),      %%mm5;                      "
+-             " add           $4,         %1;                      "
+-             " punpcklbw  %%mm0,      %%mm4;                      "
+-             " punpcklbw  %%mm0,      %%mm5;                      "
+-             " pmullw     %%mm1,      %%mm4;                      "
+-             " pmullw     %%mm2,      %%mm5;                      "
+-             " paddw      %%mm4,      %%mm5;                      "
+-             " psrlw         $8,      %%mm5;                      "
+-             " packuswb   %%mm0,      %%mm5;                      "
+-             " movd       %%mm5,       (%2);                      "
+-             " add           $4,         %2;                      "
+-             " decl       %%ecx;                                  "
+-             " jne           1b;                                  "
+-             " emms;                                              "
+-             : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow)   /* outputs */
+-             : "m"(ymult0),   "m"(ymult1),   "m"(width)    /* input */
+-             : "%ecx"         /* clobbered */
+-             );
+-    }
+-}
+-
+-void
+-filter_expand_Y_SSE(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch, int dstpitch, int srcheight, int dstheight)
+-{
+-    int y;
+-
+-    for (y = 0; y < dstheight; y++)
+-    {
+-        int yidx0 = y * (srcheight - 1) / dstheight;
+-        Uint8 *srcrow0 = srcpix + yidx0 * srcpitch;
+-        Uint8 *srcrow1 = srcrow0 + srcpitch;
+-        int ymult1 = 0x0100 * ((y * (srcheight - 1)) % dstheight) / dstheight;
+-        int ymult0 = 0x0100 - ymult1;
+-        Uint8 *dstrow = dstpix + y * dstpitch;
+-        asm __volatile__( " /* MMX code for inner loop of Y bilinear filter */ "
+-             " movl          %5,      %%ecx;                      "
+-             " movd          %3,      %%mm1;                      "
+-             " movd          %4,      %%mm2;                      "
+-             " pxor       %%mm0,      %%mm0;                      "
+-             " pshufw      $0, %%mm1, %%mm1;                      "
+-             " pshufw      $0, %%mm2, %%mm2;                      "
+-             "1:                                                  "
+-             " movd        (%0),      %%mm4;                      "
+-             " add           $4,         %0;                      "
+-             " movd        (%1),      %%mm5;                      "
+-             " add           $4,         %1;                      "
+-             " punpcklbw  %%mm0,      %%mm4;                      "
+-             " punpcklbw  %%mm0,      %%mm5;                      "
+-             " pmullw     %%mm1,      %%mm4;                      "
+-             " pmullw     %%mm2,      %%mm5;                      "
+-             " paddw      %%mm4,      %%mm5;                      "
+-             " psrlw         $8,      %%mm5;                      "
+-             " packuswb   %%mm0,      %%mm5;                      "
+-             " movd       %%mm5,       (%2);                      "
+-             " add           $4,         %2;                      "
+-             " decl       %%ecx;                                  "
+-             " jne           1b;                                  "
+-             " emms;                                              "
+-             : "+r"(srcrow0), "+r"(srcrow1), "+r"(dstrow)   /* outputs */
+-             : "m"(ymult0),   "m"(ymult1),   "m"(width)    /* input */
+-             : "%ecx"         /* clobbered */
+-             );
+-    }
+-}
+-
+diff --git a/src/transform.c b/src/transform.c
+index c997deb..ee0d03d 100644
+--- a/src/transform.c
++++ b/src/transform.c
+@@ -29,7 +29,7 @@
+ #include "pygamedocs.h"
+ #include <math.h>
+ #include <string.h>
+-#include "scale.h"
++//#include "scale.h"
+ 
+ 
+ typedef void (* SMOOTHSCALE_FILTER_P)(Uint8 *, Uint8 *, int, int, int, int, int);
diff --git a/dev-python/pygame/pygame-1.9.1.recipe b/dev-python/pygame/pygame-1.9.1.recipe
index 545dbeeff..4290a985e 100644
--- a/dev-python/pygame/pygame-1.9.1.recipe
+++ b/dev-python/pygame/pygame-1.9.1.recipe
@@ -1,32 +1,85 @@
+SUMMARY="PyGame - a popular game development module for python"
 DESCRIPTION="
-pygame - python bindings to sdl and other libs that facilitate game production.
+PyGame - python bindings to sdl and other libs that facilitate game production.
 "
-HOMEPAGE="http://www.pygame.org" 
+HOMEPAGE="http://www.pygame.org/" 
 SRC_URI="http://www.pygame.org/ftp/pygame-1.9.1release.tar.gz"
 CHECKSUM_MD5="1c4cdc708d17c8250a2d78ef997222fc"
-REVISION="1"
-STATUS_HAIKU="stable"
-DEPEND="dev-lang/python >= 2.6.4
-	media-libs/libsdl >= 1.2.14
-	media-libs/sdl-image >= 1.2.10
-	media-libs/smpeg >= 0.4.5"
+CHECKSUM_SHA256="a26095472ae4be9631e0d5bfb9a52ac57a3a091e45757913128e4a473807d433"
 
-BUILD()
-{
-	cd pygame-1.9.1release
-	LOCALBASE=/boot/common/
-	python setup.py build
-}
-
-INSTALL()
-{
-	cd pygame-1.9.1release
-	python setup.py install --root=${DESTDIR}
-}
-LICENSE="GNU LGPL v2.1"
 COPYRIGHT="2000-2004, 2007  Pete Shinners
 	2004 Takafumi Mizuno
 	2006-2007 Rene Dudfield
 	2007 Richard Goedeken
 	2007-2008 Marcus von Appen
 	"
+LICENSE="GNU LGPL v2.1"
+REVISION="1"
+
+#
+# No stable python_x86 => impossible to build _x86
+#
+ARCHITECTURES="x86 ?x86_gcc2"
+SECONDARY_ARCHITECTURES="x86_gcc2 ?x86"
+
+#ARCHITECTURES="x86"
+#if [ $effectiveTargetArchitecture != x86_gcc2 ]; then
+# # x86_gcc2 is fine as primary target architecture as long as we're building
+# # for a different secondary architecture.
+# ARCHITECTURES="$ARCHITECTURES x86_gcc2"
+#fi
+#SECONDARY_ARCHITECTURES="x86"
+
+SOURCE_DIR="pygame-1.9.1release"
+
+PROVIDES="
+	pygame${secondaryArchSuffix} = $portVersion
+	"
+#  python$secondaryArchSuffix >= 2.6.4
+REQUIRES="
+	python >= 2.6.4
+	libsdl$secondaryArchSuffix >= 1.2.14
+	sdl_image$secondaryArchSuffix >= 1.2.10
+	sdl_ttf$secondaryArchSuffix >= 1.2
+	sdl_mixer$secondaryArchSuffix >= 1.2
+	smpeg$secondaryArchSuffix >= 0.4.5
+	lib:libpng$secondaryArchSuffix
+	jpeg$secondaryArchSuffix	
+	sdl_gfx$secondaryArchSuffix	
+	"
+#	portmidi ^
+#	portmap  |
+
+BUILD_REQUIRES="
+	haiku${secondaryArchSuffix}_devel >= $haikuVersion
+	python >= 2.6.4
+	gcc${secondaryArchSuffix}
+	libsdl${secondaryArchSuffix}_devel >= 1.2.14
+	sdl_image${secondaryArchSuffix}_devel >= 1.2.10
+	sdl_ttf${secondaryArchSuffix}_devel >= 1.2
+	sdl_mixer${secondaryArchSuffix}_devel >= 1.2
+	libpng${secondaryArchSuffix}_devel
+	jpeg${secondaryArchSuffix}_devel
+	smpeg${secondaryArchSuffix}_devel >= 0.4.5
+	sdl_gfx${secondaryArchSuffix}_devel	
+	"
+#libjpeg ^
+#        |
+
+BUILD_PREREQUIRES="
+	"
+	
+BUILD()
+{
+#	$portPackageLinksDir/cmd~python/bin/python setup.py build
+#   don't build without features
+	echo "n" | python setup.py build || exit 1
+}
+
+INSTALL()
+{
+#   don't build without features
+	echo "n" | python setup.py install \
+		--prefix=$prefix || exit 1
+}
+