Commit 37fe3a93 authored by Sam Lantinga's avatar Sam Lantinga

Removed hermes since it's LGPL and not compatible with a commercial license.

Prepping for using MMX and SSE intrinsics instead of inline assembly.
.. except for memcpy equivalents which only get faster if they can
   exploit the parallelism of loading into multiple SIMD registers. :)

--HG--
extra : convert_revision : svn%3Ac70aab31-4412-0410-b14c-859654838e24/trunk%402609
parent 92c5ea48
...@@ -65,12 +65,6 @@ __EOF__ ...@@ -65,12 +65,6 @@ __EOF__
\$(LIBTOOL) --mode=compile \$(CC) \$(CFLAGS) \$(EXTRA_CFLAGS) -c $src -o \$@ \$(LIBTOOL) --mode=compile \$(CC) \$(CFLAGS) \$(EXTRA_CFLAGS) -c $src -o \$@
__EOF__
;;
asm) cat >>${output}.new <<__EOF__
\$(LIBTOOL) --tag=CC --mode=compile \$(auxdir)/strip_fPIC.sh \$(NASM) $src -o \$@
__EOF__ __EOF__
;; ;;
S) cat >>${output}.new <<__EOF__ S) cat >>${output}.new <<__EOF__
......
...@@ -276,6 +276,127 @@ AC_HELP_STRING([--enable-assembly], [Enable assembly routines [[default=yes]]]), ...@@ -276,6 +276,127 @@ AC_HELP_STRING([--enable-assembly], [Enable assembly routines [[default=yes]]]),
, enable_assembly=yes) , enable_assembly=yes)
if test x$enable_assembly = xyes; then if test x$enable_assembly = xyes; then
AC_DEFINE(SDL_ASSEMBLY_ROUTINES) AC_DEFINE(SDL_ASSEMBLY_ROUTINES)
dnl Check for various instruction support
AC_ARG_ENABLE(mmx,
AC_HELP_STRING([--enable-mmx], [use MMX assembly routines [[default=yes]]]),
, enable_mmx=yes)
if test x$enable_mmx = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_mmx=no
AC_MSG_CHECKING(for GCC -mmmx option)
mmx_CFLAGS="-mmmx"
CFLAGS="$save_CFLAGS $mmx_CFLAGS"
AC_TRY_COMPILE([
#include <mmintrin.h>
],[
],[
have_gcc_mmx=yes
])
AC_MSG_RESULT($have_gcc_mmx)
if test x$have_gcc_mmx = xyes; then
EXTRA_CFLAGS="$EXTRA_CFLAGS $mmx_CFLAGS"
fi
fi
AC_ARG_ENABLE(sse,
AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
, enable_sse=yes)
if test x$enable_sse = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_sse=no
AC_MSG_CHECKING(for GCC -msse option)
sse_CFLAGS="-msse"
CFLAGS="$save_CFLAGS $sse_CFLAGS"
AC_TRY_COMPILE([
#include <xmmintrin.h>
],[
],[
have_gcc_sse=yes
])
AC_MSG_RESULT($have_gcc_sse)
if test x$have_gcc_sse = xyes; then
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse_CFLAGS"
fi
fi
AC_ARG_ENABLE(altivec,
AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
, enable_altivec=yes)
if test x$enable_altivec = xyes; then
have_altivec_h_hdr=no
AC_CHECK_HEADER(altivec.h, have_altivec_h_hdr=yes)
save_CFLAGS="$CFLAGS"
have_gcc_altivec=no
AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
altivec_CFLAGS="-maltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
if test x$have_altivec_h_hdr = xyes; then
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
else
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
altivec_CFLAGS="-faltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
if test x$have_altivec_h_hdr = xyes; then
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
else
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
fi
CFLAGS="$save_CFLAGS"
if test x$have_gcc_altivec = xyes; then
AC_DEFINE(SDL_ALTIVEC_BLITTERS)
if test x$have_altivec_h_hdr = xyes; then
AC_DEFINE(HAVE_ALTIVEC_H)
fi
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
fi
fi
fi fi
dnl See if the OSS audio interface is supported dnl See if the OSS audio interface is supported
...@@ -629,167 +750,6 @@ AC_HELP_STRING([--enable-mintaudio], [support Atari audio driver [[default=yes]] ...@@ -629,167 +750,6 @@ AC_HELP_STRING([--enable-mintaudio], [support Atari audio driver [[default=yes]]
fi fi
} }
dnl See if we can use x86 assembly blitters
# NASM is available from: http://nasm.sourceforge.net
CheckNASM()
{
dnl Make sure we are running on an x86 platform
case $host in
i?86*)
;;
*)
# Nope, bail early.
return
;;
esac
dnl Check for NASM (for assembly blit routines)
AC_ARG_ENABLE(nasm,
AC_HELP_STRING([--enable-nasm], [use nasm assembly blitters on x86 [[default=yes]]]),
, enable_nasm=yes)
if test x$enable_video = xyes -a x$enable_assembly = xyes -a x$enable_nasm = xyes; then
CompileNASM()
{
# Usage: CompileNASM <filename>
AC_MSG_CHECKING(to see if $NASM supports $1)
if $NASM $NASMFLAGS $1 -o $1.o >&AS_MESSAGE_LOG_FD 2>&1; then
CompileNASM_ret="yes"
else
CompileNASM_ret="no"
fi
rm -f $1 $1.o
AC_MSG_RESULT($CompileNASM_ret)
test "$CompileNASM_ret" = "yes"
}
if test x"$NASMFLAGS" = x; then
case $ARCH in
win32)
NASMFLAGS="-f win32"
;;
openbsd)
NASMFLAGS="-f aoutb"
;;
macosx)
NASMFLAGS="-f macho"
;;
*)
NASMFLAGS="-f elf"
;;
esac
fi
AC_PATH_PROG(NASM, yasm)
echo "%ifidn __OUTPUT_FORMAT__,elf" > unquoted-sections
echo "section .note.GNU-stack noalloc noexec nowrite progbits" >> unquoted-sections
echo "%endif" >> unquoted-sections
CompileNASM unquoted-sections || NASM=""
if test "x$NASM" = x -o "x$NASM" = x'"$NASM"'; then
$as_unset ac_cv_path_NASM
AC_PATH_PROG(NASM, nasm)
fi
if test "x$NASM" != x -a "x$NASM" != x'"$NASM"'; then
AC_DEFINE(SDL_HERMES_BLITTERS)
SOURCES="$SOURCES $srcdir/src/hermes/*.asm"
NASMFLAGS="$NASMFLAGS -I $srcdir/src/hermes/"
dnl See if hidden visibility is supported
echo "GLOBAL _bar:function hidden" > symbol-visibility
echo "_bar:" >> symbol-visibility
CompileNASM symbol-visibility && NASMFLAGS="$NASMFLAGS -DHIDDEN_VISIBILITY"
AC_SUBST(NASM)
AC_SUBST(NASMFLAGS)
case "$host" in
# this line is needed for QNX, because it's not defined the __ELF__
*-*-qnx*)
EXTRA_CFLAGS="$EXTRA_CFLAGS -D__ELF__";;
*-*-solaris*)
EXTRA_CFLAGS="$EXTRA_CFLAGS -D__ELF__";;
esac
fi
fi
}
dnl Check for altivec instruction support using gas syntax
CheckAltivec()
{
AC_ARG_ENABLE(altivec,
AC_HELP_STRING([--enable-altivec], [use altivec assembly blitters on PPC [[default=yes]]]),
, enable_altivec=yes)
if test x$enable_video = xyes -a x$enable_assembly = xyes -a x$enable_altivec = xyes; then
have_altivec_h_hdr=no
AC_CHECK_HEADER(altivec.h, have_altivec_h_hdr=yes)
save_CFLAGS="$CFLAGS"
have_gcc_altivec=no
AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
altivec_CFLAGS="-maltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
if test x$have_altivec_h_hdr = xyes; then
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
else
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
altivec_CFLAGS="-faltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
if test x$have_altivec_h_hdr = xyes; then
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
else
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
fi
CFLAGS="$save_CFLAGS"
if test x$have_gcc_altivec = xyes; then
AC_DEFINE(SDL_ALTIVEC_BLITTERS)
if test x$have_altivec_h_hdr = xyes; then
AC_DEFINE(HAVE_ALTIVEC_H)
fi
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
fi
fi
}
dnl See if GCC's -fvisibility=hidden is supported (gcc4 and later, usually). dnl See if GCC's -fvisibility=hidden is supported (gcc4 and later, usually).
dnl Details of this flag are here: http://gcc.gnu.org/wiki/Visibility dnl Details of this flag are here: http://gcc.gnu.org/wiki/Visibility
CheckVisibilityHidden() CheckVisibilityHidden()
...@@ -2043,8 +2003,6 @@ case "$host" in ...@@ -2043,8 +2003,6 @@ case "$host" in
CheckDiskAudio CheckDiskAudio
CheckDummyAudio CheckDummyAudio
CheckDLOPEN CheckDLOPEN
CheckNASM
CheckAltivec
CheckOSS CheckOSS
CheckDMEDIA CheckDMEDIA
CheckMME CheckMME
...@@ -2153,7 +2111,6 @@ case "$host" in ...@@ -2153,7 +2111,6 @@ case "$host" in
CheckDummyVideo CheckDummyVideo
CheckDiskAudio CheckDiskAudio
CheckDummyAudio CheckDummyAudio
# CheckNASM
CheckDLOPEN CheckDLOPEN
CheckNAS CheckNAS
CheckPHOTON CheckPHOTON
...@@ -2197,7 +2154,6 @@ case "$host" in ...@@ -2197,7 +2154,6 @@ case "$host" in
CheckWIN32 CheckWIN32
CheckWIN32GL CheckWIN32GL
CheckDIRECTX CheckDIRECTX
CheckNASM
# Set up files for the video library # Set up files for the video library
if test x$enable_video = xyes; then if test x$enable_video = xyes; then
AC_DEFINE(SDL_VIDEO_DRIVER_WIN32) AC_DEFINE(SDL_VIDEO_DRIVER_WIN32)
...@@ -2278,7 +2234,6 @@ AC_HELP_STRING([--enable-render-d3d], [enable the Direct3D render driver [[defau ...@@ -2278,7 +2234,6 @@ AC_HELP_STRING([--enable-render-d3d], [enable the Direct3D render driver [[defau
CheckDummyVideo CheckDummyVideo
CheckDiskAudio CheckDiskAudio
CheckDummyAudio CheckDummyAudio
CheckNASM
CheckBWINDOW CheckBWINDOW
CheckBeGL CheckBeGL
# Set up files for the audio library # Set up files for the audio library
...@@ -2344,7 +2299,6 @@ AC_HELP_STRING([--enable-render-d3d], [enable the Direct3D render driver [[defau ...@@ -2344,7 +2299,6 @@ AC_HELP_STRING([--enable-render-d3d], [enable the Direct3D render driver [[defau
CheckDiskAudio CheckDiskAudio
CheckDummyAudio CheckDummyAudio
CheckDLOPEN CheckDLOPEN
CheckNASM
# Set up files for the shared object loading library # Set up files for the shared object loading library
# (this needs to be done before the dynamic X11 check) # (this needs to be done before the dynamic X11 check)
...@@ -2359,7 +2313,6 @@ AC_HELP_STRING([--enable-render-d3d], [enable the Direct3D render driver [[defau ...@@ -2359,7 +2313,6 @@ AC_HELP_STRING([--enable-render-d3d], [enable the Direct3D render driver [[defau
CheckMacGL CheckMacGL
CheckOpenGLX11 CheckOpenGLX11
CheckPTHREAD CheckPTHREAD
CheckAltivec
# Good optimization on Mac OS X, yes... # Good optimization on Mac OS X, yes...
EXTRA_CFLAGS="$EXTRA_CFLAGS -falign-loops=16" EXTRA_CFLAGS="$EXTRA_CFLAGS -falign-loops=16"
......
...@@ -292,7 +292,6 @@ ...@@ -292,7 +292,6 @@
/* Enable assembly routines */ /* Enable assembly routines */
#undef SDL_ASSEMBLY_ROUTINES #undef SDL_ASSEMBLY_ROUTINES
#undef SDL_HERMES_BLITTERS
#undef SDL_ALTIVEC_BLITTERS #undef SDL_ALTIVEC_BLITTERS
#endif /* _SDL_config_h */ #endif /* _SDL_config_h */
GNU LIBRARY GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the library GPL. It is
numbered 2 because it goes with version 2 of the ordinary GPL.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Library General Public License, applies to some
specially designated Free Software Foundation software, and to any
other libraries whose authors decide to use it. You can use it for
your libraries, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if
you distribute copies of the library, or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link a program with the library, you must provide
complete object files to the recipients so that they can relink them
with the library, after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
Our method of protecting your rights has two steps: (1) copyright
the library, and (2) offer you this license which gives you legal
permission to copy, distribute and/or modify the library.
Also, for each distributor's protection, we want to make certain
that everyone understands that there is no warranty for this free
library. If the library is modified by someone else and passed on, we
want its recipients to know that what they have is not the original
version, so that any problems introduced by others will not reflect on
the original authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that companies distributing free
software will individually obtain patent licenses, thus in effect
transforming the program into proprietary software. To prevent this,
we have made it clear that any patent must be licensed for everyone's
free use or not licensed at all.
Most GNU software, including some libraries, is covered by the ordinary
GNU General Public License, which was designed for utility programs. This
license, the GNU Library General Public License, applies to certain
designated libraries. This license is quite different from the ordinary
one; be sure to read it in full, and don't assume that anything in it is
the same as in the ordinary license.
The reason we have a separate public license for some libraries is that
they blur the distinction we usually make between modifying or adding to a
program and simply using it. Linking a program with a library, without
changing the library, is in some sense simply using the library, and is
analogous to running a utility program or application program. However, in
a textual and legal sense, the linked executable is a combined work, a
derivative of the original library, and the ordinary General Public License
treats it as such.
Because of this blurred distinction, using the ordinary General
Public License for libraries did not effectively promote software
sharing, because most developers did not use the libraries. We
concluded that weaker conditions might promote sharing better.
However, unrestricted linking of non-free programs would deprive the
users of those programs of all benefit from the free status of the
libraries themselves. This Library General Public License is intended to
permit developers of non-free programs to use free libraries, while
preserving your freedom as a user of such programs to change the free
libraries that are incorporated in them. (We have not seen how to achieve
this as regards changes in header files, but we have achieved it as regards
changes in the actual functions of the Library.) The hope is that this
will lead to faster development of free libraries.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, while the latter only
works together with the library.
Note that it is possible for a library to be covered by the ordinary
General Public License rather than by this special one.
GNU LIBRARY GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library which
contains a notice placed by the copyright holder or other authorized
party saying it may be distributed under the terms of this Library
General Public License (also called "this License"). Each licensee is
addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also compile or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
c) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
d) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the source code distributed need not include anything that is normally
distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Library General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
/*
Header definitions for the MMX routines for the HERMES library
Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
This source code is licensed under the GNU LGPL
Please refer to the file COPYING.LIB contained in the distribution for
licensing conditions
*/
#include "SDL_config.h"
#ifndef __HERMES_HEAD_MMX__
#define __HERMES_HEAD_MMX__
/* If you cannot stand ifdefs, then please do not look into this file, it's
going to end your life :) */
#ifdef X86_ASSEMBLER
#ifdef __cplusplus
extern "C"
{
#endif
void STACKCALL ConvertMMX(HermesConverterInterface *);
void STACKCALL ClearMMX_32(HermesClearInterface *);
void STACKCALL ClearMMX_24(HermesClearInterface *);
void STACKCALL ClearMMX_16(HermesClearInterface *);
void STACKCALL ClearMMX_8(HermesClearInterface *);
void ConvertMMXpII32_24RGB888();
void ConvertMMXpII32_16RGB565();
void ConvertMMXpII32_16BGR565();
void ConvertMMXpII32_16RGB555();
void ConvertMMXpII32_16BGR565();
void ConvertMMXpII32_16BGR555();
void ConvertMMXp32_16RGB555();
#ifdef __cplusplus
}
#endif
/* Fix the underscore business with ELF compilers */
#if defined(__ELF__) && defined(__GNUC__)
#ifdef __cplusplus
extern "C"
{
#endif
extern void _ConvertMMX(HermesConverterInterface *);
extern void _ConvertMMXpII32_24RGB888();
extern void _ConvertMMXpII32_16RGB565();
extern void _ConvertMMXpII32_16BGR565();
extern void _ConvertMMXpII32_16RGB555();
extern void _ConvertMMXpII32_16BGR555();
#define ConvertMMX _ConvertMMX
#define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888
#define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565
#define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565
#define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555
#define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555
#ifdef __cplusplus
}
#endif
#endif /* ELF and GNUC */
/* Make it work with Watcom */
#ifdef __WATCOMC__
#pragma warning 601 9
#pragma aux ConvertMMX "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_32 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_24 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_16 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_8 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ConvertMMXpII32_24RGB888 "_*"
#pragma aux ConvertMMXpII32_16RGB565 "_*"
#pragma aux ConvertMMXpII32_16BGR565 "_*"
#pragma aux ConvertMMXpII32_16RGB555 "_*"
#pragma aux ConvertMMXpII32_16BGR555 "_*"
#pragma aux ConvertMMXp32_16RGB555 "_*"
#endif /* WATCOM */
#endif /* X86_ASSEMBLER */
#endif
/* vi: set ts=4 sw=4 expandtab: */
/*
Header definitions for the x86 routines for the HERMES library
Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
This source code is licensed under the GNU LGPL
Please refer to the file COPYING.LIB contained in the distribution for
licensing conditions
*/
#ifndef __HERMES_HEAD_X86__
#define __HERMES_HEAD_X86__
#ifdef X86_ASSEMBLER
/* If you can't stand IFDEFS, then close your eyes now, please :) */
/* Ok, we start with normal function definitions */
#ifdef __cplusplus
extern "C"
{
#endif
void STACKCALL ConvertX86(HermesConverterInterface *);
void STACKCALL ClearX86_32(HermesClearInterface *);
void STACKCALL ClearX86_24(HermesClearInterface *);
void STACKCALL ClearX86_16(HermesClearInterface *);
void STACKCALL ClearX86_8(HermesClearInterface *);
int STACKCALL Hermes_X86_CPU();
void ConvertX86p32_32BGR888();
void ConvertX86p32_32RGBA888();
void ConvertX86p32_32BGRA888();
void ConvertX86p32_24RGB888();
void ConvertX86p32_24BGR888();
void ConvertX86p32_16RGB565();
void ConvertX86p32_16BGR565();
void ConvertX86p32_16RGB555();
void ConvertX86p32_16BGR555();
void ConvertX86p32_8RGB332();
void ConvertX86p16_32RGB888();
void ConvertX86p16_32BGR888();
void ConvertX86p16_32RGBA888();
void ConvertX86p16_32BGRA888();
void ConvertX86p16_24RGB888();
void ConvertX86p16_24BGR888();
void ConvertX86p16_16BGR565();
void ConvertX86p16_16RGB555();
void ConvertX86p16_16BGR555();
void ConvertX86p16_8RGB332();
void CopyX86p_4byte();
void CopyX86p_3byte();
void CopyX86p_2byte();
void CopyX86p_1byte();
void ConvertX86pI8_32();
void ConvertX86pI8_24();
void ConvertX86pI8_16();
extern int ConvertX86p16_32RGB888_LUT_X86[512];
extern int ConvertX86p16_32BGR888_LUT_X86[512];
extern int ConvertX86p16_32RGBA888_LUT_X86[512];
extern int ConvertX86p16_32BGRA888_LUT_X86[512];
#ifdef __cplusplus
}
#endif
/* Now fix up the ELF underscore problem */
#if defined(__ELF__) && defined(__GNUC__)
#ifdef __cplusplus
extern "C"
{
#endif
extern int _Hermes_X86_CPU();
extern void _ConvertX86(HermesConverterInterface *);
extern void _ConvertX86p32_32BGR888();
extern void _ConvertX86p32_32RGBA888();
extern void _ConvertX86p32_32BGRA888();
extern void _ConvertX86p32_24RGB888();
extern void _ConvertX86p32_24BGR888();
extern void _ConvertX86p32_16RGB565();
extern void _ConvertX86p32_16BGR565();
extern void _ConvertX86p32_16RGB555();
extern void _ConvertX86p32_16BGR555();
extern void _ConvertX86p32_8RGB332();
extern void _ConvertX86p16_16BGR565();
extern void _ConvertX86p16_16RGB555();
extern void _ConvertX86p16_16BGR555();
extern void _ConvertX86p16_8RGB332();
#define Hermes_X86_CPU _Hermes_X86_CPU
#define ConvertX86 _ConvertX86
#define ConvertX86p32_32BGR888 _ConvertX86p32_32BGR888
#define ConvertX86p32_32RGBA888 _ConvertX86p32_32RGBA888
#define ConvertX86p32_32BGRA888 _ConvertX86p32_32BGRA888
#define ConvertX86p32_24RGB888 _ConvertX86p32_24RGB888
#define ConvertX86p32_24BGR888 _ConvertX86p32_24BGR888
#define ConvertX86p32_16RGB565 _ConvertX86p32_16RGB565
#define ConvertX86p32_16BGR565 _ConvertX86p32_16BGR565
#define ConvertX86p32_16RGB555 _ConvertX86p32_16RGB555
#define ConvertX86p32_16BGR555 _ConvertX86p32_16BGR555
#define ConvertX86p32_8RGB332 _ConvertX86p32_8RGB332
#define ConvertX86p16_16BGR565 _ConvertX86p16_16BGR565
#define ConvertX86p16_16RGB555 _ConvertX86p16_16RGB555
#define ConvertX86p16_16BGR555 _ConvertX86p16_16BGR555
#define ConvertX86p16_8RGB332 _ConvertX86p16_8RGB332
#ifdef __cplusplus
}
#endif
#endif /* ELF & GNU */
/* Make it run with WATCOM C */
#ifdef __WATCOMC__
#pragma warning 601 9
#pragma aux Hermes_X86_CPU "_*"
#pragma aux ConvertX86 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearX86_32 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearX86_24 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearX86_16 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearX86_8 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ConvertX86p32_32BGR888 "_*"
#pragma aux ConvertX86p32_32RGBA888 "_*"
#pragma aux ConvertX86p32_32BGRA888 "_*"
#pragma aux ConvertX86p32_24RGB888 "_*"
#pragma aux ConvertX86p32_24BGR888 "_*"
#pragma aux ConvertX86p32_16RGB565 "_*"
#pragma aux ConvertX86p32_16BGR565 "_*"
#pragma aux ConvertX86p32_16RGB555 "_*"
#pragma aux ConvertX86p32_16BGR555 "_*"
#pragma aux ConvertX86p32_8RGB332 "_*"
#pragma aux ConvertX86p16_32RGB888 "_*"
#pragma aux ConvertX86p16_32BGR888 "_*"
#pragma aux ConvertX86p16_32RGBA888 "_*"
#pragma aux ConvertX86p16_32BGRA888 "_*"
#pragma aux ConvertX86p16_24RGB888 "_*"
#pragma aux ConvertX86p16_24BGR888 "_*"
#pragma aux ConvertX86p16_16BGR565 "_*"
#pragma aux ConvertX86p16_16RGB555 "_*"
#pragma aux ConvertX86p16_16BGR555 "_*"
#pragma aux ConvertX86p16_8RGB332 "_*"
#pragma aux CopyX86p_4byte "_*"
#pragma aux CopyX86p_3byte "_*"
#pragma aux CopyX86p_2byte "_*"
#pragma aux CopyX86p_1byte "_*"
#pragma aux ConvertX86pI8_32 "_*"
#pragma aux ConvertX86pI8_24 "_*"
#pragma aux ConvertX86pI8_16 "_*"
#pragma aux ConvertX86p16_32RGB888_LUT_X86 "_*"
#pragma aux ConvertX86p16_32BGR888_LUT_X86 "_*"
#pragma aux ConvertX86p16_32RGBA888_LUT_X86 "_*"
#pragma aux ConvertX86p16_32BGRA888_LUT_X86 "_*"
#endif /* __WATCOMC__ */
#endif /* X86_ASSEMBLER */
#endif
/* vi: set ts=4 sw=4 expandtab: */
HERMES 1.2.4 (c)1998 Christian Nentwich (brn) (c.nentwich@cs.ucl.ac.uk)
and quite a few assembler routines (c) Glenn Fielder (gaffer@gaffer.org)
This library and all the files enclosed in this package are free software
under the terms of the GNU Library General Public License (LGPL). Please
refer to the included file COPYING.LIB for the exact terms.
----------------------------------------------------------------------------
This is a stripped down version of HERMES, including only the x86 assembler
converters, for use with Simple DirectMedia Layer.
The full HERMES library is available at: http://hermes.terminal.at/
; Some common macros for hermes nasm code
%macro SDL_FUNC 1
%ifdef HIDDEN_VISIBILITY
GLOBAL %1:function hidden
%else
GLOBAL %1
%endif
%endmacro
;
; mmx format converter main loops for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
BITS 32
%include "common.inc"
SDL_FUNC _ConvertMMX
SECTION .text
;; _ConvertMMX:
;; [ESP+8] ConverterInfo*
;; --------------------------------------------------------------------------
;; ConverterInfo (ebp+..)
;; 0: void *s_pixels
;; 4: int s_width
;; 8: int s_height
;; 12: int s_add
;; 16: void *d_pixels
;; 20: int d_width
;; 24: int d_height
;; 28: int d_add
;; 32: void (*converter_function)()
;; 36: int32 *lookup
_ConvertMMX:
push ebp
mov ebp,esp
; Save the registers used by the blitters, necessary for optimized code
pusha
mov eax,[ebp+8]
cmp dword [eax+4],BYTE 0
je endconvert
mov ebp,eax
mov esi,[ebp+0]
mov edi,[ebp+16]
y_loop:
mov ecx,[ebp+4]
call [ebp+32]
add esi,[ebp+12]
add edi,[ebp+28]
dec dword [ebp+8]
jnz y_loop
; Restore the registers used by the blitters, necessary for optimized code
popa
pop ebp
endconvert:
emms
ret
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
;
; pII-optimised MMX format converters for HERMES
; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
; and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; COPYRIGHT NOTICE
;
; This file partly contains code that is (c) Intel Corporation, specifically
; the mode detection routine, and the converter to 15 bit (8 pixel
; conversion routine from the mmx programming tutorial pages).
;
;
; These routines aren't exactly pII optimised - it's just that as they
; are, they're terrible on p5 MMXs, but less so on pIIs. Someone needs to
; optimise them for p5 MMXs..
BITS 32
%include "common.inc"
SDL_FUNC _ConvertMMXpII32_24RGB888
SDL_FUNC _ConvertMMXpII32_16RGB565
SDL_FUNC _ConvertMMXpII32_16BGR565
SDL_FUNC _ConvertMMXpII32_16RGB555
SDL_FUNC _ConvertMMXpII32_16BGR555
;; Macros for conversion routines
%macro _push_immq_mask 1
push dword %1
push dword %1
%endmacro
%macro load_immq 2
_push_immq_mask %2
movq %1, [esp]
%endmacro
%macro pand_immq 2
_push_immq_mask %2
pand %1, [esp]
%endmacro
%define CLEANUP_IMMQ_LOADS(num) \
add esp, byte 8 * num
%define mmx32_rgb888_mask 00ffffffh
%define mmx32_rgb565_b 000000f8h
%define mmx32_rgb565_g 0000fc00h
%define mmx32_rgb565_r 00f80000h
%define mmx32_rgb555_rb 00f800f8h
%define mmx32_rgb555_g 0000f800h
%define mmx32_rgb555_mul 20000008h
%define mmx32_bgr555_mul 00082000h
SECTION .text
_ConvertMMXpII32_24RGB888:
; set up mm6 as the mask, mm7 as zero
load_immq mm6, mmx32_rgb888_mask
CLEANUP_IMMQ_LOADS(1)
pxor mm7, mm7
mov edx, ecx ; save ecx
and ecx, 0fffffffch ; clear lower two bits
jnz .L1
jmp .L2
.L1:
movq mm0, [esi] ; A R G B a r g b
pand mm0, mm6 ; 0 R G B 0 r g b
movq mm1, [esi+8] ; A R G B a r g b
pand mm1, mm6 ; 0 R G B 0 r g b
movq mm2, mm0 ; 0 R G B 0 r g b
punpckhdq mm2, mm7 ; 0 0 0 0 0 R G B
punpckldq mm0, mm7 ; 0 0 0 0 0 r g b
psllq mm2, 24 ; 0 0 R G B 0 0 0
por mm0, mm2 ; 0 0 R G B r g b
movq mm3, mm1 ; 0 R G B 0 r g b
psllq mm3, 48 ; g b 0 0 0 0 0 0
por mm0, mm3 ; g b R G B r g b
movq mm4, mm1 ; 0 R G B 0 r g b
punpckhdq mm4, mm7 ; 0 0 0 0 0 R G B
punpckldq mm1, mm7 ; 0 0 0 0 0 r g b
psrlq mm1, 16 ; 0 0 0 R G B 0 r
psllq mm4, 8 ; 0 0 0 0 R G B 0
por mm1, mm4 ; 0 0 0 0 R G B r
movq [edi], mm0
add esi, BYTE 16
movd [edi+8], mm1
add edi, BYTE 12
sub ecx, BYTE 4
jnz .L1
.L2:
mov ecx, edx
and ecx, BYTE 3
jz .L4
.L3:
mov al, [esi]
mov bl, [esi+1]
mov dl, [esi+2]
mov [edi], al
mov [edi+1], bl
mov [edi+2], dl
add esi, BYTE 4
add edi, BYTE 3
dec ecx
jnz .L3
.L4:
return
_ConvertMMXpII32_16RGB565:
; set up masks
load_immq mm5, mmx32_rgb565_b
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_r
CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
jnz .L1
jmp .L2 ; not necessary at the moment, but doesn't hurt (much)
.L1:
movq mm0, [esi] ; argb
movq mm1, mm0 ; argb
pand mm0, mm6 ; 00g0
movq mm3, mm1 ; argb
pand mm1, mm5 ; 000b
pand mm3, mm7 ; 0r00
pslld mm1, 2 ; 0 0 000000bb bbb00000
por mm0, mm1 ; 0 0 ggggggbb bbb00000
psrld mm0, 5 ; 0 0 00000ggg gggbbbbb
movq mm4, [esi+8] ; argb
movq mm2, mm4 ; argb
pand mm4, mm6 ; 00g0
movq mm1, mm2 ; argb
pand mm2, mm5 ; 000b
pand mm1, mm7 ; 0r00
pslld mm2, 2 ; 0 0 000000bb bbb00000
por mm4, mm2 ; 0 0 ggggggbb bbb00000
psrld mm4, 5 ; 0 0 00000ggg gggbbbbb
packuswb mm3, mm1 ; R 0 r 0
packssdw mm0, mm4 ; as above.. ish
por mm0, mm3 ; done.
movq [edi], mm0
add esi, 16
add edi, 8
dec ecx
jnz .L1
.L2:
mov ecx, edx
and ecx, BYTE 3
jz .L4
.L3:
mov al, [esi]
mov bh, [esi+1]
mov ah, [esi+2]
shr al, 3
and eax, 0F81Fh ; BYTE?
shr ebx, 5
and ebx, 07E0h ; BYTE?
add eax, ebx
mov [edi], al
mov [edi+1], ah
add esi, BYTE 4
add edi, BYTE 2
dec ecx
jnz .L3
.L4:
retn
_ConvertMMXpII32_16BGR565:
load_immq mm5, mmx32_rgb565_r
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_b
CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
jnz .L1
jmp .L2
.L1:
movq mm0, [esi] ; a r g b
movq mm1, mm0 ; a r g b
pand mm0, mm6 ; 0 0 g 0
movq mm3, mm1 ; a r g b
pand mm1, mm5 ; 0 r 0 0
pand mm3, mm7 ; 0 0 0 b
psllq mm3, 16 ; 0 b 0 0
psrld mm1, 14 ; 0 0 000000rr rrr00000
por mm0, mm1 ; 0 0 ggggggrr rrr00000
psrld mm0, 5 ; 0 0 00000ggg gggrrrrr
movq mm4, [esi+8] ; a r g b
movq mm2, mm4 ; a r g b
pand mm4, mm6 ; 0 0 g 0
movq mm1, mm2 ; a r g b
pand mm2, mm5 ; 0 r 0 0
pand mm1, mm7 ; 0 0 0 b
psllq mm1, 16 ; 0 b 0 0
psrld mm2, 14 ; 0 0 000000rr rrr00000
por mm4, mm2 ; 0 0 ggggggrr rrr00000
psrld mm4, 5 ; 0 0 00000ggg gggrrrrr
packuswb mm3, mm1 ; BBBBB000 00000000 bbbbb000 00000000
packssdw mm0, mm4 ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR
por mm0, mm3 ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
movq [edi], mm0
add esi, BYTE 16
add edi, BYTE 8
dec ecx
jnz .L1
.L2:
and edx, BYTE 3
jz .L4
.L3:
mov al, [esi+2]
mov bh, [esi+1]
mov ah, [esi]
shr al, 3
and eax, 0F81Fh ; BYTE ?
shr ebx, 5
and ebx, 07E0h ; BYTE ?
add eax, ebx
mov [edi], al
mov [edi+1], ah
add esi, BYTE 4
add edi, BYTE 2
dec edx
jnz .L3
.L4:
retn
_ConvertMMXpII32_16BGR555:
; the 16BGR555 converter is identical to the RGB555 one,
; except it uses a different multiplier for the pmaddwd
; instruction. cool huh.
load_immq mm7, mmx32_bgr555_mul
jmp _convert_bgr555_cheat
; This is the same as the Intel version.. they obviously went to
; much more trouble to expand/coil the loop than I did, so theirs
; would almost certainly be faster, even if only a little.
; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
; (I think) a more accurate name..
_ConvertMMXpII32_16RGB555:
load_immq mm7, mmx32_rgb555_mul
_convert_bgr555_cheat:
load_immq mm6, mmx32_rgb555_g
CLEANUP_IMMQ_LOADS(2)
mov edx,ecx ; Save ecx
and ecx,DWORD 0fffffff8h ; clear lower three bits
jnz .L_OK
jmp near .L2
.L_OK:
movq mm2,[esi+8]
movq mm0,[esi]
movq mm3,mm2
pand_immq mm3, mmx32_rgb555_rb
movq mm1,mm0
pand_immq mm1, mmx32_rgb555_rb
pmaddwd mm3,mm7
CLEANUP_IMMQ_LOADS(2)
pmaddwd mm1,mm7
pand mm2,mm6
.L1:
movq mm4,[esi+24]
pand mm0,mm6
movq mm5,[esi+16]
por mm3,mm2
psrld mm3,6
por mm1,mm0
movq mm0,mm4
psrld mm1,6
pand_immq mm0, mmx32_rgb555_rb
packssdw mm1,mm3
movq mm3,mm5
pmaddwd mm0,mm7
pand_immq mm3, mmx32_rgb555_rb
pand mm4,mm6
movq [edi],mm1
pmaddwd mm3,mm7
add esi,BYTE 32
por mm4,mm0
pand mm5,mm6
psrld mm4,6
movq mm2,[esi+8]
por mm5,mm3
movq mm0,[esi]
psrld mm5,6
movq mm3,mm2
movq mm1,mm0
pand_immq mm3, mmx32_rgb555_rb
packssdw mm5,mm4
pand_immq mm1, mmx32_rgb555_rb
pand mm2,mm6
CLEANUP_IMMQ_LOADS(4)
movq [edi+8],mm5
pmaddwd mm3,mm7
pmaddwd mm1,mm7
add edi,BYTE 16
sub ecx,BYTE 8
jz .L2
jmp .L1
.L2:
mov ecx,edx
and ecx,BYTE 7
jz .L4
.L3:
mov ebx,[esi]
add esi,BYTE 4
mov eax,ebx
mov edx,ebx
shr eax,3
shr edx,6
and eax,BYTE 0000000000011111b
and edx, 0000001111100000b
shr ebx,9
or eax,edx
and ebx, 0111110000000000b
or eax,ebx
mov [edi],ax
add edi,BYTE 2
dec ecx
jnz .L3
.L4:
retn
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
BITS 32
%include "common.inc"
SDL_FUNC _ConvertX86
SECTION .text
;; _ConvertX86:
;; [ESP+8] ConverterInfo*
;; --------------------------------------------------------------------------
;; ConverterInfo (ebp+..)
;; 0: void *s_pixels
;; 4: int s_width
;; 8: int s_height
;; 12: int s_add
;; 16: void *d_pixels
;; 20: int d_width
;; 24: int d_height
;; 28: int d_add
;; 32: void (*converter_function)()
;; 36: int32 *lookup
_ConvertX86:
push ebp
mov ebp,esp
; Save the registers used by the blitters, necessary for optimized code
pusha
mov eax,[ebp+8]
cmp dword [eax+4],BYTE 0
je endconvert
mov ebp,eax
mov esi,[ebp+0]
mov edi,[ebp+16]
y_loop:
mov ecx,[ebp+4]
call [ebp+32]
add esi,[ebp+12]
add edi,[ebp+28]
dec dword [ebp+8]
jnz y_loop
; Restore the registers used by the blitters, necessary for optimized code
popa
pop ebp
endconvert:
ret
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
;
; x86 format converters for HERMES
; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
; Used with permission.
;
BITS 32
%include "common.inc"
SDL_FUNC _ConvertX86p16_16BGR565
SDL_FUNC _ConvertX86p16_16RGB555
SDL_FUNC _ConvertX86p16_16BGR555
SDL_FUNC _ConvertX86p16_8RGB332
EXTERN _ConvertX86
SECTION .text
_ConvertX86p16_16BGR565:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
shr eax,11
and eax,BYTE 11111b
and ebx,11111100000b
shl edx,11
add eax,ebx
add eax,edx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
shr eax,11
and eax,BYTE 11111b
and ebx,11111100000b
shl edx,11
add eax,ebx
add eax,edx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
.L4 ; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*4]
lea edi,[edi+ecx*4]
; negative counter
neg ecx
jmp SHORT .L6
.L5 mov [edi+ecx*4-4],eax
.L6 mov eax,[esi+ecx*4]
mov ebx,[esi+ecx*4]
and eax,07E007E0h
mov edx,[esi+ecx*4]
and ebx,0F800F800h
shr ebx,11
and edx,001F001Fh
shl edx,11
add eax,ebx
add eax,edx
inc ecx
jnz .L5
mov [edi+ecx*4-4],eax
; tail
pop ecx
and ecx,BYTE 1
jz .L7
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
shr eax,11
and eax,BYTE 11111b
and ebx,11111100000b
shl edx,11
add eax,ebx
add eax,edx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
.L7
retn
_ConvertX86p16_16RGB555:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
shr ebx,1
and ebx, 0111111111100000b
and eax,BYTE 0000000000011111b
add eax,ebx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
shr ebx,1
and ebx, 0111111111100000b
and eax,BYTE 0000000000011111b
add eax,ebx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
.L4 ; save ebp
push ebp
; save count
push ecx
; unroll four times
shr ecx,2
; point arrays to end
lea esi,[esi+ecx*8]
lea edi,[edi+ecx*8]
; negative counter
xor ebp,ebp
sub ebp,ecx
.L5 mov eax,[esi+ebp*8] ; agi?
mov ecx,[esi+ebp*8+4]
mov ebx,eax
mov edx,ecx
and eax,0FFC0FFC0h
and ecx,0FFC0FFC0h
shr eax,1
and ebx,001F001Fh
shr ecx,1
and edx,001F001Fh
add eax,ebx
add ecx,edx
mov [edi+ebp*8],eax
mov [edi+ebp*8+4],ecx
inc ebp
jnz .L5
; tail
pop ecx
.L6 and ecx,BYTE 11b
jz .L7
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
shr ebx,1
and ebx, 0111111111100000b
and eax,BYTE 0000000000011111b
add eax,ebx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
jmp SHORT .L6
.L7 pop ebp
retn
_ConvertX86p16_16BGR555:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
shr eax,11
and eax,BYTE 11111b
shr ebx,1
and ebx,1111100000b
shl edx,10
and edx,0111110000000000b
add eax,ebx
add eax,edx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
shr eax,11
and eax,BYTE 11111b
shr ebx,1
and ebx,1111100000b
shl edx,10
and edx,0111110000000000b
add eax,ebx
add eax,edx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
dec ecx
.L4 ; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*4]
lea edi,[edi+ecx*4]
; negative counter
neg ecx
jmp SHORT .L6
.L5 mov [edi+ecx*4-4],eax
.L6 mov eax,[esi+ecx*4]
shr eax,1
mov ebx,[esi+ecx*4]
and eax,03E003E0h
mov edx,[esi+ecx*4]
and ebx,0F800F800h
shr ebx,11
and edx,001F001Fh
shl edx,10
add eax,ebx
add eax,edx
inc ecx
jnz .L5
mov [edi+ecx*4-4],eax
; tail
pop ecx
and ecx,BYTE 1
jz .L7
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
shr eax,11
and eax,BYTE 11111b
shr ebx,1
and ebx,1111100000b
shl edx,10
and edx,0111110000000000b
add eax,ebx
add eax,edx
mov [edi],al
mov [edi+1],ah
add esi,BYTE 2
add edi,BYTE 2
.L7
retn
_ConvertX86p16_8RGB332:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
and eax,BYTE 11000b ; blue
shr eax,3
and ebx,11100000000b ; green
shr ebx,6
and edx,1110000000000000b ; red
shr edx,8
add eax,ebx
add eax,edx
mov [edi],al
add esi,BYTE 2
inc edi
dec ecx
jnz .L1
.L2
retn
.L3 mov eax,edi
and eax,BYTE 11b
jz .L4
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
and eax,BYTE 11000b ; blue
shr eax,3
and ebx,11100000000b ; green
shr ebx,6
and edx,1110000000000000b ; red
shr edx,8
add eax,ebx
add eax,edx
mov [edi],al
add esi,BYTE 2
inc edi
dec ecx
jmp SHORT .L3
.L4 ; save ebp
push ebp
; save count
push ecx
; unroll 4 times
shr ecx,2
; prestep
mov dl,[esi+0]
mov bl,[esi+1]
mov dh,[esi+2]
.L5 shl edx,16
mov bh,[esi+3]
shl ebx,16
mov dl,[esi+4]
mov dh,[esi+6]
mov bl,[esi+5]
and edx,00011000000110000001100000011000b
mov bh,[esi+7]
ror edx,16+3
mov eax,ebx ; setup eax for reds
and ebx,00000111000001110000011100000111b
and eax,11100000111000001110000011100000b ; reds
ror ebx,16-2
add esi,BYTE 8
ror eax,16
add edi,BYTE 4
add eax,ebx
mov bl,[esi+1] ; greens
add eax,edx
mov dl,[esi+0] ; blues
mov [edi-4],eax
mov dh,[esi+2]
dec ecx
jnz .L5
; check tail
pop ecx
and ecx,BYTE 11b
jz .L7
.L6 ; tail
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
mov edx,eax
and eax,BYTE 11000b ; blue
shr eax,3
and ebx,11100000000b ; green
shr ebx,6
and edx,1110000000000000b ; red
shr edx,8
add eax,ebx
add eax,edx
mov [edi],al
add esi,BYTE 2
inc edi
dec ecx
jnz .L6
.L7 pop ebp
retn
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
BITS 32
%include "common.inc"
SDL_FUNC _ConvertX86p32_32BGR888
SDL_FUNC _ConvertX86p32_32RGBA888
SDL_FUNC _ConvertX86p32_32BGRA888
SDL_FUNC _ConvertX86p32_24RGB888
SDL_FUNC _ConvertX86p32_24BGR888
SDL_FUNC _ConvertX86p32_16RGB565
SDL_FUNC _ConvertX86p32_16BGR565
SDL_FUNC _ConvertX86p32_16RGB555
SDL_FUNC _ConvertX86p32_16BGR555
SDL_FUNC _ConvertX86p32_8RGB332
SECTION .text
;; _Convert_*
;; Paramters:
;; ESI = source
;; EDI = dest
;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
;; Destroys:
;; EAX, EBX, EDX
_ConvertX86p32_32BGR888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov edx,[esi]
bswap edx
ror edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L1
.L2
retn
.L3 ; save ebp
push ebp
; unroll four times
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L4 mov eax,[esi]
mov ebx,[esi+4]
bswap eax
bswap ebx
ror eax,8
mov ecx,[esi+8]
ror ebx,8
mov edx,[esi+12]
bswap ecx
bswap edx
ror ecx,8
mov [edi+0],eax
ror edx,8
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add esi,BYTE 16
add edi,BYTE 16
dec ebp
jnz .L4
; check tail
pop ecx
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
mov edx,[esi]
bswap edx
ror edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L5
.L6 pop ebp
retn
_ConvertX86p32_32RGBA888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov edx,[esi]
rol edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L1
.L2
retn
.L3 ; save ebp
push ebp
; unroll four times
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L4 mov eax,[esi]
mov ebx,[esi+4]
rol eax,8
mov ecx,[esi+8]
rol ebx,8
mov edx,[esi+12]
rol ecx,8
mov [edi+0],eax
rol edx,8
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add esi,BYTE 16
add edi,BYTE 16
dec ebp
jnz .L4
; check tail
pop ecx
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
mov edx,[esi]
rol edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L5
.L6 pop ebp
retn
_ConvertX86p32_32BGRA888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov edx,[esi]
bswap edx
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L1
.L2
retn
.L3 ; save ebp
push ebp
; unroll four times
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L4 mov eax,[esi]
mov ebx,[esi+4]
mov ecx,[esi+8]
mov edx,[esi+12]
bswap eax
bswap ebx
bswap ecx
bswap edx
mov [edi+0],eax
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add esi,BYTE 16
add edi,BYTE 16
dec ebp
jnz .L4
; check tail
pop ecx
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
mov edx,[esi]
bswap edx
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L5
.L6 pop ebp
retn
;; 32 bit RGB 888 to 24 BIT RGB 888
_ConvertX86p32_24RGB888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jmp SHORT .L3
.L4 ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
shl eax,8 ; eax = [R][G][B][.]
mov ecx,[esi+12] ; third dword ecx = [a][r][g][b]
shl ebx,8 ; ebx = [r][g][b][.]
mov al,[esi+4] ; eax = [R][G][B][b]
ror eax,8 ; eax = [b][R][G][B] (done)
mov bh,[esi+8+1] ; ebx = [r][g][G][.]
mov [edi],eax
add edi,BYTE 3*4
shl ecx,8 ; ecx = [r][g][b][.]
mov bl,[esi+8+0] ; ebx = [r][g][G][B]
rol ebx,16 ; ebx = [G][B][r][g] (done)
mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done)
mov [edi+4-3*4],ebx
add esi,BYTE 4*4
mov [edi+8-3*4],ecx
dec ebp
jnz .L5
; check tail
pop ecx
and ecx,BYTE 11b
jz .L7
.L6 ; tail loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L6
.L7 pop ebp
retn
;; 32 bit RGB 888 to 24 bit BGR 888
_ConvertX86p32_24BGR888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jmp SHORT .L3
.L4 ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L5
mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
bswap eax ; eax = [B][G][R][A]
bswap ebx ; ebx = [b][g][r][a]
mov al,[esi+4+2] ; eax = [B][G][R][r]
mov bh,[esi+4+4+1] ; ebx = [b][g][G][a]
ror eax,8 ; eax = [r][B][G][R] (done)
mov bl,[esi+4+4+2] ; ebx = [b][g][G][R]
ror ebx,16 ; ebx = [G][R][b][g] (done)
mov [edi],eax
mov [edi+4],ebx
mov ecx,[esi+12] ; third dword ecx = [a][r][g][b]
bswap ecx ; ecx = [b][g][r][a]
mov cl,[esi+8] ; ecx = [b][g][r][B] (done)
add esi,BYTE 4*4
mov [edi+8],ecx
add edi,BYTE 3*4
dec ebp
jnz .L5
; check tail
pop ecx
and ecx,BYTE 11b
jz .L7
.L6 ; tail loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L6
.L7
pop ebp
retn
;; 32 bit RGB 888 to 16 BIT RGB 565
_ConvertX86p32_16RGB565:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
jnz .L1
.L2: ; End of short loop
retn
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
.L4:
; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*8]
lea edi,[edi+ecx*4]
; negative counter
neg ecx
jmp SHORT .L6
.L5:
mov [edi+ecx*4-4],eax
.L6:
mov eax,[esi+ecx*8]
shr ah,2
mov ebx,[esi+ecx*8+4]
shr eax,3
mov edx,[esi+ecx*8+4]
shr bh,2
mov dl,[esi+ecx*8+2]
shl ebx,13
and eax,000007FFh
shl edx,8
and ebx,07FF0000h
and edx,0F800F800h
add eax,ebx
add eax,edx
inc ecx
jnz .L5
mov [edi+ecx*4-4],eax
; tail
pop ecx
test cl,1
jz .L7
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
.L7:
retn
;; 32 bit RGB 888 to 16 BIT BGR 565
_ConvertX86p32_16BGR565:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
.L4 ; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*8]
lea edi,[edi+ecx*4]
; negative count
neg ecx
jmp SHORT .L6
.L5
mov [edi+ecx*4-4],eax
.L6
mov edx,[esi+ecx*8+4]
mov bh,[esi+ecx*8+4]
mov ah,[esi+ecx*8]
shr bh,3
mov al,[esi+ecx*8+1]
shr ah,3
mov bl,[esi+ecx*8+5]
shl eax,3
mov dl,[esi+ecx*8+2]
shl ebx,19
and eax,0000FFE0h
shr edx,3
and ebx,0FFE00000h
and edx,001F001Fh
add eax,ebx
add eax,edx
inc ecx
jnz .L5
mov [edi+ecx*4-4],eax
; tail
pop ecx
and ecx,BYTE 1
jz .L7
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
.L7
retn
;; 32 BIT RGB TO 16 BIT RGB 555
_ConvertX86p32_16RGB555:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111000b
shl eax,2
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111000b
shl eax,2
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
.L4 ; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*8]
lea edi,[edi+ecx*4]
; negative counter
neg ecx
jmp SHORT .L6
.L5
mov [edi+ecx*4-4],eax
.L6
mov eax,[esi+ecx*8]
shr ah,3
mov ebx,[esi+ecx*8+4]
shr eax,3
mov edx,[esi+ecx*8+4]
shr bh,3
mov dl,[esi+ecx*8+2]
shl ebx,13
and eax,000007FFh
shl edx,7
and ebx,07FF0000h
and edx,07C007C00h
add eax,ebx
add eax,edx
inc ecx
jnz .L5
mov [edi+ecx*4-4],eax
; tail
pop ecx
and ecx,BYTE 1
jz .L7
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111000b
shl eax,2
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
.L7
retn
;; 32 BIT RGB TO 16 BIT BGR 555
_ConvertX86p32_16BGR555:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
shr ah,3
and al,11111000b
shl eax,2
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
jnz .L1
.L2
retn
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
shr ah,3
and al,11111000b
shl eax,2
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
.L4 ; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*8]
lea edi,[edi+ecx*4]
; negative counter
neg ecx
jmp SHORT .L6
.L5
mov [edi+ecx*4-4],eax
.L6
mov edx,[esi+ecx*8+4]
mov bh,[esi+ecx*8+4]
mov ah,[esi+ecx*8]
shr bh,3
mov al,[esi+ecx*8+1]
shr ah,3
mov bl,[esi+ecx*8+5]
shl eax,2
mov dl,[esi+ecx*8+2]
shl ebx,18
and eax,00007FE0h
shr edx,3
and ebx,07FE00000h
and edx,001F001Fh
add eax,ebx
add eax,edx
inc ecx
jnz .L5
mov [edi+ecx*4-4],eax
; tail
pop ecx
and ecx,BYTE 1
jz .L7
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
shr ah,3
and al,11111000b
shl eax,2
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
.L7
retn
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
;; This routine writes FOUR pixels at once (dword) and then, if they exist
;; the trailing three pixels
_ConvertX86p32_8RGB332:
.L_ALIGNED
push ecx
shr ecx,2 ; We will draw 4 pixels at once
jnz .L1
jmp .L2 ; short jump out of range :(
.L1:
mov eax,[esi] ; first pair of pixels
mov edx,[esi+4]
shr dl,6
mov ebx,eax
shr al,6
and ah,0e0h
shr ebx,16
and dh,0e0h
shr ah,3
and bl,0e0h
shr dh,3
or al,bl
mov ebx,edx
or al,ah
shr ebx,16
or dl,dh
and bl,0e0h
or dl,bl
mov ah,dl
mov ebx,[esi+8] ; second pair of pixels
mov edx,ebx
and bh,0e0h
shr bl,6
and edx,0e00000h
shr edx,16
shr bh,3
ror eax,16
or bl,dl
mov edx,[esi+12]
or bl,bh
mov al,bl
mov ebx,edx
and dh,0e0h
shr dl,6
and ebx,0e00000h
shr dh,3
mov ah,dl
shr ebx,16
or ah,dh
or ah,bl
rol eax,16
add esi,BYTE 16
mov [edi],eax
add edi,BYTE 4
dec ecx
jz .L2 ; L1 out of range for short jump :(
jmp .L1
.L2:
pop ecx
and ecx,BYTE 3 ; mask out number of pixels to draw
jz .L4 ; Nothing to do anymore
.L3:
mov eax,[esi] ; single pixel conversion for trailing pixels
mov ebx,eax
shr al,6
and ah,0e0h
shr ebx,16
shr ah,3
and bl,0e0h
or al,ah
or al,bl
mov [edi],al
inc edi
add esi,BYTE 4
dec ecx
jnz .L3
.L4:
retn
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "SDL_video.h" #include "SDL_video.h"
#include "SDL_sysvideo.h" #include "SDL_sysvideo.h"
#include "SDL_blit.h" #include "SDL_blit.h"
#include "SDL_blit_copy.h"
#include "SDL_RLEaccel_c.h" #include "SDL_RLEaccel_c.h"
#include "SDL_pixels_c.h" #include "SDL_pixels_c.h"
...@@ -106,111 +107,64 @@ SDL_SoftBlit(SDL_Surface * src, SDL_Rect * srcrect, ...@@ -106,111 +107,64 @@ SDL_SoftBlit(SDL_Surface * src, SDL_Rect * srcrect,
return (okay ? 0 : -1); return (okay ? 0 : -1);
} }
#ifdef MMX_ASMBLIT #ifdef __MACOSX__
static __inline__ void #include <sys/sysctl.h>
SDL_memcpyMMX(Uint8 * to, const Uint8 * from, int len)
static SDL_bool SDL_UseAltivecPrefetch()
{ {
int i; const char key[] = "hw.l3cachesize";
u_int64_t result = 0;
size_t typeSize = sizeof(result);
for (i = 0; i < len / 8; i++) { if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
__asm__ __volatile__(" movq (%0), %%mm0\n" return SDL_TRUE;
" movq %%mm0, (%1)\n"::"r"(from), } else {
"r"(to):"memory"); return SDL_FALSE;
from += 8;
to += 8;
} }
if (len & 7)
SDL_memcpy(to, from, len & 7);
} }
#else
static SDL_bool SDL_UseAltivecPrefetch()
{
/* Just guess G4 */
return SDL_TRUE;
}
#endif /* __MACOSX__ */
static __inline__ void static SDL_loblit SDL_ChooseBlitFunc(SDL_BlitEntry *entries, int count)
SDL_memcpySSE(Uint8 * to, const Uint8 * from, int len)
{ {
int i; int i;
static Uint32 features = 0xffffffff;
__asm__ __volatile__(" prefetchnta (%0)\n" if (features == 0xffffffff) {
" prefetchnta 64(%0)\n" features = SDL_BLIT_ANY;
" prefetchnta 128(%0)\n"
" prefetchnta 192(%0)\n"::"r"(from));
for (i = 0; i < len / 8; i++) { /* Provide an override for testing .. */
__asm__ __volatile__(" prefetchnta 256(%0)\n" const char *override = SDL_getenv("SDL_BLIT_FEATURES");
" movq (%0), %%mm0\n" if (override) {
" movntq %%mm0, (%1)\n"::"r"(from), SDL_sscanf(override, "%u", &features);
"r"(to):"memory"); } else {
from += 8; if (SDL_HasMMX()) {
to += 8; features |= SDL_BLIT_MMX;
} }
if (len & 7)
SDL_memcpy(to, from, len & 7);
}
#endif
static void
SDL_BlitCopy(SDL_BlitInfo * info)
{
Uint8 *src, *dst;
int w, h;
int srcskip, dstskip;
w = info->d_width * info->dst->BytesPerPixel;
h = info->d_height;
src = info->s_pixels;
dst = info->d_pixels;
srcskip = w + info->s_skip;
dstskip = w + info->d_skip;
#ifdef MMX_ASMBLIT
if (SDL_HasSSE()) { if (SDL_HasSSE()) {
while (h--) { features |= SDL_BLIT_SSE;
SDL_memcpySSE(dst, src, w); }
src += srcskip; if (SDL_HasAltivec()) {
dst += dstskip; if (SDL_UseAltivecPrefetch()) {
features |= SDL_BLIT_ALTIVEC_PREFETCH;
} else {
features |= SDL_BLIT_ALTIVEC_NOPREFETCH;
} }
__asm__ __volatile__(" emms\n"::);
} else if (SDL_HasMMX()) {
while (h--) {
SDL_memcpyMMX(dst, src, w);
src += srcskip;
dst += dstskip;
} }
__asm__ __volatile__(" emms\n"::);
} else
#endif
while (h--) {
SDL_memcpy(dst, src, w);
src += srcskip;
dst += dstskip;
} }
}
static void
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
{
Uint8 *src, *dst;
int w, h;
int srcskip, dstskip;
w = info->d_width * info->dst->BytesPerPixel;
h = info->d_height;
src = info->s_pixels;
dst = info->d_pixels;
srcskip = w + info->s_skip;
dstskip = w + info->d_skip;
if (dst < src) {
while (h--) {
SDL_memcpy(dst, src, w);
src += srcskip;
dst += dstskip;
} }
} else {
src += ((h - 1) * srcskip); for (i = count; i > 0; --i) {
dst += ((h - 1) * dstskip); if (features & entries[i].features) {
while (h--) { return entries[i].blit;
SDL_revcpy(dst, src, w);
src -= srcskip;
dst -= dstskip;
} }
} }
return entries[0].blit;
} }
/* Figure out which of many blit routines to set up on a surface */ /* Figure out which of many blit routines to set up on a surface */
...@@ -237,11 +191,11 @@ SDL_CalculateBlit(SDL_Surface * surface) ...@@ -237,11 +191,11 @@ SDL_CalculateBlit(SDL_Surface * surface)
/* Check for special "identity" case -- copy blit */ /* Check for special "identity" case -- copy blit */
if (surface->map->identity && blit_index == 0) { if (surface->map->identity && blit_index == 0) {
surface->map->sw_data->blit = SDL_BlitCopy;
/* Handle overlapping blits on the same surface */ /* Handle overlapping blits on the same surface */
if (surface == surface->map->dst) { if (surface == surface->map->dst) {
surface->map->sw_data->blit = SDL_BlitCopyOverlap; surface->map->sw_data->blit = SDL_BlitCopyOverlap;
} else {
surface->map->sw_data->blit = SDL_BlitCopy;
} }
} else { } else {
if (surface->format->BitsPerPixel < 8) { if (surface->format->BitsPerPixel < 8) {
......
...@@ -67,6 +67,17 @@ typedef struct SDL_BlitMap ...@@ -67,6 +67,17 @@ typedef struct SDL_BlitMap
unsigned int format_version; unsigned int format_version;
} SDL_BlitMap; } SDL_BlitMap;
#define SDL_BLIT_ANY 0x00000000
#define SDL_BLIT_MMX 0x00000001
#define SDL_BLIT_SSE 0x00000002
#define SDL_BLIT_ALTIVEC_PREFETCH 0x00000004
#define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
typedef struct SDL_BlitEntry
{
Uint32 features;
SDL_loblit blit;
} SDL_BlitEntry;
/* Functions found in SDL_blit.c */ /* Functions found in SDL_blit.c */
extern int SDL_CalculateBlit(SDL_Surface * surface); extern int SDL_CalculateBlit(SDL_Surface * surface);
......
...@@ -879,19 +879,6 @@ GetBlitFeatures(void) ...@@ -879,19 +879,6 @@ GetBlitFeatures(void)
#define LO 1 #define LO 1
#endif #endif
#if SDL_HERMES_BLITTERS
/* Heheheh, we coerce Hermes into using SDL blit information */
#define X86_ASSEMBLER
#define HermesConverterInterface SDL_BlitInfo
#define HermesClearInterface void
#define STACKCALL
#include "../hermes/HeadMMX.h"
#include "../hermes/HeadX86.h"
#else
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */ /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
#define RGB888_RGB332(dst, src) { \ #define RGB888_RGB332(dst, src) { \
dst = (Uint8)((((src)&0x00E00000)>>16)| \ dst = (Uint8)((((src)&0x00E00000)>>16)| \
...@@ -1250,8 +1237,6 @@ Blit_RGB888_RGB565(SDL_BlitInfo * info) ...@@ -1250,8 +1237,6 @@ Blit_RGB888_RGB565(SDL_BlitInfo * info)
#endif /* USE_DUFFS_LOOP */ #endif /* USE_DUFFS_LOOP */
} }
#endif /* SDL_HERMES_BLITTERS */
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */ /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1]) #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
...@@ -2357,17 +2342,7 @@ static const struct blit_table normal_blit_1[] = { ...@@ -2357,17 +2342,7 @@ static const struct blit_table normal_blit_1[] = {
{0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL}, {0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL},
}; };
static const struct blit_table normal_blit_2[] = { static const struct blit_table normal_blit_2[] = {
#if SDL_HERMES_BLITTERS #if SDL_ALTIVEC_BLITTERS
{0x0000F800, 0x000007E0, 0x0000001F, 2, 0x0000001F, 0x000007E0,
0x0000F800,
0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA},
{0x0000F800, 0x000007E0, 0x0000001F, 2, 0x00007C00, 0x000003E0,
0x0000001F,
0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA},
{0x0000F800, 0x000007E0, 0x0000001F, 2, 0x0000001F, 0x000003E0,
0x00007C00,
0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA},
#elif SDL_ALTIVEC_BLITTERS
/* has-altivec */ /* has-altivec */
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
0x00000000, 0x00000000,
...@@ -2397,47 +2372,6 @@ static const struct blit_table normal_blit_3[] = { ...@@ -2397,47 +2372,6 @@ static const struct blit_table normal_blit_3[] = {
{0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0} {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
}; };
static const struct blit_table normal_blit_4[] = { static const struct blit_table normal_blit_4[] = {
#if SDL_HERMES_BLITTERS
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
0x0000001F,
1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
0x0000001F,
0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000007E0,
0x0000F800,
1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000007E0,
0x0000F800,
0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
0x0000001F,
1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
0x0000001F,
0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000003E0,
0x00007C00,
1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000003E0,
0x00007C00,
0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00,
0x000000FF,
0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00,
0x00FF0000,
0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00,
0x00FF0000,
0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0xFF000000, 0x00FF0000,
0x0000FF00,
0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x0000FF00, 0x00FF0000,
0xFF000000,
0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA},
#else
#if SDL_ALTIVEC_BLITTERS #if SDL_ALTIVEC_BLITTERS
/* has-altivec | dont-use-prefetch */ /* has-altivec | dont-use-prefetch */
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
...@@ -2460,7 +2394,6 @@ static const struct blit_table normal_blit_4[] = { ...@@ -2460,7 +2394,6 @@ static const struct blit_table normal_blit_4[] = {
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
0x0000001F, 0x0000001F,
0, NULL, Blit_RGB888_RGB555, NO_ALPHA}, 0, NULL, Blit_RGB888_RGB555, NO_ALPHA},
#endif
/* Default for 32-bit RGB source, used if no other blitter matches */ /* Default for 32-bit RGB source, used if no other blitter matches */
{0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0} {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
}; };
...@@ -2529,12 +2462,7 @@ SDL_CalculateBlitN(SDL_Surface * surface, int blit_index) ...@@ -2529,12 +2462,7 @@ SDL_CalculateBlitN(SDL_Surface * surface, int blit_index)
if (surface->map->table) { if (surface->map->table) {
blitfun = Blit_RGB888_index8_map; blitfun = Blit_RGB888_index8_map;
} else { } else {
#if SDL_HERMES_BLITTERS
sdata->aux_data = ConvertX86p32_8RGB332;
blitfun = ConvertX86;
#else
blitfun = Blit_RGB888_index8; blitfun = Blit_RGB888_index8;
#endif
} }
} else { } else {
blitfun = BlitNto1; blitfun = BlitNto1;
...@@ -2575,13 +2503,6 @@ SDL_CalculateBlitN(SDL_Surface * surface, int blit_index) ...@@ -2575,13 +2503,6 @@ SDL_CalculateBlitN(SDL_Surface * surface, int blit_index)
} }
#ifdef DEBUG_ASM #ifdef DEBUG_ASM
#if SDL_HERMES_BLITTERS
if (blitfun == ConvertMMX)
fprintf(stderr, "Using mmx blit\n");
else if (blitfun == ConvertX86)
fprintf(stderr, "Using asm blit\n");
else
#endif
if ((blitfun == BlitNtoN) || (blitfun == BlitNto1)) if ((blitfun == BlitNtoN) || (blitfun == BlitNto1))
fprintf(stderr, "Using C blit\n"); fprintf(stderr, "Using C blit\n");
else else
......
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2006 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "SDL_video.h"
#include "SDL_blit.h"
/* The MMX/SSE intrinsics don't give access to specific registers for
the most memory parallelism, so we'll use GCC inline assembly here...
*/
#ifndef __GNUC__
#undef __MMX__
#undef __SSE__
#endif
#ifdef __MMX__
static __inline__ void
SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
{
int i;
for (i = len / 64; i--;) {
__asm__ __volatile__ (
"prefetchnta (%0)\n"
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq 16(%0), %%mm2\n"
"movq 24(%0), %%mm3\n"
"movq 32(%0), %%mm4\n"
"movq 40(%0), %%mm5\n"
"movq 48(%0), %%mm6\n"
"movq 56(%0), %%mm7\n"
"movntq %%mm0, (%1)\n"
"movntq %%mm1, 8(%1)\n"
"movntq %%mm2, 16(%1)\n"
"movntq %%mm3, 24(%1)\n"
"movntq %%mm4, 32(%1)\n"
"movntq %%mm5, 40(%1)\n"
"movntq %%mm6, 48(%1)\n"
"movntq %%mm7, 56(%1)\n"
:: "r" (src), "r" (dst) : "memory");
src += 64;
dst += 64;
}
if (len & 63)
SDL_memcpy(dst, src, len & 63);
}
#endif /* __MMX__ */
#ifdef __SSE__
static __inline__ void
SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
{
int i;
for (i = len / 64; i--;) {
__asm__ __volatile__ (
"prefetchnta (%0)\n"
"movaps (%0), %%xmm0\n"
"movaps 16(%0), %%xmm1\n"
"movaps 32(%0), %%xmm2\n"
"movaps 48(%0), %%xmm3\n"
"movntps %%xmm0, (%1)\n"
"movntps %%xmm1, 16(%1)\n"
"movntps %%xmm2, 32(%1)\n"
"movntps %%xmm3, 48(%1)\n"
:: "r" (src), "r" (dst) : "memory");
src += 64;
dst += 64;
}
if (len & 63)
SDL_memcpy(dst, src, len & 63);
}
#endif /* __SSE__ */
void
SDL_BlitCopy(SDL_BlitInfo * info)
{
Uint8 *src, *dst;
int w, h;
int srcskip, dstskip;
w = info->d_width * info->dst->BytesPerPixel;
h = info->d_height;
src = info->s_pixels;
dst = info->d_pixels;
srcskip = w + info->s_skip;
dstskip = w + info->d_skip;
#ifdef __SSE__
if (SDL_HasSSE() && !((uintptr_t)src & 15) && !((uintptr_t)dst & 15)) {
while (h--) {
SDL_memcpySSE(dst, src, w);
src += srcskip;
dst += dstskip;
}
return;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX() && !((uintptr_t)src & 7) && !((uintptr_t)dst & 7)) {
while (h--) {
SDL_memcpyMMX(dst, src, w);
src += srcskip;
dst += dstskip;
}
__asm__ __volatile__(" emms\n"::);
return;
}
#endif
while (h--) {
SDL_memcpy(dst, src, w);
src += srcskip;
dst += dstskip;
}
}
void
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
{
Uint8 *src, *dst;
int w, h;
int skip;
w = info->d_width * info->dst->BytesPerPixel;
h = info->d_height;
src = info->s_pixels;
dst = info->d_pixels;
skip = w + info->s_skip;
if ((dst < src) || (dst >= (src + h*skip))) {
SDL_BlitCopy(info);
} else {
src += ((h - 1) * skip);
dst += ((h - 1) * skip);
while (h--) {
SDL_revcpy(dst, src, w);
src -= skip;
dst -= skip;
}
}
}
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2006 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
void SDL_BlitCopy(SDL_BlitInfo * info);
void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
/* vi: set ts=4 sw=4 expandtab: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment