Commit e4a0db29 authored by Sam Lantinga's avatar Sam Lantinga

Re-added the 3DNow! and AltiVec instruction support.

parent 92bf0fc2
......@@ -52,7 +52,7 @@
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\include"
PreprocessorDefinitions="_DEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
PreprocessorDefinitions="_DEBUG;_WINDOWS"
RuntimeLibrary="2"
BufferSecurityCheck="false"
UsePrecompiledHeader="0"
......@@ -231,7 +231,7 @@
InlineFunctionExpansion="1"
EnableIntrinsicFunctions="false"
AdditionalIncludeDirectories="..\..\include"
PreprocessorDefinitions="NDEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
PreprocessorDefinitions="NDEBUG;_WINDOWS"
StringPooling="true"
RuntimeLibrary="2"
BufferSecurityCheck="false"
......
......@@ -52,7 +52,7 @@
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\include"
PreprocessorDefinitions="_DEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
PreprocessorDefinitions="_DEBUG;_WINDOWS"
RuntimeLibrary="3"
BufferSecurityCheck="false"
WarningLevel="3"
......@@ -223,7 +223,7 @@
InlineFunctionExpansion="1"
EnableIntrinsicFunctions="false"
AdditionalIncludeDirectories="..\..\include"
PreprocessorDefinitions="NDEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
PreprocessorDefinitions="NDEBUG;_WINDOWS"
StringPooling="true"
RuntimeLibrary="2"
BufferSecurityCheck="false"
......
......@@ -83,7 +83,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_DEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
<PrecompiledHeader>
......@@ -152,7 +152,7 @@
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<IntrinsicFunctions>false</IntrinsicFunctions>
<AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>NDEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
......@@ -446,4 +446,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
\ No newline at end of file
</Project>
......@@ -1514,8 +1514,10 @@ Optional Features:
--enable-ssemath Allow GCC to use SSE floating point math
[default=no]
--enable-mmx use MMX assembly routines [default=yes]
--enable-3dnow use MMX assembly routines [default=yes]
--enable-sse use SSE assembly routines [default=yes]
--enable-sse2 use SSE2 assembly routines [default=no]
--enable-altivec use Altivec assembly routines [default=yes]
--enable-oss support the OSS audio API [default=yes]
--enable-alsa support the ALSA audio API [default=yes]
--disable-alsatest Do not try to compile and run a test Alsa program
......@@ -3768,13 +3770,13 @@ if test "${lt_cv_nm_interface+set}" = set; then
else
lt_cv_nm_interface="BSD nm"
echo "int some_variable = 0;" > conftest.$ac_ext
(eval echo "\"\$as_me:3771: $ac_compile\"" >&5)
(eval echo "\"\$as_me:3773: $ac_compile\"" >&5)
(eval "$ac_compile" 2>conftest.err)
cat conftest.err >&5
(eval echo "\"\$as_me:3774: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
(eval echo "\"\$as_me:3776: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
(eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
cat conftest.err >&5
(eval echo "\"\$as_me:3777: output\"" >&5)
(eval echo "\"\$as_me:3779: output\"" >&5)
cat conftest.out >&5
if $GREP 'External.*some_variable' conftest.out > /dev/null; then
lt_cv_nm_interface="MS dumpbin"
......@@ -5001,7 +5003,7 @@ ia64-*-hpux*)
;;
*-*-irix6*)
# Find out which ABI we are using.
echo '#line 5004 "configure"' > conftest.$ac_ext
echo '#line 5006 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
......@@ -7162,11 +7164,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:7165: $lt_compile\"" >&5)
(eval echo "\"\$as_me:7167: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
echo "$as_me:7169: \$? = $ac_status" >&5
echo "$as_me:7171: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
......@@ -7501,11 +7503,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:7504: $lt_compile\"" >&5)
(eval echo "\"\$as_me:7506: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
echo "$as_me:7508: \$? = $ac_status" >&5
echo "$as_me:7510: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
......@@ -7606,11 +7608,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:7609: $lt_compile\"" >&5)
(eval echo "\"\$as_me:7611: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
echo "$as_me:7613: \$? = $ac_status" >&5
echo "$as_me:7615: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
......@@ -7661,11 +7663,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:7664: $lt_compile\"" >&5)
(eval echo "\"\$as_me:7666: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
echo "$as_me:7668: \$? = $ac_status" >&5
echo "$as_me:7670: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
......@@ -10419,7 +10421,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
#line 10422 "configure"
#line 10424 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
......@@ -10515,7 +10517,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
#line 10518 "configure"
#line 10520 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
......@@ -14197,11 +14199,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:14200: $lt_compile\"" >&5)
(eval echo "\"\$as_me:14202: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
echo "$as_me:14204: \$? = $ac_status" >&5
echo "$as_me:14206: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
......@@ -14296,11 +14298,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:14299: $lt_compile\"" >&5)
(eval echo "\"\$as_me:14301: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
echo "$as_me:14303: \$? = $ac_status" >&5
echo "$as_me:14305: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
......@@ -14348,11 +14350,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
(eval echo "\"\$as_me:14351: $lt_compile\"" >&5)
(eval echo "\"\$as_me:14353: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
echo "$as_me:14355: \$? = $ac_status" >&5
echo "$as_me:14357: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
......@@ -20693,6 +20695,79 @@ echo "${ECHO_T}$have_gcc_mmx" >&6; }
fi
fi
# Check whether --enable-3dnow was given.
if test "${enable_3dnow+set}" = set; then
enableval=$enable_3dnow;
else
enable_3dnow=yes
fi
if test x$enable_3dnow = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_3dnow=no
{ echo "$as_me:$LINENO: checking for GCC -m3dnow option" >&5
echo $ECHO_N "checking for GCC -m3dnow option... $ECHO_C" >&6; }
amd3dnow_CFLAGS="-m3dnow"
CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <mm3dnow.h>
#ifndef __3dNOW__
#error Assembler CPP flag not enabled
#endif
int
main ()
{
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
have_gcc_3dnow=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ echo "$as_me:$LINENO: result: $have_gcc_3dnow" >&5
echo "${ECHO_T}$have_gcc_3dnow" >&6; }
CFLAGS="$save_CFLAGS"
if test x$have_gcc_3dnow = xyes; then
EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
fi
fi
# Check whether --enable-sse was given.
if test "${enable_sse+set}" = set; then
enableval=$enable_sse;
......@@ -20856,6 +20931,260 @@ echo "${ECHO_T}$have_gcc_sse2" >&6; }
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
fi
fi
# Check whether --enable-altivec was given.
if test "${enable_altivec+set}" = set; then
enableval=$enable_altivec;
else
enable_altivec=yes
fi
if test x$enable_altivec = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_altivec=no
have_altivec_h_hdr=no
altivec_CFLAGS="-maltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
{ echo "$as_me:$LINENO: checking for Altivec with GCC altivec.h and -maltivec option" >&5
echo $ECHO_N "checking for Altivec with GCC altivec.h and -maltivec option... $ECHO_C" >&6; }
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
int
main ()
{
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
have_gcc_altivec=yes
have_altivec_h_hdr=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
echo "${ECHO_T}$have_gcc_altivec" >&6; }
if test x$have_gcc_altivec = xno; then
{ echo "$as_me:$LINENO: checking for Altivec with GCC -maltivec option" >&5
echo $ECHO_N "checking for Altivec with GCC -maltivec option... $ECHO_C" >&6; }
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
vector unsigned int vzero() {
return vec_splat_u32(0);
}
int
main ()
{
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
have_gcc_altivec=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
echo "${ECHO_T}$have_gcc_altivec" >&6; }
fi
if test x$have_gcc_altivec = xno; then
{ echo "$as_me:$LINENO: checking for Altivec with GCC altivec.h and -faltivec option" >&5
echo $ECHO_N "checking for Altivec with GCC altivec.h and -faltivec option... $ECHO_C" >&6; }
altivec_CFLAGS="-faltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
int
main ()
{
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
have_gcc_altivec=yes
have_altivec_h_hdr=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
echo "${ECHO_T}$have_gcc_altivec" >&6; }
fi
if test x$have_gcc_altivec = xno; then
{ echo "$as_me:$LINENO: checking for Altivec with GCC -faltivec option" >&5
echo $ECHO_N "checking for Altivec with GCC -faltivec option... $ECHO_C" >&6; }
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
vector unsigned int vzero() {
return vec_splat_u32(0);
}
int
main ()
{
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
have_gcc_altivec=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
echo "${ECHO_T}$have_gcc_altivec" >&6; }
fi
CFLAGS="$save_CFLAGS"
if test x$have_gcc_altivec = xyes; then
cat >>confdefs.h <<\_ACEOF
#define SDL_ALTIVEC_BLITTERS 1
_ACEOF
if test x$have_altivec_h_hdr = xyes; then
cat >>confdefs.h <<\_ACEOF
#define HAVE_ALTIVEC_H 1
_ACEOF
fi
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
fi
fi
fi
CheckOSS()
......
......@@ -501,6 +501,33 @@ AC_HELP_STRING([--enable-mmx], [use MMX assembly routines [[default=yes]]]),
fi
fi
AC_ARG_ENABLE(3dnow,
AC_HELP_STRING([--enable-3dnow], [use MMX assembly routines [[default=yes]]]),
, enable_3dnow=yes)
if test x$enable_3dnow = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_3dnow=no
AC_MSG_CHECKING(for GCC -m3dnow option)
amd3dnow_CFLAGS="-m3dnow"
CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
AC_TRY_COMPILE([
#include <mm3dnow.h>
#ifndef __3dNOW__
#error Assembler CPP flag not enabled
#endif
],[
],[
have_gcc_3dnow=yes
])
AC_MSG_RESULT($have_gcc_3dnow)
CFLAGS="$save_CFLAGS"
if test x$have_gcc_3dnow = xyes; then
EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
fi
fi
AC_ARG_ENABLE(sse,
AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
, enable_sse=yes)
......@@ -572,6 +599,82 @@ AC_HELP_STRING([--enable-sse2], [use SSE2 assembly routines [[default=no]]]),
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
fi
fi
AC_ARG_ENABLE(altivec,
AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
, enable_altivec=yes)
if test x$enable_altivec = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_altivec=no
have_altivec_h_hdr=no
altivec_CFLAGS="-maltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
AC_MSG_CHECKING(for Altivec with GCC altivec.h and -maltivec option)
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
have_altivec_h_hdr=yes
])
AC_MSG_RESULT($have_gcc_altivec)
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC altivec.h and -faltivec option)
altivec_CFLAGS="-faltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
have_altivec_h_hdr=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
CFLAGS="$save_CFLAGS"
if test x$have_gcc_altivec = xyes; then
AC_DEFINE(SDL_ALTIVEC_BLITTERS)
if test x$have_altivec_h_hdr = xyes; then
AC_DEFINE(HAVE_ALTIVEC_H)
fi
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
fi
fi
fi
dnl See if the OSS audio interface is supported
......
......@@ -34,7 +34,7 @@
/* Make sure that this isn't included by Visual C++ */
#ifdef _MSC_VER
#error You should copy include/SDL_config.h.default to include/SDL_config.h
#error You should run hg revert SDL_config.h
#endif
/* C language features */
......@@ -82,6 +82,7 @@
#undef HAVE_MATH_H
#undef HAVE_ICONV_H
#undef HAVE_SIGNAL_H
#undef HAVE_ALTIVEC_H
/* C library functions */
#undef HAVE_MALLOC
......@@ -302,5 +303,6 @@
/* Enable assembly routines */
#undef SDL_ASSEMBLY_ROUTINES
#undef SDL_ALTIVEC_BLITTERS
#endif /* _SDL_config_h */
......@@ -168,5 +168,8 @@
/* Enable assembly routines */
#define SDL_ASSEMBLY_ROUTINES 1
#ifdef __ppc__
#define SDL_ALTIVEC_BLITTERS 1
#endif
#endif /* _SDL_config_macosx_h */
......@@ -31,6 +31,34 @@
#include "SDL_stdinc.h"
/* Need to do this here because intrin.h has C++ code in it */
/* Visual Studio 2005 has a bug where intrin.h conflicts with winnt.h */
#if defined(_MSC_VER) && (_MSC_VER >= 1500) && !defined(_WIN32_WCE)
#include <intrin.h>
#define __MMX__
#define __3dNOW__
#define __SSE__
#define __SSE2__
#elif defined(__MINGW64_VERSION_MAJOR)
#include <intrin.h>
#else
#ifdef __MMX__
#include <mmintrin.h>
#endif
#ifdef __3dNOW__
#include <mm3dnow.h>
#endif
#ifdef __SSE__
#include <xmmintrin.h>
#endif
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#ifdef HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#endif
#include "begin_code.h"
/* Set up for C function definitions, even when using C++ */
#ifdef __cplusplus
......@@ -64,11 +92,21 @@ extern DECLSPEC int SDLCALL SDL_GetCPUCacheLineSize(void);
*/
extern DECLSPEC SDL_bool SDLCALL SDL_HasRDTSC(void);
/**
* This function returns true if the CPU has AltiVec features.
*/
extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void);
/**
* This function returns true if the CPU has MMX features.
*/
extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void);
/**
* This function returns true if the CPU has 3DNow! features.
*/
extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void);
/**
* This function returns true if the CPU has SSE features.
*/
......
......@@ -32,18 +32,37 @@
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
#include <sys/sysctl.h> /* For AltiVec check */
#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
#include <signal.h>
#include <setjmp.h>
#endif
#ifdef __WIN32__
#include "../core/windows/SDL_windows.h"
#endif
#define CPU_HAS_RDTSC 0x00000001
#define CPU_HAS_MMX 0x00000002
#define CPU_HAS_ALTIVEC 0x00000002
#define CPU_HAS_MMX 0x00000004
#define CPU_HAS_3DNOW 0x00000008
#define CPU_HAS_SSE 0x00000010
#define CPU_HAS_SSE2 0x00000020
#define CPU_HAS_SSE3 0x00000040
#define CPU_HAS_SSE41 0x00000080
#define CPU_HAS_SSE42 0x00000100
#define CPU_HAS_SSE41 0x00000100
#define CPU_HAS_SSE42 0x00000200
#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__
/* This is the brute force way of detecting instruction sets...
the idea is borrowed from the libmpeg2 library - thanks!
*/
static jmp_buf jmpbuf;
static void
illegal_instruction(int sig)
{
longjmp(jmpbuf, 1);
}
#endif /* HAVE_SETJMP */
static __inline__ int
CPU_haveCPUID(void)
......@@ -192,6 +211,29 @@ CPU_haveRDTSC(void)
return 0;
}
static __inline__ int
CPU_haveAltiVec(void)
{
volatile int altivec = 0;
#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
int hasVectorUnit = 0;
size_t length = sizeof(hasVectorUnit);
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
if (0 == error)
altivec = (hasVectorUnit != 0);
#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
void (*handler) (int sig);
handler = signal(SIGILL, illegal_instruction);
if (setjmp(jmpbuf) == 0) {
asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
altivec = 1;
}
signal(SIGILL, handler);
#endif
return altivec;
}
static __inline__ int
CPU_haveMMX(void)
{
......@@ -201,6 +243,21 @@ CPU_haveMMX(void)
return 0;
}
static __inline__ int
CPU_have3DNow(void)
{
if (CPU_haveCPUID()) {
int a, b, c, d;
cpuid(0x80000000, a, b, c, d);
if (a >= 0x80000001) {
cpuid(0x80000001, a, b, c, d);
return (d & 0x80000000);
}
}
return 0;
}
static __inline__ int
CPU_haveSSE(void)
{
......@@ -431,9 +488,15 @@ SDL_GetCPUFeatures(void)
if (CPU_haveRDTSC()) {
SDL_CPUFeatures |= CPU_HAS_RDTSC;
}
if (CPU_haveAltiVec()) {
SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
}
if (CPU_haveMMX()) {
SDL_CPUFeatures |= CPU_HAS_MMX;
}
if (CPU_have3DNow()) {
SDL_CPUFeatures |= CPU_HAS_3DNOW;
}
if (CPU_haveSSE()) {
SDL_CPUFeatures |= CPU_HAS_SSE;
}
......@@ -462,6 +525,15 @@ SDL_HasRDTSC(void)
return SDL_FALSE;
}
SDL_bool
SDL_HasAltiVec(void)
{
if (SDL_GetCPUFeatures() & CPU_HAS_ALTIVEC) {
return SDL_TRUE;
}
return SDL_FALSE;
}
SDL_bool
SDL_HasMMX(void)
{
......@@ -471,6 +543,15 @@ SDL_HasMMX(void)
return SDL_FALSE;
}
SDL_bool
SDL_Has3DNow(void)
{
if (SDL_GetCPUFeatures() & CPU_HAS_3DNOW) {
return SDL_TRUE;
}
return SDL_FALSE;
}
SDL_bool
SDL_HasSSE(void)
{
......@@ -528,7 +609,9 @@ main()
printf("CPU name: %s\n", SDL_GetCPUName());
printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
printf("RDTSC: %d\n", SDL_HasRDTSC());
printf("Altivec: %d\n", SDL_HasAltiVec());
printf("MMX: %d\n", SDL_HasMMX());
printf("3DNow: %d\n", SDL_Has3DNow());
printf("SSE: %d\n", SDL_HasSSE());
printf("SSE2: %d\n", SDL_HasSSE2());
printf("SSE3: %d\n", SDL_HasSSE3());
......
......@@ -100,6 +100,30 @@ SDL_SoftBlit(SDL_Surface * src, SDL_Rect * srcrect,
return (okay ? 0 : -1);
}
#ifdef __MACOSX__
#include <sys/sysctl.h>
static SDL_bool
SDL_UseAltivecPrefetch()
{
const char key[] = "hw.l3cachesize";
u_int64_t result = 0;
size_t typeSize = sizeof(result);
if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
return SDL_TRUE;
} else {
return SDL_FALSE;
}
}
#else
static SDL_bool
SDL_UseAltivecPrefetch()
{
/* Just guess G4 */
return SDL_TRUE;
}
#endif /* __MACOSX__ */
static SDL_BlitFunc
SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
......@@ -121,12 +145,22 @@ SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
if (SDL_HasMMX()) {
features |= SDL_CPU_MMX;
}
if (SDL_Has3DNow()) {
features |= SDL_CPU_3DNOW;
}
if (SDL_HasSSE()) {
features |= SDL_CPU_SSE;
}
if (SDL_HasSSE2()) {
features |= SDL_CPU_SSE2;
}
if (SDL_HasAltiVec()) {
if (SDL_UseAltivecPrefetch()) {
features |= SDL_CPU_ALTIVEC_PREFETCH;
} else {
features |= SDL_CPU_ALTIVEC_NOPREFETCH;
}
}
}
}
......
......@@ -24,24 +24,6 @@
#ifndef _SDL_blit_h
#define _SDL_blit_h
#ifdef __MINGW32__
#include <_mingw.h>
#endif
#if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR)
#include <intrin.h>
#else
#ifdef __MMX__
#include <mmintrin.h>
#endif
#ifdef __SSE__
#include <xmmintrin.h>
#endif
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#endif
#include "SDL_cpuinfo.h"
#include "SDL_endian.h"
#include "SDL_surface.h"
......@@ -62,8 +44,11 @@
/* SDL blit CPU flags */
#define SDL_CPU_ANY 0x00000000
#define SDL_CPU_MMX 0x00000001
#define SDL_CPU_3DNOW 0x00000002
#define SDL_CPU_SSE 0x00000004
#define SDL_CPU_SSE2 0x00000008
#define SDL_CPU_ALTIVEC_PREFETCH 0x00000010
#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000020
typedef struct
{
......
......@@ -419,6 +419,806 @@ BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
#endif /* __MMX__ */
#if SDL_ALTIVEC_BLITTERS
#if __MWERKS__
#pragma altivec_model on
#endif
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include <assert.h>
#if (defined(__MACOSX__) && (__GNUC__ < 4))
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) ( a,b,c,d,e,f,g,h )
#else
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) { a,b,c,d,e,f,g,h }
#endif
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
#define VECPRINT(msg, v) do { \
vector unsigned int tmpvec = (vector unsigned int)(v); \
unsigned int *vp = (unsigned int *)&tmpvec; \
printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
} while (0)
/* the permuation vector that takes the high bytes out of all the appropriate shorts
(vector unsigned char)(
0x00, 0x10, 0x02, 0x12,
0x04, 0x14, 0x06, 0x16,
0x08, 0x18, 0x0A, 0x1A,
0x0C, 0x1C, 0x0E, 0x1E );
*/
#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
? vec_lvsl(0, src) \
: vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
/* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
vector unsigned short vtemp1 = vec_mule(vs, valpha); \
/* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
/* valpha2 is 255-alpha */ \
vector unsigned char valpha2 = vec_nor(valpha, valpha); \
/* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \
vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
/* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \
vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
/* add source and dest */ \
vtemp1 = vec_add(vtemp1, vtemp3); \
vtemp2 = vec_add(vtemp2, vtemp4); \
/* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \
vtemp1 = vec_add(vtemp1, v1_16); \
vtemp3 = vec_sr(vtemp1, v8_16); \
vtemp1 = vec_add(vtemp1, vtemp3); \
/* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \
vtemp2 = vec_add(vtemp2, v1_16); \
vtemp4 = vec_sr(vtemp2, v8_16); \
vtemp2 = vec_add(vtemp2, vtemp4); \
/* (>>8) and get ARGBARGBARGBARGB */ \
vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
} while (0)
/* Calculate the permute vector used for 32->32 swizzling */
static vector unsigned char
calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
{
/*
* We have to assume that the bits that aren't used by other
* colors is alpha, and it's one complete byte, since some formats
* leave alpha with a zero mask, but we should still swizzle the bits.
*/
/* ARGB */
const static struct SDL_PixelFormat default_pixel_format = {
NULL, 0, 0,
0, 0, 0, 0,
16, 8, 0, 24,
0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
};
if (!srcfmt) {
srcfmt = &default_pixel_format;
}
if (!dstfmt) {
dstfmt = &default_pixel_format;
}
const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
0x04, 0x04, 0x04, 0x04,
0x08, 0x08, 0x08, 0x08,
0x0C, 0x0C, 0x0C,
0x0C);
vector unsigned char vswiz;
vector unsigned int srcvec;
#define RESHIFT(X) (3 - ((X) >> 3))
Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
Uint32 amask;
/* Use zero for alpha if either surface doesn't have alpha */
if (dstfmt->Amask) {
amask =
((srcfmt->Amask) ? RESHIFT(srcfmt->
Ashift) : 0x10) << (dstfmt->Ashift);
} else {
amask =
0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
0xFFFFFFFF);
}
#undef RESHIFT
((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
return (vswiz);
}
static void
Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint8 *src = (Uint8 *) info->src;
int srcskip = info->src_skip;
Uint8 *dst = (Uint8 *) info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
vector unsigned char v0 = vec_splat_u8(0);
vector unsigned short v8_16 = vec_splat_u16(8);
vector unsigned short v1_16 = vec_splat_u16(1);
vector unsigned short v2_16 = vec_splat_u16(2);
vector unsigned short v3_16 = vec_splat_u16(3);
vector unsigned int v8_32 = vec_splat_u32(8);
vector unsigned int v16_32 = vec_add(v8_32, v8_32);
vector unsigned short v3f =
VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
0x003f, 0x003f, 0x003f, 0x003f);
vector unsigned short vfc =
VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
0x00fc, 0x00fc, 0x00fc, 0x00fc);
/*
0x10 - 0x1f is the alpha
0x00 - 0x0e evens are the red
0x01 - 0x0f odds are zero
*/
vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
0x10, 0x02, 0x01, 0x01,
0x10, 0x04, 0x01, 0x01,
0x10, 0x06, 0x01,
0x01);
vector unsigned char vredalpha2 =
(vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
vec_sl(v8_32, v16_32))
);
/*
0x00 - 0x0f is ARxx ARxx ARxx ARxx
0x11 - 0x0f odds are blue
*/
vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
0x04, 0x05, 0x06, 0x13,
0x08, 0x09, 0x0a, 0x15,
0x0c, 0x0d, 0x0e, 0x17);
vector unsigned char vblue2 =
(vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
);
/*
0x00 - 0x0f is ARxB ARxB ARxB ARxB
0x10 - 0x0e evens are green
*/
vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
0x04, 0x05, 0x12, 0x07,
0x08, 0x09, 0x14, 0x0b,
0x0c, 0x0d, 0x16, 0x0f);
vector unsigned char vgreen2 =
(vector unsigned
char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
);
vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
0x00, 0x0a, 0x00, 0x0e,
0x00, 0x12, 0x00, 0x16,
0x00, 0x1a, 0x00, 0x1e);
vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
vector unsigned char valphaPermute =
vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
vf800 = vec_sl(vf800, vec_splat_u16(8));
while (height--) {
int extrawidth;
vector unsigned char valigner;
vector unsigned char vsrc;
vector unsigned char voverflow;
int width = info->dst_w;
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB, sA; \
DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
if(sA) { \
unsigned short dstpixel = *((unsigned short *)dst); \
dR = (dstpixel >> 8) & 0xf8; \
dG = (dstpixel >> 3) & 0xfc; \
dB = (dstpixel << 3) & 0xf8; \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
*((unsigned short *)dst) = ( \
((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
); \
} \
src += 4; \
dst += 2; \
widthvar--; \
}
ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
extrawidth = (width % 8);
valigner = VEC_ALIGNER(src);
vsrc = (vector unsigned char) vec_ld(0, src);
width -= extrawidth;
while (width) {
vector unsigned char valpha;
vector unsigned char vsrc1, vsrc2;
vector unsigned char vdst1, vdst2;
vector unsigned short vR, vG, vB;
vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
/* Load 8 pixels from src as ARGB */
voverflow = (vector unsigned char) vec_ld(15, src);
vsrc = vec_perm(vsrc, voverflow, valigner);
vsrc1 = vec_perm(vsrc, vsrc, vpermute);
src += 16;
vsrc = (vector unsigned char) vec_ld(15, src);
voverflow = vec_perm(voverflow, vsrc, valigner);
vsrc2 = vec_perm(voverflow, voverflow, vpermute);
src += 16;
/* Load 8 pixels from dst as XRGB */
voverflow = vec_ld(0, dst);
vR = vec_and((vector unsigned short) voverflow, vf800);
vB = vec_sl((vector unsigned short) voverflow, v3_16);
vG = vec_sl(vB, v2_16);
vdst1 =
(vector unsigned char) vec_perm((vector unsigned char) vR,
(vector unsigned char) vR,
vredalpha1);
vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
vdst2 =
(vector unsigned char) vec_perm((vector unsigned char) vR,
(vector unsigned char) vR,
vredalpha2);
vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
/* Alpha blend 8 pixels as ARGB */
valpha = vec_perm(vsrc1, v0, valphaPermute);
VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
v8_16);
valpha = vec_perm(vsrc2, v0, valphaPermute);
VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
v8_16);
/* Convert 8 pixels to 565 */
vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
vdst1,
(vector unsigned int)
vdst2);
vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
vgpixel = vec_and(vgpixel, vfc);
vgpixel = vec_sl(vgpixel, v3_16);
vrpixel = vec_sl(vpixel, v1_16);
vrpixel = vec_and(vrpixel, vf800);
vbpixel = vec_and(vpixel, v3f);
vdst1 =
vec_or((vector unsigned char) vrpixel,
(vector unsigned char) vgpixel);
vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
/* Store 8 pixels */
vec_st(vdst1, 0, dst);
width -= 8;
dst += 16;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
#undef ONE_PIXEL_BLEND
src += srcskip;
dst += dstskip;
}
}
static void
Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip >> 2;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip >> 2;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
unsigned sA = info->a;
unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
Uint32 ckey = info->colorkey;
vector unsigned char mergePermute;
vector unsigned char vsrcPermute;
vector unsigned char vdstPermute;
vector unsigned char vsdstPermute;
vector unsigned char valpha;
vector unsigned char valphamask;
vector unsigned char vbits;
vector unsigned char v0;
vector unsigned short v1;
vector unsigned short v8;
vector unsigned int vckey;
vector unsigned int vrgbmask;
mergePermute = VEC_MERGE_PERMUTE();
v0 = vec_splat_u8(0);
v1 = vec_splat_u16(1);
v8 = vec_splat_u16(8);
/* set the alpha to 255 on the destination surf */
valphamask = VEC_ALPHA_MASK();
vsrcPermute = calc_swizzle32(srcfmt, NULL);
vdstPermute = calc_swizzle32(NULL, dstfmt);
vsdstPermute = calc_swizzle32(dstfmt, NULL);
/* set a vector full of alpha and 255-alpha */
((unsigned char *) &valpha)[0] = sA;
valpha = vec_splat(valpha, 0);
vbits = (vector unsigned char) vec_splat_s8(-1);
ckey &= rgbmask;
((unsigned int *) (char *) &vckey)[0] = ckey;
vckey = vec_splat(vckey, 0);
((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
vrgbmask = vec_splat(vrgbmask, 0);
while (height--) {
int width = info->dst_w;
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB; \
RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
if(sA && Pixel != ckey) { \
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
} \
dstp++; \
srcp++; \
widthvar--; \
}
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
if (width > 0) {
int extrawidth = (width % 4);
vector unsigned char valigner = VEC_ALIGNER(srcp);
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
width -= extrawidth;
while (width) {
vector unsigned char vsel;
vector unsigned char voverflow;
vector unsigned char vd;
vector unsigned char vd_orig;
/* s = *srcp */
voverflow = (vector unsigned char) vec_ld(15, srcp);
vs = vec_perm(vs, voverflow, valigner);
/* vsel is set for items that match the key */
vsel =
(vector unsigned char) vec_and((vector unsigned int) vs,
vrgbmask);
vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
vsel, vckey);
/* permute to source format */
vs = vec_perm(vs, valpha, vsrcPermute);
/* d = *dstp */
vd = (vector unsigned char) vec_ld(0, dstp);
vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
/* set the alpha channel to full on */
vd = vec_or(vd, valphamask);
/* mask out color key */
vd = vec_sel(vd, vd_orig, vsel);
/* permute to dest format */
vd = vec_perm(vd, vbits, vdstPermute);
/* *dstp = res */
vec_st((vector unsigned int) vd, 0, dstp);
srcp += 4;
dstp += 4;
width -= 4;
vs = voverflow;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
}
#undef ONE_PIXEL_BLEND
srcp += srcskip;
dstp += dstskip;
}
}
static void
Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip >> 2;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip >> 2;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
vector unsigned char mergePermute;
vector unsigned char valphaPermute;
vector unsigned char vsrcPermute;
vector unsigned char vdstPermute;
vector unsigned char vsdstPermute;
vector unsigned char valphamask;
vector unsigned char vpixelmask;
vector unsigned char v0;
vector unsigned short v1;
vector unsigned short v8;
v0 = vec_splat_u8(0);
v1 = vec_splat_u16(1);
v8 = vec_splat_u16(8);
mergePermute = VEC_MERGE_PERMUTE();
valphamask = VEC_ALPHA_MASK();
valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
vpixelmask = vec_nor(valphamask, v0);
vsrcPermute = calc_swizzle32(srcfmt, NULL);
vdstPermute = calc_swizzle32(NULL, dstfmt);
vsdstPermute = calc_swizzle32(dstfmt, NULL);
while (height--) {
width = info->dst_w;
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
if(sA) { \
DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
} \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
if (width > 0) {
/* vsrcPermute */
/* vdstPermute */
int extrawidth = (width % 4);
vector unsigned char valigner = VEC_ALIGNER(srcp);
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
width -= extrawidth;
while (width) {
vector unsigned char voverflow;
vector unsigned char vd;
vector unsigned char valpha;
vector unsigned char vdstalpha;
/* s = *srcp */
voverflow = (vector unsigned char) vec_ld(15, srcp);
vs = vec_perm(vs, voverflow, valigner);
vs = vec_perm(vs, v0, vsrcPermute);
valpha = vec_perm(vs, v0, valphaPermute);
/* d = *dstp */
vd = (vector unsigned char) vec_ld(0, dstp);
vd = vec_perm(vd, v0, vsdstPermute);
vdstalpha = vec_and(vd, valphamask);
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
/* set the alpha to the dest alpha */
vd = vec_and(vd, vpixelmask);
vd = vec_or(vd, vdstalpha);
vd = vec_perm(vd, v0, vdstPermute);
/* *dstp = res */
vec_st((vector unsigned int) vd, 0, dstp);
srcp += 4;
dstp += 4;
width -= 4;
vs = voverflow;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
}
srcp += srcskip;
dstp += dstskip;
#undef ONE_PIXEL_BLEND
}
}
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
static void
BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip >> 2;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip >> 2;
vector unsigned char mergePermute;
vector unsigned char valphaPermute;
vector unsigned char valphamask;
vector unsigned char vpixelmask;
vector unsigned char v0;
vector unsigned short v1;
vector unsigned short v8;
v0 = vec_splat_u8(0);
v1 = vec_splat_u16(1);
v8 = vec_splat_u16(8);
mergePermute = VEC_MERGE_PERMUTE();
valphamask = VEC_ALPHA_MASK();
valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
vpixelmask = vec_nor(valphamask, v0);
while (height--) {
width = info->dst_w;
#define ONE_PIXEL_BLEND(condition, widthvar) \
while ((condition)) { \
Uint32 dalpha; \
Uint32 d; \
Uint32 s1; \
Uint32 d1; \
Uint32 s = *srcp; \
Uint32 alpha = s >> 24; \
if(alpha) { \
if(alpha == SDL_ALPHA_OPAQUE) { \
*dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
} else { \
d = *dstp; \
dalpha = d & 0xff000000; \
s1 = s & 0xff00ff; \
d1 = d & 0xff00ff; \
d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
s &= 0xff00; \
d &= 0xff00; \
d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
*dstp = d1 | d | dalpha; \
} \
} \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
if (width > 0) {
int extrawidth = (width % 4);
vector unsigned char valigner = VEC_ALIGNER(srcp);
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
width -= extrawidth;
while (width) {
vector unsigned char voverflow;
vector unsigned char vd;
vector unsigned char valpha;
vector unsigned char vdstalpha;
/* s = *srcp */
voverflow = (vector unsigned char) vec_ld(15, srcp);
vs = vec_perm(vs, voverflow, valigner);
valpha = vec_perm(vs, v0, valphaPermute);
/* d = *dstp */
vd = (vector unsigned char) vec_ld(0, dstp);
vdstalpha = vec_and(vd, valphamask);
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
/* set the alpha to the dest alpha */
vd = vec_and(vd, vpixelmask);
vd = vec_or(vd, vdstalpha);
/* *dstp = res */
vec_st((vector unsigned int) vd, 0, dstp);
srcp += 4;
dstp += 4;
width -= 4;
vs = voverflow;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
}
srcp += srcskip;
dstp += dstskip;
}
#undef ONE_PIXEL_BLEND
}
static void
Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
{
/* XXX : 6 */
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip >> 2;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip >> 2;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
unsigned sA = info->a;
unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
vector unsigned char mergePermute;
vector unsigned char vsrcPermute;
vector unsigned char vdstPermute;
vector unsigned char vsdstPermute;
vector unsigned char valpha;
vector unsigned char valphamask;
vector unsigned char vbits;
vector unsigned short v1;
vector unsigned short v8;
mergePermute = VEC_MERGE_PERMUTE();
v1 = vec_splat_u16(1);
v8 = vec_splat_u16(8);
/* set the alpha to 255 on the destination surf */
valphamask = VEC_ALPHA_MASK();
vsrcPermute = calc_swizzle32(srcfmt, NULL);
vdstPermute = calc_swizzle32(NULL, dstfmt);
vsdstPermute = calc_swizzle32(dstfmt, NULL);
/* set a vector full of alpha and 255-alpha */
((unsigned char *) &valpha)[0] = sA;
valpha = vec_splat(valpha, 0);
vbits = (vector unsigned char) vec_splat_s8(-1);
while (height--) {
int width = info->dst_w;
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB; \
DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
if (width > 0) {
int extrawidth = (width % 4);
vector unsigned char valigner = VEC_ALIGNER(srcp);
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
width -= extrawidth;
while (width) {
vector unsigned char voverflow;
vector unsigned char vd;
/* s = *srcp */
voverflow = (vector unsigned char) vec_ld(15, srcp);
vs = vec_perm(vs, voverflow, valigner);
vs = vec_perm(vs, valpha, vsrcPermute);
/* d = *dstp */
vd = (vector unsigned char) vec_ld(0, dstp);
vd = vec_perm(vd, vd, vsdstPermute);
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
/* set the alpha channel to full on */
vd = vec_or(vd, valphamask);
vd = vec_perm(vd, vbits, vdstPermute);
/* *dstp = res */
vec_st((vector unsigned int) vd, 0, dstp);
srcp += 4;
dstp += 4;
width -= 4;
vs = voverflow;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
}
#undef ONE_PIXEL_BLEND
srcp += srcskip;
dstp += dstskip;
}
}
/* fast RGB888->(A)RGB888 blending */
static void
BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
{
unsigned alpha = info->a;
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip >> 2;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip >> 2;
vector unsigned char mergePermute;
vector unsigned char valpha;
vector unsigned char valphamask;
vector unsigned short v1;
vector unsigned short v8;
mergePermute = VEC_MERGE_PERMUTE();
v1 = vec_splat_u16(1);
v8 = vec_splat_u16(8);
/* set the alpha to 255 on the destination surf */
valphamask = VEC_ALPHA_MASK();
/* set a vector full of alpha and 255-alpha */
((unsigned char *) &valpha)[0] = alpha;
valpha = vec_splat(valpha, 0);
while (height--) {
int width = info->dst_w;
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
Uint32 s = *srcp; \
Uint32 d = *dstp; \
Uint32 s1 = s & 0xff00ff; \
Uint32 d1 = d & 0xff00ff; \
d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
& 0xff00ff; \
s &= 0xff00; \
d &= 0xff00; \
d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
*dstp = d1 | d | 0xff000000; \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
if (width > 0) {
int extrawidth = (width % 4);
vector unsigned char valigner = VEC_ALIGNER(srcp);
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
width -= extrawidth;
while (width) {
vector unsigned char voverflow;
vector unsigned char vd;
/* s = *srcp */
voverflow = (vector unsigned char) vec_ld(15, srcp);
vs = vec_perm(vs, voverflow, valigner);
/* d = *dstp */
vd = (vector unsigned char) vec_ld(0, dstp);
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
/* set the alpha channel to full on */
vd = vec_or(vd, valphamask);
/* *dstp = res */
vec_st((vector unsigned int) vd, 0, dstp);
srcp += 4;
dstp += 4;
width -= 4;
vs = voverflow;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
}
#undef ONE_PIXEL_BLEND
srcp += srcskip;
dstp += dstskip;
}
}
#if __MWERKS__
#pragma altivec_model off
#endif
#endif /* SDL_ALTIVEC_BLITTERS */
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
static void
BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
......@@ -538,6 +1338,79 @@ BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
}
}
#ifdef __3dNOW__
/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
static void
BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip >> 2;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip >> 2;
SDL_PixelFormat *sf = info->src_fmt;
Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
Uint32 amask = sf->Amask;
Uint32 ashift = sf->Ashift;
Uint64 multmask;
__m64 src1, dst1, mm_alpha, mm_zero, dmask;
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
multmask = 0xFFFF;
multmask <<= (ashift * 2);
multmask = ~multmask;
dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP4({
Uint32 alpha;
_m_prefetch(srcp + 16);
_m_prefetch(dstp + 16);
alpha = *srcp & amask;
if (alpha == 0) {
/* do nothing */
} else if (alpha == amask) {
/* copy RGB, keep dst alpha */
*dstp = (*srcp & chanmask) | (*dstp & ~chanmask);
} else {
src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */
/* blend */
src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */
src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */
src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */
dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */
dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
}
++srcp;
++dstp;
}, width);
/* *INDENT-ON* */
srcp += srcskip;
dstp += dstskip;
}
_mm_empty();
}
#endif /* __MMX__ */
/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
/* blend a single 16 bit pixel at 50% */
......@@ -1257,10 +2130,17 @@ SDL_CalculateBlitA(SDL_Surface * surface)
return BlitNto1PixelAlpha;
case 2:
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
&& sf->Gmask == 0xff00
&& ((sf->Rmask == 0xff && df->Rmask == 0x1f)
|| (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
#if SDL_ALTIVEC_BLITTERS
if (sf->BytesPerPixel == 4
&& df->Gmask == 0x7e0 && df->Bmask == 0x1f
&& SDL_HasAltiVec())
return Blit32to565PixelAlphaAltivec;
else
#endif
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
&& sf->Gmask == 0xff00
&& ((sf->Rmask == 0xff && df->Rmask == 0x1f)
|| (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
if (df->Gmask == 0x7e0)
return BlitARGBto565PixelAlpha;
else if (df->Gmask == 0x3e0)
......@@ -1272,20 +2152,35 @@ SDL_CalculateBlitA(SDL_Surface * surface)
if (sf->Rmask == df->Rmask
&& sf->Gmask == df->Gmask
&& sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
#if defined(__MMX__)
#if defined(__MMX__) || defined(__3dNOW__)
if (sf->Rshift % 8 == 0
&& sf->Gshift % 8 == 0
&& sf->Bshift % 8 == 0
&& sf->Ashift % 8 == 0 && sf->Aloss == 0) {
#ifdef __3dNOW__
if (SDL_Has3DNow())
return BlitRGBtoRGBPixelAlphaMMX3DNOW;
#endif
#ifdef __MMX__
if (SDL_HasMMX())
return BlitRGBtoRGBPixelAlphaMMX;
#endif
}
#endif /* __MMX__ */
#endif /* __MMX__ || __3dNOW__ */
if (sf->Amask == 0xff000000) {
#if SDL_ALTIVEC_BLITTERS
if (SDL_HasAltiVec())
return BlitRGBtoRGBPixelAlphaAltivec;
#endif
return BlitRGBtoRGBPixelAlpha;
}
}
return BlitNtoNPixelAlpha;
#if SDL_ALTIVEC_BLITTERS
if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec())
return Blit32to32PixelAlphaAltivec;
else
#endif
return BlitNtoNPixelAlpha;
case 3:
default:
......@@ -1331,10 +2226,19 @@ SDL_CalculateBlitA(SDL_Surface * surface)
return BlitRGBtoRGBSurfaceAlphaMMX;
#endif
if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
#if SDL_ALTIVEC_BLITTERS
if (SDL_HasAltiVec())
return BlitRGBtoRGBSurfaceAlphaAltivec;
#endif
return BlitRGBtoRGBSurfaceAlpha;
}
}
return BlitNtoNSurfaceAlpha;
#if SDL_ALTIVEC_BLITTERS
if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec())
return Blit32to32SurfaceAlphaAltivec;
else
#endif
return BlitNtoNSurfaceAlpha;
case 3:
default:
......@@ -1348,6 +2252,12 @@ SDL_CalculateBlitA(SDL_Surface * surface)
if (df->BytesPerPixel == 1)
return BlitNto1SurfaceAlphaKey;
else
#if SDL_ALTIVEC_BLITTERS
if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
SDL_HasAltiVec())
return Blit32to32SurfaceAlphaKeyAltivec;
else
#endif
return BlitNtoNSurfaceAlphaKey;
}
break;
......
......@@ -28,8 +28,840 @@
/* Functions to blit from N-bit surfaces to other surfaces */
#if SDL_ALTIVEC_BLITTERS
#define assert(X)
#ifdef __MACOSX__
#include <sys/sysctl.h>
static size_t
GetL3CacheSize(void)
{
const char key[] = "hw.l3cachesize";
u_int64_t result = 0;
size_t typeSize = sizeof(result);
int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
if (0 != err)
return 0;
return result;
}
#else
static size_t
GetL3CacheSize(void)
{
/* XXX: Just guess G4 */
return 2097152;
}
#endif /* __MACOSX__ */
#if (defined(__MACOSX__) && (__GNUC__ < 4))
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) ( a,b,c,d,e,f,g,h )
#else
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) { a,b,c,d,e,f,g,h }
#endif
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
0x04+a, 0x04+b, 0x04+c, 0x04+d, \
0x08+a, 0x08+b, 0x08+c, 0x08+d, \
0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
#define MAKE8888(dstfmt, r, g, b, a) \
( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
((a<<dstfmt->Ashift)&dstfmt->Amask) )
/*
* Data Stream Touch...Altivec cache prefetching.
*
* Don't use this on a G5...however, the speed boost is very significant
* on a G4.
*/
#define DST_CHAN_SRC 1
#define DST_CHAN_DEST 2
/* macro to set DST control word value... */
#define DST_CTRL(size, count, stride) \
(((size) << 24) | ((count) << 16) | (stride))
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
? vec_lvsl(0, src) \
: vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
/* Calculate the permute vector used for 32->32 swizzling */
static vector unsigned char
calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
{
/*
* We have to assume that the bits that aren't used by other
* colors is alpha, and it's one complete byte, since some formats
* leave alpha with a zero mask, but we should still swizzle the bits.
*/
/* ARGB */
const static const struct SDL_PixelFormat default_pixel_format = {
NULL, 32, 4,
0, 0, 0, 0,
16, 8, 0, 24,
0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
};
if (!srcfmt) {
srcfmt = &default_pixel_format;
}
if (!dstfmt) {
dstfmt = &default_pixel_format;
}
const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
0x04, 0x04, 0x04, 0x04,
0x08, 0x08, 0x08, 0x08,
0x0C, 0x0C, 0x0C,
0x0C);
vector unsigned char vswiz;
vector unsigned int srcvec;
#define RESHIFT(X) (3 - ((X) >> 3))
Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
Uint32 amask;
/* Use zero for alpha if either surface doesn't have alpha */
if (dstfmt->Amask) {
amask =
((srcfmt->Amask) ? RESHIFT(srcfmt->
Ashift) : 0x10) << (dstfmt->Ashift);
} else {
amask =
0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
0xFFFFFFFF);
}
#undef RESHIFT
((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
return (vswiz);
}
static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
static void
Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint8 *src = (Uint8 *) info->src;
int srcskip = info->src_skip;
Uint8 *dst = (Uint8 *) info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
vector unsigned char valpha = vec_splat_u8(0);
vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
0x00, 0x0a, 0x00, 0x0e,
0x00, 0x12, 0x00, 0x16,
0x00, 0x1a, 0x00, 0x1e);
vector unsigned short v1 = vec_splat_u16(1);
vector unsigned short v3 = vec_splat_u16(3);
vector unsigned short v3f =
VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
0x003f, 0x003f, 0x003f, 0x003f);
vector unsigned short vfc =
VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
0x00fc, 0x00fc, 0x00fc, 0x00fc);
vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
vf800 = vec_sl(vf800, vec_splat_u16(8));
while (height--) {
vector unsigned char valigner;
vector unsigned char voverflow;
vector unsigned char vsrc;
int width = info->dst_w;
int extrawidth;
/* do scalar until we can align... */
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, sA; \
DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
sR, sG, sB, sA); \
*(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
((sG << 3) & 0x000007E0) | \
((sB >> 3) & 0x0000001F)); \
dst += 2; \
src += 4; \
widthvar--; \
}
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
/* After all that work, here's the vector part! */
extrawidth = (width % 8); /* trailing unaligned stores */
width -= extrawidth;
vsrc = vec_ld(0, src);
valigner = VEC_ALIGNER(src);
while (width) {
vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
vector unsigned int vsrc1, vsrc2;
vector unsigned char vdst;
voverflow = vec_ld(15, src);
vsrc = vec_perm(vsrc, voverflow, valigner);
vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
src += 16;
vsrc = voverflow;
voverflow = vec_ld(15, src);
vsrc = vec_perm(vsrc, voverflow, valigner);
vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
/* 1555 */
vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
vgpixel = vec_and(vgpixel, vfc);
vgpixel = vec_sl(vgpixel, v3);
vrpixel = vec_sl(vpixel, v1);
vrpixel = vec_and(vrpixel, vf800);
vbpixel = vec_and(vpixel, v3f);
vdst =
vec_or((vector unsigned char) vrpixel,
(vector unsigned char) vgpixel);
/* 565 */
vdst = vec_or(vdst, (vector unsigned char) vbpixel);
vec_st(vdst, 0, dst);
width -= 8;
src += 16;
dst += 16;
vsrc = voverflow;
}
assert(width == 0);
/* do scalar until we can align... */
ONE_PIXEL_BLEND((extrawidth), extrawidth);
#undef ONE_PIXEL_BLEND
src += srcskip; /* move to next row, accounting for pitch. */
dst += dstskip;
}
}
static void
Blit_RGB565_32Altivec(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint8 *src = (Uint8 *) info->src;
int srcskip = info->src_skip;
Uint8 *dst = (Uint8 *) info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
unsigned alpha;
vector unsigned char valpha;
vector unsigned char vpermute;
vector unsigned short vf800;
vector unsigned int v8 = vec_splat_u32(8);
vector unsigned int v16 = vec_add(v8, v8);
vector unsigned short v2 = vec_splat_u16(2);
vector unsigned short v3 = vec_splat_u16(3);
/*
0x10 - 0x1f is the alpha
0x00 - 0x0e evens are the red
0x01 - 0x0f odds are zero
*/
vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
0x10, 0x02, 0x01, 0x01,
0x10, 0x04, 0x01, 0x01,
0x10, 0x06, 0x01,
0x01);
vector unsigned char vredalpha2 =
(vector unsigned
char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
);
/*
0x00 - 0x0f is ARxx ARxx ARxx ARxx
0x11 - 0x0f odds are blue
*/
vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
0x04, 0x05, 0x06, 0x13,
0x08, 0x09, 0x0a, 0x15,
0x0c, 0x0d, 0x0e, 0x17);
vector unsigned char vblue2 =
(vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
);
/*
0x00 - 0x0f is ARxB ARxB ARxB ARxB
0x10 - 0x0e evens are green
*/
vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
0x04, 0x05, 0x12, 0x07,
0x08, 0x09, 0x14, 0x0b,
0x0c, 0x0d, 0x16, 0x0f);
vector unsigned char vgreen2 =
(vector unsigned
char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
);
assert(srcfmt->BytesPerPixel == 2);
assert(dstfmt->BytesPerPixel == 4);
vf800 = (vector unsigned short) vec_splat_u8(-7);
vf800 = vec_sl(vf800, vec_splat_u16(8));
if (dstfmt->Amask && info->a) {
((unsigned char *) &valpha)[0] = alpha = info->a;
valpha = vec_splat(valpha, 0);
} else {
alpha = 0;
valpha = vec_splat_u8(0);
}
vpermute = calc_swizzle32(NULL, dstfmt);
while (height--) {
vector unsigned char valigner;
vector unsigned char voverflow;
vector unsigned char vsrc;
int width = info->dst_w;
int extrawidth;
/* do scalar until we can align... */
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
unsigned sR, sG, sB; \
unsigned short Pixel = *((unsigned short *)src); \
sR = (Pixel >> 8) & 0xf8; \
sG = (Pixel >> 3) & 0xfc; \
sB = (Pixel << 3) & 0xf8; \
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
src += 2; \
dst += 4; \
widthvar--; \
}
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
/* After all that work, here's the vector part! */
extrawidth = (width % 8); /* trailing unaligned stores */
width -= extrawidth;
vsrc = vec_ld(0, src);
valigner = VEC_ALIGNER(src);
while (width) {
vector unsigned short vR, vG, vB;
vector unsigned char vdst1, vdst2;
voverflow = vec_ld(15, src);
vsrc = vec_perm(vsrc, voverflow, valigner);
vR = vec_and((vector unsigned short) vsrc, vf800);
vB = vec_sl((vector unsigned short) vsrc, v3);
vG = vec_sl(vB, v2);
vdst1 =
(vector unsigned char) vec_perm((vector unsigned char) vR,
valpha, vredalpha1);
vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
vdst1 = vec_perm(vdst1, valpha, vpermute);
vec_st(vdst1, 0, dst);
vdst2 =
(vector unsigned char) vec_perm((vector unsigned char) vR,
valpha, vredalpha2);
vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
vdst2 = vec_perm(vdst2, valpha, vpermute);
vec_st(vdst2, 16, dst);
width -= 8;
dst += 32;
src += 16;
vsrc = voverflow;
}
assert(width == 0);
/* do scalar until we can align... */
ONE_PIXEL_BLEND((extrawidth), extrawidth);
#undef ONE_PIXEL_BLEND
src += srcskip; /* move to next row, accounting for pitch. */
dst += dstskip;
}
}
static void
Blit_RGB555_32Altivec(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint8 *src = (Uint8 *) info->src;
int srcskip = info->src_skip;
Uint8 *dst = (Uint8 *) info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
unsigned alpha;
vector unsigned char valpha;
vector unsigned char vpermute;
vector unsigned short vf800;
vector unsigned int v8 = vec_splat_u32(8);
vector unsigned int v16 = vec_add(v8, v8);
vector unsigned short v1 = vec_splat_u16(1);
vector unsigned short v3 = vec_splat_u16(3);
/*
0x10 - 0x1f is the alpha
0x00 - 0x0e evens are the red
0x01 - 0x0f odds are zero
*/
vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
0x10, 0x02, 0x01, 0x01,
0x10, 0x04, 0x01, 0x01,
0x10, 0x06, 0x01,
0x01);
vector unsigned char vredalpha2 =
(vector unsigned
char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
);
/*
0x00 - 0x0f is ARxx ARxx ARxx ARxx
0x11 - 0x0f odds are blue
*/
vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
0x04, 0x05, 0x06, 0x13,
0x08, 0x09, 0x0a, 0x15,
0x0c, 0x0d, 0x0e, 0x17);
vector unsigned char vblue2 =
(vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
);
/*
0x00 - 0x0f is ARxB ARxB ARxB ARxB
0x10 - 0x0e evens are green
*/
vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
0x04, 0x05, 0x12, 0x07,
0x08, 0x09, 0x14, 0x0b,
0x0c, 0x0d, 0x16, 0x0f);
vector unsigned char vgreen2 =
(vector unsigned
char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
);
assert(srcfmt->BytesPerPixel == 2);
assert(dstfmt->BytesPerPixel == 4);
vf800 = (vector unsigned short) vec_splat_u8(-7);
vf800 = vec_sl(vf800, vec_splat_u16(8));
if (dstfmt->Amask && info->a) {
((unsigned char *) &valpha)[0] = alpha = info->a;
valpha = vec_splat(valpha, 0);
} else {
alpha = 0;
valpha = vec_splat_u8(0);
}
vpermute = calc_swizzle32(NULL, dstfmt);
while (height--) {
vector unsigned char valigner;
vector unsigned char voverflow;
vector unsigned char vsrc;
int width = info->dst_w;
int extrawidth;
/* do scalar until we can align... */
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
unsigned sR, sG, sB; \
unsigned short Pixel = *((unsigned short *)src); \
sR = (Pixel >> 7) & 0xf8; \
sG = (Pixel >> 2) & 0xf8; \
sB = (Pixel << 3) & 0xf8; \
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
src += 2; \
dst += 4; \
widthvar--; \
}
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
/* After all that work, here's the vector part! */
extrawidth = (width % 8); /* trailing unaligned stores */
width -= extrawidth;
vsrc = vec_ld(0, src);
valigner = VEC_ALIGNER(src);
while (width) {
vector unsigned short vR, vG, vB;
vector unsigned char vdst1, vdst2;
voverflow = vec_ld(15, src);
vsrc = vec_perm(vsrc, voverflow, valigner);
vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
vB = vec_sl((vector unsigned short) vsrc, v3);
vG = vec_sl(vB, v3);
vdst1 =
(vector unsigned char) vec_perm((vector unsigned char) vR,
valpha, vredalpha1);
vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
vdst1 = vec_perm(vdst1, valpha, vpermute);
vec_st(vdst1, 0, dst);
vdst2 =
(vector unsigned char) vec_perm((vector unsigned char) vR,
valpha, vredalpha2);
vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
vdst2 = vec_perm(vdst2, valpha, vpermute);
vec_st(vdst2, 16, dst);
width -= 8;
dst += 32;
src += 16;
vsrc = voverflow;
}
assert(width == 0);
/* do scalar until we can align... */
ONE_PIXEL_BLEND((extrawidth), extrawidth);
#undef ONE_PIXEL_BLEND
src += srcskip; /* move to next row, accounting for pitch. */
dst += dstskip;
}
}
static void BlitNtoNKey(SDL_BlitInfo * info);
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
static void
Blit32to32KeyAltivec(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint32 *srcp = (Uint32 *) info->src;
int srcskip = info->src_skip / 4;
Uint32 *dstp = (Uint32 *) info->dst;
int dstskip = info->dst_skip / 4;
SDL_PixelFormat *srcfmt = info->src_fmt;
int srcbpp = srcfmt->BytesPerPixel;
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;
int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
unsigned alpha = dstfmt->Amask ? info->a : 0;
Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
Uint32 ckey = info->colorkey;
vector unsigned int valpha;
vector unsigned char vpermute;
vector unsigned char vzero;
vector unsigned int vckey;
vector unsigned int vrgbmask;
vpermute = calc_swizzle32(srcfmt, dstfmt);
if (info->dst_w < 16) {
if (copy_alpha) {
BlitNtoNKeyCopyAlpha(info);
} else {
BlitNtoNKey(info);
}
return;
}
vzero = vec_splat_u8(0);
if (alpha) {
((unsigned char *) &valpha)[0] = (unsigned char) alpha;
valpha =
(vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
} else {
valpha = (vector unsigned int) vzero;
}
ckey &= rgbmask;
((unsigned int *) (char *) &vckey)[0] = ckey;
vckey = vec_splat(vckey, 0);
((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
vrgbmask = vec_splat(vrgbmask, 0);
while (height--) {
#define ONE_PIXEL_BLEND(condition, widthvar) \
if (copy_alpha) { \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, sA; \
DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
sR, sG, sB, sA); \
if ( (Pixel & rgbmask) != ckey ) { \
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
sR, sG, sB, sA); \
} \
dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
widthvar--; \
} \
} else { \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB; \
RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
if ( Pixel != ckey ) { \
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
sR, sG, sB, alpha); \
} \
dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
widthvar--; \
} \
}
int width = info->dst_w;
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
assert(width > 0);
if (width > 0) {
int extrawidth = (width % 4);
vector unsigned char valigner = VEC_ALIGNER(srcp);
vector unsigned int vs = vec_ld(0, srcp);
width -= extrawidth;
assert(width >= 4);
while (width) {
vector unsigned char vsel;
vector unsigned int vd;
vector unsigned int voverflow = vec_ld(15, srcp);
/* load the source vec */
vs = vec_perm(vs, voverflow, valigner);
/* vsel is set for items that match the key */
vsel = (vector unsigned char) vec_and(vs, vrgbmask);
vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
/* permute the src vec to the dest format */
vs = vec_perm(vs, valpha, vpermute);
/* load the destination vec */
vd = vec_ld(0, dstp);
/* select the source and dest into vs */
vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
(vector unsigned char) vd,
vsel);
vec_st(vd, 0, dstp);
srcp += 4;
width -= 4;
dstp += 4;
vs = voverflow;
}
ONE_PIXEL_BLEND((extrawidth), extrawidth);
#undef ONE_PIXEL_BLEND
srcp += srcskip;
dstp += dstskip;
}
}
}
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
/* Use this on a G5 */
static void
ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
{
int height = info->dst_h;
Uint32 *src = (Uint32 *) info->src;
int srcskip = info->src_skip / 4;
Uint32 *dst = (Uint32 *) info->dst;
int dstskip = info->dst_skip / 4;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
vector unsigned int vzero = vec_splat_u32(0);
vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
if (dstfmt->Amask && !srcfmt->Amask) {
if (info->a) {
vector unsigned char valpha;
((unsigned char *) &valpha)[0] = info->a;
vzero = (vector unsigned int) vec_splat(valpha, 0);
}
}
assert(srcfmt->BytesPerPixel == 4);
assert(dstfmt->BytesPerPixel == 4);
while (height--) {
vector unsigned char valigner;
vector unsigned int vbits;
vector unsigned int voverflow;
Uint32 bits;
Uint8 r, g, b, a;
int width = info->dst_w;
int extrawidth;
/* do scalar until we can align... */
while ((UNALIGNED_PTR(dst)) && (width)) {
bits = *(src++);
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
width--;
}
/* After all that work, here's the vector part! */
extrawidth = (width % 4);
width -= extrawidth;
valigner = VEC_ALIGNER(src);
vbits = vec_ld(0, src);
while (width) {
voverflow = vec_ld(15, src);
src += 4;
width -= 4;
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
vec_st(vbits, 0, dst); /* store it back out. */
dst += 4;
vbits = voverflow;
}
assert(width == 0);
/* cover pixels at the end of the row that didn't fit in 16 bytes. */
while (extrawidth) {
bits = *(src++); /* max 7 pixels, don't bother with prefetch. */
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
extrawidth--;
}
src += srcskip;
dst += dstskip;
}
}
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
/* Use this on a G4 */
static void
ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
{
const int scalar_dst_lead = sizeof(Uint32) * 4;
const int vector_dst_lead = sizeof(Uint32) * 16;
int height = info->dst_h;
Uint32 *src = (Uint32 *) info->src;
int srcskip = info->src_skip / 4;
Uint32 *dst = (Uint32 *) info->dst;
int dstskip = info->dst_skip / 4;
SDL_PixelFormat *srcfmt = info->src_fmt;
SDL_PixelFormat *dstfmt = info->dst_fmt;
vector unsigned int vzero = vec_splat_u32(0);
vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
if (dstfmt->Amask && !srcfmt->Amask) {
if (info->a) {
vector unsigned char valpha;
((unsigned char *) &valpha)[0] = info->a;
vzero = (vector unsigned int) vec_splat(valpha, 0);
}
}
assert(srcfmt->BytesPerPixel == 4);
assert(dstfmt->BytesPerPixel == 4);
while (height--) {
vector unsigned char valigner;
vector unsigned int vbits;
vector unsigned int voverflow;
Uint32 bits;
Uint8 r, g, b, a;
int width = info->dst_w;
int extrawidth;
/* do scalar until we can align... */
while ((UNALIGNED_PTR(dst)) && (width)) {
vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
DST_CHAN_SRC);
vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
DST_CHAN_DEST);
bits = *(src++);
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
width--;
}
/* After all that work, here's the vector part! */
extrawidth = (width % 4);
width -= extrawidth;
valigner = VEC_ALIGNER(src);
vbits = vec_ld(0, src);
while (width) {
vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
DST_CHAN_SRC);
vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
DST_CHAN_DEST);
voverflow = vec_ld(15, src);
src += 4;
width -= 4;
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
vec_st(vbits, 0, dst); /* store it back out. */
dst += 4;
vbits = voverflow;
}
assert(width == 0);
/* cover pixels at the end of the row that didn't fit in 16 bytes. */
while (extrawidth) {
bits = *(src++); /* max 7 pixels, don't bother with prefetch. */
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
extrawidth--;
}
src += srcskip;
dst += dstskip;
}
vec_dss(DST_CHAN_SRC);
vec_dss(DST_CHAN_DEST);
}
static Uint32
GetBlitFeatures(void)
{
static Uint32 features = 0xffffffff;
if (features == 0xffffffff) {
/* Provide an override for testing .. */
char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
if (override) {
features = 0;
SDL_sscanf(override, "%u", &features);
} else {
features = (0
/* Feature 1 is has-MMX */
| ((SDL_HasMMX())? 1 : 0)
/* Feature 2 is has-AltiVec */
| ((SDL_HasAltiVec())? 2 : 0)
/* Feature 4 is dont-use-prefetch */
/* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
| ((GetL3CacheSize() == 0) ? 4 : 0)
);
}
}
return features;
}
#if __MWERKS__
#pragma altivec_model off
#endif
#else
/* Feature 1 is has-MMX */
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
#endif
/* This is now endian dependent */
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
......@@ -1508,6 +2340,15 @@ static const struct blit_table normal_blit_1[] = {
};
static const struct blit_table normal_blit_2[] = {
#if SDL_ALTIVEC_BLITTERS
/* has-altivec */
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
0x00000000,
2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
{0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
0x00000000,
2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
#endif
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
0x000000FF,
0, Blit_RGB565_ARGB8888, SET_ALPHA},
......@@ -1531,6 +2372,22 @@ static const struct blit_table normal_blit_3[] = {
};
static const struct blit_table normal_blit_4[] = {
#if SDL_ALTIVEC_BLITTERS
/* has-altivec | dont-use-prefetch */
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
0x00000000,
6, ConvertAltivec32to32_noprefetch,
NO_ALPHA | COPY_ALPHA | SET_ALPHA},
/* has-altivec */
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
0x00000000,
2, ConvertAltivec32to32_prefetch,
NO_ALPHA | COPY_ALPHA | SET_ALPHA},
/* has-altivec */
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
0x0000001F,
2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
#endif
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
0x0000001F,
0, Blit_RGB888_RGB565, NO_ALPHA},
......@@ -1628,6 +2485,12 @@ SDL_CalculateBlitN(SDL_Surface * surface)
else if (dstfmt->BytesPerPixel == 1)
return BlitNto1Key;
else {
#if SDL_ALTIVEC_BLITTERS
if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
&& SDL_HasAltiVec()) {
return Blit32to32KeyAltivec;
} else
#endif
if (srcfmt->Amask && dstfmt->Amask) {
return BlitNtoNKeyCopyAlpha;
} else {
......
......@@ -158,7 +158,9 @@ int test_platform (void)
SDL_ATprintVerbose( 1, "CPU count: %d\n", SDL_GetCPUCount());
SDL_ATprintVerbose( 1, "Available extensions:\n" );
SDL_ATprintVerbose( 1, " RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected" );
SDL_ATprintVerbose( 1, " AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected" );
SDL_ATprintVerbose( 1, " MMX %s\n", SDL_HasMMX()? "detected" : "not detected" );
SDL_ATprintVerbose( 1, " 3DNow! %s\n", SDL_Has3DNow()? "detected" : "not detected" );
SDL_ATprintVerbose( 1, " SSE %s\n", SDL_HasSSE()? "detected" : "not detected" );
SDL_ATprintVerbose( 1, " SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected" );
SDL_ATprintVerbose( 1, " SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected" );
......
......@@ -140,9 +140,11 @@ TestCPUInfo(SDL_bool verbose)
{
if (verbose) {
printf("CPU count: %d\n", SDL_GetCPUCount());
printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize());
printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize());
printf("RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected");
printf("AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected");
printf("MMX %s\n", SDL_HasMMX()? "detected" : "not detected");
printf("3DNow! %s\n", SDL_Has3DNow()? "detected" : "not detected");
printf("SSE %s\n", SDL_HasSSE()? "detected" : "not detected");
printf("SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected");
printf("SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment