Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
libSDL
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
PocketInsanity
libSDL
Commits
0314fd4e
Commit
0314fd4e
authored
Feb 11, 2011
by
Sam Lantinga
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updated CPU detection code for SSE3 and SSE4 and removed obsolete 3DNow! and Altivec support.
parent
e8f3f1e9
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
57 additions
and
2075 deletions
+57
-2075
configure.in
configure.in
+0
-103
SDL_config.h.in
include/SDL_config.h.in
+0
-2
SDL_config_macosx.h
include/SDL_config_macosx.h
+0
-3
SDL_cpuinfo.h
include/SDL_cpuinfo.h
+8
-17
SDL_cpuinfo.c
src/cpuinfo/SDL_cpuinfo.c
+39
-118
SDL_blit.c
src/video/SDL_blit.c
+0
-34
SDL_blit.h
src/video/SDL_blit.h
+0
-6
SDL_blit_A.c
src/video/SDL_blit_A.c
+8
-918
SDL_blit_N.c
src/video/SDL_blit_N.c
+0
-869
testplatform.c
test/testplatform.c
+2
-5
No files found.
configure.in
View file @
0314fd4e
...
...
@@ -501,33 +501,6 @@ AC_HELP_STRING([--enable-mmx], [use MMX assembly routines [[default=yes]]]),
fi
fi
AC_ARG_ENABLE(3dnow,
AC_HELP_STRING([--enable-3dnow], [use MMX assembly routines [[default=yes]]]),
, enable_3dnow=yes)
if test x$enable_3dnow = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_3dnow=no
AC_MSG_CHECKING(for GCC -m3dnow option)
amd3dnow_CFLAGS="-m3dnow"
CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
AC_TRY_COMPILE([
#include <mm3dnow.h>
#ifndef __3dNOW__
#error Assembler CPP flag not enabled
#endif
],[
],[
have_gcc_3dnow=yes
])
AC_MSG_RESULT($have_gcc_3dnow)
CFLAGS="$save_CFLAGS"
if test x$have_gcc_3dnow = xyes; then
EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
fi
fi
AC_ARG_ENABLE(sse,
AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
, enable_sse=yes)
...
...
@@ -599,82 +572,6 @@ AC_HELP_STRING([--enable-sse2], [use SSE2 assembly routines [[default=no]]]),
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
fi
fi
AC_ARG_ENABLE(altivec,
AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
, enable_altivec=yes)
if test x$enable_altivec = xyes; then
save_CFLAGS="$CFLAGS"
have_gcc_altivec=no
have_altivec_h_hdr=no
altivec_CFLAGS="-maltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
AC_MSG_CHECKING(for Altivec with GCC altivec.h and -maltivec option)
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
have_altivec_h_hdr=yes
])
AC_MSG_RESULT($have_gcc_altivec)
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC altivec.h and -faltivec option)
altivec_CFLAGS="-faltivec"
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
AC_TRY_COMPILE([
#include <altivec.h>
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
have_altivec_h_hdr=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
if test x$have_gcc_altivec = xno; then
AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
AC_TRY_COMPILE([
vector unsigned int vzero() {
return vec_splat_u32(0);
}
],[
],[
have_gcc_altivec=yes
])
AC_MSG_RESULT($have_gcc_altivec)
fi
CFLAGS="$save_CFLAGS"
if test x$have_gcc_altivec = xyes; then
AC_DEFINE(SDL_ALTIVEC_BLITTERS)
if test x$have_altivec_h_hdr = xyes; then
AC_DEFINE(HAVE_ALTIVEC_H)
fi
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
fi
fi
fi
dnl See if the OSS audio interface is supported
...
...
include/SDL_config.h.in
View file @
0314fd4e
...
...
@@ -82,7 +82,6 @@
#undef HAVE_MATH_H
#undef HAVE_ICONV_H
#undef HAVE_SIGNAL_H
#undef HAVE_ALTIVEC_H
/* C library functions */
#undef HAVE_MALLOC
...
...
@@ -303,6 +302,5 @@
/* Enable assembly routines */
#undef SDL_ASSEMBLY_ROUTINES
#undef SDL_ALTIVEC_BLITTERS
#endif /* _SDL_config_h */
include/SDL_config_macosx.h
View file @
0314fd4e
...
...
@@ -168,8 +168,5 @@
/* Enable assembly routines */
#define SDL_ASSEMBLY_ROUTINES 1
#ifdef __ppc__
#define SDL_ALTIVEC_BLITTERS 1
#endif
#endif
/* _SDL_config_macosx_h */
include/SDL_cpuinfo.h
View file @
0314fd4e
...
...
@@ -69,21 +69,6 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasRDTSC(void);
*/
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_HasMMX
(
void
);
/**
* This function returns true if the CPU has MMX Ext.\ features.
*/
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_HasMMXExt
(
void
);
/**
* This function returns true if the CPU has 3DNow!\ features.
*/
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_Has3DNow
(
void
);
/**
* This function returns true if the CPU has 3DNow!\ Ext.\ features.
*/
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_Has3DNowExt
(
void
);
/**
* This function returns true if the CPU has SSE features.
*/
...
...
@@ -95,9 +80,15 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE(void);
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_HasSSE2
(
void
);
/**
* This function returns true if the CPU has AltiVec features.
* This function returns true if the CPU has SSE3 features.
*/
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_HasSSE3
(
void
);
/**
* This function returns true if the CPU has SSE4 features.
*/
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_HasAltiVec
(
void
);
extern
DECLSPEC
SDL_bool
SDLCALL
SDL_HasSSE4
(
void
);
/* Ends C function definitions when using C++ */
#ifdef __cplusplus
...
...
src/cpuinfo/SDL_cpuinfo.c
View file @
0314fd4e
...
...
@@ -32,36 +32,17 @@
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
#include <sys/sysctl.h>
/* For AltiVec check */
#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
#include <signal.h>
#include <setjmp.h>
#endif
#ifdef __WIN32__
#include "../core/windows/SDL_windows.h"
#endif
#define CPU_HAS_RDTSC 0x00000001
#define CPU_HAS_MMX 0x00000002
#define CPU_HAS_MMXEXT 0x00000004
#define CPU_HAS_3DNOW 0x00000010
#define CPU_HAS_3DNOWEXT 0x00000020
#define CPU_HAS_SSE 0x00000040
#define CPU_HAS_SSE2 0x00000080
#define CPU_HAS_ALTIVEC 0x00000100
#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__
/* This is the brute force way of detecting instruction sets...
the idea is borrowed from the libmpeg2 library - thanks!
*/
static
jmp_buf
jmpbuf
;
static
void
illegal_instruction
(
int
sig
)
{
longjmp
(
jmpbuf
,
1
);
}
#endif
/* HAVE_SETJMP */
#define CPU_HAS_SSE 0x00000010
#define CPU_HAS_SSE2 0x00000020
#define CPU_HAS_SSE3 0x00000040
#define CPU_HAS_SSE4 0x00000080
static
__inline__
int
CPU_haveCPUID
(
void
)
...
...
@@ -201,20 +182,6 @@ CPU_getCPUIDFeatures(void)
return
features
;
}
static
__inline__
int
CPU_getCPUIDFeaturesExt
(
void
)
{
int
features
=
0
;
int
a
,
b
,
c
,
d
;
cpuid
(
0x80000000
,
a
,
b
,
c
,
d
);
if
(
a
>=
0x80000001
)
{
cpuid
(
0x80000001
,
a
,
b
,
c
,
d
);
features
=
d
;
}
return
features
;
}
static
__inline__
int
CPU_haveRDTSC
(
void
)
{
...
...
@@ -234,71 +201,51 @@ CPU_haveMMX(void)
}
static
__inline__
int
CPU_have
MMXExt
(
void
)
CPU_have
SSE
(
void
)
{
if
(
CPU_haveCPUID
())
{
return
(
CPU_getCPUIDFeatures
Ext
()
&
0x004
00000
);
return
(
CPU_getCPUIDFeatures
()
&
0x020
00000
);
}
return
0
;
}
static
__inline__
int
CPU_have
3DNow
(
void
)
CPU_have
SSE2
(
void
)
{
if
(
CPU_haveCPUID
())
{
return
(
CPU_getCPUIDFeatures
Ext
()
&
0x80
000000
);
return
(
CPU_getCPUIDFeatures
()
&
0x04
000000
);
}
return
0
;
}
static
__inline__
int
CPU_have
3DNowExt
(
void
)
CPU_have
SSE3
(
void
)
{
if
(
CPU_haveCPUID
())
{
return
(
CPU_getCPUIDFeaturesExt
()
&
0x40000000
);
}
return
0
;
}
int
a
,
b
,
c
,
d
;
static
__inline__
int
CPU_haveSSE
(
void
)
{
if
(
CPU_haveCPUID
())
{
return
(
CPU_getCPUIDFeatures
()
&
0x02000000
);
cpuid
(
0
,
a
,
b
,
c
,
d
);
if
(
a
>=
1
)
{
cpuid
(
1
,
a
,
b
,
c
,
d
);
return
(
c
&
0x00000001
);
}
}
return
0
;
}
static
__inline__
int
CPU_haveSSE
2
(
void
)
CPU_haveSSE
4
(
void
)
{
if
(
CPU_haveCPUID
())
{
return
(
CPU_getCPUIDFeatures
()
&
0x04000000
);
}
return
0
;
}
int
a
,
b
,
c
,
d
;
static
__inline__
int
CPU_haveAltiVec
(
void
)
{
volatile
int
altivec
=
0
;
#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
int
selectors
[
2
]
=
{
CTL_HW
,
HW_VECTORUNIT
};
int
hasVectorUnit
=
0
;
size_t
length
=
sizeof
(
hasVectorUnit
);
int
error
=
sysctl
(
selectors
,
2
,
&
hasVectorUnit
,
&
length
,
NULL
,
0
);
if
(
0
==
error
)
altivec
=
(
hasVectorUnit
!=
0
);
#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
void
(
*
handler
)
(
int
sig
);
handler
=
signal
(
SIGILL
,
illegal_instruction
);
if
(
setjmp
(
jmpbuf
)
==
0
)
{
asm
volatile
(
"mtspr 256, %0
\n\t
"
"vand %%v0, %%v0, %%v0"
::
"r"
(
-
1
));
altivec
=
1
;
cpuid
(
0
,
a
,
b
,
c
,
d
);
if
(
a
>=
1
)
{
cpuid
(
1
,
a
,
b
,
c
,
d
);
return
(
c
&
0x00000100
);
}
}
signal
(
SIGILL
,
handler
);
#endif
return
altivec
;
return
0
;
}
static
int
SDL_CPUCount
=
0
;
...
...
@@ -471,23 +418,17 @@ SDL_GetCPUFeatures(void)
if
(
CPU_haveMMX
())
{
SDL_CPUFeatures
|=
CPU_HAS_MMX
;
}
if
(
CPU_haveMMXExt
())
{
SDL_CPUFeatures
|=
CPU_HAS_MMXEXT
;
}
if
(
CPU_have3DNow
())
{
SDL_CPUFeatures
|=
CPU_HAS_3DNOW
;
}
if
(
CPU_have3DNowExt
())
{
SDL_CPUFeatures
|=
CPU_HAS_3DNOWEXT
;
}
if
(
CPU_haveSSE
())
{
SDL_CPUFeatures
|=
CPU_HAS_SSE
;
}
if
(
CPU_haveSSE2
())
{
SDL_CPUFeatures
|=
CPU_HAS_SSE2
;
}
if
(
CPU_haveAltiVec
())
{
SDL_CPUFeatures
|=
CPU_HAS_ALTIVEC
;
if
(
CPU_haveSSE3
())
{
SDL_CPUFeatures
|=
CPU_HAS_SSE3
;
}
if
(
CPU_haveSSE4
())
{
SDL_CPUFeatures
|=
CPU_HAS_SSE4
;
}
}
return
SDL_CPUFeatures
;
...
...
@@ -512,54 +453,36 @@ SDL_HasMMX(void)
}
SDL_bool
SDL_HasMMXExt
(
void
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_MMXEXT
)
{
return
SDL_TRUE
;
}
return
SDL_FALSE
;
}
SDL_bool
SDL_Has3DNow
(
void
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_3DNOW
)
{
return
SDL_TRUE
;
}
return
SDL_FALSE
;
}
SDL_bool
SDL_Has3DNowExt
(
void
)
SDL_HasSSE
(
void
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_
3DNOWEXT
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_
SSE
)
{
return
SDL_TRUE
;
}
return
SDL_FALSE
;
}
SDL_bool
SDL_HasSSE
(
void
)
SDL_HasSSE
2
(
void
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_SSE
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_SSE
2
)
{
return
SDL_TRUE
;
}
return
SDL_FALSE
;
}
SDL_bool
SDL_HasSSE
2
(
void
)
SDL_HasSSE
3
(
void
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_SSE
2
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_SSE
3
)
{
return
SDL_TRUE
;
}
return
SDL_FALSE
;
}
SDL_bool
SDL_Has
AltiVec
(
void
)
SDL_Has
SSE4
(
void
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_
ALTIVEC
)
{
if
(
SDL_GetCPUFeatures
()
&
CPU_HAS_
SSE4
)
{
return
SDL_TRUE
;
}
return
SDL_FALSE
;
...
...
@@ -578,12 +501,10 @@ main()
printf
(
"CacheLine size: %d
\n
"
,
SDL_GetCPUCacheLineSize
());
printf
(
"RDTSC: %d
\n
"
,
SDL_HasRDTSC
());
printf
(
"MMX: %d
\n
"
,
SDL_HasMMX
());
printf
(
"MMXExt: %d
\n
"
,
SDL_HasMMXExt
());
printf
(
"3DNow: %d
\n
"
,
SDL_Has3DNow
());
printf
(
"3DNowExt: %d
\n
"
,
SDL_Has3DNowExt
());
printf
(
"SSE: %d
\n
"
,
SDL_HasSSE
());
printf
(
"SSE2: %d
\n
"
,
SDL_HasSSE2
());
printf
(
"AltiVec: %d
\n
"
,
SDL_HasAltiVec
());
printf
(
"SSE3: %d
\n
"
,
SDL_HasSSE3
());
printf
(
"SSE4: %d
\n
"
,
SDL_HasSSE4
());
return
0
;
}
...
...
src/video/SDL_blit.c
View file @
0314fd4e
...
...
@@ -100,30 +100,6 @@ SDL_SoftBlit(SDL_Surface * src, SDL_Rect * srcrect,
return
(
okay
?
0
:
-
1
);
}
#ifdef __MACOSX__
#include <sys/sysctl.h>
static
SDL_bool
SDL_UseAltivecPrefetch
()
{
const
char
key
[]
=
"hw.l3cachesize"
;
u_int64_t
result
=
0
;
size_t
typeSize
=
sizeof
(
result
);
if
(
sysctlbyname
(
key
,
&
result
,
&
typeSize
,
NULL
,
0
)
==
0
&&
result
>
0
)
{
return
SDL_TRUE
;
}
else
{
return
SDL_FALSE
;
}
}
#else
static
SDL_bool
SDL_UseAltivecPrefetch
()
{
/* Just guess G4 */
return
SDL_TRUE
;
}
#endif
/* __MACOSX__ */
static
SDL_BlitFunc
SDL_ChooseBlitFunc
(
Uint32
src_format
,
Uint32
dst_format
,
int
flags
,
...
...
@@ -145,22 +121,12 @@ SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
if
(
SDL_HasMMX
())
{
features
|=
SDL_CPU_MMX
;
}
if
(
SDL_Has3DNow
())
{
features
|=
SDL_CPU_3DNOW
;
}
if
(
SDL_HasSSE
())
{
features
|=
SDL_CPU_SSE
;
}
if
(
SDL_HasSSE2
())
{
features
|=
SDL_CPU_SSE2
;
}
if
(
SDL_HasAltiVec
())
{
if
(
SDL_UseAltivecPrefetch
())
{
features
|=
SDL_CPU_ALTIVEC_PREFETCH
;
}
else
{
features
|=
SDL_CPU_ALTIVEC_NOPREFETCH
;
}
}
}
}
...
...
src/video/SDL_blit.h
View file @
0314fd4e
...
...
@@ -34,9 +34,6 @@
#ifdef __MMX__
#include <mmintrin.h>
#endif
#ifdef __3dNOW__
#include <mm3dnow.h>
#endif
#ifdef __SSE__
#include <xmmintrin.h>
#endif
...
...
@@ -65,11 +62,8 @@
/* SDL blit CPU flags */
#define SDL_CPU_ANY 0x00000000
#define SDL_CPU_MMX 0x00000001
#define SDL_CPU_3DNOW 0x00000002
#define SDL_CPU_SSE 0x00000004
#define SDL_CPU_SSE2 0x00000008
#define SDL_CPU_ALTIVEC_PREFETCH 0x00000010
#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000020
typedef
struct
{
...
...
src/video/SDL_blit_A.c
View file @
0314fd4e
...
...
@@ -419,806 +419,6 @@ BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
#endif
/* __MMX__ */
#if SDL_ALTIVEC_BLITTERS
#if __MWERKS__
#pragma altivec_model on
#endif
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include <assert.h>
#if (defined(__MACOSX__) && (__GNUC__ < 4))
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) ( a,b,c,d,e,f,g,h )
#else
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) { a,b,c,d,e,f,g,h }
#endif
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
#define VECPRINT(msg, v) do { \
vector unsigned int tmpvec = (vector unsigned int)(v); \
unsigned int *vp = (unsigned int *)&tmpvec; \
printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
} while (0)
/* the permuation vector that takes the high bytes out of all the appropriate shorts
(vector unsigned char)(
0x00, 0x10, 0x02, 0x12,
0x04, 0x14, 0x06, 0x16,
0x08, 0x18, 0x0A, 0x1A,
0x0C, 0x1C, 0x0E, 0x1E );
*/
#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
? vec_lvsl(0, src) \
: vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
/* vtemp1 contains source AAGGAAGGAAGGAAGG */
\
vector unsigned short vtemp1 = vec_mule(vs, valpha); \
/* vtemp2 contains source RRBBRRBBRRBBRRBB */
\
vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
/* valpha2 is 255-alpha */
\
vector unsigned char valpha2 = vec_nor(valpha, valpha); \
/* vtemp3 contains dest AAGGAAGGAAGGAAGG */
\
vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
/* vtemp4 contains dest RRBBRRBBRRBBRRBB */
\
vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
/* add source and dest */
\
vtemp1 = vec_add(vtemp1, vtemp3); \
vtemp2 = vec_add(vtemp2, vtemp4); \
/* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */
\
vtemp1 = vec_add(vtemp1, v1_16); \
vtemp3 = vec_sr(vtemp1, v8_16); \
vtemp1 = vec_add(vtemp1, vtemp3); \
/* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */
\
vtemp2 = vec_add(vtemp2, v1_16); \
vtemp4 = vec_sr(vtemp2, v8_16); \
vtemp2 = vec_add(vtemp2, vtemp4); \
/* (>>8) and get ARGBARGBARGBARGB */
\
vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
} while (0)
/* Calculate the permute vector used for 32->32 swizzling */
static
vector
unsigned
char
calc_swizzle32
(
const
SDL_PixelFormat
*
srcfmt
,
const
SDL_PixelFormat
*
dstfmt
)
{
/*
* We have to assume that the bits that aren't used by other
* colors is alpha, and it's one complete byte, since some formats
* leave alpha with a zero mask, but we should still swizzle the bits.
*/
/* ARGB */
const
static
struct
SDL_PixelFormat
default_pixel_format
=
{
NULL
,
0
,
0
,
0
,
0
,
0
,
0
,
16
,
8
,
0
,
24
,
0x00FF0000
,
0x0000FF00
,
0x000000FF
,
0xFF000000
};
if
(
!
srcfmt
)
{
srcfmt
=
&
default_pixel_format
;
}
if
(
!
dstfmt
)
{
dstfmt
=
&
default_pixel_format
;
}
const
vector
unsigned
char
plus
=
VECUINT8_LITERAL
(
0x00
,
0x00
,
0x00
,
0x00
,
0x04
,
0x04
,
0x04
,
0x04
,
0x08
,
0x08
,
0x08
,
0x08
,
0x0C
,
0x0C
,
0x0C
,
0x0C
);
vector
unsigned
char
vswiz
;
vector
unsigned
int
srcvec
;
#define RESHIFT(X) (3 - ((X) >> 3))
Uint32
rmask
=
RESHIFT
(
srcfmt
->
Rshift
)
<<
(
dstfmt
->
Rshift
);
Uint32
gmask
=
RESHIFT
(
srcfmt
->
Gshift
)
<<
(
dstfmt
->
Gshift
);
Uint32
bmask
=
RESHIFT
(
srcfmt
->
Bshift
)
<<
(
dstfmt
->
Bshift
);
Uint32
amask
;
/* Use zero for alpha if either surface doesn't have alpha */
if
(
dstfmt
->
Amask
)
{
amask
=
((
srcfmt
->
Amask
)
?
RESHIFT
(
srcfmt
->
Ashift
)
:
0x10
)
<<
(
dstfmt
->
Ashift
);
}
else
{
amask
=
0x10101010
&
((
dstfmt
->
Rmask
|
dstfmt
->
Gmask
|
dstfmt
->
Bmask
)
^
0xFFFFFFFF
);
}
#undef RESHIFT
((
unsigned
int
*
)
(
char
*
)
&
srcvec
)[
0
]
=
(
rmask
|
gmask
|
bmask
|
amask
);
vswiz
=
vec_add
(
plus
,
(
vector
unsigned
char
)
vec_splat
(
srcvec
,
0
));
return
(
vswiz
);
}
static
void
Blit32to565PixelAlphaAltivec
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint8
*
src
=
(
Uint8
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
;
Uint8
*
dst
=
(
Uint8
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
vector
unsigned
char
v0
=
vec_splat_u8
(
0
);
vector
unsigned
short
v8_16
=
vec_splat_u16
(
8
);
vector
unsigned
short
v1_16
=
vec_splat_u16
(
1
);
vector
unsigned
short
v2_16
=
vec_splat_u16
(
2
);
vector
unsigned
short
v3_16
=
vec_splat_u16
(
3
);
vector
unsigned
int
v8_32
=
vec_splat_u32
(
8
);
vector
unsigned
int
v16_32
=
vec_add
(
v8_32
,
v8_32
);
vector
unsigned
short
v3f
=
VECUINT16_LITERAL
(
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
);
vector
unsigned
short
vfc
=
VECUINT16_LITERAL
(
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
);
/*
0x10 - 0x1f is the alpha
0x00 - 0x0e evens are the red
0x01 - 0x0f odds are zero
*/
vector
unsigned
char
vredalpha1
=
VECUINT8_LITERAL
(
0x10
,
0x00
,
0x01
,
0x01
,
0x10
,
0x02
,
0x01
,
0x01
,
0x10
,
0x04
,
0x01
,
0x01
,
0x10
,
0x06
,
0x01
,
0x01
);
vector
unsigned
char
vredalpha2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vredalpha1
,
vec_sl
(
v8_32
,
v16_32
))
);
/*
0x00 - 0x0f is ARxx ARxx ARxx ARxx
0x11 - 0x0f odds are blue
*/
vector
unsigned
char
vblue1
=
VECUINT8_LITERAL
(
0x00
,
0x01
,
0x02
,
0x11
,
0x04
,
0x05
,
0x06
,
0x13
,
0x08
,
0x09
,
0x0a
,
0x15
,
0x0c
,
0x0d
,
0x0e
,
0x17
);
vector
unsigned
char
vblue2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vblue1
,
v8_32
)
);
/*
0x00 - 0x0f is ARxB ARxB ARxB ARxB
0x10 - 0x0e evens are green
*/
vector
unsigned
char
vgreen1
=
VECUINT8_LITERAL
(
0x00
,
0x01
,
0x10
,
0x03
,
0x04
,
0x05
,
0x12
,
0x07
,
0x08
,
0x09
,
0x14
,
0x0b
,
0x0c
,
0x0d
,
0x16
,
0x0f
);
vector
unsigned
char
vgreen2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vgreen1
,
vec_sl
(
v8_32
,
v8_32
))
);
vector
unsigned
char
vgmerge
=
VECUINT8_LITERAL
(
0x00
,
0x02
,
0x00
,
0x06
,
0x00
,
0x0a
,
0x00
,
0x0e
,
0x00
,
0x12
,
0x00
,
0x16
,
0x00
,
0x1a
,
0x00
,
0x1e
);
vector
unsigned
char
mergePermute
=
VEC_MERGE_PERMUTE
();
vector
unsigned
char
vpermute
=
calc_swizzle32
(
srcfmt
,
NULL
);
vector
unsigned
char
valphaPermute
=
vec_and
(
vec_lvsl
(
0
,
(
int
*
)
NULL
),
vec_splat_u8
(
0xC
));
vector
unsigned
short
vf800
=
(
vector
unsigned
short
)
vec_splat_u8
(
-
7
);
vf800
=
vec_sl
(
vf800
,
vec_splat_u16
(
8
));
while
(
height
--
)
{
int
extrawidth
;
vector
unsigned
char
valigner
;
vector
unsigned
char
vsrc
;
vector
unsigned
char
voverflow
;
int
width
=
info
->
dst_w
;
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB, sA; \
DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
if(sA) { \
unsigned short dstpixel = *((unsigned short *)dst); \
dR = (dstpixel >> 8) & 0xf8; \
dG = (dstpixel >> 3) & 0xfc; \
dB = (dstpixel << 3) & 0xf8; \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
*((unsigned short *)dst) = ( \
((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
); \
} \
src += 4; \
dst += 2; \
widthvar--; \
}
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dst
))
&&
(
width
),
width
);
extrawidth
=
(
width
%
8
);
valigner
=
VEC_ALIGNER
(
src
);
vsrc
=
(
vector
unsigned
char
)
vec_ld
(
0
,
src
);
width
-=
extrawidth
;
while
(
width
)
{
vector
unsigned
char
valpha
;
vector
unsigned
char
vsrc1
,
vsrc2
;
vector
unsigned
char
vdst1
,
vdst2
;
vector
unsigned
short
vR
,
vG
,
vB
;
vector
unsigned
short
vpixel
,
vrpixel
,
vgpixel
,
vbpixel
;
/* Load 8 pixels from src as ARGB */
voverflow
=
(
vector
unsigned
char
)
vec_ld
(
15
,
src
);
vsrc
=
vec_perm
(
vsrc
,
voverflow
,
valigner
);
vsrc1
=
vec_perm
(
vsrc
,
vsrc
,
vpermute
);
src
+=
16
;
vsrc
=
(
vector
unsigned
char
)
vec_ld
(
15
,
src
);
voverflow
=
vec_perm
(
voverflow
,
vsrc
,
valigner
);
vsrc2
=
vec_perm
(
voverflow
,
voverflow
,
vpermute
);
src
+=
16
;
/* Load 8 pixels from dst as XRGB */
voverflow
=
vec_ld
(
0
,
dst
);
vR
=
vec_and
((
vector
unsigned
short
)
voverflow
,
vf800
);
vB
=
vec_sl
((
vector
unsigned
short
)
voverflow
,
v3_16
);
vG
=
vec_sl
(
vB
,
v2_16
);
vdst1
=
(
vector
unsigned
char
)
vec_perm
((
vector
unsigned
char
)
vR
,
(
vector
unsigned
char
)
vR
,
vredalpha1
);
vdst1
=
vec_perm
(
vdst1
,
(
vector
unsigned
char
)
vB
,
vblue1
);
vdst1
=
vec_perm
(
vdst1
,
(
vector
unsigned
char
)
vG
,
vgreen1
);
vdst2
=
(
vector
unsigned
char
)
vec_perm
((
vector
unsigned
char
)
vR
,
(
vector
unsigned
char
)
vR
,
vredalpha2
);
vdst2
=
vec_perm
(
vdst2
,
(
vector
unsigned
char
)
vB
,
vblue2
);
vdst2
=
vec_perm
(
vdst2
,
(
vector
unsigned
char
)
vG
,
vgreen2
);
/* Alpha blend 8 pixels as ARGB */
valpha
=
vec_perm
(
vsrc1
,
v0
,
valphaPermute
);
VEC_MULTIPLY_ALPHA
(
vsrc1
,
vdst1
,
valpha
,
mergePermute
,
v1_16
,
v8_16
);
valpha
=
vec_perm
(
vsrc2
,
v0
,
valphaPermute
);
VEC_MULTIPLY_ALPHA
(
vsrc2
,
vdst2
,
valpha
,
mergePermute
,
v1_16
,
v8_16
);
/* Convert 8 pixels to 565 */
vpixel
=
(
vector
unsigned
short
)
vec_packpx
((
vector
unsigned
int
)
vdst1
,
(
vector
unsigned
int
)
vdst2
);
vgpixel
=
(
vector
unsigned
short
)
vec_perm
(
vdst1
,
vdst2
,
vgmerge
);
vgpixel
=
vec_and
(
vgpixel
,
vfc
);
vgpixel
=
vec_sl
(
vgpixel
,
v3_16
);
vrpixel
=
vec_sl
(
vpixel
,
v1_16
);
vrpixel
=
vec_and
(
vrpixel
,
vf800
);
vbpixel
=
vec_and
(
vpixel
,
v3f
);
vdst1
=
vec_or
((
vector
unsigned
char
)
vrpixel
,
(
vector
unsigned
char
)
vgpixel
);
vdst1
=
vec_or
(
vdst1
,
(
vector
unsigned
char
)
vbpixel
);
/* Store 8 pixels */
vec_st
(
vdst1
,
0
,
dst
);
width
-=
8
;
dst
+=
16
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
#undef ONE_PIXEL_BLEND
src
+=
srcskip
;
dst
+=
dstskip
;
}
}
static
void
Blit32to32SurfaceAlphaKeyAltivec
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
>>
2
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
>>
2
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
unsigned
sA
=
info
->
a
;
unsigned
dA
=
dstfmt
->
Amask
?
SDL_ALPHA_OPAQUE
:
0
;
Uint32
rgbmask
=
srcfmt
->
Rmask
|
srcfmt
->
Gmask
|
srcfmt
->
Bmask
;
Uint32
ckey
=
info
->
colorkey
;
vector
unsigned
char
mergePermute
;
vector
unsigned
char
vsrcPermute
;
vector
unsigned
char
vdstPermute
;
vector
unsigned
char
vsdstPermute
;
vector
unsigned
char
valpha
;
vector
unsigned
char
valphamask
;
vector
unsigned
char
vbits
;
vector
unsigned
char
v0
;
vector
unsigned
short
v1
;
vector
unsigned
short
v8
;
vector
unsigned
int
vckey
;
vector
unsigned
int
vrgbmask
;
mergePermute
=
VEC_MERGE_PERMUTE
();
v0
=
vec_splat_u8
(
0
);
v1
=
vec_splat_u16
(
1
);
v8
=
vec_splat_u16
(
8
);
/* set the alpha to 255 on the destination surf */
valphamask
=
VEC_ALPHA_MASK
();
vsrcPermute
=
calc_swizzle32
(
srcfmt
,
NULL
);
vdstPermute
=
calc_swizzle32
(
NULL
,
dstfmt
);
vsdstPermute
=
calc_swizzle32
(
dstfmt
,
NULL
);
/* set a vector full of alpha and 255-alpha */
((
unsigned
char
*
)
&
valpha
)[
0
]
=
sA
;
valpha
=
vec_splat
(
valpha
,
0
);
vbits
=
(
vector
unsigned
char
)
vec_splat_s8
(
-
1
);
ckey
&=
rgbmask
;
((
unsigned
int
*
)
(
char
*
)
&
vckey
)[
0
]
=
ckey
;
vckey
=
vec_splat
(
vckey
,
0
);
((
unsigned
int
*
)
(
char
*
)
&
vrgbmask
)[
0
]
=
rgbmask
;
vrgbmask
=
vec_splat
(
vrgbmask
,
0
);
while
(
height
--
)
{
int
width
=
info
->
dst_w
;
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB; \
RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
if(sA && Pixel != ckey) { \
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
} \
dstp++; \
srcp++; \
widthvar--; \
}
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dstp
))
&&
(
width
),
width
);
if
(
width
>
0
)
{
int
extrawidth
=
(
width
%
4
);
vector
unsigned
char
valigner
=
VEC_ALIGNER
(
srcp
);
vector
unsigned
char
vs
=
(
vector
unsigned
char
)
vec_ld
(
0
,
srcp
);
width
-=
extrawidth
;
while
(
width
)
{
vector
unsigned
char
vsel
;
vector
unsigned
char
voverflow
;
vector
unsigned
char
vd
;
vector
unsigned
char
vd_orig
;
/* s = *srcp */
voverflow
=
(
vector
unsigned
char
)
vec_ld
(
15
,
srcp
);
vs
=
vec_perm
(
vs
,
voverflow
,
valigner
);
/* vsel is set for items that match the key */
vsel
=
(
vector
unsigned
char
)
vec_and
((
vector
unsigned
int
)
vs
,
vrgbmask
);
vsel
=
(
vector
unsigned
char
)
vec_cmpeq
((
vector
unsigned
int
)
vsel
,
vckey
);
/* permute to source format */
vs
=
vec_perm
(
vs
,
valpha
,
vsrcPermute
);
/* d = *dstp */
vd
=
(
vector
unsigned
char
)
vec_ld
(
0
,
dstp
);
vd_orig
=
vd
=
vec_perm
(
vd
,
v0
,
vsdstPermute
);
VEC_MULTIPLY_ALPHA
(
vs
,
vd
,
valpha
,
mergePermute
,
v1
,
v8
);
/* set the alpha channel to full on */
vd
=
vec_or
(
vd
,
valphamask
);
/* mask out color key */
vd
=
vec_sel
(
vd
,
vd_orig
,
vsel
);
/* permute to dest format */
vd
=
vec_perm
(
vd
,
vbits
,
vdstPermute
);
/* *dstp = res */
vec_st
((
vector
unsigned
int
)
vd
,
0
,
dstp
);
srcp
+=
4
;
dstp
+=
4
;
width
-=
4
;
vs
=
voverflow
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
}
#undef ONE_PIXEL_BLEND
srcp
+=
srcskip
;
dstp
+=
dstskip
;
}
}
static
void
Blit32to32PixelAlphaAltivec
(
SDL_BlitInfo
*
info
)
{
int
width
=
info
->
dst_w
;
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
>>
2
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
>>
2
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
vector
unsigned
char
mergePermute
;
vector
unsigned
char
valphaPermute
;
vector
unsigned
char
vsrcPermute
;
vector
unsigned
char
vdstPermute
;
vector
unsigned
char
vsdstPermute
;
vector
unsigned
char
valphamask
;
vector
unsigned
char
vpixelmask
;
vector
unsigned
char
v0
;
vector
unsigned
short
v1
;
vector
unsigned
short
v8
;
v0
=
vec_splat_u8
(
0
);
v1
=
vec_splat_u16
(
1
);
v8
=
vec_splat_u16
(
8
);
mergePermute
=
VEC_MERGE_PERMUTE
();
valphamask
=
VEC_ALPHA_MASK
();
valphaPermute
=
vec_and
(
vec_lvsl
(
0
,
(
int
*
)
NULL
),
vec_splat_u8
(
0xC
));
vpixelmask
=
vec_nor
(
valphamask
,
v0
);
vsrcPermute
=
calc_swizzle32
(
srcfmt
,
NULL
);
vdstPermute
=
calc_swizzle32
(
NULL
,
dstfmt
);
vsdstPermute
=
calc_swizzle32
(
dstfmt
,
NULL
);
while
(
height
--
)
{
width
=
info
->
dst_w
;
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
if(sA) { \
DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
} \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dstp
))
&&
(
width
),
width
);
if
(
width
>
0
)
{
/* vsrcPermute */
/* vdstPermute */
int
extrawidth
=
(
width
%
4
);
vector
unsigned
char
valigner
=
VEC_ALIGNER
(
srcp
);
vector
unsigned
char
vs
=
(
vector
unsigned
char
)
vec_ld
(
0
,
srcp
);
width
-=
extrawidth
;
while
(
width
)
{
vector
unsigned
char
voverflow
;
vector
unsigned
char
vd
;
vector
unsigned
char
valpha
;
vector
unsigned
char
vdstalpha
;
/* s = *srcp */
voverflow
=
(
vector
unsigned
char
)
vec_ld
(
15
,
srcp
);
vs
=
vec_perm
(
vs
,
voverflow
,
valigner
);
vs
=
vec_perm
(
vs
,
v0
,
vsrcPermute
);
valpha
=
vec_perm
(
vs
,
v0
,
valphaPermute
);
/* d = *dstp */
vd
=
(
vector
unsigned
char
)
vec_ld
(
0
,
dstp
);
vd
=
vec_perm
(
vd
,
v0
,
vsdstPermute
);
vdstalpha
=
vec_and
(
vd
,
valphamask
);
VEC_MULTIPLY_ALPHA
(
vs
,
vd
,
valpha
,
mergePermute
,
v1
,
v8
);
/* set the alpha to the dest alpha */
vd
=
vec_and
(
vd
,
vpixelmask
);
vd
=
vec_or
(
vd
,
vdstalpha
);
vd
=
vec_perm
(
vd
,
v0
,
vdstPermute
);
/* *dstp = res */
vec_st
((
vector
unsigned
int
)
vd
,
0
,
dstp
);
srcp
+=
4
;
dstp
+=
4
;
width
-=
4
;
vs
=
voverflow
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
}
srcp
+=
srcskip
;
dstp
+=
dstskip
;
#undef ONE_PIXEL_BLEND
}
}
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
static
void
BlitRGBtoRGBPixelAlphaAltivec
(
SDL_BlitInfo
*
info
)
{
int
width
=
info
->
dst_w
;
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
>>
2
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
>>
2
;
vector
unsigned
char
mergePermute
;
vector
unsigned
char
valphaPermute
;
vector
unsigned
char
valphamask
;
vector
unsigned
char
vpixelmask
;
vector
unsigned
char
v0
;
vector
unsigned
short
v1
;
vector
unsigned
short
v8
;
v0
=
vec_splat_u8
(
0
);
v1
=
vec_splat_u16
(
1
);
v8
=
vec_splat_u16
(
8
);
mergePermute
=
VEC_MERGE_PERMUTE
();
valphamask
=
VEC_ALPHA_MASK
();
valphaPermute
=
vec_and
(
vec_lvsl
(
0
,
(
int
*
)
NULL
),
vec_splat_u8
(
0xC
));
vpixelmask
=
vec_nor
(
valphamask
,
v0
);
while
(
height
--
)
{
width
=
info
->
dst_w
;
#define ONE_PIXEL_BLEND(condition, widthvar) \
while ((condition)) { \
Uint32 dalpha; \
Uint32 d; \
Uint32 s1; \
Uint32 d1; \
Uint32 s = *srcp; \
Uint32 alpha = s >> 24; \
if(alpha) { \
if(alpha == SDL_ALPHA_OPAQUE) { \
*dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
} else { \
d = *dstp; \
dalpha = d & 0xff000000; \
s1 = s & 0xff00ff; \
d1 = d & 0xff00ff; \
d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
s &= 0xff00; \
d &= 0xff00; \
d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
*dstp = d1 | d | dalpha; \
} \
} \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dstp
))
&&
(
width
),
width
);
if
(
width
>
0
)
{
int
extrawidth
=
(
width
%
4
);
vector
unsigned
char
valigner
=
VEC_ALIGNER
(
srcp
);
vector
unsigned
char
vs
=
(
vector
unsigned
char
)
vec_ld
(
0
,
srcp
);
width
-=
extrawidth
;
while
(
width
)
{
vector
unsigned
char
voverflow
;
vector
unsigned
char
vd
;
vector
unsigned
char
valpha
;
vector
unsigned
char
vdstalpha
;
/* s = *srcp */
voverflow
=
(
vector
unsigned
char
)
vec_ld
(
15
,
srcp
);
vs
=
vec_perm
(
vs
,
voverflow
,
valigner
);
valpha
=
vec_perm
(
vs
,
v0
,
valphaPermute
);
/* d = *dstp */
vd
=
(
vector
unsigned
char
)
vec_ld
(
0
,
dstp
);
vdstalpha
=
vec_and
(
vd
,
valphamask
);
VEC_MULTIPLY_ALPHA
(
vs
,
vd
,
valpha
,
mergePermute
,
v1
,
v8
);
/* set the alpha to the dest alpha */
vd
=
vec_and
(
vd
,
vpixelmask
);
vd
=
vec_or
(
vd
,
vdstalpha
);
/* *dstp = res */
vec_st
((
vector
unsigned
int
)
vd
,
0
,
dstp
);
srcp
+=
4
;
dstp
+=
4
;
width
-=
4
;
vs
=
voverflow
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
}
srcp
+=
srcskip
;
dstp
+=
dstskip
;
}
#undef ONE_PIXEL_BLEND
}
static
void
Blit32to32SurfaceAlphaAltivec
(
SDL_BlitInfo
*
info
)
{
/* XXX : 6 */
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
>>
2
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
>>
2
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
unsigned
sA
=
info
->
a
;
unsigned
dA
=
dstfmt
->
Amask
?
SDL_ALPHA_OPAQUE
:
0
;
vector
unsigned
char
mergePermute
;
vector
unsigned
char
vsrcPermute
;
vector
unsigned
char
vdstPermute
;
vector
unsigned
char
vsdstPermute
;
vector
unsigned
char
valpha
;
vector
unsigned
char
valphamask
;
vector
unsigned
char
vbits
;
vector
unsigned
short
v1
;
vector
unsigned
short
v8
;
mergePermute
=
VEC_MERGE_PERMUTE
();
v1
=
vec_splat_u16
(
1
);
v8
=
vec_splat_u16
(
8
);
/* set the alpha to 255 on the destination surf */
valphamask
=
VEC_ALPHA_MASK
();
vsrcPermute
=
calc_swizzle32
(
srcfmt
,
NULL
);
vdstPermute
=
calc_swizzle32
(
NULL
,
dstfmt
);
vsdstPermute
=
calc_swizzle32
(
dstfmt
,
NULL
);
/* set a vector full of alpha and 255-alpha */
((
unsigned
char
*
)
&
valpha
)[
0
]
=
sA
;
valpha
=
vec_splat
(
valpha
,
0
);
vbits
=
(
vector
unsigned
char
)
vec_splat_s8
(
-
1
);
while
(
height
--
)
{
int
width
=
info
->
dst_w
;
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
Uint32 Pixel; \
unsigned sR, sG, sB, dR, dG, dB; \
DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dstp
))
&&
(
width
),
width
);
if
(
width
>
0
)
{
int
extrawidth
=
(
width
%
4
);
vector
unsigned
char
valigner
=
VEC_ALIGNER
(
srcp
);
vector
unsigned
char
vs
=
(
vector
unsigned
char
)
vec_ld
(
0
,
srcp
);
width
-=
extrawidth
;
while
(
width
)
{
vector
unsigned
char
voverflow
;
vector
unsigned
char
vd
;
/* s = *srcp */
voverflow
=
(
vector
unsigned
char
)
vec_ld
(
15
,
srcp
);
vs
=
vec_perm
(
vs
,
voverflow
,
valigner
);
vs
=
vec_perm
(
vs
,
valpha
,
vsrcPermute
);
/* d = *dstp */
vd
=
(
vector
unsigned
char
)
vec_ld
(
0
,
dstp
);
vd
=
vec_perm
(
vd
,
vd
,
vsdstPermute
);
VEC_MULTIPLY_ALPHA
(
vs
,
vd
,
valpha
,
mergePermute
,
v1
,
v8
);
/* set the alpha channel to full on */
vd
=
vec_or
(
vd
,
valphamask
);
vd
=
vec_perm
(
vd
,
vbits
,
vdstPermute
);
/* *dstp = res */
vec_st
((
vector
unsigned
int
)
vd
,
0
,
dstp
);
srcp
+=
4
;
dstp
+=
4
;
width
-=
4
;
vs
=
voverflow
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
}
#undef ONE_PIXEL_BLEND
srcp
+=
srcskip
;
dstp
+=
dstskip
;
}
}
/* fast RGB888->(A)RGB888 blending */
static
void
BlitRGBtoRGBSurfaceAlphaAltivec
(
SDL_BlitInfo
*
info
)
{
unsigned
alpha
=
info
->
a
;
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
>>
2
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
>>
2
;
vector
unsigned
char
mergePermute
;
vector
unsigned
char
valpha
;
vector
unsigned
char
valphamask
;
vector
unsigned
short
v1
;
vector
unsigned
short
v8
;
mergePermute
=
VEC_MERGE_PERMUTE
();
v1
=
vec_splat_u16
(
1
);
v8
=
vec_splat_u16
(
8
);
/* set the alpha to 255 on the destination surf */
valphamask
=
VEC_ALPHA_MASK
();
/* set a vector full of alpha and 255-alpha */
((
unsigned
char
*
)
&
valpha
)[
0
]
=
alpha
;
valpha
=
vec_splat
(
valpha
,
0
);
while
(
height
--
)
{
int
width
=
info
->
dst_w
;
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
Uint32 s = *srcp; \
Uint32 d = *dstp; \
Uint32 s1 = s & 0xff00ff; \
Uint32 d1 = d & 0xff00ff; \
d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
& 0xff00ff; \
s &= 0xff00; \
d &= 0xff00; \
d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
*dstp = d1 | d | 0xff000000; \
++srcp; \
++dstp; \
widthvar--; \
}
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dstp
))
&&
(
width
),
width
);
if
(
width
>
0
)
{
int
extrawidth
=
(
width
%
4
);
vector
unsigned
char
valigner
=
VEC_ALIGNER
(
srcp
);
vector
unsigned
char
vs
=
(
vector
unsigned
char
)
vec_ld
(
0
,
srcp
);
width
-=
extrawidth
;
while
(
width
)
{
vector
unsigned
char
voverflow
;
vector
unsigned
char
vd
;
/* s = *srcp */
voverflow
=
(
vector
unsigned
char
)
vec_ld
(
15
,
srcp
);
vs
=
vec_perm
(
vs
,
voverflow
,
valigner
);
/* d = *dstp */
vd
=
(
vector
unsigned
char
)
vec_ld
(
0
,
dstp
);
VEC_MULTIPLY_ALPHA
(
vs
,
vd
,
valpha
,
mergePermute
,
v1
,
v8
);
/* set the alpha channel to full on */
vd
=
vec_or
(
vd
,
valphamask
);
/* *dstp = res */
vec_st
((
vector
unsigned
int
)
vd
,
0
,
dstp
);
srcp
+=
4
;
dstp
+=
4
;
width
-=
4
;
vs
=
voverflow
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
}
#undef ONE_PIXEL_BLEND
srcp
+=
srcskip
;
dstp
+=
dstskip
;
}
}
#if __MWERKS__
#pragma altivec_model off
#endif
#endif
/* SDL_ALTIVEC_BLITTERS */
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
static
void
BlitRGBtoRGBSurfaceAlpha128
(
SDL_BlitInfo
*
info
)
...
...
@@ -1338,79 +538,6 @@ BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
}
}
#ifdef __3dNOW__
/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
static
void
BlitRGBtoRGBPixelAlphaMMX3DNOW
(
SDL_BlitInfo
*
info
)
{
int
width
=
info
->
dst_w
;
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
>>
2
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
>>
2
;
SDL_PixelFormat
*
sf
=
info
->
src_fmt
;
Uint32
chanmask
=
sf
->
Rmask
|
sf
->
Gmask
|
sf
->
Bmask
;
Uint32
amask
=
sf
->
Amask
;
Uint32
ashift
=
sf
->
Ashift
;
Uint64
multmask
;
__m64
src1
,
dst1
,
mm_alpha
,
mm_zero
,
dmask
;
mm_zero
=
_mm_setzero_si64
();
/* 0 -> mm_zero */
multmask
=
0xFFFF
;
multmask
<<=
(
ashift
*
2
);
multmask
=
~
multmask
;
dmask
=
*
(
__m64
*
)
&
multmask
;
/* dst alpha mask -> dmask */
while
(
height
--
)
{
/* *INDENT-OFF* */
DUFFS_LOOP4
({
Uint32
alpha
;
_m_prefetch
(
srcp
+
16
);
_m_prefetch
(
dstp
+
16
);
alpha
=
*
srcp
&
amask
;
if
(
alpha
==
0
)
{
/* do nothing */
}
else
if
(
alpha
==
amask
)
{
/* copy RGB, keep dst alpha */
*
dstp
=
(
*
srcp
&
chanmask
)
|
(
*
dstp
&
~
chanmask
);
}
else
{
src1
=
_mm_cvtsi32_si64
(
*
srcp
);
/* src(ARGB) -> src1 (0000ARGB)*/
src1
=
_mm_unpacklo_pi8
(
src1
,
mm_zero
);
/* 0A0R0G0B -> src1 */
dst1
=
_mm_cvtsi32_si64
(
*
dstp
);
/* dst(ARGB) -> dst1 (0000ARGB)*/
dst1
=
_mm_unpacklo_pi8
(
dst1
,
mm_zero
);
/* 0A0R0G0B -> dst1 */
mm_alpha
=
_mm_cvtsi32_si64
(
alpha
);
/* alpha -> mm_alpha (0000000A) */
mm_alpha
=
_mm_srli_si64
(
mm_alpha
,
ashift
);
/* mm_alpha >> ashift -> mm_alpha(0000000A) */
mm_alpha
=
_mm_unpacklo_pi16
(
mm_alpha
,
mm_alpha
);
/* 00000A0A -> mm_alpha */
mm_alpha
=
_mm_unpacklo_pi32
(
mm_alpha
,
mm_alpha
);
/* 0A0A0A0A -> mm_alpha */
mm_alpha
=
_mm_and_si64
(
mm_alpha
,
dmask
);
/* 000A0A0A -> mm_alpha, preserve dst alpha on add */
/* blend */
src1
=
_mm_sub_pi16
(
src1
,
dst1
);
/* src - dst -> src1 */
src1
=
_mm_mullo_pi16
(
src1
,
mm_alpha
);
/* (src - dst) * alpha -> src1 */
src1
=
_mm_srli_pi16
(
src1
,
8
);
/* src1 >> 8 -> src1(000R0G0B) */
dst1
=
_mm_add_pi8
(
src1
,
dst1
);
/* src1 + dst1(dst) -> dst1(0A0R0G0B) */
dst1
=
_mm_packs_pu16
(
dst1
,
mm_zero
);
/* 0000ARGB -> dst1 */
*
dstp
=
_mm_cvtsi64_si32
(
dst1
);
/* dst1 -> pixel */
}
++
srcp
;
++
dstp
;
},
width
);
/* *INDENT-ON* */
srcp
+=
srcskip
;
dstp
+=
dstskip
;
}
_mm_empty
();
}
#endif
/* __MMX__ */
/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
/* blend a single 16 bit pixel at 50% */
...
...
@@ -2130,17 +1257,10 @@ SDL_CalculateBlitA(SDL_Surface * surface)
return
BlitNto1PixelAlpha
;
case
2
:
#if SDL_ALTIVEC_BLITTERS
if
(
sf
->
BytesPerPixel
==
4
&&
df
->
Gmask
==
0x7e0
&&
df
->
Bmask
==
0x1f
&&
SDL_HasAltiVec
())
return
Blit32to565PixelAlphaAltivec
;
else
#endif
if
(
sf
->
BytesPerPixel
==
4
&&
sf
->
Amask
==
0xff000000
&&
sf
->
Gmask
==
0xff00
&&
((
sf
->
Rmask
==
0xff
&&
df
->
Rmask
==
0x1f
)
||
(
sf
->
Bmask
==
0xff
&&
df
->
Bmask
==
0x1f
)))
{
if
(
sf
->
BytesPerPixel
==
4
&&
sf
->
Amask
==
0xff000000
&&
sf
->
Gmask
==
0xff00
&&
((
sf
->
Rmask
==
0xff
&&
df
->
Rmask
==
0x1f
)
||
(
sf
->
Bmask
==
0xff
&&
df
->
Bmask
==
0x1f
)))
{
if
(
df
->
Gmask
==
0x7e0
)
return
BlitARGBto565PixelAlpha
;
else
if
(
df
->
Gmask
==
0x3e0
)
...
...
@@ -2152,35 +1272,20 @@ SDL_CalculateBlitA(SDL_Surface * surface)
if
(
sf
->
Rmask
==
df
->
Rmask
&&
sf
->
Gmask
==
df
->
Gmask
&&
sf
->
Bmask
==
df
->
Bmask
&&
sf
->
BytesPerPixel
==
4
)
{
#if defined(__MMX__)
|| defined(__3dNOW__)
#if defined(__MMX__)
if
(
sf
->
Rshift
%
8
==
0
&&
sf
->
Gshift
%
8
==
0
&&
sf
->
Bshift
%
8
==
0
&&
sf
->
Ashift
%
8
==
0
&&
sf
->
Aloss
==
0
)
{
#ifdef __3dNOW__
if
(
SDL_Has3DNow
())
return
BlitRGBtoRGBPixelAlphaMMX3DNOW
;
#endif
#ifdef __MMX__
if
(
SDL_HasMMX
())
return
BlitRGBtoRGBPixelAlphaMMX
;
#endif
}
#endif
/* __MMX__
|| __3dNOW__
*/
#endif
/* __MMX__ */
if
(
sf
->
Amask
==
0xff000000
)
{
#if SDL_ALTIVEC_BLITTERS
if
(
SDL_HasAltiVec
())
return
BlitRGBtoRGBPixelAlphaAltivec
;
#endif
return
BlitRGBtoRGBPixelAlpha
;
}
}
#if SDL_ALTIVEC_BLITTERS
if
(
sf
->
Amask
&&
sf
->
BytesPerPixel
==
4
&&
SDL_HasAltiVec
())
return
Blit32to32PixelAlphaAltivec
;
else
#endif
return
BlitNtoNPixelAlpha
;
return
BlitNtoNPixelAlpha
;
case
3
:
default:
...
...
@@ -2226,19 +1331,10 @@ SDL_CalculateBlitA(SDL_Surface * surface)
return
BlitRGBtoRGBSurfaceAlphaMMX
;
#endif
if
((
sf
->
Rmask
|
sf
->
Gmask
|
sf
->
Bmask
)
==
0xffffff
)
{
#if SDL_ALTIVEC_BLITTERS
if
(
SDL_HasAltiVec
())
return
BlitRGBtoRGBSurfaceAlphaAltivec
;
#endif
return
BlitRGBtoRGBSurfaceAlpha
;
}
}
#if SDL_ALTIVEC_BLITTERS
if
((
sf
->
BytesPerPixel
==
4
)
&&
SDL_HasAltiVec
())
return
Blit32to32SurfaceAlphaAltivec
;
else
#endif
return
BlitNtoNSurfaceAlpha
;
return
BlitNtoNSurfaceAlpha
;
case
3
:
default:
...
...
@@ -2252,12 +1348,6 @@ SDL_CalculateBlitA(SDL_Surface * surface)
if
(
df
->
BytesPerPixel
==
1
)
return
BlitNto1SurfaceAlphaKey
;
else
#if SDL_ALTIVEC_BLITTERS
if
(
sf
->
BytesPerPixel
==
4
&&
df
->
BytesPerPixel
==
4
&&
SDL_HasAltiVec
())
return
Blit32to32SurfaceAlphaKeyAltivec
;
else
#endif
return
BlitNtoNSurfaceAlphaKey
;
}
break
;
...
...
src/video/SDL_blit_N.c
View file @
0314fd4e
...
...
@@ -28,846 +28,8 @@
/* Functions to blit from N-bit surfaces to other surfaces */
#if SDL_ALTIVEC_BLITTERS
#if __MWERKS__
#pragma altivec_model on
#endif
#ifdef HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#define assert(X)
#ifdef __MACOSX__
#include <sys/sysctl.h>
static
size_t
GetL3CacheSize
(
void
)
{
const
char
key
[]
=
"hw.l3cachesize"
;
u_int64_t
result
=
0
;
size_t
typeSize
=
sizeof
(
result
);
int
err
=
sysctlbyname
(
key
,
&
result
,
&
typeSize
,
NULL
,
0
);
if
(
0
!=
err
)
return
0
;
return
result
;
}
#else
static
size_t
GetL3CacheSize
(
void
)
{
/* XXX: Just guess G4 */
return
2097152
;
}
#endif
/* __MACOSX__ */
#if (defined(__MACOSX__) && (__GNUC__ < 4))
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) ( a,b,c,d,e,f,g,h )
#else
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
(vector unsigned short) { a,b,c,d,e,f,g,h }
#endif
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
0x04+a, 0x04+b, 0x04+c, 0x04+d, \
0x08+a, 0x08+b, 0x08+c, 0x08+d, \
0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
#define MAKE8888(dstfmt, r, g, b, a) \
( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
((a<<dstfmt->Ashift)&dstfmt->Amask) )
/*
* Data Stream Touch...Altivec cache prefetching.
*
* Don't use this on a G5...however, the speed boost is very significant
* on a G4.
*/
#define DST_CHAN_SRC 1
#define DST_CHAN_DEST 2
/* macro to set DST control word value... */
#define DST_CTRL(size, count, stride) \
(((size) << 24) | ((count) << 16) | (stride))
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
? vec_lvsl(0, src) \
: vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
/* Calculate the permute vector used for 32->32 swizzling */
static
vector
unsigned
char
calc_swizzle32
(
const
SDL_PixelFormat
*
srcfmt
,
const
SDL_PixelFormat
*
dstfmt
)
{
/*
* We have to assume that the bits that aren't used by other
* colors is alpha, and it's one complete byte, since some formats
* leave alpha with a zero mask, but we should still swizzle the bits.
*/
/* ARGB */
const
static
const
struct
SDL_PixelFormat
default_pixel_format
=
{
NULL
,
32
,
4
,
0
,
0
,
0
,
0
,
16
,
8
,
0
,
24
,
0x00FF0000
,
0x0000FF00
,
0x000000FF
,
0xFF000000
};
if
(
!
srcfmt
)
{
srcfmt
=
&
default_pixel_format
;
}
if
(
!
dstfmt
)
{
dstfmt
=
&
default_pixel_format
;
}
const
vector
unsigned
char
plus
=
VECUINT8_LITERAL
(
0x00
,
0x00
,
0x00
,
0x00
,
0x04
,
0x04
,
0x04
,
0x04
,
0x08
,
0x08
,
0x08
,
0x08
,
0x0C
,
0x0C
,
0x0C
,
0x0C
);
vector
unsigned
char
vswiz
;
vector
unsigned
int
srcvec
;
#define RESHIFT(X) (3 - ((X) >> 3))
Uint32
rmask
=
RESHIFT
(
srcfmt
->
Rshift
)
<<
(
dstfmt
->
Rshift
);
Uint32
gmask
=
RESHIFT
(
srcfmt
->
Gshift
)
<<
(
dstfmt
->
Gshift
);
Uint32
bmask
=
RESHIFT
(
srcfmt
->
Bshift
)
<<
(
dstfmt
->
Bshift
);
Uint32
amask
;
/* Use zero for alpha if either surface doesn't have alpha */
if
(
dstfmt
->
Amask
)
{
amask
=
((
srcfmt
->
Amask
)
?
RESHIFT
(
srcfmt
->
Ashift
)
:
0x10
)
<<
(
dstfmt
->
Ashift
);
}
else
{
amask
=
0x10101010
&
((
dstfmt
->
Rmask
|
dstfmt
->
Gmask
|
dstfmt
->
Bmask
)
^
0xFFFFFFFF
);
}
#undef RESHIFT
((
unsigned
int
*
)
(
char
*
)
&
srcvec
)[
0
]
=
(
rmask
|
gmask
|
bmask
|
amask
);
vswiz
=
vec_add
(
plus
,
(
vector
unsigned
char
)
vec_splat
(
srcvec
,
0
));
return
(
vswiz
);
}
static
void
Blit_RGB888_RGB565
(
SDL_BlitInfo
*
info
);
static
void
Blit_RGB888_RGB565Altivec
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint8
*
src
=
(
Uint8
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
;
Uint8
*
dst
=
(
Uint8
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
vector
unsigned
char
valpha
=
vec_splat_u8
(
0
);
vector
unsigned
char
vpermute
=
calc_swizzle32
(
srcfmt
,
NULL
);
vector
unsigned
char
vgmerge
=
VECUINT8_LITERAL
(
0x00
,
0x02
,
0x00
,
0x06
,
0x00
,
0x0a
,
0x00
,
0x0e
,
0x00
,
0x12
,
0x00
,
0x16
,
0x00
,
0x1a
,
0x00
,
0x1e
);
vector
unsigned
short
v1
=
vec_splat_u16
(
1
);
vector
unsigned
short
v3
=
vec_splat_u16
(
3
);
vector
unsigned
short
v3f
=
VECUINT16_LITERAL
(
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
,
0x003f
);
vector
unsigned
short
vfc
=
VECUINT16_LITERAL
(
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
,
0x00fc
);
vector
unsigned
short
vf800
=
(
vector
unsigned
short
)
vec_splat_u8
(
-
7
);
vf800
=
vec_sl
(
vf800
,
vec_splat_u16
(
8
));
while
(
height
--
)
{
vector
unsigned
char
valigner
;
vector
unsigned
char
voverflow
;
vector
unsigned
char
vsrc
;
int
width
=
info
->
dst_w
;
int
extrawidth
;
/* do scalar until we can align... */
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, sA; \
DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
sR, sG, sB, sA); \
*(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
((sG << 3) & 0x000007E0) | \
((sB >> 3) & 0x0000001F)); \
dst += 2; \
src += 4; \
widthvar--; \
}
ONE_PIXEL_BLEND
(((
UNALIGNED_PTR
(
dst
))
&&
(
width
)),
width
);
/* After all that work, here's the vector part! */
extrawidth
=
(
width
%
8
);
/* trailing unaligned stores */
width
-=
extrawidth
;
vsrc
=
vec_ld
(
0
,
src
);
valigner
=
VEC_ALIGNER
(
src
);
while
(
width
)
{
vector
unsigned
short
vpixel
,
vrpixel
,
vgpixel
,
vbpixel
;
vector
unsigned
int
vsrc1
,
vsrc2
;
vector
unsigned
char
vdst
;
voverflow
=
vec_ld
(
15
,
src
);
vsrc
=
vec_perm
(
vsrc
,
voverflow
,
valigner
);
vsrc1
=
(
vector
unsigned
int
)
vec_perm
(
vsrc
,
valpha
,
vpermute
);
src
+=
16
;
vsrc
=
voverflow
;
voverflow
=
vec_ld
(
15
,
src
);
vsrc
=
vec_perm
(
vsrc
,
voverflow
,
valigner
);
vsrc2
=
(
vector
unsigned
int
)
vec_perm
(
vsrc
,
valpha
,
vpermute
);
/* 1555 */
vpixel
=
(
vector
unsigned
short
)
vec_packpx
(
vsrc1
,
vsrc2
);
vgpixel
=
(
vector
unsigned
short
)
vec_perm
(
vsrc1
,
vsrc2
,
vgmerge
);
vgpixel
=
vec_and
(
vgpixel
,
vfc
);
vgpixel
=
vec_sl
(
vgpixel
,
v3
);
vrpixel
=
vec_sl
(
vpixel
,
v1
);
vrpixel
=
vec_and
(
vrpixel
,
vf800
);
vbpixel
=
vec_and
(
vpixel
,
v3f
);
vdst
=
vec_or
((
vector
unsigned
char
)
vrpixel
,
(
vector
unsigned
char
)
vgpixel
);
/* 565 */
vdst
=
vec_or
(
vdst
,
(
vector
unsigned
char
)
vbpixel
);
vec_st
(
vdst
,
0
,
dst
);
width
-=
8
;
src
+=
16
;
dst
+=
16
;
vsrc
=
voverflow
;
}
assert
(
width
==
0
);
/* do scalar until we can align... */
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
#undef ONE_PIXEL_BLEND
src
+=
srcskip
;
/* move to next row, accounting for pitch. */
dst
+=
dstskip
;
}
}
static
void
Blit_RGB565_32Altivec
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint8
*
src
=
(
Uint8
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
;
Uint8
*
dst
=
(
Uint8
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
unsigned
alpha
;
vector
unsigned
char
valpha
;
vector
unsigned
char
vpermute
;
vector
unsigned
short
vf800
;
vector
unsigned
int
v8
=
vec_splat_u32
(
8
);
vector
unsigned
int
v16
=
vec_add
(
v8
,
v8
);
vector
unsigned
short
v2
=
vec_splat_u16
(
2
);
vector
unsigned
short
v3
=
vec_splat_u16
(
3
);
/*
0x10 - 0x1f is the alpha
0x00 - 0x0e evens are the red
0x01 - 0x0f odds are zero
*/
vector
unsigned
char
vredalpha1
=
VECUINT8_LITERAL
(
0x10
,
0x00
,
0x01
,
0x01
,
0x10
,
0x02
,
0x01
,
0x01
,
0x10
,
0x04
,
0x01
,
0x01
,
0x10
,
0x06
,
0x01
,
0x01
);
vector
unsigned
char
vredalpha2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vredalpha1
,
vec_sl
(
v8
,
v16
))
);
/*
0x00 - 0x0f is ARxx ARxx ARxx ARxx
0x11 - 0x0f odds are blue
*/
vector
unsigned
char
vblue1
=
VECUINT8_LITERAL
(
0x00
,
0x01
,
0x02
,
0x11
,
0x04
,
0x05
,
0x06
,
0x13
,
0x08
,
0x09
,
0x0a
,
0x15
,
0x0c
,
0x0d
,
0x0e
,
0x17
);
vector
unsigned
char
vblue2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vblue1
,
v8
)
);
/*
0x00 - 0x0f is ARxB ARxB ARxB ARxB
0x10 - 0x0e evens are green
*/
vector
unsigned
char
vgreen1
=
VECUINT8_LITERAL
(
0x00
,
0x01
,
0x10
,
0x03
,
0x04
,
0x05
,
0x12
,
0x07
,
0x08
,
0x09
,
0x14
,
0x0b
,
0x0c
,
0x0d
,
0x16
,
0x0f
);
vector
unsigned
char
vgreen2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vgreen1
,
vec_sl
(
v8
,
v8
))
);
assert
(
srcfmt
->
BytesPerPixel
==
2
);
assert
(
dstfmt
->
BytesPerPixel
==
4
);
vf800
=
(
vector
unsigned
short
)
vec_splat_u8
(
-
7
);
vf800
=
vec_sl
(
vf800
,
vec_splat_u16
(
8
));
if
(
dstfmt
->
Amask
&&
info
->
a
)
{
((
unsigned
char
*
)
&
valpha
)[
0
]
=
alpha
=
info
->
a
;
valpha
=
vec_splat
(
valpha
,
0
);
}
else
{
alpha
=
0
;
valpha
=
vec_splat_u8
(
0
);
}
vpermute
=
calc_swizzle32
(
NULL
,
dstfmt
);
while
(
height
--
)
{
vector
unsigned
char
valigner
;
vector
unsigned
char
voverflow
;
vector
unsigned
char
vsrc
;
int
width
=
info
->
dst_w
;
int
extrawidth
;
/* do scalar until we can align... */
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
unsigned sR, sG, sB; \
unsigned short Pixel = *((unsigned short *)src); \
sR = (Pixel >> 8) & 0xf8; \
sG = (Pixel >> 3) & 0xfc; \
sB = (Pixel << 3) & 0xf8; \
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
src += 2; \
dst += 4; \
widthvar--; \
}
ONE_PIXEL_BLEND
(((
UNALIGNED_PTR
(
dst
))
&&
(
width
)),
width
);
/* After all that work, here's the vector part! */
extrawidth
=
(
width
%
8
);
/* trailing unaligned stores */
width
-=
extrawidth
;
vsrc
=
vec_ld
(
0
,
src
);
valigner
=
VEC_ALIGNER
(
src
);
while
(
width
)
{
vector
unsigned
short
vR
,
vG
,
vB
;
vector
unsigned
char
vdst1
,
vdst2
;
voverflow
=
vec_ld
(
15
,
src
);
vsrc
=
vec_perm
(
vsrc
,
voverflow
,
valigner
);
vR
=
vec_and
((
vector
unsigned
short
)
vsrc
,
vf800
);
vB
=
vec_sl
((
vector
unsigned
short
)
vsrc
,
v3
);
vG
=
vec_sl
(
vB
,
v2
);
vdst1
=
(
vector
unsigned
char
)
vec_perm
((
vector
unsigned
char
)
vR
,
valpha
,
vredalpha1
);
vdst1
=
vec_perm
(
vdst1
,
(
vector
unsigned
char
)
vB
,
vblue1
);
vdst1
=
vec_perm
(
vdst1
,
(
vector
unsigned
char
)
vG
,
vgreen1
);
vdst1
=
vec_perm
(
vdst1
,
valpha
,
vpermute
);
vec_st
(
vdst1
,
0
,
dst
);
vdst2
=
(
vector
unsigned
char
)
vec_perm
((
vector
unsigned
char
)
vR
,
valpha
,
vredalpha2
);
vdst2
=
vec_perm
(
vdst2
,
(
vector
unsigned
char
)
vB
,
vblue2
);
vdst2
=
vec_perm
(
vdst2
,
(
vector
unsigned
char
)
vG
,
vgreen2
);
vdst2
=
vec_perm
(
vdst2
,
valpha
,
vpermute
);
vec_st
(
vdst2
,
16
,
dst
);
width
-=
8
;
dst
+=
32
;
src
+=
16
;
vsrc
=
voverflow
;
}
assert
(
width
==
0
);
/* do scalar until we can align... */
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
#undef ONE_PIXEL_BLEND
src
+=
srcskip
;
/* move to next row, accounting for pitch. */
dst
+=
dstskip
;
}
}
static
void
Blit_RGB555_32Altivec
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint8
*
src
=
(
Uint8
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
;
Uint8
*
dst
=
(
Uint8
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
unsigned
alpha
;
vector
unsigned
char
valpha
;
vector
unsigned
char
vpermute
;
vector
unsigned
short
vf800
;
vector
unsigned
int
v8
=
vec_splat_u32
(
8
);
vector
unsigned
int
v16
=
vec_add
(
v8
,
v8
);
vector
unsigned
short
v1
=
vec_splat_u16
(
1
);
vector
unsigned
short
v3
=
vec_splat_u16
(
3
);
/*
0x10 - 0x1f is the alpha
0x00 - 0x0e evens are the red
0x01 - 0x0f odds are zero
*/
vector
unsigned
char
vredalpha1
=
VECUINT8_LITERAL
(
0x10
,
0x00
,
0x01
,
0x01
,
0x10
,
0x02
,
0x01
,
0x01
,
0x10
,
0x04
,
0x01
,
0x01
,
0x10
,
0x06
,
0x01
,
0x01
);
vector
unsigned
char
vredalpha2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vredalpha1
,
vec_sl
(
v8
,
v16
))
);
/*
0x00 - 0x0f is ARxx ARxx ARxx ARxx
0x11 - 0x0f odds are blue
*/
vector
unsigned
char
vblue1
=
VECUINT8_LITERAL
(
0x00
,
0x01
,
0x02
,
0x11
,
0x04
,
0x05
,
0x06
,
0x13
,
0x08
,
0x09
,
0x0a
,
0x15
,
0x0c
,
0x0d
,
0x0e
,
0x17
);
vector
unsigned
char
vblue2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vblue1
,
v8
)
);
/*
0x00 - 0x0f is ARxB ARxB ARxB ARxB
0x10 - 0x0e evens are green
*/
vector
unsigned
char
vgreen1
=
VECUINT8_LITERAL
(
0x00
,
0x01
,
0x10
,
0x03
,
0x04
,
0x05
,
0x12
,
0x07
,
0x08
,
0x09
,
0x14
,
0x0b
,
0x0c
,
0x0d
,
0x16
,
0x0f
);
vector
unsigned
char
vgreen2
=
(
vector
unsigned
char
)
(
vec_add
((
vector
unsigned
int
)
vgreen1
,
vec_sl
(
v8
,
v8
))
);
assert
(
srcfmt
->
BytesPerPixel
==
2
);
assert
(
dstfmt
->
BytesPerPixel
==
4
);
vf800
=
(
vector
unsigned
short
)
vec_splat_u8
(
-
7
);
vf800
=
vec_sl
(
vf800
,
vec_splat_u16
(
8
));
if
(
dstfmt
->
Amask
&&
info
->
a
)
{
((
unsigned
char
*
)
&
valpha
)[
0
]
=
alpha
=
info
->
a
;
valpha
=
vec_splat
(
valpha
,
0
);
}
else
{
alpha
=
0
;
valpha
=
vec_splat_u8
(
0
);
}
vpermute
=
calc_swizzle32
(
NULL
,
dstfmt
);
while
(
height
--
)
{
vector
unsigned
char
valigner
;
vector
unsigned
char
voverflow
;
vector
unsigned
char
vsrc
;
int
width
=
info
->
dst_w
;
int
extrawidth
;
/* do scalar until we can align... */
#define ONE_PIXEL_BLEND(condition, widthvar) \
while (condition) { \
unsigned sR, sG, sB; \
unsigned short Pixel = *((unsigned short *)src); \
sR = (Pixel >> 7) & 0xf8; \
sG = (Pixel >> 2) & 0xf8; \
sB = (Pixel << 3) & 0xf8; \
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
src += 2; \
dst += 4; \
widthvar--; \
}
ONE_PIXEL_BLEND
(((
UNALIGNED_PTR
(
dst
))
&&
(
width
)),
width
);
/* After all that work, here's the vector part! */
extrawidth
=
(
width
%
8
);
/* trailing unaligned stores */
width
-=
extrawidth
;
vsrc
=
vec_ld
(
0
,
src
);
valigner
=
VEC_ALIGNER
(
src
);
while
(
width
)
{
vector
unsigned
short
vR
,
vG
,
vB
;
vector
unsigned
char
vdst1
,
vdst2
;
voverflow
=
vec_ld
(
15
,
src
);
vsrc
=
vec_perm
(
vsrc
,
voverflow
,
valigner
);
vR
=
vec_and
(
vec_sl
((
vector
unsigned
short
)
vsrc
,
v1
),
vf800
);
vB
=
vec_sl
((
vector
unsigned
short
)
vsrc
,
v3
);
vG
=
vec_sl
(
vB
,
v3
);
vdst1
=
(
vector
unsigned
char
)
vec_perm
((
vector
unsigned
char
)
vR
,
valpha
,
vredalpha1
);
vdst1
=
vec_perm
(
vdst1
,
(
vector
unsigned
char
)
vB
,
vblue1
);
vdst1
=
vec_perm
(
vdst1
,
(
vector
unsigned
char
)
vG
,
vgreen1
);
vdst1
=
vec_perm
(
vdst1
,
valpha
,
vpermute
);
vec_st
(
vdst1
,
0
,
dst
);
vdst2
=
(
vector
unsigned
char
)
vec_perm
((
vector
unsigned
char
)
vR
,
valpha
,
vredalpha2
);
vdst2
=
vec_perm
(
vdst2
,
(
vector
unsigned
char
)
vB
,
vblue2
);
vdst2
=
vec_perm
(
vdst2
,
(
vector
unsigned
char
)
vG
,
vgreen2
);
vdst2
=
vec_perm
(
vdst2
,
valpha
,
vpermute
);
vec_st
(
vdst2
,
16
,
dst
);
width
-=
8
;
dst
+=
32
;
src
+=
16
;
vsrc
=
voverflow
;
}
assert
(
width
==
0
);
/* do scalar until we can align... */
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
#undef ONE_PIXEL_BLEND
src
+=
srcskip
;
/* move to next row, accounting for pitch. */
dst
+=
dstskip
;
}
}
static
void
BlitNtoNKey
(
SDL_BlitInfo
*
info
);
static
void
BlitNtoNKeyCopyAlpha
(
SDL_BlitInfo
*
info
);
static
void
Blit32to32KeyAltivec
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint32
*
srcp
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
/
4
;
Uint32
*
dstp
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
/
4
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
int
srcbpp
=
srcfmt
->
BytesPerPixel
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
int
dstbpp
=
dstfmt
->
BytesPerPixel
;
int
copy_alpha
=
(
srcfmt
->
Amask
&&
dstfmt
->
Amask
);
unsigned
alpha
=
dstfmt
->
Amask
?
info
->
a
:
0
;
Uint32
rgbmask
=
srcfmt
->
Rmask
|
srcfmt
->
Gmask
|
srcfmt
->
Bmask
;
Uint32
ckey
=
info
->
colorkey
;
vector
unsigned
int
valpha
;
vector
unsigned
char
vpermute
;
vector
unsigned
char
vzero
;
vector
unsigned
int
vckey
;
vector
unsigned
int
vrgbmask
;
vpermute
=
calc_swizzle32
(
srcfmt
,
dstfmt
);
if
(
info
->
dst_w
<
16
)
{
if
(
copy_alpha
)
{
BlitNtoNKeyCopyAlpha
(
info
);
}
else
{
BlitNtoNKey
(
info
);
}
return
;
}
vzero
=
vec_splat_u8
(
0
);
if
(
alpha
)
{
((
unsigned
char
*
)
&
valpha
)[
0
]
=
(
unsigned
char
)
alpha
;
valpha
=
(
vector
unsigned
int
)
vec_splat
((
vector
unsigned
char
)
valpha
,
0
);
}
else
{
valpha
=
(
vector
unsigned
int
)
vzero
;
}
ckey
&=
rgbmask
;
((
unsigned
int
*
)
(
char
*
)
&
vckey
)[
0
]
=
ckey
;
vckey
=
vec_splat
(
vckey
,
0
);
((
unsigned
int
*
)
(
char
*
)
&
vrgbmask
)[
0
]
=
rgbmask
;
vrgbmask
=
vec_splat
(
vrgbmask
,
0
);
while
(
height
--
)
{
#define ONE_PIXEL_BLEND(condition, widthvar) \
if (copy_alpha) { \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB, sA; \
DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
sR, sG, sB, sA); \
if ( (Pixel & rgbmask) != ckey ) { \
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
sR, sG, sB, sA); \
} \
dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
widthvar--; \
} \
} else { \
while (condition) { \
Uint32 Pixel; \
unsigned sR, sG, sB; \
RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
if ( Pixel != ckey ) { \
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
sR, sG, sB, alpha); \
} \
dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
widthvar--; \
} \
}
int
width
=
info
->
dst_w
;
ONE_PIXEL_BLEND
((
UNALIGNED_PTR
(
dstp
))
&&
(
width
),
width
);
assert
(
width
>
0
);
if
(
width
>
0
)
{
int
extrawidth
=
(
width
%
4
);
vector
unsigned
char
valigner
=
VEC_ALIGNER
(
srcp
);
vector
unsigned
int
vs
=
vec_ld
(
0
,
srcp
);
width
-=
extrawidth
;
assert
(
width
>=
4
);
while
(
width
)
{
vector
unsigned
char
vsel
;
vector
unsigned
int
vd
;
vector
unsigned
int
voverflow
=
vec_ld
(
15
,
srcp
);
/* load the source vec */
vs
=
vec_perm
(
vs
,
voverflow
,
valigner
);
/* vsel is set for items that match the key */
vsel
=
(
vector
unsigned
char
)
vec_and
(
vs
,
vrgbmask
);
vsel
=
(
vector
unsigned
char
)
vec_cmpeq
(
vs
,
vckey
);
/* permute the src vec to the dest format */
vs
=
vec_perm
(
vs
,
valpha
,
vpermute
);
/* load the destination vec */
vd
=
vec_ld
(
0
,
dstp
);
/* select the source and dest into vs */
vd
=
(
vector
unsigned
int
)
vec_sel
((
vector
unsigned
char
)
vs
,
(
vector
unsigned
char
)
vd
,
vsel
);
vec_st
(
vd
,
0
,
dstp
);
srcp
+=
4
;
width
-=
4
;
dstp
+=
4
;
vs
=
voverflow
;
}
ONE_PIXEL_BLEND
((
extrawidth
),
extrawidth
);
#undef ONE_PIXEL_BLEND
srcp
+=
srcskip
;
dstp
+=
dstskip
;
}
}
}
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
/* Use this on a G5 */
static
void
ConvertAltivec32to32_noprefetch
(
SDL_BlitInfo
*
info
)
{
int
height
=
info
->
dst_h
;
Uint32
*
src
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
/
4
;
Uint32
*
dst
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
/
4
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
vector
unsigned
int
vzero
=
vec_splat_u32
(
0
);
vector
unsigned
char
vpermute
=
calc_swizzle32
(
srcfmt
,
dstfmt
);
if
(
dstfmt
->
Amask
&&
!
srcfmt
->
Amask
)
{
if
(
info
->
a
)
{
vector
unsigned
char
valpha
;
((
unsigned
char
*
)
&
valpha
)[
0
]
=
info
->
a
;
vzero
=
(
vector
unsigned
int
)
vec_splat
(
valpha
,
0
);
}
}
assert
(
srcfmt
->
BytesPerPixel
==
4
);
assert
(
dstfmt
->
BytesPerPixel
==
4
);
while
(
height
--
)
{
vector
unsigned
char
valigner
;
vector
unsigned
int
vbits
;
vector
unsigned
int
voverflow
;
Uint32
bits
;
Uint8
r
,
g
,
b
,
a
;
int
width
=
info
->
dst_w
;
int
extrawidth
;
/* do scalar until we can align... */
while
((
UNALIGNED_PTR
(
dst
))
&&
(
width
))
{
bits
=
*
(
src
++
);
RGBA_FROM_8888
(
bits
,
srcfmt
,
r
,
g
,
b
,
a
);
*
(
dst
++
)
=
MAKE8888
(
dstfmt
,
r
,
g
,
b
,
a
);
width
--
;
}
/* After all that work, here's the vector part! */
extrawidth
=
(
width
%
4
);
width
-=
extrawidth
;
valigner
=
VEC_ALIGNER
(
src
);
vbits
=
vec_ld
(
0
,
src
);
while
(
width
)
{
voverflow
=
vec_ld
(
15
,
src
);
src
+=
4
;
width
-=
4
;
vbits
=
vec_perm
(
vbits
,
voverflow
,
valigner
);
/* src is ready. */
vbits
=
vec_perm
(
vbits
,
vzero
,
vpermute
);
/* swizzle it. */
vec_st
(
vbits
,
0
,
dst
);
/* store it back out. */
dst
+=
4
;
vbits
=
voverflow
;
}
assert
(
width
==
0
);
/* cover pixels at the end of the row that didn't fit in 16 bytes. */
while
(
extrawidth
)
{
bits
=
*
(
src
++
);
/* max 7 pixels, don't bother with prefetch. */
RGBA_FROM_8888
(
bits
,
srcfmt
,
r
,
g
,
b
,
a
);
*
(
dst
++
)
=
MAKE8888
(
dstfmt
,
r
,
g
,
b
,
a
);
extrawidth
--
;
}
src
+=
srcskip
;
dst
+=
dstskip
;
}
}
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
/* Use this on a G4 */
static
void
ConvertAltivec32to32_prefetch
(
SDL_BlitInfo
*
info
)
{
const
int
scalar_dst_lead
=
sizeof
(
Uint32
)
*
4
;
const
int
vector_dst_lead
=
sizeof
(
Uint32
)
*
16
;
int
height
=
info
->
dst_h
;
Uint32
*
src
=
(
Uint32
*
)
info
->
src
;
int
srcskip
=
info
->
src_skip
/
4
;
Uint32
*
dst
=
(
Uint32
*
)
info
->
dst
;
int
dstskip
=
info
->
dst_skip
/
4
;
SDL_PixelFormat
*
srcfmt
=
info
->
src_fmt
;
SDL_PixelFormat
*
dstfmt
=
info
->
dst_fmt
;
vector
unsigned
int
vzero
=
vec_splat_u32
(
0
);
vector
unsigned
char
vpermute
=
calc_swizzle32
(
srcfmt
,
dstfmt
);
if
(
dstfmt
->
Amask
&&
!
srcfmt
->
Amask
)
{
if
(
info
->
a
)
{
vector
unsigned
char
valpha
;
((
unsigned
char
*
)
&
valpha
)[
0
]
=
info
->
a
;
vzero
=
(
vector
unsigned
int
)
vec_splat
(
valpha
,
0
);
}
}
assert
(
srcfmt
->
BytesPerPixel
==
4
);
assert
(
dstfmt
->
BytesPerPixel
==
4
);
while
(
height
--
)
{
vector
unsigned
char
valigner
;
vector
unsigned
int
vbits
;
vector
unsigned
int
voverflow
;
Uint32
bits
;
Uint8
r
,
g
,
b
,
a
;
int
width
=
info
->
dst_w
;
int
extrawidth
;
/* do scalar until we can align... */
while
((
UNALIGNED_PTR
(
dst
))
&&
(
width
))
{
vec_dstt
(
src
+
scalar_dst_lead
,
DST_CTRL
(
2
,
32
,
1024
),
DST_CHAN_SRC
);
vec_dstst
(
dst
+
scalar_dst_lead
,
DST_CTRL
(
2
,
32
,
1024
),
DST_CHAN_DEST
);
bits
=
*
(
src
++
);
RGBA_FROM_8888
(
bits
,
srcfmt
,
r
,
g
,
b
,
a
);
*
(
dst
++
)
=
MAKE8888
(
dstfmt
,
r
,
g
,
b
,
a
);
width
--
;
}
/* After all that work, here's the vector part! */
extrawidth
=
(
width
%
4
);
width
-=
extrawidth
;
valigner
=
VEC_ALIGNER
(
src
);
vbits
=
vec_ld
(
0
,
src
);
while
(
width
)
{
vec_dstt
(
src
+
vector_dst_lead
,
DST_CTRL
(
2
,
32
,
1024
),
DST_CHAN_SRC
);
vec_dstst
(
dst
+
vector_dst_lead
,
DST_CTRL
(
2
,
32
,
1024
),
DST_CHAN_DEST
);
voverflow
=
vec_ld
(
15
,
src
);
src
+=
4
;
width
-=
4
;
vbits
=
vec_perm
(
vbits
,
voverflow
,
valigner
);
/* src is ready. */
vbits
=
vec_perm
(
vbits
,
vzero
,
vpermute
);
/* swizzle it. */
vec_st
(
vbits
,
0
,
dst
);
/* store it back out. */
dst
+=
4
;
vbits
=
voverflow
;
}
assert
(
width
==
0
);
/* cover pixels at the end of the row that didn't fit in 16 bytes. */
while
(
extrawidth
)
{
bits
=
*
(
src
++
);
/* max 7 pixels, don't bother with prefetch. */
RGBA_FROM_8888
(
bits
,
srcfmt
,
r
,
g
,
b
,
a
);
*
(
dst
++
)
=
MAKE8888
(
dstfmt
,
r
,
g
,
b
,
a
);
extrawidth
--
;
}
src
+=
srcskip
;
dst
+=
dstskip
;
}
vec_dss
(
DST_CHAN_SRC
);
vec_dss
(
DST_CHAN_DEST
);
}
static
Uint32
GetBlitFeatures
(
void
)
{
static
Uint32
features
=
0xffffffff
;
if
(
features
==
0xffffffff
)
{
/* Provide an override for testing .. */
char
*
override
=
SDL_getenv
(
"SDL_ALTIVEC_BLIT_FEATURES"
);
if
(
override
)
{
features
=
0
;
SDL_sscanf
(
override
,
"%u"
,
&
features
);
}
else
{
features
=
(
0
/* Feature 1 is has-MMX */
|
((
SDL_HasMMX
())
?
1
:
0
)
/* Feature 2 is has-AltiVec */
|
((
SDL_HasAltiVec
())
?
2
:
0
)
/* Feature 4 is dont-use-prefetch */
/* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
|
((
GetL3CacheSize
()
==
0
)
?
4
:
0
)
);
}
}
return
features
;
}
#if __MWERKS__
#pragma altivec_model off
#endif
#else
/* Feature 1 is has-MMX */
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
#endif
/* This is now endian dependent */
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
...
...
@@ -2346,15 +1508,6 @@ static const struct blit_table normal_blit_1[] = {
};
static
const
struct
blit_table
normal_blit_2
[]
=
{
#if SDL_ALTIVEC_BLITTERS
/* has-altivec */
{
0x0000F800
,
0x000007E0
,
0x0000001F
,
4
,
0x00000000
,
0x00000000
,
0x00000000
,
2
,
Blit_RGB565_32Altivec
,
NO_ALPHA
|
COPY_ALPHA
|
SET_ALPHA
},
{
0x00007C00
,
0x000003E0
,
0x0000001F
,
4
,
0x00000000
,
0x00000000
,
0x00000000
,
2
,
Blit_RGB555_32Altivec
,
NO_ALPHA
|
COPY_ALPHA
|
SET_ALPHA
},
#endif
{
0x0000F800
,
0x000007E0
,
0x0000001F
,
4
,
0x00FF0000
,
0x0000FF00
,
0x000000FF
,
0
,
Blit_RGB565_ARGB8888
,
SET_ALPHA
},
...
...
@@ -2378,22 +1531,6 @@ static const struct blit_table normal_blit_3[] = {
};
static
const
struct
blit_table
normal_blit_4
[]
=
{
#if SDL_ALTIVEC_BLITTERS
/* has-altivec | dont-use-prefetch */
{
0x00000000
,
0x00000000
,
0x00000000
,
4
,
0x00000000
,
0x00000000
,
0x00000000
,
6
,
ConvertAltivec32to32_noprefetch
,
NO_ALPHA
|
COPY_ALPHA
|
SET_ALPHA
},
/* has-altivec */
{
0x00000000
,
0x00000000
,
0x00000000
,
4
,
0x00000000
,
0x00000000
,
0x00000000
,
2
,
ConvertAltivec32to32_prefetch
,
NO_ALPHA
|
COPY_ALPHA
|
SET_ALPHA
},
/* has-altivec */
{
0x00000000
,
0x00000000
,
0x00000000
,
2
,
0x0000F800
,
0x000007E0
,
0x0000001F
,
2
,
Blit_RGB888_RGB565Altivec
,
NO_ALPHA
},
#endif
{
0x00FF0000
,
0x0000FF00
,
0x000000FF
,
2
,
0x0000F800
,
0x000007E0
,
0x0000001F
,
0
,
Blit_RGB888_RGB565
,
NO_ALPHA
},
...
...
@@ -2491,12 +1628,6 @@ SDL_CalculateBlitN(SDL_Surface * surface)
else
if
(
dstfmt
->
BytesPerPixel
==
1
)
return
BlitNto1Key
;
else
{
#if SDL_ALTIVEC_BLITTERS
if
((
srcfmt
->
BytesPerPixel
==
4
)
&&
(
dstfmt
->
BytesPerPixel
==
4
)
&&
SDL_HasAltiVec
())
{
return
Blit32to32KeyAltivec
;
}
else
#endif
if
(
srcfmt
->
Amask
&&
dstfmt
->
Amask
)
{
return
BlitNtoNKeyCopyAlpha
;
}
else
{
...
...
test/testplatform.c
View file @
0314fd4e
...
...
@@ -143,13 +143,10 @@ TestCPUInfo(SDL_bool verbose)
printf
(
"CPU cache line size: %d
\n
"
,
SDL_GetCPUCacheLineSize
());
printf
(
"RDTSC %s
\n
"
,
SDL_HasRDTSC
()
?
"detected"
:
"not detected"
);
printf
(
"MMX %s
\n
"
,
SDL_HasMMX
()
?
"detected"
:
"not detected"
);
printf
(
"MMX Ext %s
\n
"
,
SDL_HasMMXExt
()
?
"detected"
:
"not detected"
);
printf
(
"3DNow %s
\n
"
,
SDL_Has3DNow
()
?
"detected"
:
"not detected"
);
printf
(
"3DNow Ext %s
\n
"
,
SDL_Has3DNowExt
()
?
"detected"
:
"not detected"
);
printf
(
"SSE %s
\n
"
,
SDL_HasSSE
()
?
"detected"
:
"not detected"
);
printf
(
"SSE2 %s
\n
"
,
SDL_HasSSE2
()
?
"detected"
:
"not detected"
);
printf
(
"AltiVec %s
\n
"
,
SDL_HasAltiVec
()
?
"detected"
:
"not detected"
);
printf
(
"SSE3 %s
\n
"
,
SDL_HasSSE3
()
?
"detected"
:
"not detected"
);
printf
(
"SSE4 %s
\n
"
,
SDL_HasSSE4
()
?
"detected"
:
"not detected"
);
}
return
(
0
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment