Commit dc8a2012 authored by Sam Lantinga's avatar Sam Lantinga

Added SSE version of SDL_FillRect() for 32-bit ARGB surfaces

--HG--
extra : convert_revision : svn%3Ac70aab31-4412-0410-b14c-859654838e24/trunk%402601
parent 6d80d8fe
...@@ -587,10 +587,11 @@ SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) ...@@ -587,10 +587,11 @@ SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
} else { } else {
switch (dst->format->BytesPerPixel) { switch (dst->format->BytesPerPixel) {
case 2: case 2:
for (y = dstrect->h; y; --y) { {
Uint16 *pixels = (Uint16 *) row;
Uint16 c = (Uint16) color; Uint16 c = (Uint16) color;
Uint32 cc = (Uint32) c << 16 | c; Uint32 cc = (Uint32) c << 16 | c;
for (y = dstrect->h; y; --y) {
Uint16 *pixels = (Uint16 *) row;
int n = dstrect->w; int n = dstrect->w;
if ((uintptr_t) pixels & 3) { if ((uintptr_t) pixels & 3) {
*pixels++ = c; *pixels++ = c;
...@@ -602,6 +603,7 @@ SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) ...@@ -602,6 +603,7 @@ SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
pixels[n - 1] = c; pixels[n - 1] = c;
row += dst->pitch; row += dst->pitch;
} }
}
break; break;
case 3: case 3:
...@@ -619,6 +621,33 @@ SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) ...@@ -619,6 +621,33 @@ SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
break; break;
case 4: case 4:
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
if (SDL_HasSSE() && !((uintptr_t) row & 15) && !(dstrect->w & 3)) {
Uint32 cccc[4] __attribute__ ((aligned(16))) = {
color, color, color, color};
int i, n = dstrect->w / 4;
__asm__ __volatile__(" movdqa (%0), %%xmm0\n"::
"r"(cccc):"memory");
for (y = dstrect->h; y; --y) {
Uint8 *pixels = row;
for (i = n / 2; i--;) {
/* *INDENT-OFF* */
__asm__ __volatile__(" prefetchnta 256(%0)\n"
" movdqa %%xmm0, (%0)\n"
" movdqa %%xmm0, 16(%0)\n"::"r"(pixels):"memory");
/* *INDENT-ON* */
pixels += 32;
}
if (n & 1) {
__asm__ __volatile__(" movdqa %%xmm0, (%0)\n"::
"r"(pixels):"memory");
}
row += dst->pitch;
}
__asm__ __volatile__(" emms\n"::);
break;
}
#endif
for (y = dstrect->h; y; --y) { for (y = dstrect->h; y; --y) {
SDL_memset4(row, color, dstrect->w); SDL_memset4(row, color, dstrect->w);
row += dst->pitch; row += dst->pitch;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment