Commit d2b922f5 authored by Sam Lantinga's avatar Sam Lantinga

Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order)

Even if we're blitting between two different surfaces their pixels might still overlap, because of SDL_CreateRGBSurfaceFrom(), so always use SDL_BlitCopy() and check for overlap in that function.

When handling overlapping surfaces, don't assume that memcpy() iterates forward, instead use memmove() correctly, and provide a fallback implementation of SDL_memmove() that handles the different cases.

Fixed a bug with SDL_memset() not completely filling lengths that aren't a multiple of 4.
Optimized SDL_memcpy() a bit using the same technique as SDL_memset().
parent dc23c42e
...@@ -352,8 +352,8 @@ do { \ ...@@ -352,8 +352,8 @@ do { \
#endif #endif
/* We can count on memcpy existing on Mac OS X and being well-tuned. */ /* We can count on memcpy existing on Mac OS X and being well-tuned. */
#if defined(__MACH__) && defined(__APPLE__) #if defined(__MACOSX__)
#define SDL_memcpy(dst, src, len) memcpy(dst, src, len) #define SDL_memcpy memcpy
#elif defined(__GNUC__) && defined(i386) #elif defined(__GNUC__) && defined(i386)
#define SDL_memcpy(dst, src, len) \ #define SDL_memcpy(dst, src, len) \
do { \ do { \
...@@ -385,8 +385,8 @@ extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src, ...@@ -385,8 +385,8 @@ extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src,
#endif #endif
/* We can count on memcpy existing on Mac OS X and being well-tuned. */ /* We can count on memcpy existing on Mac OS X and being well-tuned. */
#if defined(__MACH__) && defined(__APPLE__) #if defined(__MACOSX__)
#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len)*4) #define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
#elif defined(__GNUC__) && defined(i386) #elif defined(__GNUC__) && defined(i386)
#define SDL_memcpy4(dst, src, len) \ #define SDL_memcpy4(dst, src, len) \
do { \ do { \
...@@ -400,54 +400,14 @@ do { \ ...@@ -400,54 +400,14 @@ do { \
} while(0) } while(0)
#endif #endif
#ifndef SDL_memcpy4 #ifndef SDL_memcpy4
#define SDL_memcpy4(dst, src, len) SDL_memcpy(dst, src, (len) << 2) #define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
#endif
#if defined(__GNUC__) && defined(i386)
#define SDL_revcpy(dst, src, len) \
do { \
int u0, u1, u2; \
char *dstp = SDL_static_cast(char *, dst); \
char *srcp = SDL_static_cast(char *, src); \
int n = (len); \
if ( n >= 4 ) { \
__asm__ __volatile__ ( \
"std\n\t" \
"rep ; movsl\n\t" \
"cld\n\t" \
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
: "0" (n >> 2), \
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \
: "memory" ); \
} \
switch (n & 3) { \
case 3: dstp[2] = srcp[2]; \
case 2: dstp[1] = srcp[1]; \
case 1: dstp[0] = srcp[0]; \
break; \
default: \
break; \
} \
} while(0)
#endif
#ifndef SDL_revcpy
extern DECLSPEC void *SDLCALL SDL_revcpy(void *dst, const void *src,
size_t len);
#endif #endif
#ifdef HAVE_MEMMOVE #ifdef HAVE_MEMMOVE
#define SDL_memmove memmove #define SDL_memmove memmove
#elif defined(HAVE_BCOPY) #else
#define SDL_memmove(d, s, n) bcopy((s), (d), (n)) extern DECLSPEC void *SDLCALL SDL_memmove(void *dst, const void *src,
#else size_t len);
#define SDL_memmove(dst, src, len) \
do { \
if ( dst < src ) { \
SDL_memcpy(dst, src, len); \
} else { \
SDL_revcpy(dst, src, len); \
} \
} while(0)
#endif #endif
#ifdef HAVE_MEMCMP #ifdef HAVE_MEMCMP
......
...@@ -265,31 +265,27 @@ void * ...@@ -265,31 +265,27 @@ void *
SDL_memset(void *dst, int c, size_t len) SDL_memset(void *dst, int c, size_t len)
{ {
size_t left = (len % 4); size_t left = (len % 4);
if (len >= 4) { Uint32 *dstp4;
Uint32 value = 0; Uint8 *dstp1;
Uint32 *dstp = (Uint32 *) dst; Uint32 value4 = (c | (c << 8) | (c << 16) | (c << 24));
int i; Uint8 value1 = (Uint8) c;
for (i = 0; i < 4; ++i) {
value <<= 8; dstp4 = (Uint32 *) dst;
value |= c; len /= 4;
} while (len--) {
len /= 4; *dstp4++ = value4;
while (len--) {
*dstp++ = value;
}
} }
if (left > 0) {
Uint8 value = (Uint8) c; dstp1 = (Uint8 *) dstp4;
Uint8 *dstp = (Uint8 *) dst; switch (left) {
switch (left) { case 3:
case 3: *dstp1++ = value1;
*dstp++ = value; case 2:
case 2: *dstp1++ = value1;
*dstp++ = value; case 1:
case 1: *dstp1++ = value1;
*dstp++ = value;
}
} }
return dst; return dst;
} }
#endif #endif
...@@ -298,25 +294,49 @@ SDL_memset(void *dst, int c, size_t len) ...@@ -298,25 +294,49 @@ SDL_memset(void *dst, int c, size_t len)
void * void *
SDL_memcpy(void *dst, const void *src, size_t len) SDL_memcpy(void *dst, const void *src, size_t len)
{ {
char *srcp = (char *) src; size_t left = (len % 4);
char *dstp = (char *) dst; Uint32 *srcp4, *dstp4;
Uint8 *srcp1, *dstp1;
srcp4 = (Uint32 *) src;
dstp4 = (Uint32 *) dst;
len /= 4;
while (len--) { while (len--) {
*dstp++ = *srcp++; *dstp4++ = *srcp4++;
}
srcp1 = (Uint8 *) srcp4;
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp1++ = *srcp1++;
case 2:
*dstp1++ = *srcp1++;
case 1:
*dstp1++ = *srcp1++;
} }
return dst; return dst;
} }
#endif #endif
#ifndef SDL_revcpy #ifndef SDL_memmove
void * void *
SDL_revcpy(void *dst, const void *src, size_t len) SDL_memmove(void *dst, const void *src, size_t len)
{ {
char *srcp = (char *) src; char *srcp = (char *) src;
char *dstp = (char *) dst; char *dstp = (char *) dst;
srcp += len - 1;
dstp += len - 1; if (src < dst) {
while (len--) { srcp += len - 1;
*dstp-- = *srcp--; dstp += len - 1;
while (len--) {
*dstp-- = *srcp--;
}
} else {
while (len--) {
*dstp++ = *srcp++;
}
} }
return dst; return dst;
} }
......
...@@ -205,12 +205,7 @@ SDL_CalculateBlit(SDL_Surface * surface) ...@@ -205,12 +205,7 @@ SDL_CalculateBlit(SDL_Surface * surface)
/* Choose a standard blit function */ /* Choose a standard blit function */
if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) { if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) {
/* Handle overlapping blits on the same surface */ blit = SDL_BlitCopy;
if (surface == dst) {
blit = SDL_BlitCopyOverlap;
} else {
blit = SDL_BlitCopy;
}
} else if (surface->format->BitsPerPixel < 8) { } else if (surface->format->BitsPerPixel < 8) {
blit = SDL_CalculateBlit0(surface); blit = SDL_CalculateBlit0(surface);
} else if (surface->format->BytesPerPixel == 1) { } else if (surface->format->BytesPerPixel == 1) {
......
...@@ -96,6 +96,7 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) ...@@ -96,6 +96,7 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
void void
SDL_BlitCopy(SDL_BlitInfo * info) SDL_BlitCopy(SDL_BlitInfo * info)
{ {
SDL_bool overlap;
Uint8 *src, *dst; Uint8 *src, *dst;
int w, h; int w, h;
int srcskip, dstskip; int srcskip, dstskip;
...@@ -107,6 +108,21 @@ SDL_BlitCopy(SDL_BlitInfo * info) ...@@ -107,6 +108,21 @@ SDL_BlitCopy(SDL_BlitInfo * info)
srcskip = info->src_pitch; srcskip = info->src_pitch;
dstskip = info->dst_pitch; dstskip = info->dst_pitch;
/* Properly handle overlapping blits */
if (src < dst) {
overlap = (dst < (src + h*srcskip));
} else {
overlap = (src < (dst + h*dstskip));
}
if (overlap) {
while (h--) {
SDL_memmove(dst, src, w);
src += srcskip;
dst += dstskip;
}
return;
}
#ifdef __SSE__ #ifdef __SSE__
if (SDL_HasSSE() && if (SDL_HasSSE() &&
!((uintptr_t) src & 15) && !(srcskip & 15) && !((uintptr_t) src & 15) && !(srcskip & 15) &&
...@@ -141,29 +157,4 @@ SDL_BlitCopy(SDL_BlitInfo * info) ...@@ -141,29 +157,4 @@ SDL_BlitCopy(SDL_BlitInfo * info)
} }
} }
void
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
{
Uint8 *src, *dst;
int w, h;
int skip;
w = info->dst_w * info->dst_fmt->BytesPerPixel;
h = info->dst_h;
src = info->src;
dst = info->dst;
skip = info->src_pitch;
if ((dst < src) || (dst >= (src + h * skip))) {
SDL_BlitCopy(info);
} else {
src += ((h - 1) * skip);
dst += ((h - 1) * skip);
while (h--) {
SDL_revcpy(dst, src, w);
src -= skip;
dst -= skip;
}
}
}
/* vi: set ts=4 sw=4 expandtab: */ /* vi: set ts=4 sw=4 expandtab: */
...@@ -21,6 +21,5 @@ ...@@ -21,6 +21,5 @@
*/ */
void SDL_BlitCopy(SDL_BlitInfo * info); void SDL_BlitCopy(SDL_BlitInfo * info);
void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
/* vi: set ts=4 sw=4 expandtab: */ /* vi: set ts=4 sw=4 expandtab: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment