Commit d2b922f5 authored by Sam Lantinga's avatar Sam Lantinga

Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order)

Even if we're blitting between two different surfaces their pixels might still overlap, because of SDL_CreateRGBSurfaceFrom(), so always use SDL_BlitCopy() and check for overlap in that function.

When handling overlapping surfaces, don't assume that memcpy() iterates forward, instead use memmove() correctly, and provide a fallback implementation of SDL_memmove() that handles the different cases.

Fixed a bug with SDL_memset() not completely filling lengths that aren't a multiple of 4.
Optimized SDL_memcpy() a bit using the same technique as SDL_memset().
parent dc23c42e
......@@ -352,8 +352,8 @@ do { \
#endif
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
#if defined(__MACH__) && defined(__APPLE__)
#define SDL_memcpy(dst, src, len) memcpy(dst, src, len)
#if defined(__MACOSX__)
#define SDL_memcpy memcpy
#elif defined(__GNUC__) && defined(i386)
#define SDL_memcpy(dst, src, len) \
do { \
......@@ -385,8 +385,8 @@ extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src,
#endif
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
#if defined(__MACH__) && defined(__APPLE__)
#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len)*4)
#if defined(__MACOSX__)
#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
#elif defined(__GNUC__) && defined(i386)
#define SDL_memcpy4(dst, src, len) \
do { \
......@@ -400,54 +400,14 @@ do { \
} while(0)
#endif
#ifndef SDL_memcpy4
#define SDL_memcpy4(dst, src, len) SDL_memcpy(dst, src, (len) << 2)
#endif
#if defined(__GNUC__) && defined(i386)
#define SDL_revcpy(dst, src, len) \
do { \
int u0, u1, u2; \
char *dstp = SDL_static_cast(char *, dst); \
char *srcp = SDL_static_cast(char *, src); \
int n = (len); \
if ( n >= 4 ) { \
__asm__ __volatile__ ( \
"std\n\t" \
"rep ; movsl\n\t" \
"cld\n\t" \
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
: "0" (n >> 2), \
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \
: "memory" ); \
} \
switch (n & 3) { \
case 3: dstp[2] = srcp[2]; \
case 2: dstp[1] = srcp[1]; \
case 1: dstp[0] = srcp[0]; \
break; \
default: \
break; \
} \
} while(0)
#endif
#ifndef SDL_revcpy
extern DECLSPEC void *SDLCALL SDL_revcpy(void *dst, const void *src,
size_t len);
#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
#endif
#ifdef HAVE_MEMMOVE
#define SDL_memmove memmove
#elif defined(HAVE_BCOPY)
#define SDL_memmove(d, s, n) bcopy((s), (d), (n))
#else
#define SDL_memmove(dst, src, len) \
do { \
if ( dst < src ) { \
SDL_memcpy(dst, src, len); \
} else { \
SDL_revcpy(dst, src, len); \
} \
} while(0)
extern DECLSPEC void *SDLCALL SDL_memmove(void *dst, const void *src,
size_t len);
#endif
#ifdef HAVE_MEMCMP
......
......@@ -265,31 +265,27 @@ void *
SDL_memset(void *dst, int c, size_t len)
{
size_t left = (len % 4);
if (len >= 4) {
Uint32 value = 0;
Uint32 *dstp = (Uint32 *) dst;
int i;
for (i = 0; i < 4; ++i) {
value <<= 8;
value |= c;
}
Uint32 *dstp4;
Uint8 *dstp1;
Uint32 value4 = (c | (c << 8) | (c << 16) | (c << 24));
Uint8 value1 = (Uint8) c;
dstp4 = (Uint32 *) dst;
len /= 4;
while (len--) {
*dstp++ = value;
*dstp4++ = value4;
}
}
if (left > 0) {
Uint8 value = (Uint8) c;
Uint8 *dstp = (Uint8 *) dst;
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp++ = value;
*dstp1++ = value1;
case 2:
*dstp++ = value;
*dstp1++ = value1;
case 1:
*dstp++ = value;
}
*dstp1++ = value1;
}
return dst;
}
#endif
......@@ -298,26 +294,50 @@ SDL_memset(void *dst, int c, size_t len)
void *
SDL_memcpy(void *dst, const void *src, size_t len)
{
char *srcp = (char *) src;
char *dstp = (char *) dst;
size_t left = (len % 4);
Uint32 *srcp4, *dstp4;
Uint8 *srcp1, *dstp1;
srcp4 = (Uint32 *) src;
dstp4 = (Uint32 *) dst;
len /= 4;
while (len--) {
*dstp++ = *srcp++;
*dstp4++ = *srcp4++;
}
srcp1 = (Uint8 *) srcp4;
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp1++ = *srcp1++;
case 2:
*dstp1++ = *srcp1++;
case 1:
*dstp1++ = *srcp1++;
}
return dst;
}
#endif
#ifndef SDL_revcpy
#ifndef SDL_memmove
void *
SDL_revcpy(void *dst, const void *src, size_t len)
SDL_memmove(void *dst, const void *src, size_t len)
{
char *srcp = (char *) src;
char *dstp = (char *) dst;
if (src < dst) {
srcp += len - 1;
dstp += len - 1;
while (len--) {
*dstp-- = *srcp--;
}
} else {
while (len--) {
*dstp++ = *srcp++;
}
}
return dst;
}
#endif
......
......@@ -205,12 +205,7 @@ SDL_CalculateBlit(SDL_Surface * surface)
/* Choose a standard blit function */
if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) {
/* Handle overlapping blits on the same surface */
if (surface == dst) {
blit = SDL_BlitCopyOverlap;
} else {
blit = SDL_BlitCopy;
}
} else if (surface->format->BitsPerPixel < 8) {
blit = SDL_CalculateBlit0(surface);
} else if (surface->format->BytesPerPixel == 1) {
......
......@@ -96,6 +96,7 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
void
SDL_BlitCopy(SDL_BlitInfo * info)
{
SDL_bool overlap;
Uint8 *src, *dst;
int w, h;
int srcskip, dstskip;
......@@ -107,6 +108,21 @@ SDL_BlitCopy(SDL_BlitInfo * info)
srcskip = info->src_pitch;
dstskip = info->dst_pitch;
/* Properly handle overlapping blits */
if (src < dst) {
overlap = (dst < (src + h*srcskip));
} else {
overlap = (src < (dst + h*dstskip));
}
if (overlap) {
while (h--) {
SDL_memmove(dst, src, w);
src += srcskip;
dst += dstskip;
}
return;
}
#ifdef __SSE__
if (SDL_HasSSE() &&
!((uintptr_t) src & 15) && !(srcskip & 15) &&
......@@ -141,29 +157,4 @@ SDL_BlitCopy(SDL_BlitInfo * info)
}
}
void
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
{
Uint8 *src, *dst;
int w, h;
int skip;
w = info->dst_w * info->dst_fmt->BytesPerPixel;
h = info->dst_h;
src = info->src;
dst = info->dst;
skip = info->src_pitch;
if ((dst < src) || (dst >= (src + h * skip))) {
SDL_BlitCopy(info);
} else {
src += ((h - 1) * skip);
dst += ((h - 1) * skip);
while (h--) {
SDL_revcpy(dst, src, w);
src -= skip;
dst -= skip;
}
}
}
/* vi: set ts=4 sw=4 expandtab: */
......@@ -21,6 +21,5 @@
*/
void SDL_BlitCopy(SDL_BlitInfo * info);
void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
/* vi: set ts=4 sw=4 expandtab: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment