Commit 9cff95c6 authored by Ryan C. Gordon's avatar Ryan C. Gordon

From Mike Frysinger and/or Gentoo:

- libsdl-PIC-load-mmx-masks-from-stack.patch
this one may be a little controversial ... the fix here is again that you cant
reference the memory addresses like this to load into a mmx register, so the
way to do it is to push two 32bit words onto the stack, load the 64bit value
off of the stack into the mmx register, and then adjust the stack so that
it's back to normal.

--HG--
extra : convert_revision : svn%3Ac70aab31-4412-0410-b14c-859654838e24/trunk%401233
parent b1c0273c
......@@ -29,31 +29,43 @@ GLOBAL _ConvertMMXpII32_16BGR555
EXTERN _mmxreturn
SECTION .data
ALIGN 8
;; Macros for conversion routines
%macro _push_immq_mask 1
push dword %1
push dword %1
%endmacro
;; Constants for conversion routines
%macro load_immq 2
_push_immq_mask %2
movq %1, [esp]
%endmacro
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
%macro pand_immq 2
_push_immq_mask %2
pand %1, [esp]
%endmacro
mmx32_rgb565_b dd 000000f8h, 000000f8h
mmx32_rgb565_g dd 0000fc00h, 0000fc00h
mmx32_rgb565_r dd 00f80000h, 00f80000h
%define CLEANUP_IMMQ_LOADS(num) \
add esp, byte 8 * num
mmx32_rgb555_rb dd 00f800f8h,00f800f8h
mmx32_rgb555_g dd 0000f800h,0000f800h
mmx32_rgb555_mul dd 20000008h,20000008h
mmx32_bgr555_mul dd 00082000h,00082000h
%define mmx32_rgb888_mask 00ffffffh
%define mmx32_rgb565_b 000000f8h
%define mmx32_rgb565_g 0000fc00h
%define mmx32_rgb565_r 00f80000h
%define mmx32_rgb555_rb 00f800f8h
%define mmx32_rgb555_g 0000f800h
%define mmx32_rgb555_mul 20000008h
%define mmx32_bgr555_mul 00082000h
SECTION .text
_ConvertMMXpII32_24RGB888:
; set up mm6 as the mask, mm7 as zero
movq mm6, qword [mmx32_rgb888_mask]
load_immq mm6, mmx32_rgb888_mask
CLEANUP_IMMQ_LOADS(1)
pxor mm7, mm7
mov edx, ecx ; save ecx
......@@ -115,9 +127,10 @@ _ConvertMMXpII32_24RGB888:
_ConvertMMXpII32_16RGB565:
; set up masks
movq mm5, [mmx32_rgb565_b]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_r]
load_immq mm5, mmx32_rgb565_b
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_r
CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
......@@ -181,9 +194,10 @@ _ConvertMMXpII32_16RGB565:
_ConvertMMXpII32_16BGR565:
movq mm5, [mmx32_rgb565_r]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_b]
load_immq mm5, mmx32_rgb565_r
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_b
CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
......@@ -253,7 +267,7 @@ _ConvertMMXpII32_16BGR555:
; except it uses a different multiplier for the pmaddwd
; instruction. cool huh.
movq mm7, qword [mmx32_bgr555_mul]
load_immq mm7, mmx32_bgr555_mul
jmp _convert_bgr555_cheat
; This is the same as the Intel version.. they obviously went to
......@@ -263,9 +277,10 @@ _ConvertMMXpII32_16BGR555:
; (I think) a more accurate name..
_ConvertMMXpII32_16RGB555:
movq mm7,qword [mmx32_rgb555_mul]
load_immq mm7, mmx32_rgb555_mul
_convert_bgr555_cheat:
movq mm6,qword [mmx32_rgb555_g]
load_immq mm6, mmx32_rgb555_g
CLEANUP_IMMQ_LOADS(2)
mov edx,ecx ; Save ecx
......@@ -280,12 +295,14 @@ _convert_bgr555_cheat:
movq mm0,[esi]
movq mm3,mm2
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
movq mm1,mm0
pand mm1,qword [mmx32_rgb555_rb]
pand_immq mm1, mmx32_rgb555_rb
pmaddwd mm3,mm7
CLEANUP_IMMQ_LOADS(2)
pmaddwd mm1,mm7
pand mm2,mm6
......@@ -302,13 +319,13 @@ _convert_bgr555_cheat:
movq mm0,mm4
psrld mm1,6
pand mm0,qword [mmx32_rgb555_rb]
pand_immq mm0, mmx32_rgb555_rb
packssdw mm1,mm3
movq mm3,mm5
pmaddwd mm0,mm7
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
pand mm4,mm6
movq [edi],mm1
......@@ -329,12 +346,14 @@ _convert_bgr555_cheat:
movq mm3,mm2
movq mm1,mm0
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
packssdw mm5,mm4
pand mm1,qword [mmx32_rgb555_rb]
pand_immq mm1, mmx32_rgb555_rb
pand mm2,mm6
CLEANUP_IMMQ_LOADS(4)
movq [edi+8],mm5
pmaddwd mm3,mm7
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment