AxibugEmuOnline_old/References/virtuanessrc097-master/nx_SuperEagle.h

1095 lines
44 KiB
C
Raw Normal View History

<EFBFBD><EFBFBD>/*---------------------------------------------------------------------*
* The following (piece of) code, (part of) the 2xSaI engine, *
* copyright (c) 2001 by Derek Liauw Kie Fa. *
* Non-Commercial use of the engine is allowed and is encouraged, *
* provided that appropriate credit be given and that this copyright *
* notice will not be removed under any circumstance. *
* You may freely modify this code, but I request *
* that any improvements to the engine be submitted to me, so *
* that I can implement these improvements in newer versions of *
* the engine. *
* If you need more information, have any comments or suggestions, *
* you can e-mail me. My e-mail: DerekL666@yahoo.com *
*---------------------------------------------------------------------*/
//
// This code was converted into VirtuaNES by Norix.
//
static void nx_SuperEagleLine_16bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
{
__asm {
mov eax, pSrc
mov ebx, srcPitch
mov edx, pDst
sub eax, ebx
nx_SuperEagleLine_16mmx_loop:
mov ecx, bForceWrite
test ecx, ecx
jz nx_SuperEagleLine_16mmx_normal
mov esi, pDlt
movq mm6, [eax+colorB0]
movq [esi+colorB0], mm6
jmp nx_SuperEagleLine_16mmx_forcewrite
nx_SuperEagleLine_16mmx_normal:
// Check delta
mov ecx, pDlt
// load source img
lea esi, [eax+ebx]
movq mm0, [eax+colorB0]
movq mm1, [eax+colorB3]
movq mm2, [eax+ebx+color4]
movq mm3, [eax+ebx+colorS2]
movq mm4, [eax+ebx*2+color1]
movq mm5, [eax+ebx*2+colorS1]
movq mm6, [esi+ebx*2+colorA0]
movq mm7, [esi+ebx*2+colorA3]
// compare to delta
lea esi, [ecx+ebx]
pcmpeqw mm0, [ecx+colorB0]
pcmpeqw mm1, [ecx+colorB3]
pcmpeqw mm2, [ecx+ebx+color4]
pcmpeqw mm3, [ecx+ebx+colorS2]
pcmpeqw mm4, [ecx+ebx*2+color1]
pcmpeqw mm5, [ecx+ebx*2+colorS1]
pcmpeqw mm6, [esi+ebx*2+colorA0]
pcmpeqw mm7, [esi+ebx*2+colorA3]
// compose results
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pxor mm7, mm7
pand mm0, mm4
movq mm6, [eax+colorB0]
pcmpeqw mm7, mm0
movq [ecx+colorB0], mm6
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz nx_SuperEagleLine_16mmx_skipprocess
// End Delta
nx_SuperEagleLine_16mmx_forcewrite:
//------------------------------
// Interpolate pixels
// (c0&c1)+(((c0^c1)&colorMask)>>1)
//------------------------------
movq mm6, cMask
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pxor mm3, mm2
pand mm0, mm1
pand mm3, mm6
psrlw mm3, 1
paddw mm0, mm3
movq I56Pixel, mm0
movq mm7, mm0
//------------------------------
movq mm0, mm7
movq mm2, mm7
movq mm1, mm7
movq mm3, mm7
pxor mm2, mm4
pxor mm3, mm5
pand mm0, mm4
pand mm2, mm6
pand mm1, mm5
pand mm3, mm6
psrlw mm2, 1
psrlw mm3, 1
paddw mm0, mm2
paddw mm1, mm3
movq product1a, mm0
movq product1b, mm1
//------------------------------
movq mm0, [eax+ebx*2+color2]
movq mm1, [eax+ebx*2+color3]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pxor mm3, mm2
pand mm0, mm1
pand mm3, mm6
psrlw mm3, 1
paddw mm0, mm3
movq I23Pixel, mm0
movq mm7, mm0
//------------------------------
movq mm0, mm7
movq mm2, mm7
movq mm1, mm7
movq mm3, mm7
pxor mm2, mm4
pxor mm3, mm5
pand mm0, mm4
pand mm2, mm6
pand mm1, mm5
pand mm3, mm6
psrlw mm2, 1
psrlw mm3, 1
paddw mm0, mm2
paddw mm1, mm3
movq product2a, mm0
movq product2b, mm1
//------------------------------
// Decide which "branch" to take
//------------------------------
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
movq mm6, [eax+ebx*2+color3]
movq mm7, [eax+ebx*2+color2]
pxor mm3, mm3
movq mm0, mm4
movq mm1, mm5
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pcmpeqw mm1, mm3
pand mm0, mm1
movq Mask35, mm0
lea esi, [eax+ebx]
movq mm0, [eax+ebx*2+colorS1]
movq mm1, [eax+ebx+color4]
movq mm2, [esi+ebx*2+colorA2]
movq mm3, [eax+colorB1]
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm4
pcmpeqw mm3, mm4
pand mm0, mm1
pand mm2, mm3
por mm0, mm2
pand mm0, Mask35
movq Mask35b, mm0
//------------------------------
pxor mm3, mm3
movq mm0, mm4
movq mm1, mm5
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pcmpeqw mm0, mm3
pand mm0, mm1
movq Mask26, mm0
lea esi, [eax+ebx]
movq mm0, [eax+ebx*2+color1]
movq mm1, [eax+ebx+colorS2]
movq mm2, [esi+ebx*2+colorA1]
movq mm3, [eax+colorB2]
pcmpeqw mm0, mm5
pcmpeqw mm1, mm5
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm1
pand mm2, mm3
por mm0, mm2
pand mm0, Mask26
movq Mask26b, mm0
//------------------------------
movq mm0, mm4
movq mm1, mm5
movq mm2, mm0
pcmpeqw mm2, mm1
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pand mm0, mm1
pand mm2, mm0
pxor mm0, mm2
movq mm7, mm0
//------------------------------
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz nx_SuperEagleLine_16mmx_skipguess
//------------------------------
// Map of the pixels: I|E F|J
// G|A B|K
// H|C D|L
// M|N O|P
//------------------------------
movq mm6, mm0
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
pxor mm7, mm7
pand mm6, ONE
movq mm0, [eax+colorB1]
movq mm1, [eax+ebx+color4]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+colorB2]
movq mm1, [eax+ebx+colorS2]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
lea edi, [eax+ebx]
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [edi+ebx+color1]
movq mm1, [edi+ebx*2+colorA1]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [edi+ebx+colorS1]
movq mm1, [edi+ebx*2+colorA2]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm1, mm7
pxor mm0, mm0
pcmpgtw mm7, mm0
pcmpgtw mm0, mm1
por mm7, Mask35
por mm0, Mask26
movq Mask35, mm7
movq Mask26, mm0
nx_SuperEagleLine_16mmx_skipguess:
// Start the ASSEMBLY !!!
movq mm4, Mask35
movq mm5, Mask26
movq mm6, Mask35b
movq mm7, Mask26b
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, [eax+ebx*2+color2]
movq mm3, [eax+ebx*2+color3]
pcmpeqw mm0, mm2
pcmpeqw mm1, mm3
movq mm2, mm4
movq mm3, mm5
por mm0, mm1
por mm2, mm3
pand mm2, mm0
pxor mm0, mm2
movq mm3, mm0
movq mm2, mm0
pxor mm0, mm0
por mm2, mm4
pxor mm4, mm6
por mm2, mm5
pxor mm5, mm7
pcmpeqw mm2, mm0
;----------------
movq mm0, [eax+ebx+color5]
movq mm1, mm3
por mm1, mm4
por mm1, mm6
pand mm0, mm1
movq mm1, mm5
pand mm1, I56Pixel
por mm0, mm1
movq mm1, mm7
pand mm1, product1b
por mm0, mm1
movq mm1, mm2
pand mm1, product1a
por mm0, mm1
movq final1a, mm0
movq mm0, [eax+ebx+color6]
movq mm1, mm3
por mm1, mm5
por mm1, mm7
pand mm0, mm1
movq mm1, mm4
pand mm1, I56Pixel
por mm0, mm1
movq mm1, mm6
pand mm1, product1a
por mm0, mm1
movq mm1, mm2
pand mm1, product1b
por mm0, mm1
movq final1b, mm0
movq mm0, [eax+ebx*2+color2]
movq mm1, mm3
por mm1, mm5
por mm1, mm7
pand mm0, mm1
movq mm1, mm4
pand mm1, I23Pixel
por mm0, mm1
movq mm1, mm6
pand mm1, product2b
por mm0, mm1
movq mm1, mm2
pand mm1, product2a
por mm0, mm1
movq final2a, mm0
movq mm0, [eax+ebx*2+color3]
movq mm1, mm3
por mm1, mm4
por mm1, mm6
pand mm0, mm1
movq mm1, mm5
pand mm1, I23Pixel
por mm0, mm1
movq mm1, mm7
pand mm1, product2a
por mm0, mm1
movq mm1, mm2
pand mm1, product2b
por mm0, mm1
movq final2b, mm0
//------------------------------
// Write final image
//------------------------------
movq mm0, final1a
movq mm2, final1b
movq mm1, mm0
movq mm4, final2a
movq mm6, final2b
movq mm5, mm4
punpcklwd mm0, mm2 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
punpckhwd mm1, mm2 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
punpcklwd mm4, mm6 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
punpckhwd mm5, mm6 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
mov edi, dstPitch
movq [edx+0], mm0 // 1st line
movq [edx+8], mm1
movq [edi+edx+0], mm4 // 2nd line
movq [edi+edx+8], mm5
nx_SuperEagleLine_16mmx_skipprocess:
add pDlt, 8 // 4 pixels
lea eax, [eax+ 8] // 4 pixels
lea edx, [edx+16] // 8 pixels
sub width, 4 // 4 pixels
jg nx_SuperEagleLine_16mmx_loop
emms
}
}
static void nx_SuperEagleLine_32bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
{
__asm {
mov eax, pSrc
mov ebx, srcPitch
mov edx, pDst
sub eax, ebx
nx_SuperEagleLine_32mmx_loop:
mov ecx, bForceWrite
test ecx, ecx
jz nx_SuperEagleLine_32mmx_normal
mov esi, pDlt
movq mm6, [eax+colorB0]
movq [esi+colorB0], mm6
jmp nx_SuperEagleLine_32mmx_forcewrite
nx_SuperEagleLine_32mmx_normal:
// Check delta
mov ecx, pDlt
// load source img
lea esi, [eax+ebx]
movq mm0, [eax+colorB0]
movq mm1, [eax+colorB3]
movq mm2, [eax+ebx+color4]
movq mm3, [eax+ebx+colorS2]
movq mm4, [eax+ebx*2+color1]
movq mm5, [eax+ebx*2+colorS1]
movq mm6, [esi+ebx*2+colorA0]
movq mm7, [esi+ebx*2+colorA3]
// compare to delta
lea esi, [ecx+ebx]
pcmpeqw mm0, [ecx+colorB0]
pcmpeqw mm1, [ecx+colorB3]
pcmpeqw mm2, [ecx+ebx+color4]
pcmpeqw mm3, [ecx+ebx+colorS2]
pcmpeqw mm4, [ecx+ebx*2+color1]
pcmpeqw mm5, [ecx+ebx*2+colorS1]
pcmpeqw mm6, [esi+ebx*2+colorA0]
pcmpeqw mm7, [esi+ebx*2+colorA3]
// compose results
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pxor mm7, mm7
pand mm0, mm4
movq mm6, [eax+colorB0]
pcmpeqw mm7, mm0
movq [ecx+colorB0], mm6
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz nx_SuperEagleLine_32mmx_skipprocess
// End Delta
nx_SuperEagleLine_32mmx_forcewrite:
//------------------------------
// Interpolate pixels
// (c0&c1)+(((c0^c1)&colorMask)>>1)
//------------------------------
movq mm6, cMask
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pxor mm3, mm2
pand mm0, mm1
pand mm3, mm6
psrlw mm3, 1
paddw mm0, mm3
movq I56Pixel, mm0
movq mm7, mm0
//------------------------------
movq mm0, mm7
movq mm2, mm7
movq mm1, mm7
movq mm3, mm7
pxor mm2, mm4
pxor mm3, mm5
pand mm0, mm4
pand mm2, mm6
pand mm1, mm5
pand mm3, mm6
psrlw mm2, 1
psrlw mm3, 1
paddw mm0, mm2
paddw mm1, mm3
movq product1a, mm0
movq product1b, mm1
//------------------------------
movq mm0, [eax+ebx*2+color2]
movq mm1, [eax+ebx*2+color3]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pxor mm3, mm2
pand mm0, mm1
pand mm3, mm6
psrlw mm3, 1
paddw mm0, mm3
movq I23Pixel, mm0
movq mm7, mm0
//------------------------------
movq mm0, mm7
movq mm2, mm7
movq mm1, mm7
movq mm3, mm7
pxor mm2, mm4
pxor mm3, mm5
pand mm0, mm4
pand mm2, mm6
pand mm1, mm5
pand mm3, mm6
psrlw mm2, 1
psrlw mm3, 1
paddw mm0, mm2
paddw mm1, mm3
movq product2a, mm0
movq product2b, mm1
//------------------------------
// Decide which "branch" to take
//------------------------------
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
movq mm6, [eax+ebx*2+color3]
movq mm7, [eax+ebx*2+color2]
pxor mm3, mm3
movq mm0, mm4
movq mm1, mm5
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pcmpeqw mm1, mm3
pand mm0, mm1
movq Mask35, mm0
lea esi, [eax+ebx]
movq mm0, [eax+ebx*2+colorS1]
movq mm1, [eax+ebx+color4]
movq mm2, [esi+ebx*2+colorA2]
movq mm3, [eax+colorB1]
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm4
pcmpeqw mm3, mm4
pand mm0, mm1
pand mm2, mm3
por mm0, mm2
pand mm0, Mask35
movq Mask35b, mm0
//------------------------------
pxor mm3, mm3
movq mm0, mm4
movq mm1, mm5
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pcmpeqw mm0, mm3
pand mm0, mm1
movq Mask26, mm0
lea esi, [eax+ebx]
movq mm0, [eax+ebx*2+color1]
movq mm1, [eax+ebx+colorS2]
movq mm2, [esi+ebx*2+colorA1]
movq mm3, [eax+colorB2]
pcmpeqw mm0, mm5
pcmpeqw mm1, mm5
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm1
pand mm2, mm3
por mm0, mm2
pand mm0, Mask26
movq Mask26b, mm0
//------------------------------
movq mm0, mm4
movq mm1, mm5
movq mm2, mm0
pcmpeqw mm2, mm1
pcmpeqw mm0, mm6
pcmpeqw mm1, mm7
pand mm0, mm1
pand mm2, mm0
pxor mm0, mm2
movq mm7, mm0
//------------------------------
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz nx_SuperEagleLine_32mmx_skipguess
//------------------------------
// Map of the pixels: I|E F|J
// G|A B|K
// H|C D|L
// M|N O|P
//------------------------------
movq mm6, mm0
movq mm4, [eax+ebx+color5]
movq mm5, [eax+ebx+color6]
pxor mm7, mm7
pand mm6, ONE
movq mm0, [eax+colorB1]
movq mm1, [eax+ebx+color4]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+colorB2]
movq mm1, [eax+ebx+colorS2]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
lea edi, [eax+ebx]
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [edi+ebx+color1]
movq mm1, [edi+ebx*2+colorA1]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [edi+ebx+colorS1]
movq mm1, [edi+ebx*2+colorA2]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm1, mm7
pxor mm0, mm0
pcmpgtw mm7, mm0
pcmpgtw mm0, mm1
por mm7, Mask35
por mm0, Mask26
movq Mask35, mm7
movq Mask26, mm0
nx_SuperEagleLine_32mmx_skipguess:
// Start the ASSEMBLY !!!
movq mm4, Mask35
movq mm5, Mask26
movq mm6, Mask35b
movq mm7, Mask26b
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, [eax+ebx*2+color2]
movq mm3, [eax+ebx*2+color3]
pcmpeqw mm0, mm2
pcmpeqw mm1, mm3
movq mm2, mm4
movq mm3, mm5
por mm0, mm1
por mm2, mm3
pand mm2, mm0
pxor mm0, mm2
movq mm3, mm0
movq mm2, mm0
pxor mm0, mm0
por mm2, mm4
pxor mm4, mm6
por mm2, mm5
pxor mm5, mm7
pcmpeqw mm2, mm0
;----------------
movq mm0, [eax+ebx+color5]
movq mm1, mm3
por mm1, mm4
por mm1, mm6
pand mm0, mm1
movq mm1, mm5
pand mm1, I56Pixel
por mm0, mm1
movq mm1, mm7
pand mm1, product1b
por mm0, mm1
movq mm1, mm2
pand mm1, product1a
por mm0, mm1
movq final1a, mm0
movq mm0, [eax+ebx+color6]
movq mm1, mm3
por mm1, mm5
por mm1, mm7
pand mm0, mm1
movq mm1, mm4
pand mm1, I56Pixel
por mm0, mm1
movq mm1, mm6
pand mm1, product1a
por mm0, mm1
movq mm1, mm2
pand mm1, product1b
por mm0, mm1
movq final1b, mm0
movq mm0, [eax+ebx*2+color2]
movq mm1, mm3
por mm1, mm5
por mm1, mm7
pand mm0, mm1
movq mm1, mm4
pand mm1, I23Pixel
por mm0, mm1
movq mm1, mm6
pand mm1, product2b
por mm0, mm1
movq mm1, mm2
pand mm1, product2a
por mm0, mm1
movq final2a, mm0
movq mm0, [eax+ebx*2+color3]
movq mm1, mm3
por mm1, mm4
por mm1, mm6
pand mm0, mm1
movq mm1, mm5
pand mm1, I23Pixel
por mm0, mm1
movq mm1, mm7
pand mm1, product2a
por mm0, mm1
movq mm1, mm2
pand mm1, product2b
por mm0, mm1
movq final2b, mm0
//------------------------------
// Write final image
//------------------------------
movq mm0, final1a
movq mm2, final1b
movq mm1, mm0
movq mm4, final2a
movq mm6, final2b
movq mm5, mm4
punpcklwd mm0, mm2 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
punpckhwd mm1, mm2 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
punpcklwd mm4, mm6 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
punpckhwd mm5, mm6 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
// Write image RGB1555->RGBx888
// save
mov esi, eax
movd mm7, ebx
mov edi, dstPitch
mov ecx, 0x00F8F8F8 // mask
// 1st line
movd eax, mm0
movd ebx, mm0
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm0, 16
mov [edx+0], ebx
movd eax, mm0
movd ebx, mm0
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm0, 16
mov [edx+4], ebx
movd eax, mm0
movd ebx, mm0
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm0, 16
mov [edx+8], ebx
movd eax, mm0
movd ebx, mm0
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
mov [edx+12], ebx
movd eax, mm1
movd ebx, mm1
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm1, 16
mov [edx+16], ebx
movd eax, mm1
movd ebx, mm1
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm1, 16
mov [edx+20], ebx
movd eax, mm1
movd ebx, mm1
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm1, 16
mov [edx+24], ebx
movd eax, mm1
movd ebx, mm1
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
mov [edx+28], ebx
// 2nd line
movd eax, mm4
movd ebx, mm4
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm4, 16
mov [edx+edi+ 0], ebx
movd eax, mm4
movd ebx, mm4
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm4, 16
mov [edx+edi+ 4], ebx
movd eax, mm4
movd ebx, mm4
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm4, 16
mov [edx+edi+ 8], ebx
movd eax, mm4
movd ebx, mm4
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
mov [edx+edi+12], ebx
movd eax, mm5
movd ebx, mm5
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm5, 16
mov [edx+edi+16], ebx
movd eax, mm5
movd ebx, mm5
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm5, 16
mov [edx+edi+20], ebx
movd eax, mm5
movd ebx, mm5
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
psrlq mm5, 16
mov [edx+edi+24], ebx
movd eax, mm5
movd ebx, mm5
shl eax, 3
shl ebx, 9
shl ah, 3
and ebx, ecx
mov bx, ax
mov [edx+edi+28], ebx
// restore
mov eax, esi
movd ebx, mm7
nx_SuperEagleLine_32mmx_skipprocess:
add pDlt, 8 // 4 pixels
lea eax, [eax+ 8] // 4 pixels
lea edx, [edx+32] // 8 pixels
sub width, 4 // 4 pixels
jg nx_SuperEagleLine_32mmx_loop
emms
}
}