forked from sin365/AxibugEmuOnline
1095 lines
21 KiB
C
1095 lines
21 KiB
C
/*---------------------------------------------------------------------*
|
|
* The following (piece of) code, (part of) the 2xSaI engine, *
|
|
* copyright (c) 2001 by Derek Liauw Kie Fa. *
|
|
* Non-Commercial use of the engine is allowed and is encouraged, *
|
|
* provided that appropriate credit be given and that this copyright *
|
|
* notice will not be removed under any circumstance. *
|
|
* You may freely modify this code, but I request *
|
|
* that any improvements to the engine be submitted to me, so *
|
|
* that I can implement these improvements in newer versions of *
|
|
* the engine. *
|
|
* If you need more information, have any comments or suggestions, *
|
|
* you can e-mail me. My e-mail: DerekL666@yahoo.com *
|
|
*---------------------------------------------------------------------*/
|
|
//
|
|
// This code was converted into VirtuaNES by Norix.
|
|
//
|
|
static void nx_SuperEagleLine_16bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
|
{
|
|
__asm {
|
|
mov eax, pSrc
|
|
mov ebx, srcPitch
|
|
mov edx, pDst
|
|
sub eax, ebx
|
|
nx_SuperEagleLine_16mmx_loop:
|
|
mov ecx, bForceWrite
|
|
test ecx, ecx
|
|
jz nx_SuperEagleLine_16mmx_normal
|
|
|
|
mov esi, pDlt
|
|
movq mm6, [eax+colorB0]
|
|
movq [esi+colorB0], mm6
|
|
|
|
jmp nx_SuperEagleLine_16mmx_forcewrite
|
|
nx_SuperEagleLine_16mmx_normal:
|
|
// Check delta
|
|
mov ecx, pDlt
|
|
|
|
// load source img
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+colorB0]
|
|
movq mm1, [eax+colorB3]
|
|
movq mm2, [eax+ebx+color4]
|
|
movq mm3, [eax+ebx+colorS2]
|
|
movq mm4, [eax+ebx*2+color1]
|
|
movq mm5, [eax+ebx*2+colorS1]
|
|
movq mm6, [esi+ebx*2+colorA0]
|
|
movq mm7, [esi+ebx*2+colorA3]
|
|
|
|
// compare to delta
|
|
lea esi, [ecx+ebx]
|
|
pcmpeqw mm0, [ecx+colorB0]
|
|
pcmpeqw mm1, [ecx+colorB3]
|
|
pcmpeqw mm2, [ecx+ebx+color4]
|
|
pcmpeqw mm3, [ecx+ebx+colorS2]
|
|
pcmpeqw mm4, [ecx+ebx*2+color1]
|
|
pcmpeqw mm5, [ecx+ebx*2+colorS1]
|
|
pcmpeqw mm6, [esi+ebx*2+colorA0]
|
|
pcmpeqw mm7, [esi+ebx*2+colorA3]
|
|
|
|
// compose results
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
pand mm4, mm5
|
|
pand mm6, mm7
|
|
pand mm0, mm2
|
|
pand mm4, mm6
|
|
pxor mm7, mm7
|
|
pand mm0, mm4
|
|
movq mm6, [eax+colorB0]
|
|
pcmpeqw mm7, mm0
|
|
|
|
movq [ecx+colorB0], mm6
|
|
|
|
packsswb mm7, mm7
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_SuperEagleLine_16mmx_skipprocess
|
|
// End Delta
|
|
nx_SuperEagleLine_16mmx_forcewrite:
|
|
//------------------------------
|
|
// Interpolate pixels
|
|
// (c0&c1)+(((c0^c1)&colorMask)>>1)
|
|
//------------------------------
|
|
movq mm6, cMask
|
|
|
|
movq mm0, [eax+ebx+color5]
|
|
movq mm1, [eax+ebx+color6]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
movq mm4, mm0
|
|
movq mm5, mm1
|
|
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3
|
|
movq I56Pixel, mm0
|
|
movq mm7, mm0
|
|
|
|
//------------------------------
|
|
movq mm0, mm7
|
|
movq mm2, mm7
|
|
movq mm1, mm7
|
|
movq mm3, mm7
|
|
pxor mm2, mm4
|
|
pxor mm3, mm5
|
|
pand mm0, mm4
|
|
pand mm2, mm6
|
|
pand mm1, mm5
|
|
pand mm3, mm6
|
|
psrlw mm2, 1
|
|
psrlw mm3, 1
|
|
paddw mm0, mm2
|
|
paddw mm1, mm3
|
|
movq product1a, mm0
|
|
movq product1b, mm1
|
|
|
|
//------------------------------
|
|
movq mm0, [eax+ebx*2+color2]
|
|
movq mm1, [eax+ebx*2+color3]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
movq mm4, mm0
|
|
movq mm5, mm1
|
|
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3
|
|
movq I23Pixel, mm0
|
|
movq mm7, mm0
|
|
|
|
//------------------------------
|
|
movq mm0, mm7
|
|
movq mm2, mm7
|
|
movq mm1, mm7
|
|
movq mm3, mm7
|
|
pxor mm2, mm4
|
|
pxor mm3, mm5
|
|
pand mm0, mm4
|
|
pand mm2, mm6
|
|
pand mm1, mm5
|
|
pand mm3, mm6
|
|
psrlw mm2, 1
|
|
psrlw mm3, 1
|
|
paddw mm0, mm2
|
|
paddw mm1, mm3
|
|
movq product2a, mm0
|
|
movq product2b, mm1
|
|
|
|
//------------------------------
|
|
// Decide which "branch" to take
|
|
//------------------------------
|
|
movq mm4, [eax+ebx+color5]
|
|
movq mm5, [eax+ebx+color6]
|
|
movq mm6, [eax+ebx*2+color3]
|
|
movq mm7, [eax+ebx*2+color2]
|
|
|
|
pxor mm3, mm3
|
|
movq mm0, mm4
|
|
movq mm1, mm5
|
|
|
|
pcmpeqw mm0, mm6
|
|
pcmpeqw mm1, mm7
|
|
pcmpeqw mm1, mm3
|
|
pand mm0, mm1
|
|
movq Mask35, mm0
|
|
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+ebx*2+colorS1]
|
|
movq mm1, [eax+ebx+color4]
|
|
movq mm2, [esi+ebx*2+colorA2]
|
|
movq mm3, [eax+colorB1]
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm4
|
|
pcmpeqw mm3, mm4
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
por mm0, mm2
|
|
pand mm0, Mask35
|
|
movq Mask35b, mm0
|
|
|
|
//------------------------------
|
|
pxor mm3, mm3
|
|
movq mm0, mm4
|
|
movq mm1, mm5
|
|
|
|
pcmpeqw mm0, mm6
|
|
pcmpeqw mm1, mm7
|
|
pcmpeqw mm0, mm3
|
|
pand mm0, mm1
|
|
movq Mask26, mm0
|
|
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+ebx*2+color1]
|
|
movq mm1, [eax+ebx+colorS2]
|
|
movq mm2, [esi+ebx*2+colorA1]
|
|
movq mm3, [eax+colorB2]
|
|
pcmpeqw mm0, mm5
|
|
pcmpeqw mm1, mm5
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
por mm0, mm2
|
|
pand mm0, Mask26
|
|
movq Mask26b, mm0
|
|
|
|
//------------------------------
|
|
movq mm0, mm4
|
|
movq mm1, mm5
|
|
movq mm2, mm0
|
|
|
|
pcmpeqw mm2, mm1
|
|
pcmpeqw mm0, mm6
|
|
pcmpeqw mm1, mm7
|
|
pand mm0, mm1
|
|
pand mm2, mm0
|
|
pxor mm0, mm2
|
|
movq mm7, mm0
|
|
|
|
//------------------------------
|
|
packsswb mm7, mm7
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_SuperEagleLine_16mmx_skipguess
|
|
|
|
//------------------------------
|
|
// Map of the pixels: I|E F|J
|
|
// G|A B|K
|
|
// H|C D|L
|
|
// M|N O|P
|
|
//------------------------------
|
|
movq mm6, mm0
|
|
movq mm4, [eax+ebx+color5]
|
|
movq mm5, [eax+ebx+color6]
|
|
pxor mm7, mm7
|
|
pand mm6, ONE
|
|
|
|
movq mm0, [eax+colorB1]
|
|
movq mm1, [eax+ebx+color4]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [eax+colorB2]
|
|
movq mm1, [eax+ebx+colorS2]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
lea edi, [eax+ebx]
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [edi+ebx+color1]
|
|
movq mm1, [edi+ebx*2+colorA1]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [edi+ebx+colorS1]
|
|
movq mm1, [edi+ebx*2+colorA2]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm1, mm7
|
|
pxor mm0, mm0
|
|
pcmpgtw mm7, mm0
|
|
pcmpgtw mm0, mm1
|
|
|
|
por mm7, Mask35
|
|
por mm0, Mask26
|
|
movq Mask35, mm7
|
|
movq Mask26, mm0
|
|
|
|
nx_SuperEagleLine_16mmx_skipguess:
|
|
// Start the ASSEMBLY !!!
|
|
|
|
movq mm4, Mask35
|
|
movq mm5, Mask26
|
|
movq mm6, Mask35b
|
|
movq mm7, Mask26b
|
|
|
|
movq mm0, [eax+ebx+color5]
|
|
movq mm1, [eax+ebx+color6]
|
|
movq mm2, [eax+ebx*2+color2]
|
|
movq mm3, [eax+ebx*2+color3]
|
|
pcmpeqw mm0, mm2
|
|
pcmpeqw mm1, mm3
|
|
movq mm2, mm4
|
|
movq mm3, mm5
|
|
por mm0, mm1
|
|
por mm2, mm3
|
|
pand mm2, mm0
|
|
pxor mm0, mm2
|
|
movq mm3, mm0
|
|
|
|
movq mm2, mm0
|
|
pxor mm0, mm0
|
|
por mm2, mm4
|
|
pxor mm4, mm6
|
|
por mm2, mm5
|
|
pxor mm5, mm7
|
|
pcmpeqw mm2, mm0
|
|
;----------------
|
|
|
|
movq mm0, [eax+ebx+color5]
|
|
movq mm1, mm3
|
|
por mm1, mm4
|
|
por mm1, mm6
|
|
pand mm0, mm1
|
|
movq mm1, mm5
|
|
pand mm1, I56Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm7
|
|
pand mm1, product1b
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product1a
|
|
por mm0, mm1
|
|
movq final1a, mm0
|
|
|
|
movq mm0, [eax+ebx+color6]
|
|
movq mm1, mm3
|
|
por mm1, mm5
|
|
por mm1, mm7
|
|
pand mm0, mm1
|
|
movq mm1, mm4
|
|
pand mm1, I56Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm6
|
|
pand mm1, product1a
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product1b
|
|
por mm0, mm1
|
|
movq final1b, mm0
|
|
|
|
movq mm0, [eax+ebx*2+color2]
|
|
movq mm1, mm3
|
|
por mm1, mm5
|
|
por mm1, mm7
|
|
pand mm0, mm1
|
|
movq mm1, mm4
|
|
pand mm1, I23Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm6
|
|
pand mm1, product2b
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product2a
|
|
por mm0, mm1
|
|
movq final2a, mm0
|
|
|
|
movq mm0, [eax+ebx*2+color3]
|
|
movq mm1, mm3
|
|
por mm1, mm4
|
|
por mm1, mm6
|
|
pand mm0, mm1
|
|
movq mm1, mm5
|
|
pand mm1, I23Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm7
|
|
pand mm1, product2a
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product2b
|
|
por mm0, mm1
|
|
movq final2b, mm0
|
|
|
|
//------------------------------
|
|
// Write final image
|
|
//------------------------------
|
|
movq mm0, final1a
|
|
movq mm2, final1b
|
|
movq mm1, mm0
|
|
movq mm4, final2a
|
|
movq mm6, final2b
|
|
movq mm5, mm4
|
|
|
|
punpcklwd mm0, mm2 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
|
punpckhwd mm1, mm2 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
|
punpcklwd mm4, mm6 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
|
punpckhwd mm5, mm6 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
|
|
|
mov edi, dstPitch
|
|
movq [edx+0], mm0 // 1st line
|
|
movq [edx+8], mm1
|
|
movq [edi+edx+0], mm4 // 2nd line
|
|
movq [edi+edx+8], mm5
|
|
|
|
nx_SuperEagleLine_16mmx_skipprocess:
|
|
add pDlt, 8 // 4 pixels
|
|
lea eax, [eax+ 8] // 4 pixels
|
|
lea edx, [edx+16] // 8 pixels
|
|
sub width, 4 // 4 pixels
|
|
jg nx_SuperEagleLine_16mmx_loop
|
|
|
|
emms
|
|
}
|
|
}
|
|
|
|
static void nx_SuperEagleLine_32bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
|
{
|
|
__asm {
|
|
mov eax, pSrc
|
|
mov ebx, srcPitch
|
|
mov edx, pDst
|
|
sub eax, ebx
|
|
nx_SuperEagleLine_32mmx_loop:
|
|
mov ecx, bForceWrite
|
|
test ecx, ecx
|
|
jz nx_SuperEagleLine_32mmx_normal
|
|
|
|
mov esi, pDlt
|
|
movq mm6, [eax+colorB0]
|
|
movq [esi+colorB0], mm6
|
|
|
|
jmp nx_SuperEagleLine_32mmx_forcewrite
|
|
nx_SuperEagleLine_32mmx_normal:
|
|
// Check delta
|
|
mov ecx, pDlt
|
|
|
|
// load source img
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+colorB0]
|
|
movq mm1, [eax+colorB3]
|
|
movq mm2, [eax+ebx+color4]
|
|
movq mm3, [eax+ebx+colorS2]
|
|
movq mm4, [eax+ebx*2+color1]
|
|
movq mm5, [eax+ebx*2+colorS1]
|
|
movq mm6, [esi+ebx*2+colorA0]
|
|
movq mm7, [esi+ebx*2+colorA3]
|
|
|
|
// compare to delta
|
|
lea esi, [ecx+ebx]
|
|
pcmpeqw mm0, [ecx+colorB0]
|
|
pcmpeqw mm1, [ecx+colorB3]
|
|
pcmpeqw mm2, [ecx+ebx+color4]
|
|
pcmpeqw mm3, [ecx+ebx+colorS2]
|
|
pcmpeqw mm4, [ecx+ebx*2+color1]
|
|
pcmpeqw mm5, [ecx+ebx*2+colorS1]
|
|
pcmpeqw mm6, [esi+ebx*2+colorA0]
|
|
pcmpeqw mm7, [esi+ebx*2+colorA3]
|
|
|
|
// compose results
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
pand mm4, mm5
|
|
pand mm6, mm7
|
|
pand mm0, mm2
|
|
pand mm4, mm6
|
|
pxor mm7, mm7
|
|
pand mm0, mm4
|
|
movq mm6, [eax+colorB0]
|
|
pcmpeqw mm7, mm0
|
|
|
|
movq [ecx+colorB0], mm6
|
|
|
|
packsswb mm7, mm7
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_SuperEagleLine_32mmx_skipprocess
|
|
// End Delta
|
|
nx_SuperEagleLine_32mmx_forcewrite:
|
|
//------------------------------
|
|
// Interpolate pixels
|
|
// (c0&c1)+(((c0^c1)&colorMask)>>1)
|
|
//------------------------------
|
|
movq mm6, cMask
|
|
|
|
movq mm0, [eax+ebx+color5]
|
|
movq mm1, [eax+ebx+color6]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
movq mm4, mm0
|
|
movq mm5, mm1
|
|
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3
|
|
movq I56Pixel, mm0
|
|
movq mm7, mm0
|
|
|
|
//------------------------------
|
|
movq mm0, mm7
|
|
movq mm2, mm7
|
|
movq mm1, mm7
|
|
movq mm3, mm7
|
|
pxor mm2, mm4
|
|
pxor mm3, mm5
|
|
pand mm0, mm4
|
|
pand mm2, mm6
|
|
pand mm1, mm5
|
|
pand mm3, mm6
|
|
psrlw mm2, 1
|
|
psrlw mm3, 1
|
|
paddw mm0, mm2
|
|
paddw mm1, mm3
|
|
movq product1a, mm0
|
|
movq product1b, mm1
|
|
|
|
//------------------------------
|
|
movq mm0, [eax+ebx*2+color2]
|
|
movq mm1, [eax+ebx*2+color3]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
movq mm4, mm0
|
|
movq mm5, mm1
|
|
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3
|
|
movq I23Pixel, mm0
|
|
movq mm7, mm0
|
|
|
|
//------------------------------
|
|
movq mm0, mm7
|
|
movq mm2, mm7
|
|
movq mm1, mm7
|
|
movq mm3, mm7
|
|
pxor mm2, mm4
|
|
pxor mm3, mm5
|
|
pand mm0, mm4
|
|
pand mm2, mm6
|
|
pand mm1, mm5
|
|
pand mm3, mm6
|
|
psrlw mm2, 1
|
|
psrlw mm3, 1
|
|
paddw mm0, mm2
|
|
paddw mm1, mm3
|
|
movq product2a, mm0
|
|
movq product2b, mm1
|
|
|
|
//------------------------------
|
|
// Decide which "branch" to take
|
|
//------------------------------
|
|
movq mm4, [eax+ebx+color5]
|
|
movq mm5, [eax+ebx+color6]
|
|
movq mm6, [eax+ebx*2+color3]
|
|
movq mm7, [eax+ebx*2+color2]
|
|
|
|
pxor mm3, mm3
|
|
movq mm0, mm4
|
|
movq mm1, mm5
|
|
|
|
pcmpeqw mm0, mm6
|
|
pcmpeqw mm1, mm7
|
|
pcmpeqw mm1, mm3
|
|
pand mm0, mm1
|
|
movq Mask35, mm0
|
|
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+ebx*2+colorS1]
|
|
movq mm1, [eax+ebx+color4]
|
|
movq mm2, [esi+ebx*2+colorA2]
|
|
movq mm3, [eax+colorB1]
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm4
|
|
pcmpeqw mm3, mm4
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
por mm0, mm2
|
|
pand mm0, Mask35
|
|
movq Mask35b, mm0
|
|
|
|
//------------------------------
|
|
pxor mm3, mm3
|
|
movq mm0, mm4
|
|
movq mm1, mm5
|
|
|
|
pcmpeqw mm0, mm6
|
|
pcmpeqw mm1, mm7
|
|
pcmpeqw mm0, mm3
|
|
pand mm0, mm1
|
|
movq Mask26, mm0
|
|
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+ebx*2+color1]
|
|
movq mm1, [eax+ebx+colorS2]
|
|
movq mm2, [esi+ebx*2+colorA1]
|
|
movq mm3, [eax+colorB2]
|
|
pcmpeqw mm0, mm5
|
|
pcmpeqw mm1, mm5
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
por mm0, mm2
|
|
pand mm0, Mask26
|
|
movq Mask26b, mm0
|
|
|
|
//------------------------------
|
|
movq mm0, mm4
|
|
movq mm1, mm5
|
|
movq mm2, mm0
|
|
|
|
pcmpeqw mm2, mm1
|
|
pcmpeqw mm0, mm6
|
|
pcmpeqw mm1, mm7
|
|
pand mm0, mm1
|
|
pand mm2, mm0
|
|
pxor mm0, mm2
|
|
movq mm7, mm0
|
|
|
|
//------------------------------
|
|
packsswb mm7, mm7
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_SuperEagleLine_32mmx_skipguess
|
|
|
|
//------------------------------
|
|
// Map of the pixels: I|E F|J
|
|
// G|A B|K
|
|
// H|C D|L
|
|
// M|N O|P
|
|
//------------------------------
|
|
movq mm6, mm0
|
|
movq mm4, [eax+ebx+color5]
|
|
movq mm5, [eax+ebx+color6]
|
|
pxor mm7, mm7
|
|
pand mm6, ONE
|
|
|
|
movq mm0, [eax+colorB1]
|
|
movq mm1, [eax+ebx+color4]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [eax+colorB2]
|
|
movq mm1, [eax+ebx+colorS2]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
lea edi, [eax+ebx]
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [edi+ebx+color1]
|
|
movq mm1, [edi+ebx*2+colorA1]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [edi+ebx+colorS1]
|
|
movq mm1, [edi+ebx*2+colorA2]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm1, mm7
|
|
pxor mm0, mm0
|
|
pcmpgtw mm7, mm0
|
|
pcmpgtw mm0, mm1
|
|
|
|
por mm7, Mask35
|
|
por mm0, Mask26
|
|
movq Mask35, mm7
|
|
movq Mask26, mm0
|
|
|
|
nx_SuperEagleLine_32mmx_skipguess:
|
|
// Start the ASSEMBLY !!!
|
|
|
|
movq mm4, Mask35
|
|
movq mm5, Mask26
|
|
movq mm6, Mask35b
|
|
movq mm7, Mask26b
|
|
|
|
movq mm0, [eax+ebx+color5]
|
|
movq mm1, [eax+ebx+color6]
|
|
movq mm2, [eax+ebx*2+color2]
|
|
movq mm3, [eax+ebx*2+color3]
|
|
pcmpeqw mm0, mm2
|
|
pcmpeqw mm1, mm3
|
|
movq mm2, mm4
|
|
movq mm3, mm5
|
|
por mm0, mm1
|
|
por mm2, mm3
|
|
pand mm2, mm0
|
|
pxor mm0, mm2
|
|
movq mm3, mm0
|
|
|
|
movq mm2, mm0
|
|
pxor mm0, mm0
|
|
por mm2, mm4
|
|
pxor mm4, mm6
|
|
por mm2, mm5
|
|
pxor mm5, mm7
|
|
pcmpeqw mm2, mm0
|
|
;----------------
|
|
|
|
movq mm0, [eax+ebx+color5]
|
|
movq mm1, mm3
|
|
por mm1, mm4
|
|
por mm1, mm6
|
|
pand mm0, mm1
|
|
movq mm1, mm5
|
|
pand mm1, I56Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm7
|
|
pand mm1, product1b
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product1a
|
|
por mm0, mm1
|
|
movq final1a, mm0
|
|
|
|
movq mm0, [eax+ebx+color6]
|
|
movq mm1, mm3
|
|
por mm1, mm5
|
|
por mm1, mm7
|
|
pand mm0, mm1
|
|
movq mm1, mm4
|
|
pand mm1, I56Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm6
|
|
pand mm1, product1a
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product1b
|
|
por mm0, mm1
|
|
movq final1b, mm0
|
|
|
|
movq mm0, [eax+ebx*2+color2]
|
|
movq mm1, mm3
|
|
por mm1, mm5
|
|
por mm1, mm7
|
|
pand mm0, mm1
|
|
movq mm1, mm4
|
|
pand mm1, I23Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm6
|
|
pand mm1, product2b
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product2a
|
|
por mm0, mm1
|
|
movq final2a, mm0
|
|
|
|
movq mm0, [eax+ebx*2+color3]
|
|
movq mm1, mm3
|
|
por mm1, mm4
|
|
por mm1, mm6
|
|
pand mm0, mm1
|
|
movq mm1, mm5
|
|
pand mm1, I23Pixel
|
|
por mm0, mm1
|
|
movq mm1, mm7
|
|
pand mm1, product2a
|
|
por mm0, mm1
|
|
movq mm1, mm2
|
|
pand mm1, product2b
|
|
por mm0, mm1
|
|
movq final2b, mm0
|
|
|
|
//------------------------------
|
|
// Write final image
|
|
//------------------------------
|
|
movq mm0, final1a
|
|
movq mm2, final1b
|
|
movq mm1, mm0
|
|
movq mm4, final2a
|
|
movq mm6, final2b
|
|
movq mm5, mm4
|
|
|
|
punpcklwd mm0, mm2 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
|
punpckhwd mm1, mm2 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
|
punpcklwd mm4, mm6 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
|
punpckhwd mm5, mm6 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
|
|
|
// Write image RGB1555->RGBx888
|
|
// save
|
|
mov esi, eax
|
|
movd mm7, ebx
|
|
mov edi, dstPitch
|
|
mov ecx, 0x00F8F8F8 // mask
|
|
|
|
// 1st line
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm0, 16
|
|
mov [edx+0], ebx
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm0, 16
|
|
mov [edx+4], ebx
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm0, 16
|
|
mov [edx+8], ebx
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+12], ebx
|
|
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm1, 16
|
|
mov [edx+16], ebx
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm1, 16
|
|
mov [edx+20], ebx
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm1, 16
|
|
mov [edx+24], ebx
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+28], ebx
|
|
|
|
// 2nd line
|
|
movd eax, mm4
|
|
movd ebx, mm4
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm4, 16
|
|
mov [edx+edi+ 0], ebx
|
|
movd eax, mm4
|
|
movd ebx, mm4
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm4, 16
|
|
mov [edx+edi+ 4], ebx
|
|
movd eax, mm4
|
|
movd ebx, mm4
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm4, 16
|
|
mov [edx+edi+ 8], ebx
|
|
movd eax, mm4
|
|
movd ebx, mm4
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+edi+12], ebx
|
|
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm5, 16
|
|
mov [edx+edi+16], ebx
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm5, 16
|
|
mov [edx+edi+20], ebx
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm5, 16
|
|
mov [edx+edi+24], ebx
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+edi+28], ebx
|
|
|
|
// restore
|
|
mov eax, esi
|
|
movd ebx, mm7
|
|
|
|
nx_SuperEagleLine_32mmx_skipprocess:
|
|
add pDlt, 8 // 4 pixels
|
|
lea eax, [eax+ 8] // 4 pixels
|
|
lea edx, [edx+32] // 8 pixels
|
|
sub width, 4 // 4 pixels
|
|
jg nx_SuperEagleLine_32mmx_loop
|
|
|
|
emms
|
|
}
|
|
}
|