1095 lines
44 KiB
C
1095 lines
44 KiB
C
/*---------------------------------------------------------------------*
|
||
* The following (piece of) code, (part of) the 2xSaI engine, *
|
||
* copyright (c) 2001 by Derek Liauw Kie Fa. *
|
||
* Non-Commercial use of the engine is allowed and is encouraged, *
|
||
* provided that appropriate credit be given and that this copyright *
|
||
* notice will not be removed under any circumstance. *
|
||
* You may freely modify this code, but I request *
|
||
* that any improvements to the engine be submitted to me, so *
|
||
* that I can implement these improvements in newer versions of *
|
||
* the engine. *
|
||
* If you need more information, have any comments or suggestions, *
|
||
* you can e-mail me. My e-mail: DerekL666@yahoo.com *
|
||
*---------------------------------------------------------------------*/
|
||
//
|
||
// This code was converted into VirtuaNES by Norix.
|
||
//
|
||
static void nx_SuperEagleLine_16bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
||
{
|
||
__asm {
|
||
mov eax, pSrc
|
||
mov ebx, srcPitch
|
||
mov edx, pDst
|
||
sub eax, ebx
|
||
nx_SuperEagleLine_16mmx_loop:
|
||
mov ecx, bForceWrite
|
||
test ecx, ecx
|
||
jz nx_SuperEagleLine_16mmx_normal
|
||
|
||
mov esi, pDlt
|
||
movq mm6, [eax+colorB0]
|
||
movq [esi+colorB0], mm6
|
||
|
||
jmp nx_SuperEagleLine_16mmx_forcewrite
|
||
nx_SuperEagleLine_16mmx_normal:
|
||
// Check delta
|
||
mov ecx, pDlt
|
||
|
||
// load source img
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+colorB0]
|
||
movq mm1, [eax+colorB3]
|
||
movq mm2, [eax+ebx+color4]
|
||
movq mm3, [eax+ebx+colorS2]
|
||
movq mm4, [eax+ebx*2+color1]
|
||
movq mm5, [eax+ebx*2+colorS1]
|
||
movq mm6, [esi+ebx*2+colorA0]
|
||
movq mm7, [esi+ebx*2+colorA3]
|
||
|
||
// compare to delta
|
||
lea esi, [ecx+ebx]
|
||
pcmpeqw mm0, [ecx+colorB0]
|
||
pcmpeqw mm1, [ecx+colorB3]
|
||
pcmpeqw mm2, [ecx+ebx+color4]
|
||
pcmpeqw mm3, [ecx+ebx+colorS2]
|
||
pcmpeqw mm4, [ecx+ebx*2+color1]
|
||
pcmpeqw mm5, [ecx+ebx*2+colorS1]
|
||
pcmpeqw mm6, [esi+ebx*2+colorA0]
|
||
pcmpeqw mm7, [esi+ebx*2+colorA3]
|
||
|
||
// compose results
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
pand mm4, mm5
|
||
pand mm6, mm7
|
||
pand mm0, mm2
|
||
pand mm4, mm6
|
||
pxor mm7, mm7
|
||
pand mm0, mm4
|
||
movq mm6, [eax+colorB0]
|
||
pcmpeqw mm7, mm0
|
||
|
||
movq [ecx+colorB0], mm6
|
||
|
||
packsswb mm7, mm7
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_SuperEagleLine_16mmx_skipprocess
|
||
// End Delta
|
||
nx_SuperEagleLine_16mmx_forcewrite:
|
||
//------------------------------
|
||
// Interpolate pixels
|
||
// (c0&c1)+(((c0^c1)&colorMask)>>1)
|
||
//------------------------------
|
||
movq mm6, cMask
|
||
|
||
movq mm0, [eax+ebx+color5]
|
||
movq mm1, [eax+ebx+color6]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
movq mm4, mm0
|
||
movq mm5, mm1
|
||
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3
|
||
movq I56Pixel, mm0
|
||
movq mm7, mm0
|
||
|
||
//------------------------------
|
||
movq mm0, mm7
|
||
movq mm2, mm7
|
||
movq mm1, mm7
|
||
movq mm3, mm7
|
||
pxor mm2, mm4
|
||
pxor mm3, mm5
|
||
pand mm0, mm4
|
||
pand mm2, mm6
|
||
pand mm1, mm5
|
||
pand mm3, mm6
|
||
psrlw mm2, 1
|
||
psrlw mm3, 1
|
||
paddw mm0, mm2
|
||
paddw mm1, mm3
|
||
movq product1a, mm0
|
||
movq product1b, mm1
|
||
|
||
//------------------------------
|
||
movq mm0, [eax+ebx*2+color2]
|
||
movq mm1, [eax+ebx*2+color3]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
movq mm4, mm0
|
||
movq mm5, mm1
|
||
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3
|
||
movq I23Pixel, mm0
|
||
movq mm7, mm0
|
||
|
||
//------------------------------
|
||
movq mm0, mm7
|
||
movq mm2, mm7
|
||
movq mm1, mm7
|
||
movq mm3, mm7
|
||
pxor mm2, mm4
|
||
pxor mm3, mm5
|
||
pand mm0, mm4
|
||
pand mm2, mm6
|
||
pand mm1, mm5
|
||
pand mm3, mm6
|
||
psrlw mm2, 1
|
||
psrlw mm3, 1
|
||
paddw mm0, mm2
|
||
paddw mm1, mm3
|
||
movq product2a, mm0
|
||
movq product2b, mm1
|
||
|
||
//------------------------------
|
||
// Decide which "branch" to take
|
||
//------------------------------
|
||
movq mm4, [eax+ebx+color5]
|
||
movq mm5, [eax+ebx+color6]
|
||
movq mm6, [eax+ebx*2+color3]
|
||
movq mm7, [eax+ebx*2+color2]
|
||
|
||
pxor mm3, mm3
|
||
movq mm0, mm4
|
||
movq mm1, mm5
|
||
|
||
pcmpeqw mm0, mm6
|
||
pcmpeqw mm1, mm7
|
||
pcmpeqw mm1, mm3
|
||
pand mm0, mm1
|
||
movq Mask35, mm0
|
||
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+ebx*2+colorS1]
|
||
movq mm1, [eax+ebx+color4]
|
||
movq mm2, [esi+ebx*2+colorA2]
|
||
movq mm3, [eax+colorB1]
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm4
|
||
pcmpeqw mm3, mm4
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
por mm0, mm2
|
||
pand mm0, Mask35
|
||
movq Mask35b, mm0
|
||
|
||
//------------------------------
|
||
pxor mm3, mm3
|
||
movq mm0, mm4
|
||
movq mm1, mm5
|
||
|
||
pcmpeqw mm0, mm6
|
||
pcmpeqw mm1, mm7
|
||
pcmpeqw mm0, mm3
|
||
pand mm0, mm1
|
||
movq Mask26, mm0
|
||
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+ebx*2+color1]
|
||
movq mm1, [eax+ebx+colorS2]
|
||
movq mm2, [esi+ebx*2+colorA1]
|
||
movq mm3, [eax+colorB2]
|
||
pcmpeqw mm0, mm5
|
||
pcmpeqw mm1, mm5
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
por mm0, mm2
|
||
pand mm0, Mask26
|
||
movq Mask26b, mm0
|
||
|
||
//------------------------------
|
||
movq mm0, mm4
|
||
movq mm1, mm5
|
||
movq mm2, mm0
|
||
|
||
pcmpeqw mm2, mm1
|
||
pcmpeqw mm0, mm6
|
||
pcmpeqw mm1, mm7
|
||
pand mm0, mm1
|
||
pand mm2, mm0
|
||
pxor mm0, mm2
|
||
movq mm7, mm0
|
||
|
||
//------------------------------
|
||
packsswb mm7, mm7
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_SuperEagleLine_16mmx_skipguess
|
||
|
||
//------------------------------
|
||
// Map of the pixels: I|E F|J
|
||
// G|A B|K
|
||
// H|C D|L
|
||
// M|N O|P
|
||
//------------------------------
|
||
movq mm6, mm0
|
||
movq mm4, [eax+ebx+color5]
|
||
movq mm5, [eax+ebx+color6]
|
||
pxor mm7, mm7
|
||
pand mm6, ONE
|
||
|
||
movq mm0, [eax+colorB1]
|
||
movq mm1, [eax+ebx+color4]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [eax+colorB2]
|
||
movq mm1, [eax+ebx+colorS2]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
lea edi, [eax+ebx]
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [edi+ebx+color1]
|
||
movq mm1, [edi+ebx*2+colorA1]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [edi+ebx+colorS1]
|
||
movq mm1, [edi+ebx*2+colorA2]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm1, mm7
|
||
pxor mm0, mm0
|
||
pcmpgtw mm7, mm0
|
||
pcmpgtw mm0, mm1
|
||
|
||
por mm7, Mask35
|
||
por mm0, Mask26
|
||
movq Mask35, mm7
|
||
movq Mask26, mm0
|
||
|
||
nx_SuperEagleLine_16mmx_skipguess:
|
||
// Start the ASSEMBLY !!!
|
||
|
||
movq mm4, Mask35
|
||
movq mm5, Mask26
|
||
movq mm6, Mask35b
|
||
movq mm7, Mask26b
|
||
|
||
movq mm0, [eax+ebx+color5]
|
||
movq mm1, [eax+ebx+color6]
|
||
movq mm2, [eax+ebx*2+color2]
|
||
movq mm3, [eax+ebx*2+color3]
|
||
pcmpeqw mm0, mm2
|
||
pcmpeqw mm1, mm3
|
||
movq mm2, mm4
|
||
movq mm3, mm5
|
||
por mm0, mm1
|
||
por mm2, mm3
|
||
pand mm2, mm0
|
||
pxor mm0, mm2
|
||
movq mm3, mm0
|
||
|
||
movq mm2, mm0
|
||
pxor mm0, mm0
|
||
por mm2, mm4
|
||
pxor mm4, mm6
|
||
por mm2, mm5
|
||
pxor mm5, mm7
|
||
pcmpeqw mm2, mm0
|
||
;----------------
|
||
|
||
movq mm0, [eax+ebx+color5]
|
||
movq mm1, mm3
|
||
por mm1, mm4
|
||
por mm1, mm6
|
||
pand mm0, mm1
|
||
movq mm1, mm5
|
||
pand mm1, I56Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm7
|
||
pand mm1, product1b
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product1a
|
||
por mm0, mm1
|
||
movq final1a, mm0
|
||
|
||
movq mm0, [eax+ebx+color6]
|
||
movq mm1, mm3
|
||
por mm1, mm5
|
||
por mm1, mm7
|
||
pand mm0, mm1
|
||
movq mm1, mm4
|
||
pand mm1, I56Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm6
|
||
pand mm1, product1a
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product1b
|
||
por mm0, mm1
|
||
movq final1b, mm0
|
||
|
||
movq mm0, [eax+ebx*2+color2]
|
||
movq mm1, mm3
|
||
por mm1, mm5
|
||
por mm1, mm7
|
||
pand mm0, mm1
|
||
movq mm1, mm4
|
||
pand mm1, I23Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm6
|
||
pand mm1, product2b
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product2a
|
||
por mm0, mm1
|
||
movq final2a, mm0
|
||
|
||
movq mm0, [eax+ebx*2+color3]
|
||
movq mm1, mm3
|
||
por mm1, mm4
|
||
por mm1, mm6
|
||
pand mm0, mm1
|
||
movq mm1, mm5
|
||
pand mm1, I23Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm7
|
||
pand mm1, product2a
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product2b
|
||
por mm0, mm1
|
||
movq final2b, mm0
|
||
|
||
//------------------------------
|
||
// Write final image
|
||
//------------------------------
|
||
movq mm0, final1a
|
||
movq mm2, final1b
|
||
movq mm1, mm0
|
||
movq mm4, final2a
|
||
movq mm6, final2b
|
||
movq mm5, mm4
|
||
|
||
punpcklwd mm0, mm2 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
||
punpckhwd mm1, mm2 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
||
punpcklwd mm4, mm6 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
||
punpckhwd mm5, mm6 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
||
|
||
mov edi, dstPitch
|
||
movq [edx+0], mm0 // 1st line
|
||
movq [edx+8], mm1
|
||
movq [edi+edx+0], mm4 // 2nd line
|
||
movq [edi+edx+8], mm5
|
||
|
||
nx_SuperEagleLine_16mmx_skipprocess:
|
||
add pDlt, 8 // 4 pixels
|
||
lea eax, [eax+ 8] // 4 pixels
|
||
lea edx, [edx+16] // 8 pixels
|
||
sub width, 4 // 4 pixels
|
||
jg nx_SuperEagleLine_16mmx_loop
|
||
|
||
emms
|
||
}
|
||
}
|
||
|
||
static void nx_SuperEagleLine_32bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
||
{
|
||
__asm {
|
||
mov eax, pSrc
|
||
mov ebx, srcPitch
|
||
mov edx, pDst
|
||
sub eax, ebx
|
||
nx_SuperEagleLine_32mmx_loop:
|
||
mov ecx, bForceWrite
|
||
test ecx, ecx
|
||
jz nx_SuperEagleLine_32mmx_normal
|
||
|
||
mov esi, pDlt
|
||
movq mm6, [eax+colorB0]
|
||
movq [esi+colorB0], mm6
|
||
|
||
jmp nx_SuperEagleLine_32mmx_forcewrite
|
||
nx_SuperEagleLine_32mmx_normal:
|
||
// Check delta
|
||
mov ecx, pDlt
|
||
|
||
// load source img
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+colorB0]
|
||
movq mm1, [eax+colorB3]
|
||
movq mm2, [eax+ebx+color4]
|
||
movq mm3, [eax+ebx+colorS2]
|
||
movq mm4, [eax+ebx*2+color1]
|
||
movq mm5, [eax+ebx*2+colorS1]
|
||
movq mm6, [esi+ebx*2+colorA0]
|
||
movq mm7, [esi+ebx*2+colorA3]
|
||
|
||
// compare to delta
|
||
lea esi, [ecx+ebx]
|
||
pcmpeqw mm0, [ecx+colorB0]
|
||
pcmpeqw mm1, [ecx+colorB3]
|
||
pcmpeqw mm2, [ecx+ebx+color4]
|
||
pcmpeqw mm3, [ecx+ebx+colorS2]
|
||
pcmpeqw mm4, [ecx+ebx*2+color1]
|
||
pcmpeqw mm5, [ecx+ebx*2+colorS1]
|
||
pcmpeqw mm6, [esi+ebx*2+colorA0]
|
||
pcmpeqw mm7, [esi+ebx*2+colorA3]
|
||
|
||
// compose results
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
pand mm4, mm5
|
||
pand mm6, mm7
|
||
pand mm0, mm2
|
||
pand mm4, mm6
|
||
pxor mm7, mm7
|
||
pand mm0, mm4
|
||
movq mm6, [eax+colorB0]
|
||
pcmpeqw mm7, mm0
|
||
|
||
movq [ecx+colorB0], mm6
|
||
|
||
packsswb mm7, mm7
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_SuperEagleLine_32mmx_skipprocess
|
||
// End Delta
|
||
nx_SuperEagleLine_32mmx_forcewrite:
|
||
//------------------------------
|
||
// Interpolate pixels
|
||
// (c0&c1)+(((c0^c1)&colorMask)>>1)
|
||
//------------------------------
|
||
movq mm6, cMask
|
||
|
||
movq mm0, [eax+ebx+color5]
|
||
movq mm1, [eax+ebx+color6]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
movq mm4, mm0
|
||
movq mm5, mm1
|
||
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3
|
||
movq I56Pixel, mm0
|
||
movq mm7, mm0
|
||
|
||
//------------------------------
|
||
movq mm0, mm7
|
||
movq mm2, mm7
|
||
movq mm1, mm7
|
||
movq mm3, mm7
|
||
pxor mm2, mm4
|
||
pxor mm3, mm5
|
||
pand mm0, mm4
|
||
pand mm2, mm6
|
||
pand mm1, mm5
|
||
pand mm3, mm6
|
||
psrlw mm2, 1
|
||
psrlw mm3, 1
|
||
paddw mm0, mm2
|
||
paddw mm1, mm3
|
||
movq product1a, mm0
|
||
movq product1b, mm1
|
||
|
||
//------------------------------
|
||
movq mm0, [eax+ebx*2+color2]
|
||
movq mm1, [eax+ebx*2+color3]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
movq mm4, mm0
|
||
movq mm5, mm1
|
||
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3
|
||
movq I23Pixel, mm0
|
||
movq mm7, mm0
|
||
|
||
//------------------------------
|
||
movq mm0, mm7
|
||
movq mm2, mm7
|
||
movq mm1, mm7
|
||
movq mm3, mm7
|
||
pxor mm2, mm4
|
||
pxor mm3, mm5
|
||
pand mm0, mm4
|
||
pand mm2, mm6
|
||
pand mm1, mm5
|
||
pand mm3, mm6
|
||
psrlw mm2, 1
|
||
psrlw mm3, 1
|
||
paddw mm0, mm2
|
||
paddw mm1, mm3
|
||
movq product2a, mm0
|
||
movq product2b, mm1
|
||
|
||
//------------------------------
|
||
// Decide which "branch" to take
|
||
//------------------------------
|
||
movq mm4, [eax+ebx+color5]
|
||
movq mm5, [eax+ebx+color6]
|
||
movq mm6, [eax+ebx*2+color3]
|
||
movq mm7, [eax+ebx*2+color2]
|
||
|
||
pxor mm3, mm3
|
||
movq mm0, mm4
|
||
movq mm1, mm5
|
||
|
||
pcmpeqw mm0, mm6
|
||
pcmpeqw mm1, mm7
|
||
pcmpeqw mm1, mm3
|
||
pand mm0, mm1
|
||
movq Mask35, mm0
|
||
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+ebx*2+colorS1]
|
||
movq mm1, [eax+ebx+color4]
|
||
movq mm2, [esi+ebx*2+colorA2]
|
||
movq mm3, [eax+colorB1]
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm4
|
||
pcmpeqw mm3, mm4
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
por mm0, mm2
|
||
pand mm0, Mask35
|
||
movq Mask35b, mm0
|
||
|
||
//------------------------------
|
||
pxor mm3, mm3
|
||
movq mm0, mm4
|
||
movq mm1, mm5
|
||
|
||
pcmpeqw mm0, mm6
|
||
pcmpeqw mm1, mm7
|
||
pcmpeqw mm0, mm3
|
||
pand mm0, mm1
|
||
movq Mask26, mm0
|
||
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+ebx*2+color1]
|
||
movq mm1, [eax+ebx+colorS2]
|
||
movq mm2, [esi+ebx*2+colorA1]
|
||
movq mm3, [eax+colorB2]
|
||
pcmpeqw mm0, mm5
|
||
pcmpeqw mm1, mm5
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
por mm0, mm2
|
||
pand mm0, Mask26
|
||
movq Mask26b, mm0
|
||
|
||
//------------------------------
|
||
movq mm0, mm4
|
||
movq mm1, mm5
|
||
movq mm2, mm0
|
||
|
||
pcmpeqw mm2, mm1
|
||
pcmpeqw mm0, mm6
|
||
pcmpeqw mm1, mm7
|
||
pand mm0, mm1
|
||
pand mm2, mm0
|
||
pxor mm0, mm2
|
||
movq mm7, mm0
|
||
|
||
//------------------------------
|
||
packsswb mm7, mm7
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_SuperEagleLine_32mmx_skipguess
|
||
|
||
//------------------------------
|
||
// Map of the pixels: I|E F|J
|
||
// G|A B|K
|
||
// H|C D|L
|
||
// M|N O|P
|
||
//------------------------------
|
||
movq mm6, mm0
|
||
movq mm4, [eax+ebx+color5]
|
||
movq mm5, [eax+ebx+color6]
|
||
pxor mm7, mm7
|
||
pand mm6, ONE
|
||
|
||
movq mm0, [eax+colorB1]
|
||
movq mm1, [eax+ebx+color4]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [eax+colorB2]
|
||
movq mm1, [eax+ebx+colorS2]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
lea edi, [eax+ebx]
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [edi+ebx+color1]
|
||
movq mm1, [edi+ebx*2+colorA1]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [edi+ebx+colorS1]
|
||
movq mm1, [edi+ebx*2+colorA2]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm1, mm7
|
||
pxor mm0, mm0
|
||
pcmpgtw mm7, mm0
|
||
pcmpgtw mm0, mm1
|
||
|
||
por mm7, Mask35
|
||
por mm0, Mask26
|
||
movq Mask35, mm7
|
||
movq Mask26, mm0
|
||
|
||
nx_SuperEagleLine_32mmx_skipguess:
|
||
// Start the ASSEMBLY !!!
|
||
|
||
movq mm4, Mask35
|
||
movq mm5, Mask26
|
||
movq mm6, Mask35b
|
||
movq mm7, Mask26b
|
||
|
||
movq mm0, [eax+ebx+color5]
|
||
movq mm1, [eax+ebx+color6]
|
||
movq mm2, [eax+ebx*2+color2]
|
||
movq mm3, [eax+ebx*2+color3]
|
||
pcmpeqw mm0, mm2
|
||
pcmpeqw mm1, mm3
|
||
movq mm2, mm4
|
||
movq mm3, mm5
|
||
por mm0, mm1
|
||
por mm2, mm3
|
||
pand mm2, mm0
|
||
pxor mm0, mm2
|
||
movq mm3, mm0
|
||
|
||
movq mm2, mm0
|
||
pxor mm0, mm0
|
||
por mm2, mm4
|
||
pxor mm4, mm6
|
||
por mm2, mm5
|
||
pxor mm5, mm7
|
||
pcmpeqw mm2, mm0
|
||
;----------------
|
||
|
||
movq mm0, [eax+ebx+color5]
|
||
movq mm1, mm3
|
||
por mm1, mm4
|
||
por mm1, mm6
|
||
pand mm0, mm1
|
||
movq mm1, mm5
|
||
pand mm1, I56Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm7
|
||
pand mm1, product1b
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product1a
|
||
por mm0, mm1
|
||
movq final1a, mm0
|
||
|
||
movq mm0, [eax+ebx+color6]
|
||
movq mm1, mm3
|
||
por mm1, mm5
|
||
por mm1, mm7
|
||
pand mm0, mm1
|
||
movq mm1, mm4
|
||
pand mm1, I56Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm6
|
||
pand mm1, product1a
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product1b
|
||
por mm0, mm1
|
||
movq final1b, mm0
|
||
|
||
movq mm0, [eax+ebx*2+color2]
|
||
movq mm1, mm3
|
||
por mm1, mm5
|
||
por mm1, mm7
|
||
pand mm0, mm1
|
||
movq mm1, mm4
|
||
pand mm1, I23Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm6
|
||
pand mm1, product2b
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product2a
|
||
por mm0, mm1
|
||
movq final2a, mm0
|
||
|
||
movq mm0, [eax+ebx*2+color3]
|
||
movq mm1, mm3
|
||
por mm1, mm4
|
||
por mm1, mm6
|
||
pand mm0, mm1
|
||
movq mm1, mm5
|
||
pand mm1, I23Pixel
|
||
por mm0, mm1
|
||
movq mm1, mm7
|
||
pand mm1, product2a
|
||
por mm0, mm1
|
||
movq mm1, mm2
|
||
pand mm1, product2b
|
||
por mm0, mm1
|
||
movq final2b, mm0
|
||
|
||
//------------------------------
|
||
// Write final image
|
||
//------------------------------
|
||
movq mm0, final1a
|
||
movq mm2, final1b
|
||
movq mm1, mm0
|
||
movq mm4, final2a
|
||
movq mm6, final2b
|
||
movq mm5, mm4
|
||
|
||
punpcklwd mm0, mm2 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
||
punpckhwd mm1, mm2 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
||
punpcklwd mm4, mm6 // B1A1B0A0=B3B2B1B0(dst):A3A2A1A0(src)
|
||
punpckhwd mm5, mm6 // B3A3B2A2=B3B2B1B0(dst):A3A2A1A0(src)
|
||
|
||
// Write image RGB1555->RGBx888
|
||
// save
|
||
mov esi, eax
|
||
movd mm7, ebx
|
||
mov edi, dstPitch
|
||
mov ecx, 0x00F8F8F8 // mask
|
||
|
||
// 1st line
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm0, 16
|
||
mov [edx+0], ebx
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm0, 16
|
||
mov [edx+4], ebx
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm0, 16
|
||
mov [edx+8], ebx
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+12], ebx
|
||
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm1, 16
|
||
mov [edx+16], ebx
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm1, 16
|
||
mov [edx+20], ebx
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm1, 16
|
||
mov [edx+24], ebx
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+28], ebx
|
||
|
||
// 2nd line
|
||
movd eax, mm4
|
||
movd ebx, mm4
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm4, 16
|
||
mov [edx+edi+ 0], ebx
|
||
movd eax, mm4
|
||
movd ebx, mm4
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm4, 16
|
||
mov [edx+edi+ 4], ebx
|
||
movd eax, mm4
|
||
movd ebx, mm4
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm4, 16
|
||
mov [edx+edi+ 8], ebx
|
||
movd eax, mm4
|
||
movd ebx, mm4
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+edi+12], ebx
|
||
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm5, 16
|
||
mov [edx+edi+16], ebx
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm5, 16
|
||
mov [edx+edi+20], ebx
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm5, 16
|
||
mov [edx+edi+24], ebx
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+edi+28], ebx
|
||
|
||
// restore
|
||
mov eax, esi
|
||
movd ebx, mm7
|
||
|
||
nx_SuperEagleLine_32mmx_skipprocess:
|
||
add pDlt, 8 // 4 pixels
|
||
lea eax, [eax+ 8] // 4 pixels
|
||
lea edx, [edx+32] // 8 pixels
|
||
sub width, 4 // 4 pixels
|
||
jg nx_SuperEagleLine_32mmx_loop
|
||
|
||
emms
|
||
}
|
||
}
|