1256 lines
27 KiB
C
1256 lines
27 KiB
C
/*---------------------------------------------------------------------*
|
|
* The following (piece of) code, (part of) the 2xSaI engine, *
|
|
* copyright (c) 2001 by Derek Liauw Kie Fa. *
|
|
* Non-Commercial use of the engine is allowed and is encouraged, *
|
|
* provided that appropriate credit be given and that this copyright *
|
|
* notice will not be removed under any circumstance. *
|
|
* You may freely modify this code, but I request *
|
|
* that any improvements to the engine be submitted to me, so *
|
|
* that I can implement these improvements in newer versions of *
|
|
* the engine. *
|
|
* If you need more information, have any comments or suggestions, *
|
|
* you can e-mail me. My e-mail: DerekL666@yahoo.com *
|
|
*---------------------------------------------------------------------*/
|
|
//
|
|
// This code was converted into VirtuaNES by Norix.
|
|
//
|
|
static void nx_2xSaILine_16bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
|
{
|
|
__asm {
|
|
mov eax, pSrc
|
|
mov ebx, srcPitch
|
|
mov edx, pDst
|
|
sub eax, ebx
|
|
nx_2xSaILine_16mmx_loop:
|
|
mov ecx, bForceWrite
|
|
test ecx, ecx
|
|
jz nx_2xSaILine_16mmx_normal
|
|
|
|
mov esi, pDlt
|
|
movq mm6, [eax+colorI]
|
|
movq [esi+colorI], mm6
|
|
|
|
jmp nx_2xSaILine_16mmx_forcewrite
|
|
nx_2xSaILine_16mmx_normal:
|
|
// Check delta
|
|
mov ecx, pDlt
|
|
|
|
// load source img
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+colorI]
|
|
movq mm1, [eax+colorJ]
|
|
movq mm2, [eax+ebx+colorG]
|
|
movq mm3, [eax+ebx+colorK]
|
|
movq mm4, [eax+ebx*2+colorH]
|
|
movq mm5, [eax+ebx*2+colorL]
|
|
movq mm6, [esi+ebx*2+colorM]
|
|
movq mm7, [esi+ebx*2+colorP]
|
|
|
|
// compare to delta
|
|
lea esi, [ecx+ebx]
|
|
pcmpeqw mm0, [ecx+colorI]
|
|
pcmpeqw mm1, [ecx+colorJ]
|
|
pcmpeqw mm2, [ecx+ebx+colorG]
|
|
pcmpeqw mm3, [ecx+ebx+colorK]
|
|
pcmpeqw mm4, [ecx+ebx*2+colorH]
|
|
pcmpeqw mm5, [ecx+ebx*2+colorL]
|
|
pcmpeqw mm6, [esi+ebx*2+colorM]
|
|
pcmpeqw mm7, [esi+ebx*2+colorP]
|
|
|
|
// compose results
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
pand mm4, mm5
|
|
pand mm6, mm7
|
|
pand mm0, mm2
|
|
pand mm4, mm6
|
|
pxor mm7, mm7
|
|
pand mm0, mm4
|
|
movq mm6, [eax+colorI]
|
|
pcmpeqw mm7, mm0
|
|
|
|
movq [ecx+colorI], mm6
|
|
|
|
packsswb mm7, mm7
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_2xSaILine_16mmx_skipprocess
|
|
// End Delta
|
|
nx_2xSaILine_16mmx_forcewrite:
|
|
//------------------------------
|
|
// 1
|
|
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
|
|
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
|
movq mm2, [eax+ebx+colorB] // mm2 and mm3 contain colorB
|
|
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [eax+ebx*2+colorD]
|
|
pcmpeqw mm1, [eax+colorE]
|
|
pcmpeqw mm2, [eax+ebx*2+colorL]
|
|
pcmpeqw mm3, [eax+ebx*2+colorC]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
|
|
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
|
movq mm6, [eax+ebx+colorB] // mm6 and mm7 contain colorB
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [eax+ebx*2+colorC]
|
|
pcmpeqw mm5, [eax+colorF]
|
|
pcmpeqw mm6, [eax+colorJ]
|
|
pcmpeqw mm7, [eax+colorE]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 // combine the masks
|
|
movq Mask1, mm0
|
|
|
|
//------------------------------
|
|
// 2
|
|
// if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
|
|
movq mm0, [eax+ebx+colorB] // mm0 and mm1 contain colorB
|
|
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [eax+ebx*2+colorC]
|
|
pcmpeqw mm1, [eax+colorF]
|
|
pcmpeqw mm2, [eax+ebx*2+colorH]
|
|
pcmpeqw mm3, [eax+ebx*2+colorD]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
|
|
movq mm4, [eax+ebx+colorB] // mm4 and mm5 contain colorB
|
|
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [eax+ebx*2+colorD]
|
|
pcmpeqw mm5, [eax+colorE]
|
|
pcmpeqw mm6, [eax+colorI]
|
|
pcmpeqw mm7, [eax+colorF]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 // combine the masks
|
|
movq Mask2, mm0
|
|
|
|
//------------------------------
|
|
// interpolate colorA and colorB
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx+colorB]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
|
|
movq mm6, cMask
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3 // mm0 contains the interpolated values
|
|
|
|
// assemble the pixels
|
|
movq mm1, [eax+ebx+colorA]
|
|
movq mm2, [eax+ebx+colorB]
|
|
|
|
movq mm3, Mask1
|
|
movq mm5, mm1
|
|
movq mm4, Mask2
|
|
movq mm6, mm1
|
|
|
|
pand mm1, mm3
|
|
por mm3, mm4
|
|
pxor mm7, mm7
|
|
pand mm2, mm4
|
|
|
|
pcmpeqw mm3, mm7
|
|
por mm1, mm2
|
|
pand mm0, mm3
|
|
|
|
por mm0, mm1
|
|
|
|
punpcklwd mm5, mm0
|
|
punpckhwd mm6, mm0
|
|
|
|
//------------------------------
|
|
// Write image
|
|
//------------------------------
|
|
movq [edx+0], mm5
|
|
movq [edx+8], mm6
|
|
|
|
//------------------------------
|
|
// Create the Nextline
|
|
//------------------------------
|
|
// 3
|
|
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
|
|
lea esi, [eax+ebx]
|
|
|
|
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
|
movq mm2, [eax+ebx*2+colorC] // mm2 and mm3 contain colorC
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [esi+ebx+colorD]
|
|
pcmpeqw mm1, [esi+colorG]
|
|
pcmpeqw mm2, [esi+ebx*2+colorO]
|
|
pcmpeqw mm3, [esi+colorB]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
|
|
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
|
movq mm6, [eax+ebx*2+colorC] // mm6 and mm7 contain colorC
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [esi+ebx+colorH]
|
|
pcmpeqw mm5, [esi+colorB]
|
|
pcmpeqw mm6, [esi+ebx*2+colorM]
|
|
pcmpeqw mm7, [esi+colorG]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 // combine the masks
|
|
movq Mask1, mm0
|
|
|
|
//------------------------------
|
|
// 4
|
|
// if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
|
|
movq mm0, [eax+ebx*2+colorC] // mm0 and mm1 contain colorC
|
|
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [eax+ebx+colorB]
|
|
pcmpeqw mm1, [eax+ebx*2+colorH]
|
|
pcmpeqw mm2, [eax+colorF]
|
|
pcmpeqw mm3, [eax+ebx*2+colorD]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
|
|
movq mm4, [eax+ebx*2+colorC] // mm4 and mm5 contain colorC
|
|
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [eax+ebx*2+colorD]
|
|
pcmpeqw mm5, [eax+ebx+colorG]
|
|
pcmpeqw mm6, [eax+colorI]
|
|
pcmpeqw mm7, [eax+ebx*2+colorH]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 ;combine the masks
|
|
movq Mask2, mm0
|
|
|
|
//------------------------------
|
|
// interpolate colorA and colorC
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx*2+colorC]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
|
|
movq mm6, cMask
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3 // mm0 contains the interpolated values
|
|
|
|
// assemble the pixels
|
|
movq mm1, [eax+ebx+colorA]
|
|
movq mm2, [eax+ebx*2+colorC]
|
|
|
|
movq mm3, Mask1
|
|
movq mm4, Mask2
|
|
|
|
pand mm1, mm3
|
|
pand mm2, mm4
|
|
|
|
por mm3, mm4
|
|
pxor mm7, mm7
|
|
por mm1, mm2
|
|
|
|
pcmpeqw mm3, mm7
|
|
pand mm0, mm3
|
|
por mm0, mm1
|
|
movq ACPixel, mm0
|
|
|
|
//------------------------------
|
|
// Decide which "branch" to take
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx+colorB]
|
|
movq mm6, mm0
|
|
movq mm7, mm1
|
|
pcmpeqw mm0, [eax+ebx*2+colorD]
|
|
pcmpeqw mm1, [eax+ebx*2+colorC]
|
|
pcmpeqw mm6, mm7
|
|
|
|
movq mm2, mm0
|
|
movq mm3, mm0
|
|
|
|
pand mm0, mm1 // colorA == colorD && colorB == colorC
|
|
pxor mm7, mm7
|
|
|
|
pcmpeqw mm2, mm7
|
|
pand mm6, mm0
|
|
pand mm2, mm1 // colorA != colorD && colorB == colorC
|
|
|
|
pcmpeqw mm1, mm7
|
|
|
|
pand mm1, mm3 // colorA == colorD && colorB != colorC
|
|
pxor mm0, mm6
|
|
por mm1, mm6
|
|
movq mm7, mm0
|
|
movq Mask2, mm2
|
|
packsswb mm7, mm7
|
|
movq Mask1, mm1
|
|
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_2xSaILine_16mmx_skipguess
|
|
|
|
//------------------------------
|
|
// Map of the pixels: I|E F|J
|
|
// G|A B|K
|
|
// H|C D|L
|
|
// M|N O|P
|
|
//------------------------------
|
|
movq mm6, mm0
|
|
movq mm4, [eax+ebx+colorA]
|
|
movq mm5, [eax+ebx+colorB]
|
|
pxor mm7, mm7
|
|
pand mm6, ONE
|
|
|
|
movq mm0, [eax+colorE]
|
|
movq mm1, [eax+ebx+colorG]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [eax+colorF]
|
|
movq mm1, [eax+ebx+colorK]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
lea esi, [eax+ebx]
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [esi+ebx+colorH]
|
|
movq mm1, [esi+ebx*2+colorN]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [esi+ebx+colorL]
|
|
movq mm1, [esi+ebx*2+colorO]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm1, mm7
|
|
pxor mm0, mm0
|
|
pcmpgtw mm7, mm0
|
|
pcmpgtw mm0, mm1
|
|
|
|
por mm7, Mask1
|
|
por mm0, Mask2
|
|
movq Mask1, mm7
|
|
movq Mask2, mm0
|
|
|
|
nx_2xSaILine_16mmx_skipguess:
|
|
//------------------------------
|
|
// interpolate A, B, C and D
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx+colorB]
|
|
movq mm4, mm0
|
|
movq mm2, [eax+ebx*2+colorC]
|
|
movq mm5, mm1
|
|
movq mm3, qMask // qcolorMask
|
|
movq mm6, mm2
|
|
movq mm7, lMask // qlowcolorMask
|
|
|
|
pand mm0, mm3
|
|
pand mm1, mm3
|
|
pand mm2, mm3
|
|
pand mm3, [eax+ebx*2+colorD]
|
|
|
|
psrlw mm0, 2
|
|
pand mm4, mm7
|
|
psrlw mm1, 2
|
|
pand mm5, mm7
|
|
psrlw mm2, 2
|
|
pand mm6, mm7
|
|
psrlw mm3, 2
|
|
pand mm7, [eax+ebx*2+colorD]
|
|
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
paddw mm4, mm5
|
|
paddw mm6, mm7
|
|
|
|
paddw mm4, mm6
|
|
movq mm7, lMask // qlowcolorMask
|
|
paddw mm0, mm2
|
|
psrlw mm4, 2
|
|
pand mm4, mm7
|
|
paddw mm0, mm4 // mm0 contains the interpolated value of A, B, C and D
|
|
|
|
// assemble the pixels
|
|
movq mm1, Mask1
|
|
movq mm2, Mask2
|
|
movq mm4, [eax+ebx+colorA]
|
|
movq mm5, [eax+ebx+colorB]
|
|
pand mm4, mm1
|
|
pand mm5, mm2
|
|
|
|
pxor mm7, mm7
|
|
por mm1, mm2
|
|
por mm4, mm5
|
|
pcmpeqw mm1, mm7
|
|
pand mm0, mm1
|
|
por mm4, mm0 // mm4 contains the diagonal pixels
|
|
|
|
movq mm0, ACPixel
|
|
mov edi, dstPitch
|
|
movq mm1, mm0
|
|
punpcklwd mm0, mm4
|
|
punpckhwd mm1, mm4
|
|
|
|
//------------------------------
|
|
// Write image
|
|
//------------------------------
|
|
movq [edx+edi+0], mm0
|
|
movq [edx+edi+8], mm1
|
|
|
|
nx_2xSaILine_16mmx_skipprocess:
|
|
add pDlt, 8 // 4 pixels
|
|
lea eax, [eax+ 8] // 4 pixels
|
|
lea edx, [edx+16] // 8 pixels
|
|
sub width, 4 // 4 pixels
|
|
jg nx_2xSaILine_16mmx_loop
|
|
|
|
emms
|
|
}
|
|
}
|
|
|
|
static void nx_2xSaILine_32bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
|
{
|
|
__asm {
|
|
mov eax, pSrc
|
|
mov ebx, srcPitch
|
|
mov edx, pDst
|
|
sub eax, ebx
|
|
nx_2xSaILine_32mmx_loop:
|
|
mov ecx, bForceWrite
|
|
test ecx, ecx
|
|
jz nx_2xSaILine_32mmx_normal
|
|
|
|
mov esi, pDlt
|
|
movq mm6, [eax+colorI]
|
|
movq [esi+colorI], mm6
|
|
|
|
jmp nx_2xSaILine_32mmx_forcewrite
|
|
nx_2xSaILine_32mmx_normal:
|
|
// Check delta
|
|
mov ecx, pDlt
|
|
|
|
// load source img
|
|
lea esi, [eax+ebx]
|
|
movq mm0, [eax+colorI]
|
|
movq mm1, [eax+colorJ]
|
|
movq mm2, [eax+ebx+colorG]
|
|
movq mm3, [eax+ebx+colorK]
|
|
movq mm4, [eax+ebx*2+colorH]
|
|
movq mm5, [eax+ebx*2+colorL]
|
|
movq mm6, [esi+ebx*2+colorM]
|
|
movq mm7, [esi+ebx*2+colorP]
|
|
|
|
// compare to delta
|
|
lea esi, [ecx+ebx]
|
|
pcmpeqw mm0, [ecx+colorI]
|
|
pcmpeqw mm1, [ecx+colorJ]
|
|
pcmpeqw mm2, [ecx+ebx+colorG]
|
|
pcmpeqw mm3, [ecx+ebx+colorK]
|
|
pcmpeqw mm4, [ecx+ebx*2+colorH]
|
|
pcmpeqw mm5, [ecx+ebx*2+colorL]
|
|
pcmpeqw mm6, [esi+ebx*2+colorM]
|
|
pcmpeqw mm7, [esi+ebx*2+colorP]
|
|
|
|
// compose results
|
|
pand mm0, mm1
|
|
pand mm2, mm3
|
|
pand mm4, mm5
|
|
pand mm6, mm7
|
|
pand mm0, mm2
|
|
pand mm4, mm6
|
|
pxor mm7, mm7
|
|
pand mm0, mm4
|
|
movq mm6, [eax+colorI]
|
|
pcmpeqw mm7, mm0
|
|
|
|
movq [ecx+colorI], mm6
|
|
|
|
packsswb mm7, mm7
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_2xSaILine_32mmx_skipprocess
|
|
// End Delta
|
|
nx_2xSaILine_32mmx_forcewrite:
|
|
//------------------------------
|
|
// 1
|
|
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
|
|
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
|
movq mm2, [eax+ebx+colorB] // mm2 and mm3 contain colorB
|
|
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [eax+ebx*2+colorD]
|
|
pcmpeqw mm1, [eax+colorE]
|
|
pcmpeqw mm2, [eax+ebx*2+colorL]
|
|
pcmpeqw mm3, [eax+ebx*2+colorC]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
|
|
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
|
movq mm6, [eax+ebx+colorB] // mm6 and mm7 contain colorB
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [eax+ebx*2+colorC]
|
|
pcmpeqw mm5, [eax+colorF]
|
|
pcmpeqw mm6, [eax+colorJ]
|
|
pcmpeqw mm7, [eax+colorE]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 // combine the masks
|
|
movq Mask1, mm0
|
|
|
|
//------------------------------
|
|
// 2
|
|
// if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
|
|
movq mm0, [eax+ebx+colorB] // mm0 and mm1 contain colorB
|
|
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [eax+ebx*2+colorC]
|
|
pcmpeqw mm1, [eax+colorF]
|
|
pcmpeqw mm2, [eax+ebx*2+colorH]
|
|
pcmpeqw mm3, [eax+ebx*2+colorD]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
|
|
movq mm4, [eax+ebx+colorB] // mm4 and mm5 contain colorB
|
|
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [eax+ebx*2+colorD]
|
|
pcmpeqw mm5, [eax+colorE]
|
|
pcmpeqw mm6, [eax+colorI]
|
|
pcmpeqw mm7, [eax+colorF]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 // combine the masks
|
|
movq Mask2, mm0
|
|
|
|
//------------------------------
|
|
// interpolate colorA and colorB
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx+colorB]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
|
|
movq mm6, cMask
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3 // mm0 contains the interpolated values
|
|
|
|
// assemble the pixels
|
|
movq mm1, [eax+ebx+colorA]
|
|
movq mm2, [eax+ebx+colorB]
|
|
|
|
movq mm3, Mask1
|
|
movq mm5, mm1
|
|
movq mm4, Mask2
|
|
movq mm6, mm1
|
|
|
|
pand mm1, mm3
|
|
por mm3, mm4
|
|
pxor mm7, mm7
|
|
pand mm2, mm4
|
|
|
|
pcmpeqw mm3, mm7
|
|
por mm1, mm2
|
|
pand mm0, mm3
|
|
|
|
por mm0, mm1
|
|
|
|
punpcklwd mm5, mm0
|
|
punpckhwd mm6, mm0
|
|
|
|
//------------------------------
|
|
// Write image
|
|
//------------------------------
|
|
// save
|
|
mov esi, eax
|
|
mov edi, ebx
|
|
mov ecx, 0x00F8F8F8 // mask
|
|
|
|
// movq [edx+0], mm5
|
|
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm5, 16
|
|
mov [edx+0], ebx
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm5, 16
|
|
mov [edx+4], ebx
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm5, 16
|
|
mov [edx+8], ebx
|
|
movd eax, mm5
|
|
movd ebx, mm5
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+12], ebx
|
|
|
|
// movq [edx+8], mm6
|
|
|
|
movd eax, mm6
|
|
movd ebx, mm6
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm6, 16
|
|
mov [edx+16], ebx
|
|
movd eax, mm6
|
|
movd ebx, mm6
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm6, 16
|
|
mov [edx+20], ebx
|
|
movd eax, mm6
|
|
movd ebx, mm6
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm6, 16
|
|
mov [edx+24], ebx
|
|
movd eax, mm6
|
|
movd ebx, mm6
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+28], ebx
|
|
|
|
// restore
|
|
mov eax, esi
|
|
mov ebx, edi
|
|
|
|
//------------------------------
|
|
// Create the Nextline
|
|
//------------------------------
|
|
// 3
|
|
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
|
|
lea esi, [eax+ebx]
|
|
|
|
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
|
movq mm2, [eax+ebx*2+colorC] // mm2 and mm3 contain colorC
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [esi+ebx+colorD]
|
|
pcmpeqw mm1, [esi+colorG]
|
|
pcmpeqw mm2, [esi+ebx*2+colorO]
|
|
pcmpeqw mm3, [esi+colorB]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
|
|
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
|
movq mm6, [eax+ebx*2+colorC] // mm6 and mm7 contain colorC
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [esi+ebx+colorH]
|
|
pcmpeqw mm5, [esi+colorB]
|
|
pcmpeqw mm6, [esi+ebx*2+colorM]
|
|
pcmpeqw mm7, [esi+colorG]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 // combine the masks
|
|
movq Mask1, mm0
|
|
|
|
//------------------------------
|
|
// 4
|
|
// if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
|
|
movq mm0, [eax+ebx*2+colorC] // mm0 and mm1 contain colorC
|
|
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
|
movq mm1, mm0
|
|
movq mm3, mm2
|
|
|
|
pcmpeqw mm0, [eax+ebx+colorB]
|
|
pcmpeqw mm1, [eax+ebx*2+colorH]
|
|
pcmpeqw mm2, [eax+colorF]
|
|
pcmpeqw mm3, [eax+ebx*2+colorD]
|
|
|
|
pand mm0, mm1
|
|
pxor mm1, mm1
|
|
pand mm0, mm2
|
|
pcmpeqw mm3, mm1
|
|
pand mm0, mm3 // result in mm0
|
|
|
|
// if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
|
|
movq mm4, [eax+ebx*2+colorC] // mm4 and mm5 contain colorC
|
|
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
|
movq mm5, mm4
|
|
movq mm7, mm6
|
|
|
|
pcmpeqw mm4, [eax+ebx*2+colorD]
|
|
pcmpeqw mm5, [eax+ebx+colorG]
|
|
pcmpeqw mm6, [eax+colorI]
|
|
pcmpeqw mm7, [eax+ebx*2+colorH]
|
|
|
|
pand mm4, mm5
|
|
pxor mm5, mm5
|
|
pand mm4, mm6
|
|
pcmpeqw mm7, mm5
|
|
pand mm4, mm7 // result in mm4
|
|
|
|
por mm0, mm4 ;combine the masks
|
|
movq Mask2, mm0
|
|
|
|
//------------------------------
|
|
// interpolate colorA and colorC
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx*2+colorC]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
|
|
movq mm6, cMask
|
|
pxor mm3, mm2
|
|
pand mm0, mm1
|
|
pand mm3, mm6
|
|
psrlw mm3, 1
|
|
paddw mm0, mm3 // mm0 contains the interpolated values
|
|
|
|
// assemble the pixels
|
|
movq mm1, [eax+ebx+colorA]
|
|
movq mm2, [eax+ebx*2+colorC]
|
|
|
|
movq mm3, Mask1
|
|
movq mm4, Mask2
|
|
|
|
pand mm1, mm3
|
|
pand mm2, mm4
|
|
|
|
por mm3, mm4
|
|
pxor mm7, mm7
|
|
por mm1, mm2
|
|
|
|
pcmpeqw mm3, mm7
|
|
pand mm0, mm3
|
|
por mm0, mm1
|
|
movq ACPixel, mm0
|
|
|
|
//------------------------------
|
|
// Decide which "branch" to take
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx+colorB]
|
|
movq mm6, mm0
|
|
movq mm7, mm1
|
|
pcmpeqw mm0, [eax+ebx*2+colorD]
|
|
pcmpeqw mm1, [eax+ebx*2+colorC]
|
|
pcmpeqw mm6, mm7
|
|
|
|
movq mm2, mm0
|
|
movq mm3, mm0
|
|
|
|
pand mm0, mm1 // colorA == colorD && colorB == colorC
|
|
pxor mm7, mm7
|
|
|
|
pcmpeqw mm2, mm7
|
|
pand mm6, mm0
|
|
pand mm2, mm1 // colorA != colorD && colorB == colorC
|
|
|
|
pcmpeqw mm1, mm7
|
|
|
|
pand mm1, mm3 // colorA == colorD && colorB != colorC
|
|
pxor mm0, mm6
|
|
por mm1, mm6
|
|
movq mm7, mm0
|
|
movq Mask2, mm2
|
|
packsswb mm7, mm7
|
|
movq Mask1, mm1
|
|
|
|
movd ecx, mm7
|
|
test ecx, ecx
|
|
jz nx_2xSaILine_32mmx_skipguess
|
|
|
|
//------------------------------
|
|
// Map of the pixels: I|E F|J
|
|
// G|A B|K
|
|
// H|C D|L
|
|
// M|N O|P
|
|
//------------------------------
|
|
movq mm6, mm0
|
|
movq mm4, [eax+ebx+colorA]
|
|
movq mm5, [eax+ebx+colorB]
|
|
pxor mm7, mm7
|
|
pand mm6, ONE
|
|
|
|
movq mm0, [eax+colorE]
|
|
movq mm1, [eax+ebx+colorG]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [eax+colorF]
|
|
movq mm1, [eax+ebx+colorK]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
lea esi, [eax+ebx]
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [esi+ebx+colorH]
|
|
movq mm1, [esi+ebx*2+colorN]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm0, [esi+ebx+colorL]
|
|
movq mm1, [esi+ebx*2+colorO]
|
|
movq mm2, mm0
|
|
movq mm3, mm1
|
|
pcmpeqw mm0, mm4
|
|
pcmpeqw mm1, mm4
|
|
pcmpeqw mm2, mm5
|
|
pcmpeqw mm3, mm5
|
|
pand mm0, mm6
|
|
pand mm1, mm6
|
|
pand mm2, mm6
|
|
pand mm3, mm6
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
pxor mm3, mm3
|
|
pcmpgtw mm0, mm6
|
|
pcmpgtw mm2, mm6
|
|
pcmpeqw mm0, mm3
|
|
pcmpeqw mm2, mm3
|
|
pand mm0, mm6
|
|
pand mm2, mm6
|
|
paddw mm7, mm0
|
|
psubw mm7, mm2
|
|
|
|
movq mm1, mm7
|
|
pxor mm0, mm0
|
|
pcmpgtw mm7, mm0
|
|
pcmpgtw mm0, mm1
|
|
|
|
por mm7, Mask1
|
|
por mm0, Mask2
|
|
movq Mask1, mm7
|
|
movq Mask2, mm0
|
|
|
|
nx_2xSaILine_32mmx_skipguess:
|
|
//------------------------------
|
|
// interpolate A, B, C and D
|
|
//------------------------------
|
|
movq mm0, [eax+ebx+colorA]
|
|
movq mm1, [eax+ebx+colorB]
|
|
movq mm4, mm0
|
|
movq mm2, [eax+ebx*2+colorC]
|
|
movq mm5, mm1
|
|
movq mm3, qMask // qcolorMask
|
|
movq mm6, mm2
|
|
movq mm7, lMask // qlowcolorMask
|
|
|
|
pand mm0, mm3
|
|
pand mm1, mm3
|
|
pand mm2, mm3
|
|
pand mm3, [eax+ebx*2+colorD]
|
|
|
|
psrlw mm0, 2
|
|
pand mm4, mm7
|
|
psrlw mm1, 2
|
|
pand mm5, mm7
|
|
psrlw mm2, 2
|
|
pand mm6, mm7
|
|
psrlw mm3, 2
|
|
pand mm7, [eax+ebx*2+colorD]
|
|
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
paddw mm4, mm5
|
|
paddw mm6, mm7
|
|
|
|
paddw mm4, mm6
|
|
movq mm7, lMask // qlowcolorMask
|
|
paddw mm0, mm2
|
|
psrlw mm4, 2
|
|
pand mm4, mm7
|
|
paddw mm0, mm4 // mm0 contains the interpolated value of A, B, C and D
|
|
|
|
// assemble the pixels
|
|
movq mm1, Mask1
|
|
movq mm2, Mask2
|
|
movq mm4, [eax+ebx+colorA]
|
|
movq mm5, [eax+ebx+colorB]
|
|
pand mm4, mm1
|
|
pand mm5, mm2
|
|
|
|
pxor mm7, mm7
|
|
por mm1, mm2
|
|
por mm4, mm5
|
|
pcmpeqw mm1, mm7
|
|
pand mm0, mm1
|
|
por mm4, mm0 // mm4 contains the diagonal pixels
|
|
|
|
movq mm0, ACPixel
|
|
mov edi, dstPitch
|
|
movq mm1, mm0
|
|
punpcklwd mm0, mm4
|
|
punpckhwd mm1, mm4
|
|
|
|
//------------------------------
|
|
// Write image RGB1555->RGBx888
|
|
//------------------------------
|
|
// save
|
|
movd mm6, eax
|
|
movd mm7, ebx
|
|
mov ecx, 0x00F8F8F8 // mask
|
|
|
|
// movq [edx+edi+0], mm0
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm0, 16
|
|
mov [edx+edi+0], ebx
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm0, 16
|
|
mov [edx+edi+4], ebx
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm0, 16
|
|
mov [edx+edi+8], ebx
|
|
movd eax, mm0
|
|
movd ebx, mm0
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+edi+12], ebx
|
|
|
|
// movq [edx+edi+8], mm1
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm1, 16
|
|
mov [edx+edi+16], ebx
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm1, 16
|
|
mov [edx+edi+20], ebx
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
psrlq mm1, 16
|
|
mov [edx+edi+24], ebx
|
|
movd eax, mm1
|
|
movd ebx, mm1
|
|
shl eax, 3
|
|
shl ebx, 9
|
|
shl ah, 3
|
|
and ebx, ecx
|
|
mov bx, ax
|
|
mov [edx+edi+28], ebx
|
|
|
|
// restore
|
|
movd eax, mm6
|
|
movd ebx, mm7
|
|
|
|
nx_2xSaILine_32mmx_skipprocess:
|
|
add pDlt, 8 // 4 pixels
|
|
lea eax, [eax+ 8] // 4 pixels
|
|
lea edx, [edx+32] // 8 pixels
|
|
sub width, 4 // 4 pixels
|
|
jg nx_2xSaILine_32mmx_loop
|
|
|
|
emms
|
|
}
|
|
}
|