1256 lines
56 KiB
C
1256 lines
56 KiB
C
/*---------------------------------------------------------------------*
|
||
* The following (piece of) code, (part of) the 2xSaI engine, *
|
||
* copyright (c) 2001 by Derek Liauw Kie Fa. *
|
||
* Non-Commercial use of the engine is allowed and is encouraged, *
|
||
* provided that appropriate credit be given and that this copyright *
|
||
* notice will not be removed under any circumstance. *
|
||
* You may freely modify this code, but I request *
|
||
* that any improvements to the engine be submitted to me, so *
|
||
* that I can implement these improvements in newer versions of *
|
||
* the engine. *
|
||
* If you need more information, have any comments or suggestions, *
|
||
* you can e-mail me. My e-mail: DerekL666@yahoo.com *
|
||
*---------------------------------------------------------------------*/
|
||
//
|
||
// This code was converted into VirtuaNES by Norix.
|
||
//
|
||
static void nx_2xSaILine_16bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
||
{
|
||
__asm {
|
||
mov eax, pSrc
|
||
mov ebx, srcPitch
|
||
mov edx, pDst
|
||
sub eax, ebx
|
||
nx_2xSaILine_16mmx_loop:
|
||
mov ecx, bForceWrite
|
||
test ecx, ecx
|
||
jz nx_2xSaILine_16mmx_normal
|
||
|
||
mov esi, pDlt
|
||
movq mm6, [eax+colorI]
|
||
movq [esi+colorI], mm6
|
||
|
||
jmp nx_2xSaILine_16mmx_forcewrite
|
||
nx_2xSaILine_16mmx_normal:
|
||
// Check delta
|
||
mov ecx, pDlt
|
||
|
||
// load source img
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+colorI]
|
||
movq mm1, [eax+colorJ]
|
||
movq mm2, [eax+ebx+colorG]
|
||
movq mm3, [eax+ebx+colorK]
|
||
movq mm4, [eax+ebx*2+colorH]
|
||
movq mm5, [eax+ebx*2+colorL]
|
||
movq mm6, [esi+ebx*2+colorM]
|
||
movq mm7, [esi+ebx*2+colorP]
|
||
|
||
// compare to delta
|
||
lea esi, [ecx+ebx]
|
||
pcmpeqw mm0, [ecx+colorI]
|
||
pcmpeqw mm1, [ecx+colorJ]
|
||
pcmpeqw mm2, [ecx+ebx+colorG]
|
||
pcmpeqw mm3, [ecx+ebx+colorK]
|
||
pcmpeqw mm4, [ecx+ebx*2+colorH]
|
||
pcmpeqw mm5, [ecx+ebx*2+colorL]
|
||
pcmpeqw mm6, [esi+ebx*2+colorM]
|
||
pcmpeqw mm7, [esi+ebx*2+colorP]
|
||
|
||
// compose results
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
pand mm4, mm5
|
||
pand mm6, mm7
|
||
pand mm0, mm2
|
||
pand mm4, mm6
|
||
pxor mm7, mm7
|
||
pand mm0, mm4
|
||
movq mm6, [eax+colorI]
|
||
pcmpeqw mm7, mm0
|
||
|
||
movq [ecx+colorI], mm6
|
||
|
||
packsswb mm7, mm7
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_2xSaILine_16mmx_skipprocess
|
||
// End Delta
|
||
nx_2xSaILine_16mmx_forcewrite:
|
||
//------------------------------
|
||
// 1
|
||
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
|
||
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
||
movq mm2, [eax+ebx+colorB] // mm2 and mm3 contain colorB
|
||
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [eax+ebx*2+colorD]
|
||
pcmpeqw mm1, [eax+colorE]
|
||
pcmpeqw mm2, [eax+ebx*2+colorL]
|
||
pcmpeqw mm3, [eax+ebx*2+colorC]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
|
||
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
||
movq mm6, [eax+ebx+colorB] // mm6 and mm7 contain colorB
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [eax+ebx*2+colorC]
|
||
pcmpeqw mm5, [eax+colorF]
|
||
pcmpeqw mm6, [eax+colorJ]
|
||
pcmpeqw mm7, [eax+colorE]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 // combine the masks
|
||
movq Mask1, mm0
|
||
|
||
//------------------------------
|
||
// 2
|
||
// if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
|
||
movq mm0, [eax+ebx+colorB] // mm0 and mm1 contain colorB
|
||
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [eax+ebx*2+colorC]
|
||
pcmpeqw mm1, [eax+colorF]
|
||
pcmpeqw mm2, [eax+ebx*2+colorH]
|
||
pcmpeqw mm3, [eax+ebx*2+colorD]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
|
||
movq mm4, [eax+ebx+colorB] // mm4 and mm5 contain colorB
|
||
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [eax+ebx*2+colorD]
|
||
pcmpeqw mm5, [eax+colorE]
|
||
pcmpeqw mm6, [eax+colorI]
|
||
pcmpeqw mm7, [eax+colorF]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 // combine the masks
|
||
movq Mask2, mm0
|
||
|
||
//------------------------------
|
||
// interpolate colorA and colorB
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx+colorB]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
|
||
movq mm6, cMask
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3 // mm0 contains the interpolated values
|
||
|
||
// assemble the pixels
|
||
movq mm1, [eax+ebx+colorA]
|
||
movq mm2, [eax+ebx+colorB]
|
||
|
||
movq mm3, Mask1
|
||
movq mm5, mm1
|
||
movq mm4, Mask2
|
||
movq mm6, mm1
|
||
|
||
pand mm1, mm3
|
||
por mm3, mm4
|
||
pxor mm7, mm7
|
||
pand mm2, mm4
|
||
|
||
pcmpeqw mm3, mm7
|
||
por mm1, mm2
|
||
pand mm0, mm3
|
||
|
||
por mm0, mm1
|
||
|
||
punpcklwd mm5, mm0
|
||
punpckhwd mm6, mm0
|
||
|
||
//------------------------------
|
||
// Write image
|
||
//------------------------------
|
||
movq [edx+0], mm5
|
||
movq [edx+8], mm6
|
||
|
||
//------------------------------
|
||
// Create the Nextline
|
||
//------------------------------
|
||
// 3
|
||
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
|
||
lea esi, [eax+ebx]
|
||
|
||
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
||
movq mm2, [eax+ebx*2+colorC] // mm2 and mm3 contain colorC
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [esi+ebx+colorD]
|
||
pcmpeqw mm1, [esi+colorG]
|
||
pcmpeqw mm2, [esi+ebx*2+colorO]
|
||
pcmpeqw mm3, [esi+colorB]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
|
||
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
||
movq mm6, [eax+ebx*2+colorC] // mm6 and mm7 contain colorC
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [esi+ebx+colorH]
|
||
pcmpeqw mm5, [esi+colorB]
|
||
pcmpeqw mm6, [esi+ebx*2+colorM]
|
||
pcmpeqw mm7, [esi+colorG]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 // combine the masks
|
||
movq Mask1, mm0
|
||
|
||
//------------------------------
|
||
// 4
|
||
// if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
|
||
movq mm0, [eax+ebx*2+colorC] // mm0 and mm1 contain colorC
|
||
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [eax+ebx+colorB]
|
||
pcmpeqw mm1, [eax+ebx*2+colorH]
|
||
pcmpeqw mm2, [eax+colorF]
|
||
pcmpeqw mm3, [eax+ebx*2+colorD]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
|
||
movq mm4, [eax+ebx*2+colorC] // mm4 and mm5 contain colorC
|
||
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [eax+ebx*2+colorD]
|
||
pcmpeqw mm5, [eax+ebx+colorG]
|
||
pcmpeqw mm6, [eax+colorI]
|
||
pcmpeqw mm7, [eax+ebx*2+colorH]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 ;combine the masks
|
||
movq Mask2, mm0
|
||
|
||
//------------------------------
|
||
// interpolate colorA and colorC
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx*2+colorC]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
|
||
movq mm6, cMask
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3 // mm0 contains the interpolated values
|
||
|
||
// assemble the pixels
|
||
movq mm1, [eax+ebx+colorA]
|
||
movq mm2, [eax+ebx*2+colorC]
|
||
|
||
movq mm3, Mask1
|
||
movq mm4, Mask2
|
||
|
||
pand mm1, mm3
|
||
pand mm2, mm4
|
||
|
||
por mm3, mm4
|
||
pxor mm7, mm7
|
||
por mm1, mm2
|
||
|
||
pcmpeqw mm3, mm7
|
||
pand mm0, mm3
|
||
por mm0, mm1
|
||
movq ACPixel, mm0
|
||
|
||
//------------------------------
|
||
// Decide which "branch" to take
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx+colorB]
|
||
movq mm6, mm0
|
||
movq mm7, mm1
|
||
pcmpeqw mm0, [eax+ebx*2+colorD]
|
||
pcmpeqw mm1, [eax+ebx*2+colorC]
|
||
pcmpeqw mm6, mm7
|
||
|
||
movq mm2, mm0
|
||
movq mm3, mm0
|
||
|
||
pand mm0, mm1 // colorA == colorD && colorB == colorC
|
||
pxor mm7, mm7
|
||
|
||
pcmpeqw mm2, mm7
|
||
pand mm6, mm0
|
||
pand mm2, mm1 // colorA != colorD && colorB == colorC
|
||
|
||
pcmpeqw mm1, mm7
|
||
|
||
pand mm1, mm3 // colorA == colorD && colorB != colorC
|
||
pxor mm0, mm6
|
||
por mm1, mm6
|
||
movq mm7, mm0
|
||
movq Mask2, mm2
|
||
packsswb mm7, mm7
|
||
movq Mask1, mm1
|
||
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_2xSaILine_16mmx_skipguess
|
||
|
||
//------------------------------
|
||
// Map of the pixels: I|E F|J
|
||
// G|A B|K
|
||
// H|C D|L
|
||
// M|N O|P
|
||
//------------------------------
|
||
movq mm6, mm0
|
||
movq mm4, [eax+ebx+colorA]
|
||
movq mm5, [eax+ebx+colorB]
|
||
pxor mm7, mm7
|
||
pand mm6, ONE
|
||
|
||
movq mm0, [eax+colorE]
|
||
movq mm1, [eax+ebx+colorG]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [eax+colorF]
|
||
movq mm1, [eax+ebx+colorK]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
lea esi, [eax+ebx]
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [esi+ebx+colorH]
|
||
movq mm1, [esi+ebx*2+colorN]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [esi+ebx+colorL]
|
||
movq mm1, [esi+ebx*2+colorO]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm1, mm7
|
||
pxor mm0, mm0
|
||
pcmpgtw mm7, mm0
|
||
pcmpgtw mm0, mm1
|
||
|
||
por mm7, Mask1
|
||
por mm0, Mask2
|
||
movq Mask1, mm7
|
||
movq Mask2, mm0
|
||
|
||
nx_2xSaILine_16mmx_skipguess:
|
||
//------------------------------
|
||
// interpolate A, B, C and D
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx+colorB]
|
||
movq mm4, mm0
|
||
movq mm2, [eax+ebx*2+colorC]
|
||
movq mm5, mm1
|
||
movq mm3, qMask // qcolorMask
|
||
movq mm6, mm2
|
||
movq mm7, lMask // qlowcolorMask
|
||
|
||
pand mm0, mm3
|
||
pand mm1, mm3
|
||
pand mm2, mm3
|
||
pand mm3, [eax+ebx*2+colorD]
|
||
|
||
psrlw mm0, 2
|
||
pand mm4, mm7
|
||
psrlw mm1, 2
|
||
pand mm5, mm7
|
||
psrlw mm2, 2
|
||
pand mm6, mm7
|
||
psrlw mm3, 2
|
||
pand mm7, [eax+ebx*2+colorD]
|
||
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
paddw mm4, mm5
|
||
paddw mm6, mm7
|
||
|
||
paddw mm4, mm6
|
||
movq mm7, lMask // qlowcolorMask
|
||
paddw mm0, mm2
|
||
psrlw mm4, 2
|
||
pand mm4, mm7
|
||
paddw mm0, mm4 // mm0 contains the interpolated value of A, B, C and D
|
||
|
||
// assemble the pixels
|
||
movq mm1, Mask1
|
||
movq mm2, Mask2
|
||
movq mm4, [eax+ebx+colorA]
|
||
movq mm5, [eax+ebx+colorB]
|
||
pand mm4, mm1
|
||
pand mm5, mm2
|
||
|
||
pxor mm7, mm7
|
||
por mm1, mm2
|
||
por mm4, mm5
|
||
pcmpeqw mm1, mm7
|
||
pand mm0, mm1
|
||
por mm4, mm0 // mm4 contains the diagonal pixels
|
||
|
||
movq mm0, ACPixel
|
||
mov edi, dstPitch
|
||
movq mm1, mm0
|
||
punpcklwd mm0, mm4
|
||
punpckhwd mm1, mm4
|
||
|
||
//------------------------------
|
||
// Write image
|
||
//------------------------------
|
||
movq [edx+edi+0], mm0
|
||
movq [edx+edi+8], mm1
|
||
|
||
nx_2xSaILine_16mmx_skipprocess:
|
||
add pDlt, 8 // 4 pixels
|
||
lea eax, [eax+ 8] // 4 pixels
|
||
lea edx, [edx+16] // 8 pixels
|
||
sub width, 4 // 4 pixels
|
||
jg nx_2xSaILine_16mmx_loop
|
||
|
||
emms
|
||
}
|
||
}
|
||
|
||
static void nx_2xSaILine_32bpp_mmx( euI8* pSrc, euI8* pDlt, euI32 srcPitch, euI32 width, euI8* pDst, euI32 dstPitch, euI32 bForceWrite )
|
||
{
|
||
__asm {
|
||
mov eax, pSrc
|
||
mov ebx, srcPitch
|
||
mov edx, pDst
|
||
sub eax, ebx
|
||
nx_2xSaILine_32mmx_loop:
|
||
mov ecx, bForceWrite
|
||
test ecx, ecx
|
||
jz nx_2xSaILine_32mmx_normal
|
||
|
||
mov esi, pDlt
|
||
movq mm6, [eax+colorI]
|
||
movq [esi+colorI], mm6
|
||
|
||
jmp nx_2xSaILine_32mmx_forcewrite
|
||
nx_2xSaILine_32mmx_normal:
|
||
// Check delta
|
||
mov ecx, pDlt
|
||
|
||
// load source img
|
||
lea esi, [eax+ebx]
|
||
movq mm0, [eax+colorI]
|
||
movq mm1, [eax+colorJ]
|
||
movq mm2, [eax+ebx+colorG]
|
||
movq mm3, [eax+ebx+colorK]
|
||
movq mm4, [eax+ebx*2+colorH]
|
||
movq mm5, [eax+ebx*2+colorL]
|
||
movq mm6, [esi+ebx*2+colorM]
|
||
movq mm7, [esi+ebx*2+colorP]
|
||
|
||
// compare to delta
|
||
lea esi, [ecx+ebx]
|
||
pcmpeqw mm0, [ecx+colorI]
|
||
pcmpeqw mm1, [ecx+colorJ]
|
||
pcmpeqw mm2, [ecx+ebx+colorG]
|
||
pcmpeqw mm3, [ecx+ebx+colorK]
|
||
pcmpeqw mm4, [ecx+ebx*2+colorH]
|
||
pcmpeqw mm5, [ecx+ebx*2+colorL]
|
||
pcmpeqw mm6, [esi+ebx*2+colorM]
|
||
pcmpeqw mm7, [esi+ebx*2+colorP]
|
||
|
||
// compose results
|
||
pand mm0, mm1
|
||
pand mm2, mm3
|
||
pand mm4, mm5
|
||
pand mm6, mm7
|
||
pand mm0, mm2
|
||
pand mm4, mm6
|
||
pxor mm7, mm7
|
||
pand mm0, mm4
|
||
movq mm6, [eax+colorI]
|
||
pcmpeqw mm7, mm0
|
||
|
||
movq [ecx+colorI], mm6
|
||
|
||
packsswb mm7, mm7
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_2xSaILine_32mmx_skipprocess
|
||
// End Delta
|
||
nx_2xSaILine_32mmx_forcewrite:
|
||
//------------------------------
|
||
// 1
|
||
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
|
||
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
||
movq mm2, [eax+ebx+colorB] // mm2 and mm3 contain colorB
|
||
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [eax+ebx*2+colorD]
|
||
pcmpeqw mm1, [eax+colorE]
|
||
pcmpeqw mm2, [eax+ebx*2+colorL]
|
||
pcmpeqw mm3, [eax+ebx*2+colorC]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
|
||
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
||
movq mm6, [eax+ebx+colorB] // mm6 and mm7 contain colorB
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [eax+ebx*2+colorC]
|
||
pcmpeqw mm5, [eax+colorF]
|
||
pcmpeqw mm6, [eax+colorJ]
|
||
pcmpeqw mm7, [eax+colorE]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 // combine the masks
|
||
movq Mask1, mm0
|
||
|
||
//------------------------------
|
||
// 2
|
||
// if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
|
||
movq mm0, [eax+ebx+colorB] // mm0 and mm1 contain colorB
|
||
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [eax+ebx*2+colorC]
|
||
pcmpeqw mm1, [eax+colorF]
|
||
pcmpeqw mm2, [eax+ebx*2+colorH]
|
||
pcmpeqw mm3, [eax+ebx*2+colorD]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
|
||
movq mm4, [eax+ebx+colorB] // mm4 and mm5 contain colorB
|
||
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [eax+ebx*2+colorD]
|
||
pcmpeqw mm5, [eax+colorE]
|
||
pcmpeqw mm6, [eax+colorI]
|
||
pcmpeqw mm7, [eax+colorF]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 // combine the masks
|
||
movq Mask2, mm0
|
||
|
||
//------------------------------
|
||
// interpolate colorA and colorB
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx+colorB]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
|
||
movq mm6, cMask
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3 // mm0 contains the interpolated values
|
||
|
||
// assemble the pixels
|
||
movq mm1, [eax+ebx+colorA]
|
||
movq mm2, [eax+ebx+colorB]
|
||
|
||
movq mm3, Mask1
|
||
movq mm5, mm1
|
||
movq mm4, Mask2
|
||
movq mm6, mm1
|
||
|
||
pand mm1, mm3
|
||
por mm3, mm4
|
||
pxor mm7, mm7
|
||
pand mm2, mm4
|
||
|
||
pcmpeqw mm3, mm7
|
||
por mm1, mm2
|
||
pand mm0, mm3
|
||
|
||
por mm0, mm1
|
||
|
||
punpcklwd mm5, mm0
|
||
punpckhwd mm6, mm0
|
||
|
||
//------------------------------
|
||
// Write image
|
||
//------------------------------
|
||
// save
|
||
mov esi, eax
|
||
mov edi, ebx
|
||
mov ecx, 0x00F8F8F8 // mask
|
||
|
||
// movq [edx+0], mm5
|
||
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm5, 16
|
||
mov [edx+0], ebx
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm5, 16
|
||
mov [edx+4], ebx
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm5, 16
|
||
mov [edx+8], ebx
|
||
movd eax, mm5
|
||
movd ebx, mm5
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+12], ebx
|
||
|
||
// movq [edx+8], mm6
|
||
|
||
movd eax, mm6
|
||
movd ebx, mm6
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm6, 16
|
||
mov [edx+16], ebx
|
||
movd eax, mm6
|
||
movd ebx, mm6
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm6, 16
|
||
mov [edx+20], ebx
|
||
movd eax, mm6
|
||
movd ebx, mm6
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm6, 16
|
||
mov [edx+24], ebx
|
||
movd eax, mm6
|
||
movd ebx, mm6
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+28], ebx
|
||
|
||
// restore
|
||
mov eax, esi
|
||
mov ebx, edi
|
||
|
||
//------------------------------
|
||
// Create the Nextline
|
||
//------------------------------
|
||
// 3
|
||
// if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
|
||
lea esi, [eax+ebx]
|
||
|
||
movq mm0, [eax+ebx+colorA] // mm0 and mm1 contain colorA
|
||
movq mm2, [eax+ebx*2+colorC] // mm2 and mm3 contain colorC
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [esi+ebx+colorD]
|
||
pcmpeqw mm1, [esi+colorG]
|
||
pcmpeqw mm2, [esi+ebx*2+colorO]
|
||
pcmpeqw mm3, [esi+colorB]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
|
||
movq mm4, [eax+ebx+colorA] // mm4 and mm5 contain colorA
|
||
movq mm6, [eax+ebx*2+colorC] // mm6 and mm7 contain colorC
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [esi+ebx+colorH]
|
||
pcmpeqw mm5, [esi+colorB]
|
||
pcmpeqw mm6, [esi+ebx*2+colorM]
|
||
pcmpeqw mm7, [esi+colorG]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 // combine the masks
|
||
movq Mask1, mm0
|
||
|
||
//------------------------------
|
||
// 4
|
||
// if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
|
||
movq mm0, [eax+ebx*2+colorC] // mm0 and mm1 contain colorC
|
||
movq mm2, [eax+ebx+colorA] // mm2 and mm3 contain colorA
|
||
movq mm1, mm0
|
||
movq mm3, mm2
|
||
|
||
pcmpeqw mm0, [eax+ebx+colorB]
|
||
pcmpeqw mm1, [eax+ebx*2+colorH]
|
||
pcmpeqw mm2, [eax+colorF]
|
||
pcmpeqw mm3, [eax+ebx*2+colorD]
|
||
|
||
pand mm0, mm1
|
||
pxor mm1, mm1
|
||
pand mm0, mm2
|
||
pcmpeqw mm3, mm1
|
||
pand mm0, mm3 // result in mm0
|
||
|
||
// if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
|
||
movq mm4, [eax+ebx*2+colorC] // mm4 and mm5 contain colorC
|
||
movq mm6, [eax+ebx+colorA] // mm6 and mm7 contain colorA
|
||
movq mm5, mm4
|
||
movq mm7, mm6
|
||
|
||
pcmpeqw mm4, [eax+ebx*2+colorD]
|
||
pcmpeqw mm5, [eax+ebx+colorG]
|
||
pcmpeqw mm6, [eax+colorI]
|
||
pcmpeqw mm7, [eax+ebx*2+colorH]
|
||
|
||
pand mm4, mm5
|
||
pxor mm5, mm5
|
||
pand mm4, mm6
|
||
pcmpeqw mm7, mm5
|
||
pand mm4, mm7 // result in mm4
|
||
|
||
por mm0, mm4 ;combine the masks
|
||
movq Mask2, mm0
|
||
|
||
//------------------------------
|
||
// interpolate colorA and colorC
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx*2+colorC]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
|
||
movq mm6, cMask
|
||
pxor mm3, mm2
|
||
pand mm0, mm1
|
||
pand mm3, mm6
|
||
psrlw mm3, 1
|
||
paddw mm0, mm3 // mm0 contains the interpolated values
|
||
|
||
// assemble the pixels
|
||
movq mm1, [eax+ebx+colorA]
|
||
movq mm2, [eax+ebx*2+colorC]
|
||
|
||
movq mm3, Mask1
|
||
movq mm4, Mask2
|
||
|
||
pand mm1, mm3
|
||
pand mm2, mm4
|
||
|
||
por mm3, mm4
|
||
pxor mm7, mm7
|
||
por mm1, mm2
|
||
|
||
pcmpeqw mm3, mm7
|
||
pand mm0, mm3
|
||
por mm0, mm1
|
||
movq ACPixel, mm0
|
||
|
||
//------------------------------
|
||
// Decide which "branch" to take
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx+colorB]
|
||
movq mm6, mm0
|
||
movq mm7, mm1
|
||
pcmpeqw mm0, [eax+ebx*2+colorD]
|
||
pcmpeqw mm1, [eax+ebx*2+colorC]
|
||
pcmpeqw mm6, mm7
|
||
|
||
movq mm2, mm0
|
||
movq mm3, mm0
|
||
|
||
pand mm0, mm1 // colorA == colorD && colorB == colorC
|
||
pxor mm7, mm7
|
||
|
||
pcmpeqw mm2, mm7
|
||
pand mm6, mm0
|
||
pand mm2, mm1 // colorA != colorD && colorB == colorC
|
||
|
||
pcmpeqw mm1, mm7
|
||
|
||
pand mm1, mm3 // colorA == colorD && colorB != colorC
|
||
pxor mm0, mm6
|
||
por mm1, mm6
|
||
movq mm7, mm0
|
||
movq Mask2, mm2
|
||
packsswb mm7, mm7
|
||
movq Mask1, mm1
|
||
|
||
movd ecx, mm7
|
||
test ecx, ecx
|
||
jz nx_2xSaILine_32mmx_skipguess
|
||
|
||
//------------------------------
|
||
// Map of the pixels: I|E F|J
|
||
// G|A B|K
|
||
// H|C D|L
|
||
// M|N O|P
|
||
//------------------------------
|
||
movq mm6, mm0
|
||
movq mm4, [eax+ebx+colorA]
|
||
movq mm5, [eax+ebx+colorB]
|
||
pxor mm7, mm7
|
||
pand mm6, ONE
|
||
|
||
movq mm0, [eax+colorE]
|
||
movq mm1, [eax+ebx+colorG]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [eax+colorF]
|
||
movq mm1, [eax+ebx+colorK]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
lea esi, [eax+ebx]
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [esi+ebx+colorH]
|
||
movq mm1, [esi+ebx*2+colorN]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm0, [esi+ebx+colorL]
|
||
movq mm1, [esi+ebx*2+colorO]
|
||
movq mm2, mm0
|
||
movq mm3, mm1
|
||
pcmpeqw mm0, mm4
|
||
pcmpeqw mm1, mm4
|
||
pcmpeqw mm2, mm5
|
||
pcmpeqw mm3, mm5
|
||
pand mm0, mm6
|
||
pand mm1, mm6
|
||
pand mm2, mm6
|
||
pand mm3, mm6
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
pxor mm3, mm3
|
||
pcmpgtw mm0, mm6
|
||
pcmpgtw mm2, mm6
|
||
pcmpeqw mm0, mm3
|
||
pcmpeqw mm2, mm3
|
||
pand mm0, mm6
|
||
pand mm2, mm6
|
||
paddw mm7, mm0
|
||
psubw mm7, mm2
|
||
|
||
movq mm1, mm7
|
||
pxor mm0, mm0
|
||
pcmpgtw mm7, mm0
|
||
pcmpgtw mm0, mm1
|
||
|
||
por mm7, Mask1
|
||
por mm0, Mask2
|
||
movq Mask1, mm7
|
||
movq Mask2, mm0
|
||
|
||
nx_2xSaILine_32mmx_skipguess:
|
||
//------------------------------
|
||
// interpolate A, B, C and D
|
||
//------------------------------
|
||
movq mm0, [eax+ebx+colorA]
|
||
movq mm1, [eax+ebx+colorB]
|
||
movq mm4, mm0
|
||
movq mm2, [eax+ebx*2+colorC]
|
||
movq mm5, mm1
|
||
movq mm3, qMask // qcolorMask
|
||
movq mm6, mm2
|
||
movq mm7, lMask // qlowcolorMask
|
||
|
||
pand mm0, mm3
|
||
pand mm1, mm3
|
||
pand mm2, mm3
|
||
pand mm3, [eax+ebx*2+colorD]
|
||
|
||
psrlw mm0, 2
|
||
pand mm4, mm7
|
||
psrlw mm1, 2
|
||
pand mm5, mm7
|
||
psrlw mm2, 2
|
||
pand mm6, mm7
|
||
psrlw mm3, 2
|
||
pand mm7, [eax+ebx*2+colorD]
|
||
|
||
paddw mm0, mm1
|
||
paddw mm2, mm3
|
||
|
||
paddw mm4, mm5
|
||
paddw mm6, mm7
|
||
|
||
paddw mm4, mm6
|
||
movq mm7, lMask // qlowcolorMask
|
||
paddw mm0, mm2
|
||
psrlw mm4, 2
|
||
pand mm4, mm7
|
||
paddw mm0, mm4 // mm0 contains the interpolated value of A, B, C and D
|
||
|
||
// assemble the pixels
|
||
movq mm1, Mask1
|
||
movq mm2, Mask2
|
||
movq mm4, [eax+ebx+colorA]
|
||
movq mm5, [eax+ebx+colorB]
|
||
pand mm4, mm1
|
||
pand mm5, mm2
|
||
|
||
pxor mm7, mm7
|
||
por mm1, mm2
|
||
por mm4, mm5
|
||
pcmpeqw mm1, mm7
|
||
pand mm0, mm1
|
||
por mm4, mm0 // mm4 contains the diagonal pixels
|
||
|
||
movq mm0, ACPixel
|
||
mov edi, dstPitch
|
||
movq mm1, mm0
|
||
punpcklwd mm0, mm4
|
||
punpckhwd mm1, mm4
|
||
|
||
//------------------------------
|
||
// Write image RGB1555->RGBx888
|
||
//------------------------------
|
||
// save
|
||
movd mm6, eax
|
||
movd mm7, ebx
|
||
mov ecx, 0x00F8F8F8 // mask
|
||
|
||
// movq [edx+edi+0], mm0
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm0, 16
|
||
mov [edx+edi+0], ebx
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm0, 16
|
||
mov [edx+edi+4], ebx
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm0, 16
|
||
mov [edx+edi+8], ebx
|
||
movd eax, mm0
|
||
movd ebx, mm0
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+edi+12], ebx
|
||
|
||
// movq [edx+edi+8], mm1
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm1, 16
|
||
mov [edx+edi+16], ebx
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm1, 16
|
||
mov [edx+edi+20], ebx
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
psrlq mm1, 16
|
||
mov [edx+edi+24], ebx
|
||
movd eax, mm1
|
||
movd ebx, mm1
|
||
shl eax, 3
|
||
shl ebx, 9
|
||
shl ah, 3
|
||
and ebx, ecx
|
||
mov bx, ax
|
||
mov [edx+edi+28], ebx
|
||
|
||
// restore
|
||
movd eax, mm6
|
||
movd ebx, mm7
|
||
|
||
nx_2xSaILine_32mmx_skipprocess:
|
||
add pDlt, 8 // 4 pixels
|
||
lea eax, [eax+ 8] // 4 pixels
|
||
lea edx, [edx+32] // 8 pixels
|
||
sub width, 4 // 4 pixels
|
||
jg nx_2xSaILine_32mmx_loop
|
||
|
||
emms
|
||
}
|
||
}
|