AxibugEmuOnline/References/VirtuaNESex_src_191105/nx_Scale2x.h

443 lines
9.4 KiB
C
Raw Normal View History

2024-08-05 17:58:53 +08:00
/*
* This file is part of the Advance project.
*
* Copyright (C) 1999-2002 Andrea Mazzoleni
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* This file contains a C and MMX implentation of the Scale2x effect.
*
* You can found an high level description of the effect at :
*
* http://scale2x.sourceforge.net/scale2x.html
*
* Alternatively at the previous license terms, you are allowed to use this
* code in your program with these conditions:
* - the program is not used in commercial activities.
* - the whole source code of the program is released with the binary.
* - derivative works of the program are allowed.
*/
// Note
//
// This code has deleted most from the original code.
// An original code is acquirable from the website of Advanced project.
// MS VisualC++ and ProcessorPack are required for compile of this code.
//
static void internal_scale2x_16_mmx_single(euI16* dst, const euI16* src0, const euI16* src1, const euI16* src2, unsigned count)
{
/* always do the first and last run */
count -= 2*4;
__asm {
mov eax, src0
mov ebx, src1
mov ecx, src2
mov edx, dst
mov esi, count
/* first run */
/* set the current, current_pre, current_next registers */
pxor mm0, mm0 /* use a fake black out of screen */
movq mm7, qword ptr [ebx+0]
movq mm1, qword ptr [ebx+8]
psrlq mm0, 48
psllq mm1, 48
movq mm2, mm7
movq mm3, mm7
psllq mm2, 16
psrlq mm3, 16
por mm0, mm2
por mm1, mm3
/* current_upper */
movq mm6, qword ptr [eax]
/* compute the upper-left pixel for dst0 on %%mm2 */
/* compute the upper-right pixel for dst0 on %%mm4 */
movq mm2, mm0
movq mm4, mm1
movq mm3, mm0
movq mm5, mm1
pcmpeqw mm2, mm6
pcmpeqw mm4, mm6
pcmpeqw mm3, qword ptr [ecx]
pcmpeqw mm5, qword ptr [ecx]
pandn mm3, mm2
pandn mm5, mm4
movq mm2, mm0
movq mm4, mm1
pcmpeqw mm2, mm1
pcmpeqw mm4, mm0
pandn mm2, mm3
pandn mm4, mm5
movq mm3, mm2
movq mm5, mm4
pand mm2, mm6
pand mm4, mm6
pandn mm3, mm7
pandn mm5, mm7
por mm2, mm3
por mm4, mm5
/* set *dst0 */
movq mm3, mm2
punpcklwd mm2, mm4
punpckhwd mm3, mm4
movq qword ptr [edx+0], mm2
movq qword ptr [edx+8], mm3
/* next */
add eax, 8
add ebx, 8
add ecx, 8
add edx, 16
/* central runs */
shr esi, 2
jz label1
align 4
label0:
/* set the current, current_pre, current_next registers */
movq mm0, qword ptr [ebx-8]
movq mm7, qword ptr [ebx+0]
movq mm1, qword ptr [ebx+8]
psrlq mm0, 48
psllq mm1, 48
movq mm2, mm7
movq mm3, mm7
psllq mm2, 16
psrlq mm3, 16
por mm0, mm2
por mm1, mm3
/* current_upper */
movq mm6, qword ptr [eax]
/* compute the upper-left pixel for dst0 on %%mm2 */
/* compute the upper-right pixel for dst0 on %%mm4 */
movq mm2, mm0
movq mm4, mm1
movq mm3, mm0
movq mm5, mm1
pcmpeqw mm2, mm6
pcmpeqw mm4, mm6
pcmpeqw mm3, qword ptr [ecx]
pcmpeqw mm5, qword ptr [ecx]
pandn mm3, mm2
pandn mm5, mm4
movq mm2, mm0
movq mm4, mm1
pcmpeqw mm2, mm1
pcmpeqw mm4, mm0
pandn mm2, mm3
pandn mm4, mm5
movq mm3, mm2
movq mm5, mm4
pand mm2, mm6
pand mm4, mm6
pandn mm3, mm7
pandn mm5, mm7
por mm2, mm3
por mm4, mm5
/* set *dst0 */
movq mm3, mm2
punpcklwd mm2, mm4
punpckhwd mm3, mm4
movq qword ptr [edx+0], mm2
movq qword ptr [edx+8], mm3
/* next */
add eax, 8
add ebx, 8
add ecx, 8
add edx, 16
dec esi
jnz label0
label1:
/* final run */
/* set the current, current_pre, current_next registers */
movq mm0, qword ptr [ebx-8]
movq mm7, qword ptr [ebx+0]
pxor mm1, mm1 /* use a fake black out of screen */
psrlq mm0, 48
psllq mm1, 48
movq mm2, mm7
movq mm3, mm7
psllq mm2, 16
psrlq mm3, 16
por mm0, mm2
por mm1, mm3
/* current_upper */
movq mm6, qword ptr [eax]
/* compute the upper-left pixel for dst0 on %%mm2 */
/* compute the upper-right pixel for dst0 on %%mm4 */
movq mm2, mm0
movq mm4, mm1
movq mm3, mm0
movq mm5, mm1
pcmpeqw mm2, mm6
pcmpeqw mm4, mm6
pcmpeqw mm3, qword ptr [ecx]
pcmpeqw mm5, qword ptr [ecx]
pandn mm3, mm2
pandn mm5, mm4
movq mm2, mm0
movq mm4, mm1
pcmpeqw mm2, mm1
pcmpeqw mm4, mm0
pandn mm2, mm3
pandn mm4, mm5
movq mm3, mm2
movq mm5, mm4
pand mm2, mm6
pand mm4, mm6
pandn mm3, mm7
pandn mm5, mm7
por mm2, mm3
por mm4, mm5
/* set *dst0 */
movq mm3, mm2
punpcklwd mm2, mm4
punpckhwd mm3, mm4
movq qword ptr [edx+0], mm2
movq qword ptr [edx+8], mm3
mov src0, eax
mov src1, ebx
mov src2, ecx
mov dst, edx
mov count, esi
emms
}
}
static void internal_scale2x_32_mmx_single(euI32* dst, const euI32* src0, const euI32* src1, const euI32* src2, unsigned count)
{
/* always do the first and last run */
count -= 2*2;
__asm {
mov eax, src0
mov ebx, src1
mov ecx, src2
mov edx, dst
mov esi, count
/* first run */
/* set the current, current_pre, current_next registers */
pxor mm0, mm0
movq mm7, qword ptr [ebx+0]
movq mm1, qword ptr [ebx+8]
psrlq mm0, 32
psllq mm1, 32
movq mm2, mm7
movq mm3, mm7
psllq mm2, 32
psrlq mm3, 32
por mm0, mm2
por mm1, mm3
/* current_upper */
movq mm6, qword ptr [eax]
/* compute the upper-left pixel for dst0 on %%mm2 */
/* compute the upper-right pixel for dst0 on %%mm4 */
movq mm2, mm0
movq mm4, mm1
movq mm3, mm0
movq mm5, mm1
pcmpeqd mm2, mm6
pcmpeqd mm4, mm6
pcmpeqd mm3, qword ptr [ecx]
pcmpeqd mm5, qword ptr [ecx]
pandn mm3, mm2
pandn mm5, mm4
movq mm2, mm0
movq mm4, mm1
pcmpeqd mm2, mm1
pcmpeqd mm4, mm0
pandn mm2, mm3
pandn mm4, mm5
movq mm3, mm2
movq mm5, mm4
pand mm2, mm6
pand mm4, mm6
pandn mm3, mm7
pandn mm5, mm7
por mm2, mm3
por mm4, mm5
/* set *dst0 */
movq mm3, mm2
punpckldq mm2, mm4
punpckhdq mm3, mm4
movq qword ptr [edx+0], mm2
movq qword ptr [edx+8], mm3
/* next */
add eax, 8
add ebx, 8
add ecx, 8
add edx, 16
/* central runs */
shr esi, 1
jz label1
label0:
/* set the current, current_pre, current_next registers */
movq mm0, qword ptr [ebx-8]
movq mm7, qword ptr [ebx+0]
movq mm1, qword ptr [ebx+8]
psrlq mm0, 32
psllq mm1, 32
movq mm2, mm7
movq mm3, mm7
psllq mm2, 32
psrlq mm3, 32
por mm0, mm2
por mm1, mm3
/* current_upper */
movq mm6, qword ptr[eax]
/* compute the upper-left pixel for dst0 on %%mm2 */
/* compute the upper-right pixel for dst0 on %%mm4 */
movq mm2, mm0
movq mm4, mm1
movq mm3, mm0
movq mm5, mm1
pcmpeqd mm2, mm6
pcmpeqd mm4, mm6
pcmpeqd mm3, qword ptr[ecx]
pcmpeqd mm5, qword ptr[ecx]
pandn mm3, mm2
pandn mm5, mm4
movq mm2, mm0
movq mm4, mm1
pcmpeqd mm2, mm1
pcmpeqd mm4, mm0
pandn mm2, mm3
pandn mm4, mm5
movq mm3, mm2
movq mm5, mm4
pand mm2, mm6
pand mm4, mm6
pandn mm3, mm7
pandn mm5, mm7
por mm2, mm3
por mm4, mm5
/* set *dst0 */
movq mm3, mm2
punpckldq mm2, mm4
punpckhdq mm3, mm4
movq qword ptr [edx+0], mm2
movq qword ptr [edx+8], mm3
/* next */
add eax, 8
add ebx, 8
add ecx, 8
add edx, 16
dec esi
jnz label0
label1:
/* final run */
/* set the current, current_pre, current_next registers */
movq mm0, qword ptr [ebx-8]
movq mm7, qword ptr [ebx+0]
pxor mm1, mm1
psrlq mm0, 32
psllq mm1, 32
movq mm2, mm7
movq mm3, mm7
psllq mm2, 32
psrlq mm3, 32
por mm0, mm2
por mm1, mm3
/* current_upper */
movq mm6, qword ptr [eax]
/* compute the upper-left pixel for dst0 on %%mm2 */
/* compute the upper-right pixel for dst0 on %%mm4 */
movq mm2, mm0
movq mm4, mm1
movq mm3, mm0
movq mm5, mm1
pcmpeqd mm2, mm6
pcmpeqd mm4, mm6
pcmpeqd mm3, qword ptr [ecx]
pcmpeqd mm5, qword ptr [ecx]
pandn mm3, mm2
pandn mm5, mm4
movq mm2, mm0
movq mm4, mm1
pcmpeqd mm2, mm1
pcmpeqd mm4, mm0
pandn mm2, mm3
pandn mm4, mm5
movq mm3, mm2
movq mm5, mm4
pand mm2, mm6
pand mm4, mm6
pandn mm3, mm7
pandn mm5, mm7
por mm2, mm3
por mm4, mm5
/* set *dst0 */
movq mm3, mm2
punpckldq mm2, mm4
punpckhdq mm3, mm4
movq qword ptr [edx+0], mm2
movq qword ptr [edx+8], mm3
mov src0, eax
mov src1, ebx
mov src2, ecx
mov dst, edx
mov count, esi
emms
}
}
static void internal_scale2x_16_mmx(euI16* dst0, euI16* dst1, const euI16* src0, const euI16* src1, const euI16* src2, unsigned count)
{
internal_scale2x_16_mmx_single(dst0, src0, src1, src2, count);
internal_scale2x_16_mmx_single(dst1, src2, src1, src0, count);
}
static void internal_scale2x_32_mmx(euI32* dst0, euI32* dst1, const euI32* src0, const euI32* src1, const euI32* src2, unsigned count)
{
internal_scale2x_32_mmx_single(dst0, src0, src1, src2, count);
internal_scale2x_32_mmx_single(dst1, src2, src1, src0, count);
}