443 lines
20 KiB
C
443 lines
20 KiB
C
/*
|
||
* This file is part of the Advance project.
|
||
*
|
||
* Copyright (C) 1999-2002 Andrea Mazzoleni
|
||
*
|
||
* This program is free software; you can redistribute it and/or modify
|
||
* it under the terms of the GNU General Public License as published by
|
||
* the Free Software Foundation; either version 2 of the License, or
|
||
* (at your option) any later version.
|
||
*
|
||
* This program is distributed in the hope that it will be useful,
|
||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
* GNU General Public License for more details.
|
||
*
|
||
* You should have received a copy of the GNU General Public License
|
||
* along with this program; if not, write to the Free Software
|
||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||
*/
|
||
|
||
/*
|
||
* This file contains a C and MMX implentation of the Scale2x effect.
|
||
*
|
||
* You can found an high level description of the effect at :
|
||
*
|
||
* http://scale2x.sourceforge.net/scale2x.html
|
||
*
|
||
* Alternatively at the previous license terms, you are allowed to use this
|
||
* code in your program with these conditions:
|
||
* - the program is not used in commercial activities.
|
||
* - the whole source code of the program is released with the binary.
|
||
* - derivative works of the program are allowed.
|
||
*/
|
||
|
||
// Note
|
||
//
|
||
// This code has deleted most from the original code.
|
||
// An original code is acquirable from the website of Advanced project.
|
||
// MS VisualC++ and ProcessorPack are required for compile of this code.
|
||
//
|
||
static void internal_scale2x_16_mmx_single(euI16* dst, const euI16* src0, const euI16* src1, const euI16* src2, unsigned count)
|
||
{
|
||
/* always do the first and last run */
|
||
count -= 2*4;
|
||
|
||
__asm {
|
||
mov eax, src0
|
||
mov ebx, src1
|
||
mov ecx, src2
|
||
mov edx, dst
|
||
mov esi, count
|
||
|
||
/* first run */
|
||
/* set the current, current_pre, current_next registers */
|
||
pxor mm0, mm0 /* use a fake black out of screen */
|
||
movq mm7, qword ptr [ebx+0]
|
||
movq mm1, qword ptr [ebx+8]
|
||
psrlq mm0, 48
|
||
psllq mm1, 48
|
||
movq mm2, mm7
|
||
movq mm3, mm7
|
||
psllq mm2, 16
|
||
psrlq mm3, 16
|
||
por mm0, mm2
|
||
por mm1, mm3
|
||
|
||
/* current_upper */
|
||
movq mm6, qword ptr [eax]
|
||
|
||
/* compute the upper-left pixel for dst0 on %%mm2 */
|
||
/* compute the upper-right pixel for dst0 on %%mm4 */
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
movq mm3, mm0
|
||
movq mm5, mm1
|
||
pcmpeqw mm2, mm6
|
||
pcmpeqw mm4, mm6
|
||
pcmpeqw mm3, qword ptr [ecx]
|
||
pcmpeqw mm5, qword ptr [ecx]
|
||
pandn mm3, mm2
|
||
pandn mm5, mm4
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
pcmpeqw mm2, mm1
|
||
pcmpeqw mm4, mm0
|
||
pandn mm2, mm3
|
||
pandn mm4, mm5
|
||
movq mm3, mm2
|
||
movq mm5, mm4
|
||
pand mm2, mm6
|
||
pand mm4, mm6
|
||
pandn mm3, mm7
|
||
pandn mm5, mm7
|
||
por mm2, mm3
|
||
por mm4, mm5
|
||
|
||
/* set *dst0 */
|
||
movq mm3, mm2
|
||
punpcklwd mm2, mm4
|
||
punpckhwd mm3, mm4
|
||
movq qword ptr [edx+0], mm2
|
||
movq qword ptr [edx+8], mm3
|
||
|
||
/* next */
|
||
add eax, 8
|
||
add ebx, 8
|
||
add ecx, 8
|
||
add edx, 16
|
||
|
||
/* central runs */
|
||
shr esi, 2
|
||
jz label1
|
||
align 4
|
||
label0:
|
||
|
||
/* set the current, current_pre, current_next registers */
|
||
movq mm0, qword ptr [ebx-8]
|
||
movq mm7, qword ptr [ebx+0]
|
||
movq mm1, qword ptr [ebx+8]
|
||
psrlq mm0, 48
|
||
psllq mm1, 48
|
||
movq mm2, mm7
|
||
movq mm3, mm7
|
||
psllq mm2, 16
|
||
psrlq mm3, 16
|
||
por mm0, mm2
|
||
por mm1, mm3
|
||
|
||
/* current_upper */
|
||
movq mm6, qword ptr [eax]
|
||
|
||
/* compute the upper-left pixel for dst0 on %%mm2 */
|
||
/* compute the upper-right pixel for dst0 on %%mm4 */
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
movq mm3, mm0
|
||
movq mm5, mm1
|
||
pcmpeqw mm2, mm6
|
||
pcmpeqw mm4, mm6
|
||
pcmpeqw mm3, qword ptr [ecx]
|
||
pcmpeqw mm5, qword ptr [ecx]
|
||
pandn mm3, mm2
|
||
pandn mm5, mm4
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
pcmpeqw mm2, mm1
|
||
pcmpeqw mm4, mm0
|
||
pandn mm2, mm3
|
||
pandn mm4, mm5
|
||
movq mm3, mm2
|
||
movq mm5, mm4
|
||
pand mm2, mm6
|
||
pand mm4, mm6
|
||
pandn mm3, mm7
|
||
pandn mm5, mm7
|
||
por mm2, mm3
|
||
por mm4, mm5
|
||
|
||
/* set *dst0 */
|
||
movq mm3, mm2
|
||
punpcklwd mm2, mm4
|
||
punpckhwd mm3, mm4
|
||
movq qword ptr [edx+0], mm2
|
||
movq qword ptr [edx+8], mm3
|
||
|
||
/* next */
|
||
add eax, 8
|
||
add ebx, 8
|
||
add ecx, 8
|
||
add edx, 16
|
||
|
||
dec esi
|
||
jnz label0
|
||
label1:
|
||
/* final run */
|
||
/* set the current, current_pre, current_next registers */
|
||
movq mm0, qword ptr [ebx-8]
|
||
movq mm7, qword ptr [ebx+0]
|
||
pxor mm1, mm1 /* use a fake black out of screen */
|
||
psrlq mm0, 48
|
||
psllq mm1, 48
|
||
movq mm2, mm7
|
||
movq mm3, mm7
|
||
psllq mm2, 16
|
||
psrlq mm3, 16
|
||
por mm0, mm2
|
||
por mm1, mm3
|
||
|
||
/* current_upper */
|
||
movq mm6, qword ptr [eax]
|
||
|
||
/* compute the upper-left pixel for dst0 on %%mm2 */
|
||
/* compute the upper-right pixel for dst0 on %%mm4 */
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
movq mm3, mm0
|
||
movq mm5, mm1
|
||
pcmpeqw mm2, mm6
|
||
pcmpeqw mm4, mm6
|
||
pcmpeqw mm3, qword ptr [ecx]
|
||
pcmpeqw mm5, qword ptr [ecx]
|
||
pandn mm3, mm2
|
||
pandn mm5, mm4
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
pcmpeqw mm2, mm1
|
||
pcmpeqw mm4, mm0
|
||
pandn mm2, mm3
|
||
pandn mm4, mm5
|
||
movq mm3, mm2
|
||
movq mm5, mm4
|
||
pand mm2, mm6
|
||
pand mm4, mm6
|
||
pandn mm3, mm7
|
||
pandn mm5, mm7
|
||
por mm2, mm3
|
||
por mm4, mm5
|
||
|
||
/* set *dst0 */
|
||
movq mm3, mm2
|
||
punpcklwd mm2, mm4
|
||
punpckhwd mm3, mm4
|
||
movq qword ptr [edx+0], mm2
|
||
movq qword ptr [edx+8], mm3
|
||
|
||
mov src0, eax
|
||
mov src1, ebx
|
||
mov src2, ecx
|
||
mov dst, edx
|
||
mov count, esi
|
||
|
||
emms
|
||
}
|
||
}
|
||
|
||
static void internal_scale2x_32_mmx_single(euI32* dst, const euI32* src0, const euI32* src1, const euI32* src2, unsigned count)
|
||
{
|
||
/* always do the first and last run */
|
||
count -= 2*2;
|
||
|
||
__asm {
|
||
mov eax, src0
|
||
mov ebx, src1
|
||
mov ecx, src2
|
||
mov edx, dst
|
||
mov esi, count
|
||
|
||
/* first run */
|
||
/* set the current, current_pre, current_next registers */
|
||
pxor mm0, mm0
|
||
movq mm7, qword ptr [ebx+0]
|
||
movq mm1, qword ptr [ebx+8]
|
||
psrlq mm0, 32
|
||
psllq mm1, 32
|
||
movq mm2, mm7
|
||
movq mm3, mm7
|
||
psllq mm2, 32
|
||
psrlq mm3, 32
|
||
por mm0, mm2
|
||
por mm1, mm3
|
||
|
||
/* current_upper */
|
||
movq mm6, qword ptr [eax]
|
||
|
||
/* compute the upper-left pixel for dst0 on %%mm2 */
|
||
/* compute the upper-right pixel for dst0 on %%mm4 */
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
movq mm3, mm0
|
||
movq mm5, mm1
|
||
pcmpeqd mm2, mm6
|
||
pcmpeqd mm4, mm6
|
||
pcmpeqd mm3, qword ptr [ecx]
|
||
pcmpeqd mm5, qword ptr [ecx]
|
||
pandn mm3, mm2
|
||
pandn mm5, mm4
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
pcmpeqd mm2, mm1
|
||
pcmpeqd mm4, mm0
|
||
pandn mm2, mm3
|
||
pandn mm4, mm5
|
||
movq mm3, mm2
|
||
movq mm5, mm4
|
||
pand mm2, mm6
|
||
pand mm4, mm6
|
||
pandn mm3, mm7
|
||
pandn mm5, mm7
|
||
por mm2, mm3
|
||
por mm4, mm5
|
||
|
||
/* set *dst0 */
|
||
movq mm3, mm2
|
||
punpckldq mm2, mm4
|
||
punpckhdq mm3, mm4
|
||
movq qword ptr [edx+0], mm2
|
||
movq qword ptr [edx+8], mm3
|
||
|
||
/* next */
|
||
add eax, 8
|
||
add ebx, 8
|
||
add ecx, 8
|
||
add edx, 16
|
||
|
||
/* central runs */
|
||
shr esi, 1
|
||
jz label1
|
||
label0:
|
||
|
||
/* set the current, current_pre, current_next registers */
|
||
movq mm0, qword ptr [ebx-8]
|
||
movq mm7, qword ptr [ebx+0]
|
||
movq mm1, qword ptr [ebx+8]
|
||
psrlq mm0, 32
|
||
psllq mm1, 32
|
||
movq mm2, mm7
|
||
movq mm3, mm7
|
||
psllq mm2, 32
|
||
psrlq mm3, 32
|
||
por mm0, mm2
|
||
por mm1, mm3
|
||
|
||
/* current_upper */
|
||
movq mm6, qword ptr[eax]
|
||
|
||
/* compute the upper-left pixel for dst0 on %%mm2 */
|
||
/* compute the upper-right pixel for dst0 on %%mm4 */
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
movq mm3, mm0
|
||
movq mm5, mm1
|
||
pcmpeqd mm2, mm6
|
||
pcmpeqd mm4, mm6
|
||
pcmpeqd mm3, qword ptr[ecx]
|
||
pcmpeqd mm5, qword ptr[ecx]
|
||
pandn mm3, mm2
|
||
pandn mm5, mm4
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
pcmpeqd mm2, mm1
|
||
pcmpeqd mm4, mm0
|
||
pandn mm2, mm3
|
||
pandn mm4, mm5
|
||
movq mm3, mm2
|
||
movq mm5, mm4
|
||
pand mm2, mm6
|
||
pand mm4, mm6
|
||
pandn mm3, mm7
|
||
pandn mm5, mm7
|
||
por mm2, mm3
|
||
por mm4, mm5
|
||
|
||
/* set *dst0 */
|
||
movq mm3, mm2
|
||
punpckldq mm2, mm4
|
||
punpckhdq mm3, mm4
|
||
movq qword ptr [edx+0], mm2
|
||
movq qword ptr [edx+8], mm3
|
||
|
||
/* next */
|
||
add eax, 8
|
||
add ebx, 8
|
||
add ecx, 8
|
||
add edx, 16
|
||
|
||
dec esi
|
||
jnz label0
|
||
label1:
|
||
|
||
/* final run */
|
||
/* set the current, current_pre, current_next registers */
|
||
movq mm0, qword ptr [ebx-8]
|
||
movq mm7, qword ptr [ebx+0]
|
||
pxor mm1, mm1
|
||
psrlq mm0, 32
|
||
psllq mm1, 32
|
||
movq mm2, mm7
|
||
movq mm3, mm7
|
||
psllq mm2, 32
|
||
psrlq mm3, 32
|
||
por mm0, mm2
|
||
por mm1, mm3
|
||
|
||
/* current_upper */
|
||
movq mm6, qword ptr [eax]
|
||
|
||
/* compute the upper-left pixel for dst0 on %%mm2 */
|
||
/* compute the upper-right pixel for dst0 on %%mm4 */
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
movq mm3, mm0
|
||
movq mm5, mm1
|
||
pcmpeqd mm2, mm6
|
||
pcmpeqd mm4, mm6
|
||
pcmpeqd mm3, qword ptr [ecx]
|
||
pcmpeqd mm5, qword ptr [ecx]
|
||
pandn mm3, mm2
|
||
pandn mm5, mm4
|
||
movq mm2, mm0
|
||
movq mm4, mm1
|
||
pcmpeqd mm2, mm1
|
||
pcmpeqd mm4, mm0
|
||
pandn mm2, mm3
|
||
pandn mm4, mm5
|
||
movq mm3, mm2
|
||
movq mm5, mm4
|
||
pand mm2, mm6
|
||
pand mm4, mm6
|
||
pandn mm3, mm7
|
||
pandn mm5, mm7
|
||
por mm2, mm3
|
||
por mm4, mm5
|
||
|
||
/* set *dst0 */
|
||
movq mm3, mm2
|
||
punpckldq mm2, mm4
|
||
punpckhdq mm3, mm4
|
||
movq qword ptr [edx+0], mm2
|
||
movq qword ptr [edx+8], mm3
|
||
|
||
mov src0, eax
|
||
mov src1, ebx
|
||
mov src2, ecx
|
||
mov dst, edx
|
||
mov count, esi
|
||
|
||
emms
|
||
}
|
||
}
|
||
|
||
static void internal_scale2x_16_mmx(euI16* dst0, euI16* dst1, const euI16* src0, const euI16* src1, const euI16* src2, unsigned count)
|
||
{
|
||
internal_scale2x_16_mmx_single(dst0, src0, src1, src2, count);
|
||
internal_scale2x_16_mmx_single(dst1, src2, src1, src0, count);
|
||
}
|
||
|
||
static void internal_scale2x_32_mmx(euI32* dst0, euI32* dst1, const euI32* src0, const euI32* src1, const euI32* src2, unsigned count)
|
||
{
|
||
internal_scale2x_32_mmx_single(dst0, src0, src1, src2, count);
|
||
internal_scale2x_32_mmx_single(dst1, src2, src1, src0, count);
|
||
}
|
||
|