--- cbp_mmx.asm 2002/04/17 10:54:19 1.3 +++ cbp_mmx.asm 2002/11/17 00:57:57 1.7 @@ -1,65 +1,77 @@ ;/************************************************************************** ; * -; * XVID MPEG-4 VIDEO CODEC -; * mmx cbp calc +; * XVID MPEG-4 VIDEO CODEC +; * mmx cbp calc ; * -; * This program is an implementation of a part of one or more MPEG-4 -; * Video tools as specified in ISO/IEC 14496-2 standard. Those intending -; * to use this software module in hardware or software products are -; * advised that its use may infringe existing patents or copyrights, and -; * any such use would be at such party's own risk. The original -; * developer of this software module and his/her company, and subsequent -; * editors and their companies, will have no liability for use of this -; * software or modifications or derivatives thereof. -; * -; * This program is free software; you can redistribute it and/or modify -; * it under the terms of the GNU General Public License as published by -; * the Free Software Foundation; either version 2 of the License, or -; * (at your option) any later version. -; * -; * This program is distributed in the hope that it will be useful, -; * but WITHOUT ANY WARRANTY; without even the implied warranty of -; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; * GNU General Public License for more details. -; * -; * You should have received a copy of the GNU General Public License -; * along with this program; if not, write to the Free Software -; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +; * This file is part of XviD, a free MPEG-4 video encoder/decoder ; * -; *************************************************************************/ - -;/************************************************************************** -; * -; * History: +; * XviD is free software; you can redistribute it and/or modify it +; * under the terms of the GNU General Public License as published by +; * the Free Software Foundation; either version 2 of the License, or +; * (at your option) any later version. +; * +; * This program is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; * GNU General Public License for more details. +; * +; * You should have received a copy of the GNU General Public License +; * along with this program; if not, write to the Free Software +; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +; * +; * Under section 8 of the GNU General Public License, the copyright +; * holders of XVID explicitly forbid distribution in the following +; * countries: +; * +; * - Japan +; * - United States of America +; * +; * Linking XviD statically or dynamically with other modules is making a +; * combined work based on XviD. Thus, the terms and conditions of the +; * GNU General Public License cover the whole combination. +; * +; * As a special exception, the copyright holders of XviD give you +; * permission to link XviD with independent modules that communicate with +; * XviD solely through the VFW1.1 and DShow interfaces, regardless of the +; * license terms of these independent modules, and to copy and distribute +; * the resulting combined work under terms of your choice, provided that +; * every copy of the combined work is accompanied by a complete copy of +; * the source code of XviD (the version of XviD used to produce the +; * combined work), being distributed under the terms of the GNU General +; * Public License plus this exception. An independent module is a module +; * which is not derived from or based on XviD. +; * +; * Note that people who make modified versions of XviD are not obligated +; * to grant this special exception for their modified versions; it is +; * their choice whether to do so. The GNU General Public License gives +; * permission to release a modified version without this exception; this +; * exception also makes it possible to release a modified version which +; * carries forward this exception. ; * -; * 17.04.2002 sse2 stuff -; * 22.03.2002 0.01 ; Min Chen -; * ; use 386 cpu's 'BTS' to replace 'cbp |= 1 << (edx-1)' -; * 24.11.2001 inital version; (c)2001 peter ross +; * $Id: cbp_mmx.asm,v 1.7 2002/11/17 00:57:57 edgomez Exp $ ; * ; *************************************************************************/ - bits 32 section .data %macro cglobal 1 -%ifdef PREFIX -global _%1 -%define %1 _%1 -%else -global %1 -%endif + %ifdef PREFIX + global _%1 + %define %1 _%1 + %else + global %1 + %endif %endmacro align 16 ignore_dc dw 0, -1, -1, -1, -1, -1, -1, -1 - section .text +cglobal calc_cbp_mmx ;=========================================================================== ; @@ -68,239 +80,59 @@ ;=========================================================================== align 16 -cglobal calc_cbp_mmx -calc_cbp_mmx - push ebx - push ecx - push edx - push esi +calc_cbp_mmx: + push ebx + push esi + + mov esi, [esp + 8 + 4] ; coeff + xor eax, eax ; cbp = 0 + mov edx, (1 << 5) - mov esi, [esp + 16 + 4] ; coeff - movq mm7, [ignore_dc] + movq mm7, [ignore_dc] - xor eax, eax ; cbp = 0 - mov edx, 6 .loop - movq mm0, [esi] - pand mm0, mm7 - movq mm1, [esi+8] - - por mm0, [esi+16] - por mm1, [esi+24] - - por mm0, [esi+32] - por mm1, [esi+40] - - por mm0, [esi+48] - por mm1, [esi+56] - - por mm0, [esi+64] - por mm1, [esi+72] - - por mm0, [esi+80] - por mm1, [esi+88] - - por mm0, [esi+96] - por mm1, [esi+104] - - por mm0, [esi+112] - por mm1, [esi+120] + movq mm0, [esi] + movq mm1, [esi+8] + pand mm0, mm7 - por mm0, mm1 - movq mm1, mm0 - psrlq mm1, 32 - por mm0, mm1 - movd ebx, mm0 - - add esi, 128 - - or ebx, ebx - jz .iterate - - ; cbp |= 1 << (edx-1) - - ; Change by Chenm001 - ;mov ecx, edx - ;dec ecx - ;mov ebx, 1 - ;shl ebx, cl - ;or eax, ebx - lea ebx,[edx-1] - bts eax,ebx - -.iterate dec edx - jnz .loop - - pop esi - pop edx - pop ecx - pop ebx - - ret - - - -;=========================================================================== -; -; uint32_t calc_cbp_sse2(const int16_t coeff[6][64]); -; -; not enabled - slower than mmx? -; -;=========================================================================== - -align 16 -cglobal calc_cbp_sse2 -calc_cbp_sse2 - push esi + por mm0, [esi+16] + por mm1, [esi+24] - mov esi, [esp + 4 + 4] ; coeff - movdqa xmm7, [ignore_dc] ; mask to ignore dc value + por mm0, [esi+32] + por mm1, [esi+40] - xor eax, eax ; cbp = 0 - pxor xmm6, xmm6 ; zeroes to help psadbw + por mm0, [esi+48] + por mm1, [esi+56] -.first movdqa xmm0, [esi] - pand xmm0, xmm7 - movdqa xmm1, [esi+16] + por mm0, [esi+64] + por mm1, [esi+72] - por xmm0, [esi+32] - por xmm1, [esi+48] - por xmm0, [esi+64] - por xmm1, [esi+80] - por xmm0, [esi+96] - por xmm1, [esi+112] + por mm0, [esi+80] + por mm1, [esi+88] - por xmm0, xmm1 ; xmm0 = xmm1 = 128 bits worth of info - psadbw xmm0, xmm6 ; contains 2 dwords with sums - movhlps xmm1, xmm0 ; move high dword from xmm0 to low xmm1 - por xmm0, xmm1 ; combine - movd ecx, xmm0 ; if ecx set, values were found + por mm0, [esi+96] + por mm1, [esi+104] - add esi, 128 + por mm0, [esi+112] + por mm1, [esi+120] - or ecx, ecx - jz .second + por mm0, mm1 + movq mm1, mm0 + psrlq mm1, 32 + lea esi, [esi + 128] - bts eax, 5 + por mm0, mm1 + movd ebx, mm0 -.second movdqa xmm0, [esi] - pand xmm0, xmm7 - movdqa xmm1, [esi+16] + test ebx, ebx + jz .next + or eax, edx ; cbp |= 1 << (5-i) - por xmm0, [esi+32] - por xmm1, [esi+48] - por xmm0, [esi+64] - por xmm1, [esi+80] - por xmm0, [esi+96] - por xmm1, [esi+112] +.next + shr edx,1 + jnc .loop - por xmm0, xmm1 - psadbw xmm0, xmm6 - movhlps xmm1, xmm0 - por xmm0, xmm1 - movd ecx, xmm0 - - add esi, 128 - - or ecx, ecx - jz .third - - bts eax, 4 - -.third movdqa xmm0, [esi] - pand xmm0, xmm7 - movdqa xmm1, [esi+16] - - por xmm0, [esi+32] - por xmm1, [esi+48] - por xmm0, [esi+64] - por xmm1, [esi+80] - por xmm0, [esi+96] - por xmm1, [esi+112] - - por xmm0, xmm1 - psadbw xmm0, xmm6 - movhlps xmm1, xmm0 - por xmm0, xmm1 - movd ecx, xmm0 - - add esi, 128 - - or ecx, ecx - jz .fourth - - bts eax, 3 - -.fourth movdqa xmm0, [esi] - pand xmm0, xmm7 - movdqa xmm1, [esi+16] - - por xmm0, [esi+32] - por xmm1, [esi+48] - por xmm0, [esi+64] - por xmm1, [esi+80] - por xmm0, [esi+96] - por xmm1, [esi+112] - - por xmm0, xmm1 - psadbw xmm0, xmm6 - movhlps xmm1, xmm0 - por xmm0, xmm1 - movd ecx, xmm0 - - add esi, 128 - - or ecx, ecx - jz .fifth - - bts eax, 2 - -.fifth movdqa xmm0, [esi] - pand xmm0, xmm7 - movdqa xmm1, [esi+16] - - por xmm0, [esi+32] - por xmm1, [esi+48] - por xmm0, [esi+64] - por xmm1, [esi+80] - por xmm0, [esi+96] - por xmm1, [esi+112] - - por xmm0, xmm1 - psadbw xmm0, xmm6 - movhlps xmm1, xmm0 - por xmm0, xmm1 - movd ecx, xmm0 - - add esi, 128 - - or ecx, ecx - jz .sixth - - bts eax, 1 - -.sixth movdqa xmm0, [esi] - pand xmm0, xmm7 - movdqa xmm1, [esi+16] - - por xmm0, [esi+32] - por xmm1, [esi+48] - por xmm0, [esi+64] - por xmm1, [esi+80] - por xmm0, [esi+96] - por xmm1, [esi+112] - - por xmm0, xmm1 - psadbw xmm0, xmm6 - movhlps xmm1, xmm0 - por xmm0, xmm1 - movd ecx, xmm0 - - or ecx, ecx - jz .end - - bts eax, 0 - -.end pop esi - - ret + pop esi + pop ebx + + ret \ No newline at end of file