Parent Directory | Revision Log
Revision 1.3 - (view) (download)
1 : | Isibaar | 1.1 | ;/************************************************************************** |
2 : | ; * | ||
3 : | edgomez | 1.3 | ; * XVID MPEG-4 VIDEO CODEC |
4 : | ; * sse2 cbp calc | ||
5 : | Isibaar | 1.1 | ; * |
6 : | edgomez | 1.3 | ; * This program is an implementation of a part of one or more MPEG-4 |
7 : | ; * Video tools as specified in ISO/IEC 14496-2 standard. Those intending | ||
8 : | ; * to use this software module in hardware or software products are | ||
9 : | ; * advised that its use may infringe existing patents or copyrights, and | ||
10 : | ; * any such use would be at such party's own risk. The original | ||
11 : | ; * developer of this software module and his/her company, and subsequent | ||
12 : | ; * editors and their companies, will have no liability for use of this | ||
13 : | ; * software or modifications or derivatives thereof. | ||
14 : | ; * | ||
15 : | ; * This program is free software; you can redistribute it and/or modify | ||
16 : | ; * it under the terms of the GNU General Public License as published by | ||
17 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
18 : | ; * (at your option) any later version. | ||
19 : | ; * | ||
20 : | ; * This program is distributed in the hope that it will be useful, | ||
21 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 : | ; * GNU General Public License for more details. | ||
24 : | ; * | ||
25 : | ; * You should have received a copy of the GNU General Public License | ||
26 : | ; * along with this program; if not, write to the Free Software | ||
27 : | ; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
28 : | Isibaar | 1.1 | ; * |
29 : | edgomez | 1.3 | ; *************************************************************************/ |
30 : | |||
31 : | ;/************************************************************************** | ||
32 : | ; * | ||
33 : | ; * History: | ||
34 : | Isibaar | 1.1 | ; * |
35 : | edgomez | 1.3 | ; * 14.06.2002 cleanup -Skal- |
36 : | ; * 24.04.2002 had to use sse2's movdqu instead of movdqa (???) | ||
37 : | ; * 17.04.2002 initial version (c) 2002 Daniel Smith | ||
38 : | Isibaar | 1.1 | ; * |
39 : | ; *************************************************************************/ | ||
40 : | edgomez | 1.3 | |
41 : | Isibaar | 1.1 | |
42 : | bits 32 | ||
43 : | |||
44 : | section .data | ||
45 : | |||
46 : | %macro cglobal 1 | ||
47 : | %ifdef PREFIX | ||
48 : | global _%1 | ||
49 : | %define %1 _%1 | ||
50 : | %else | ||
51 : | global %1 | ||
52 : | %endif | ||
53 : | %endmacro | ||
54 : | |||
55 : | align 16 | ||
56 : | |||
57 : | ignore_dc dw 0, -1, -1, -1, -1, -1, -1, -1 | ||
58 : | |||
59 : | section .text | ||
60 : | |||
61 : | cglobal calc_cbp_sse2 | ||
62 : | |||
63 : | ;=========================================================================== | ||
64 : | ; | ||
65 : | ; uint32_t calc_cbp_sse2(const int16_t coeff[6][64]); | ||
66 : | ; | ||
67 : | ; not enabled - slower than mmx? | ||
68 : | ; | ||
69 : | ;=========================================================================== | ||
70 : | |||
71 : | %macro LOOP_SSE2 1 | ||
72 : | movdqa xmm0, [edx+(%1)*128] | ||
73 : | pand xmm0, xmm7 | ||
74 : | movdqa xmm1, [edx+(%1)*128+16] | ||
75 : | |||
76 : | por xmm0, [edx+(%1)*128+32] | ||
77 : | por xmm1, [edx+(%1)*128+48] | ||
78 : | por xmm0, [edx+(%1)*128+64] | ||
79 : | por xmm1, [edx+(%1)*128+80] | ||
80 : | por xmm0, [edx+(%1)*128+96] | ||
81 : | por xmm1, [edx+(%1)*128+112] | ||
82 : | |||
83 : | por xmm0, xmm1 ; xmm0 = xmm1 = 128 bits worth of info | ||
84 : | psadbw xmm0, xmm6 ; contains 2 dwords with sums | ||
85 : | movhlps xmm1, xmm0 ; move high dword from xmm0 to low xmm1 | ||
86 : | por xmm0, xmm1 ; combine | ||
87 : | movd ecx, xmm0 ; if ecx set, values were found | ||
88 : | test ecx, ecx | ||
89 : | %endmacro | ||
90 : | |||
91 : | align 16 | ||
92 : | |||
93 : | calc_cbp_sse2: | ||
94 : | mov edx, [esp+4] ; coeff[] | ||
95 : | xor eax, eax ; cbp = 0 | ||
96 : | |||
97 : | movdqu xmm7, [ignore_dc] ; mask to ignore dc value | ||
98 : | pxor xmm6, xmm6 ; zero | ||
99 : | |||
100 : | LOOP_SSE2 0 | ||
101 : | test ecx, ecx | ||
102 : | jz .blk2 | ||
103 : | or eax, (1<<5) | ||
104 : | .blk2 | ||
105 : | LOOP_SSE2 1 | ||
106 : | test ecx, ecx | ||
107 : | jz .blk3 | ||
108 : | or eax, (1<<4) | ||
109 : | .blk3 | ||
110 : | LOOP_SSE2 2 | ||
111 : | test ecx, ecx | ||
112 : | jz .blk4 | ||
113 : | or eax, (1<<3) | ||
114 : | .blk4 | ||
115 : | LOOP_SSE2 3 | ||
116 : | test ecx, ecx | ||
117 : | jz .blk5 | ||
118 : | or eax, (1<<2) | ||
119 : | .blk5 | ||
120 : | LOOP_SSE2 4 | ||
121 : | test ecx, ecx | ||
122 : | jz .blk6 | ||
123 : | or eax, (1<<1) | ||
124 : | .blk6 | ||
125 : | LOOP_SSE2 5 | ||
126 : | test ecx, ecx | ||
127 : | jz .finished | ||
128 : | or eax, (1<<0) | ||
129 : | .finished | ||
130 : | |||
131 : | ret |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |