Parent Directory | Revision Log
Revision 1.8 - (view) (download)
1 : | edgomez | 1.3 | ;/**************************************************************************** |
2 : | edgomez | 1.2 | ; * |
3 : | edgomez | 1.3 | ; * XVID MPEG-4 VIDEO CODEC |
4 : | ; * - 3dne CBP computation - | ||
5 : | edgomez | 1.2 | ; * |
6 : | edgomez | 1.3 | ; * Copyright (C) 2002 Jaan Kalda |
7 : | ; * | ||
8 : | ; * This program is free software ; you can redistribute it and/or modify | ||
9 : | ; * it under the terms of the GNU General Public License as published by | ||
10 : | ; * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | ; * (at your option) any later version. | ||
12 : | ; * | ||
13 : | ; * This program is distributed in the hope that it will be useful, | ||
14 : | ; * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | ; * GNU General Public License for more details. | ||
17 : | ; * | ||
18 : | ; * You should have received a copy of the GNU General Public License | ||
19 : | ; * along with this program ; if not, write to the Free Software | ||
20 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | ; * | ||
22 : | Isibaar | 1.8 | ; * $Id: cbp_3dne.asm,v 1.7 2008/11/11 20:46:24 Isibaar Exp $ |
23 : | edgomez | 1.3 | ; * |
24 : | ; ***************************************************************************/ | ||
25 : | |||
26 : | ; these 3dne functions are compatible with iSSE, but are optimized | ||
27 : | ; specifically for K7 pipelines | ||
28 : | edgomez | 1.2 | |
29 : | edgomez | 1.3 | BITS 32 |
30 : | |||
31 : | ;============================================================================= | ||
32 : | ; Macros | ||
33 : | ;============================================================================= | ||
34 : | edgomez | 1.2 | |
35 : | %macro cglobal 1 | ||
36 : | %ifdef PREFIX | ||
37 : | edgomez | 1.4 | %ifdef MARK_FUNCS |
38 : | edgomez | 1.5 | global _%1:function %1.endfunc-%1 |
39 : | %define %1 _%1:function %1.endfunc-%1 | ||
40 : | Isibaar | 1.7 | %define ENDFUNC .endfunc |
41 : | edgomez | 1.4 | %else |
42 : | global _%1 | ||
43 : | %define %1 _%1 | ||
44 : | Isibaar | 1.7 | %define ENDFUNC |
45 : | edgomez | 1.4 | %endif |
46 : | edgomez | 1.2 | %else |
47 : | edgomez | 1.4 | %ifdef MARK_FUNCS |
48 : | edgomez | 1.5 | global %1:function %1.endfunc-%1 |
49 : | Isibaar | 1.7 | %define ENDFUNC .endfunc |
50 : | edgomez | 1.4 | %else |
51 : | global %1 | ||
52 : | Isibaar | 1.7 | %define ENDFUNC |
53 : | edgomez | 1.4 | %endif |
54 : | edgomez | 1.2 | %endif |
55 : | %endmacro | ||
56 : | |||
57 : | %macro calc_cbp 1 | ||
58 : | edgomez | 1.3 | pshufw mm0, [eax], 229 ; =11100101 |
59 : | movq mm1, [eax+8] | ||
60 : | por mm0, [eax+64] | ||
61 : | por mm1, [eax+72] | ||
62 : | movq mm2, [eax+16] | ||
63 : | movq mm3, [eax+24] | ||
64 : | por mm2, [eax+80] | ||
65 : | por mm3, [eax+88] | ||
66 : | movq mm4, [eax+32] | ||
67 : | movq mm5, [eax+40] | ||
68 : | por mm4, [eax+96] | ||
69 : | por mm5, [eax+104] | ||
70 : | movq mm6, [eax+48] | ||
71 : | movq mm7, [eax+56] | ||
72 : | por mm6, [eax+112] | ||
73 : | por mm7, [eax+120] | ||
74 : | por mm1, mm0 | ||
75 : | %if %1 | ||
76 : | sub eax, byte -128 ;ecx ;+= 128; needed 3 bytes for alignment | ||
77 : | edgomez | 1.2 | %else |
78 : | edgomez | 1.3 | xor eax, eax |
79 : | xor edx, edx | ||
80 : | %endif | ||
81 : | por mm3, mm2 | ||
82 : | por mm5, mm4 | ||
83 : | por mm7, mm6 | ||
84 : | por mm3, mm1 | ||
85 : | por mm7, mm5 | ||
86 : | por mm7, mm3 | ||
87 : | packsswb mm7, mm7 | ||
88 : | movd [esp+%1*4], mm7 | ||
89 : | edgomez | 1.2 | %endmacro |
90 : | |||
91 : | edgomez | 1.3 | ;============================================================================= |
92 : | ; Code | ||
93 : | ;============================================================================= | ||
94 : | |||
95 : | SECTION .text | ||
96 : | |||
97 : | cglobal calc_cbp_3dne | ||
98 : | edgomez | 1.2 | |
99 : | edgomez | 1.3 | ;----------------------------------------------------------------------------- |
100 : | ; uint32_t calc_cbp_3dne(const int16_t coeff[6*64]); | ||
101 : | ;----------------------------------------------------------------------------- | ||
102 : | ;AMD K7, in cache: ca 80 clk | ||
103 : | edgomez | 1.2 | |
104 : | edgomez | 1.3 | ALIGN 16 |
105 : | edgomez | 1.2 | calc_cbp_3dne: |
106 : | edgomez | 1.3 | mov eax, [esp+ 4] ; coeff |
107 : | lea esp, [esp-24] | ||
108 : | edgomez | 1.2 | calc_cbp 5 ;bit 5 |
109 : | edgomez | 1.3 | calc_cbp 4 ;b4 |
110 : | edgomez | 1.2 | calc_cbp 3 ;b3 |
111 : | calc_cbp 2 ;b2 | ||
112 : | calc_cbp 1 ;b1 | ||
113 : | calc_cbp 0 ;b0 | ||
114 : | edgomez | 1.3 | cmp eax, [esp+5*4] |
115 : | adc eax, eax | ||
116 : | cmp edx, [esp+4*4] | ||
117 : | adc eax, eax | ||
118 : | cmp edx, [esp+3*4] | ||
119 : | adc eax, eax | ||
120 : | cmp edx, [esp+2*4] | ||
121 : | adc eax, eax | ||
122 : | cmp edx, [esp+1*4] | ||
123 : | adc eax, eax | ||
124 : | cmp edx, [esp+0*4] | ||
125 : | adc eax, eax | ||
126 : | add esp, byte 24 | ||
127 : | ret | ||
128 : | Isibaar | 1.7 | ENDFUNC |
129 : | edgomez | 1.5 | |
130 : | Isibaar | 1.6 | |
131 : | %ifidn __OUTPUT_FORMAT__,elf | ||
132 : | section ".note.GNU-stack" noalloc noexec nowrite progbits | ||
133 : | %endif | ||
134 : |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |