Parent Directory | Revision Log
Revision 1.1 - (view) (download)
1 : | edgomez | 1.1 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - MPEG4 Quantization MPEG implementation with altivec optimization - | ||
5 : | * | ||
6 : | * Copyright(C) 2005 Christoph Naegeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | * $Id$ | ||
23 : | * | ||
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | #ifdef HAVE_ALTIVEC_H | ||
27 : | #include <altivec.h> | ||
28 : | #endif | ||
29 : | |||
30 : | #include "../../portab.h" | ||
31 : | #include "../../global.h" | ||
32 : | |||
33 : | #include "../quant.h" | ||
34 : | #include "../quant_matrix.h" | ||
35 : | |||
36 : | #undef DEBUG | ||
37 : | #include <stdio.h> | ||
38 : | |||
39 : | /* Some useful typedefs */ | ||
40 : | typedef vector unsigned char vec_uint8_t; | ||
41 : | typedef vector unsigned short vec_uint16_t; | ||
42 : | typedef vector unsigned int vec_uint32_t; | ||
43 : | |||
44 : | typedef vector signed char vec_sint8_t; | ||
45 : | typedef vector signed short vec_sint16_t; | ||
46 : | typedef vector signed int vec_sint32_t; | ||
47 : | |||
48 : | /***************************************************************************** | ||
49 : | * Local data | ||
50 : | ****************************************************************************/ | ||
51 : | |||
52 : | #define VM18P 3 | ||
53 : | #define VM18Q 4 | ||
54 : | |||
55 : | /* divide-by-multiply table | ||
56 : | * needs 17 bit shift (16 causes slight errors when q > 19) */ | ||
57 : | |||
58 : | #define SCALEBITS 17 | ||
59 : | |||
60 : | #define DEQUANT_MPEG_INTRA() \ | ||
61 : | level = vec_perm(vec_ld(0,coeff_ptr),vec_ld(16,coeff_ptr),vec_lvsl(0,coeff_ptr));\ | ||
62 : | zero_less = vec_cmplt(level,ox00);\ | ||
63 : | level = vec_abs(level);\ | ||
64 : | vintra = vec_perm(vec_ld(0,intra_matrix),vec_ld(16,intra_matrix),vec_lvsl(0,intra_matrix));\ | ||
65 : | even = vec_mule((vec_uint16_t)level,vintra);\ | ||
66 : | odd = vec_mulo((vec_uint16_t)level,vintra);\ | ||
67 : | t = vec_splat_u32(-16);\ | ||
68 : | et = vec_msum( (vec_uint16_t)even, (vec_uint16_t)swap, (vec_uint32_t)ox00);\ | ||
69 : | ot = vec_msum( (vec_uint16_t)odd, (vec_uint16_t)swap, (vec_uint32_t)ox00);\ | ||
70 : | et = vec_sl(et, t);\ | ||
71 : | ot = vec_sl(ot, t);\ | ||
72 : | even = vec_mulo( (vec_uint16_t)even, (vec_uint16_t)vquant);\ | ||
73 : | odd = vec_mulo( (vec_uint16_t)odd, (vec_uint16_t)vquant);\ | ||
74 : | t = vec_splat_u32(3);\ | ||
75 : | even = vec_add(even,et);\ | ||
76 : | odd = vec_add(odd,ot);\ | ||
77 : | even = vec_sr(even,t);\ | ||
78 : | odd = vec_sr(odd,t);\ | ||
79 : | /* Pack & Clamp to [-2048,2047] */\ | ||
80 : | level = vec_packs( (vec_sint32_t)vec_mergeh(even,odd), (vec_sint32_t)vec_mergel(even,odd) );\ | ||
81 : | t = (vec_uint32_t)vec_splat_s16(-1);\ | ||
82 : | overflow = vec_cmpgt(level, vec_add(vec_2048, (vec_sint16_t)t));\ | ||
83 : | level = vec_sel(level, vec_2048, overflow);\ | ||
84 : | overflow = (vector bool short)vec_and(overflow, vec_xor(zero_less, (vec_sint16_t)t));\ | ||
85 : | t = (vec_uint32_t)vec_splat_s16(1);\ | ||
86 : | overflow = (vector bool short)vec_and(overflow, (vec_sint16_t)t);\ | ||
87 : | level = vec_sub(level, (vec_sint16_t)overflow);\ | ||
88 : | /* Invert the negative ones */\ | ||
89 : | level = vec_xor(level, zero_less);\ | ||
90 : | level = vec_add(level, (vec_sint16_t)vec_and(zero_less, (vec_uint16_t)t));\ | ||
91 : | /* Save & Advance Pointers */\ | ||
92 : | vec_st(level,0,data_ptr);\ | ||
93 : | data_ptr += 8;\ | ||
94 : | intra_matrix += 8;\ | ||
95 : | coeff_ptr += 8 | ||
96 : | |||
97 : | |||
98 : | /* This function assuems: | ||
99 : | * data: 16 Byte aligned | ||
100 : | */ | ||
101 : | |||
102 : | uint32_t | ||
103 : | dequant_mpeg_intra_altivec_c(int16_t * data, | ||
104 : | const int16_t * coeff, | ||
105 : | const uint32_t quant, | ||
106 : | const uint32_t dcscalar, | ||
107 : | const uint16_t * mpeg_quant_matrices) | ||
108 : | { | ||
109 : | register const uint16_t *intra_matrix = get_intra_matrix(mpeg_quant_matrices); | ||
110 : | register const int16_t *coeff_ptr = coeff; | ||
111 : | register int16_t *data_ptr = data; | ||
112 : | |||
113 : | register vec_sint16_t ox00; | ||
114 : | register vec_sint16_t level; | ||
115 : | register vec_sint16_t vec_2048; | ||
116 : | register vec_uint16_t vintra; | ||
117 : | register vec_uint32_t swap; | ||
118 : | register vec_uint32_t even,odd; | ||
119 : | register vec_uint32_t et,ot,t; | ||
120 : | |||
121 : | vec_uint32_t vquant; | ||
122 : | vector bool short zero_less; | ||
123 : | vector bool short overflow; | ||
124 : | |||
125 : | #ifdef DEBUG | ||
126 : | if((long)data & 0xf) | ||
127 : | fprintf(stderr, "xvidcore: error in dequant_mpeg_intra_altivec_c, incorrect align: %x\n", data); | ||
128 : | #endif | ||
129 : | |||
130 : | /* Initialize */ | ||
131 : | ox00 = vec_splat_s16(0); | ||
132 : | *((uint32_t*)&vquant) = quant; | ||
133 : | vquant = vec_splat(vquant,0); | ||
134 : | |||
135 : | swap = vec_rl(vquant, vec_splat_u32(-16)); | ||
136 : | vec_2048 = (vec_sint16_t)vec_rl(vec_splat_u16(8),vec_splat_u16(8)); | ||
137 : | |||
138 : | DEQUANT_MPEG_INTRA(); | ||
139 : | DEQUANT_MPEG_INTRA(); | ||
140 : | DEQUANT_MPEG_INTRA(); | ||
141 : | DEQUANT_MPEG_INTRA(); | ||
142 : | |||
143 : | DEQUANT_MPEG_INTRA(); | ||
144 : | DEQUANT_MPEG_INTRA(); | ||
145 : | DEQUANT_MPEG_INTRA(); | ||
146 : | DEQUANT_MPEG_INTRA(); | ||
147 : | |||
148 : | /* Process the first */ | ||
149 : | data[0] = coeff[0] * dcscalar; | ||
150 : | if (data[0] < -2048) { | ||
151 : | data[0] = -2048; | ||
152 : | } else if (data[0] > 2047) { | ||
153 : | data[0] = 2047; | ||
154 : | } | ||
155 : | |||
156 : | return 0; | ||
157 : | } | ||
158 : | |||
159 : | |||
160 : | |||
161 : | #define DEQUANT_MPEG_INTER() \ | ||
162 : | level = vec_perm(vec_ld(0,coeff),vec_ld(16,coeff),vec_lvsl(0,coeff));\ | ||
163 : | zero_eq = vec_cmpeq(level,ox00);\ | ||
164 : | zero_less = vec_cmplt(level,ox00);\ | ||
165 : | level = vec_abs(level);\ | ||
166 : | vinter = vec_perm(vec_ld(0,inter_matrix),vec_ld(16,inter_matrix),vec_lvsl(0,inter_matrix));\ | ||
167 : | t = vec_splat_u32(1);\ | ||
168 : | hi = (vec_uint32_t)vec_unpackh(level);\ | ||
169 : | lo = (vec_uint32_t)vec_unpackl(level);\ | ||
170 : | hi = vec_sl(hi, t);\ | ||
171 : | lo = vec_sl(lo, t);\ | ||
172 : | hi = vec_add(hi, t);\ | ||
173 : | lo = vec_add(lo, t);\ | ||
174 : | /* Multiplication with vinter */\ | ||
175 : | sw_hi = vec_rl(hi, v16);\ | ||
176 : | sw_lo = vec_rl(lo, v16);\ | ||
177 : | hi = vec_mulo((vec_uint16_t)hi, vec_mergeh((vec_uint16_t)ox00,vinter));\ | ||
178 : | lo = vec_mulo((vec_uint16_t)lo, vec_mergel((vec_uint16_t)ox00,vinter));\ | ||
179 : | sw_hi = vec_mulo((vec_uint16_t)sw_hi, vec_mergeh((vec_uint16_t)ox00,vinter));\ | ||
180 : | sw_lo = vec_mulo((vec_uint16_t)sw_lo, vec_mergeh((vec_uint16_t)ox00,vinter));\ | ||
181 : | hi = vec_add(hi, vec_sl(sw_hi,v16));\ | ||
182 : | lo = vec_add(lo, vec_sl(sw_lo,v16));\ | ||
183 : | /* Multiplication with Quant */\ | ||
184 : | t = vec_splat_u32(4);\ | ||
185 : | sw_hi = vec_msum( (vec_uint16_t)hi, (vec_uint16_t)swap, (vec_uint32_t)ox00 );\ | ||
186 : | sw_lo = vec_msum( (vec_uint16_t)lo, (vec_uint16_t)swap, (vec_uint32_t)ox00 );\ | ||
187 : | hi = vec_mulo( (vec_uint16_t)hi, (vec_uint16_t)vquant );\ | ||
188 : | lo = vec_mulo( (vec_uint16_t)lo, (vec_uint16_t)vquant );\ | ||
189 : | hi = vec_add(hi, vec_sl(sw_hi,v16));\ | ||
190 : | lo = vec_add(lo, vec_sl(sw_lo,v16));\ | ||
191 : | hi = vec_sr(hi, t);\ | ||
192 : | lo = vec_sr(lo, t);\ | ||
193 : | /* Pack & Clamp to [-2048,2047] */\ | ||
194 : | t = (vec_uint32_t)vec_splat_s16(-1);\ | ||
195 : | level = vec_packs((vec_sint32_t)hi, (vec_sint32_t)lo);\ | ||
196 : | overflow = vec_cmpgt(level, vec_add(v2048, (vec_sint16_t)t));\ | ||
197 : | level = vec_sel(level, v2048, overflow);\ | ||
198 : | overflow = (vector bool short)vec_and(overflow, vec_xor(zero_less, (vec_sint16_t)t));\ | ||
199 : | t = (vec_uint32_t)vec_splat_s16(1);\ | ||
200 : | overflow = (vector bool short)vec_and(overflow, (vec_sint16_t)t);\ | ||
201 : | level = vec_sub(level, (vec_sint16_t)overflow);\ | ||
202 : | level = vec_sel(level, ox00, zero_eq);\ | ||
203 : | level = vec_xor(level, zero_less);\ | ||
204 : | level = vec_add(level, (vec_sint16_t)vec_and(zero_less, (vec_uint16_t)t));\ | ||
205 : | /* Get vsum */\ | ||
206 : | vsum = vec_xor(vsum, (vec_uint32_t)vec_unpackh(level));\ | ||
207 : | vsum = vec_xor(vsum, (vec_uint32_t)vec_unpackl(level));\ | ||
208 : | /* Save & Advance Pointers */\ | ||
209 : | vec_st(level,0,data);\ | ||
210 : | data+=8;\ | ||
211 : | inter_matrix+=8;\ | ||
212 : | coeff+=8 | ||
213 : | |||
214 : | |||
215 : | /* This function assumes: | ||
216 : | * data: 16 Byte aligned | ||
217 : | */ | ||
218 : | |||
219 : | uint32_t | ||
220 : | dequant_mpeg_inter_altivec_c(int16_t * data, | ||
221 : | const int16_t * coeff, | ||
222 : | const uint32_t quant, | ||
223 : | const uint16_t * mpeg_quant_matrices) | ||
224 : | { | ||
225 : | register uint32_t sum; | ||
226 : | register const uint16_t *inter_matrix = get_inter_matrix(mpeg_quant_matrices); | ||
227 : | |||
228 : | register vec_sint16_t ox00; | ||
229 : | register vec_sint16_t v2048; | ||
230 : | register vec_sint16_t level; | ||
231 : | register vec_uint16_t vinter; | ||
232 : | register vec_uint32_t hi,lo; | ||
233 : | register vec_uint32_t sw_hi,sw_lo; | ||
234 : | register vec_uint32_t swap; | ||
235 : | register vec_uint32_t t,v16; | ||
236 : | |||
237 : | vec_uint32_t vsum; | ||
238 : | vec_uint32_t vquant; | ||
239 : | vector bool short zero_eq; | ||
240 : | vector bool short zero_less; | ||
241 : | vector bool short overflow; | ||
242 : | |||
243 : | #ifdef DEBUG | ||
244 : | if((long)data & 0xf) | ||
245 : | fprintf(stderr, "xvidcore: error in dequant_mpeg_inter_altivec_c, incorrect align: %x\n", data); | ||
246 : | #endif | ||
247 : | |||
248 : | /* Initialization */ | ||
249 : | ox00 = vec_splat_s16(0); | ||
250 : | v16 = vec_splat_u32(-16); | ||
251 : | v2048 = vec_rl(vec_splat_s16(8),vec_splat_u16(8)); | ||
252 : | |||
253 : | vsum = (vec_uint32_t)ox00; | ||
254 : | |||
255 : | *((uint32_t*)&vquant) = quant; | ||
256 : | vquant = vec_splat(vquant,0); | ||
257 : | swap = vec_rl(vquant,v16); | ||
258 : | |||
259 : | DEQUANT_MPEG_INTER(); | ||
260 : | DEQUANT_MPEG_INTER(); | ||
261 : | DEQUANT_MPEG_INTER(); | ||
262 : | DEQUANT_MPEG_INTER(); | ||
263 : | |||
264 : | DEQUANT_MPEG_INTER(); | ||
265 : | DEQUANT_MPEG_INTER(); | ||
266 : | DEQUANT_MPEG_INTER(); | ||
267 : | DEQUANT_MPEG_INTER(); | ||
268 : | |||
269 : | sum = ((uint32_t*)&vsum)[0]; | ||
270 : | sum ^= ((uint32_t*)&vsum)[1]; | ||
271 : | sum ^= ((uint32_t*)&vsum)[2]; | ||
272 : | sum ^= ((uint32_t*)&vsum)[3]; | ||
273 : | |||
274 : | /* mismatch control */ | ||
275 : | if((sum & 1) == 0) { | ||
276 : | data -= 1; | ||
277 : | *data ^= 1; | ||
278 : | } | ||
279 : | |||
280 : | return 0; | ||
281 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |