Parent Directory | Revision Log
Revision 1.6 - (view) (download)
1 : | edgomez | 1.2 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - GMC interpolation module - | ||
5 : | * | ||
6 : | * Copyright(C) 2002-2003 Pascal Massimino <skal@planet-d.net> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | Isibaar | 1.6 | * $Id: gmc.c,v 1.5 2006/06/14 21:44:07 Skal Exp $ |
23 : | edgomez | 1.2 | * |
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | #include "../portab.h" | ||
27 : | #include "../global.h" | ||
28 : | #include "../encoder.h" | ||
29 : | #include "gmc.h" | ||
30 : | |||
31 : | #include <stdio.h> | ||
32 : | |||
33 : | Skal | 1.5 | /* initialized by init_GMC(), for 3points */ |
34 : | static | ||
35 : | void (*Predict_16x16_func)(const NEW_GMC_DATA * const This, | ||
36 : | uint8_t *dst, const uint8_t *src, | ||
37 : | int dststride, int srcstride, int x, int y, int rounding) = 0; | ||
38 : | static | ||
39 : | void (*Predict_8x8_func)(const NEW_GMC_DATA * const This, | ||
40 : | uint8_t *uDst, const uint8_t *uSrc, | ||
41 : | uint8_t *vDst, const uint8_t *vSrc, | ||
42 : | int dststride, int srcstride, int x, int y, int rounding) = 0; | ||
43 : | |||
44 : | /****************************************************************************/ | ||
45 : | /* this is borrowed from bitstream.c until we find a common solution */ | ||
46 : | static uint32_t __inline | ||
47 : | log2bin(uint32_t value) | ||
48 : | { | ||
49 : | /* Changed by Chenm001 */ | ||
50 : | #if !defined(_MSC_VER) | ||
51 : | int n = 0; | ||
52 : | |||
53 : | while (value) { | ||
54 : | value >>= 1; | ||
55 : | n++; | ||
56 : | } | ||
57 : | return n; | ||
58 : | #else | ||
59 : | __asm { | ||
60 : | bsr eax, value | ||
61 : | inc eax | ||
62 : | } | ||
63 : | #endif | ||
64 : | } | ||
65 : | |||
66 : | /* 16*sizeof(int) -> 1 or 2 cachelines */ | ||
67 : | /* table lookup might be faster! (still to be benchmarked) */ | ||
68 : | |||
69 : | /* | ||
70 : | static int log2bin_table[16] = | ||
71 : | { 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4}; | ||
72 : | */ | ||
73 : | /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 */ | ||
74 : | |||
75 : | #define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) | ||
76 : | #define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) | ||
77 : | |||
78 : | #define MLT(i) (((16-(i))<<16) + (i)) | ||
79 : | static const uint32_t MTab[16] = { | ||
80 : | MLT( 0), MLT( 1), MLT( 2), MLT( 3), MLT( 4), MLT( 5), MLT( 6), MLT( 7), | ||
81 : | MLT( 8), MLT( 9), MLT(10), MLT(11), MLT(12), MLT(13), MLT(14), MLT(15) | ||
82 : | }; | ||
83 : | #undef MLT | ||
84 : | |||
85 : | edgomez | 1.2 | /* ************************************************************ |
86 : | * Pts = 2 or 3 | ||
87 : | * | ||
88 : | * Warning! *src is the global frame pointer (that is: adress | ||
89 : | * of pixel 0,0), not the macroblock one. | ||
90 : | * Conversely, *dst is the macroblock top-left adress. | ||
91 : | */ | ||
92 : | |||
93 : | Skal | 1.5 | static |
94 : | edgomez | 1.2 | void Predict_16x16_C(const NEW_GMC_DATA * const This, |
95 : | Skal | 1.5 | uint8_t *dst, const uint8_t *src, |
96 : | int dststride, int srcstride, int x, int y, int rounding) | ||
97 : | edgomez | 1.2 | { |
98 : | const int W = This->sW; | ||
99 : | const int H = This->sH; | ||
100 : | const int rho = 3 - This->accuracy; | ||
101 : | const int Rounder = ( (1<<7) - (rounding<<(2*rho)) ) << 16; | ||
102 : | |||
103 : | const int dUx = This->dU[0]; | ||
104 : | const int dVx = This->dV[0]; | ||
105 : | const int dUy = This->dU[1]; | ||
106 : | const int dVy = This->dV[1]; | ||
107 : | |||
108 : | int Uo = This->Uo + 16*(dUy*y + dUx*x); | ||
109 : | int Vo = This->Vo + 16*(dVy*y + dVx*x); | ||
110 : | |||
111 : | int i, j; | ||
112 : | |||
113 : | dst += 16; | ||
114 : | for (j=16; j>0; --j) { | ||
115 : | int U = Uo, V = Vo; | ||
116 : | Uo += dUy; Vo += dVy; | ||
117 : | for (i=-16; i<0; ++i) { | ||
118 : | unsigned int f0, f1, ri = 16, rj = 16; | ||
119 : | int Offset; | ||
120 : | int u = ( U >> 16 ) << rho; | ||
121 : | int v = ( V >> 16 ) << rho; | ||
122 : | |||
123 : | U += dUx; V += dVx; | ||
124 : | |||
125 : | if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4; } | ||
126 : | else { | ||
127 : | if (u > W) Offset = W>>4; | ||
128 : | else Offset = 0; | ||
129 : | ri = MTab[0]; | ||
130 : | } | ||
131 : | |||
132 : | if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; } | ||
133 : | else { | ||
134 : | if (v > H) Offset += (H>>4)*srcstride; | ||
135 : | rj = MTab[0]; | ||
136 : | } | ||
137 : | |||
138 : | f0 = src[Offset + 0]; | ||
139 : | f0 |= src[Offset + 1] << 16; | ||
140 : | f1 = src[Offset + srcstride + 0]; | ||
141 : | f1 |= src[Offset + srcstride + 1] << 16; | ||
142 : | f0 = (ri*f0)>>16; | ||
143 : | f1 = (ri*f1) & 0x0fff0000; | ||
144 : | f0 |= f1; | ||
145 : | f0 = (rj*f0 + Rounder) >> 24; | ||
146 : | |||
147 : | dst[i] = (uint8_t)f0; | ||
148 : | } | ||
149 : | dst += dststride; | ||
150 : | } | ||
151 : | } | ||
152 : | |||
153 : | Skal | 1.5 | static |
154 : | edgomez | 1.2 | void Predict_8x8_C(const NEW_GMC_DATA * const This, |
155 : | Skal | 1.5 | uint8_t *uDst, const uint8_t *uSrc, |
156 : | uint8_t *vDst, const uint8_t *vSrc, | ||
157 : | int dststride, int srcstride, int x, int y, int rounding) | ||
158 : | edgomez | 1.2 | { |
159 : | const int W = This->sW >> 1; | ||
160 : | const int H = This->sH >> 1; | ||
161 : | const int rho = 3-This->accuracy; | ||
162 : | const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; | ||
163 : | |||
164 : | const int32_t dUx = This->dU[0]; | ||
165 : | const int32_t dVx = This->dV[0]; | ||
166 : | const int32_t dUy = This->dU[1]; | ||
167 : | const int32_t dVy = This->dV[1]; | ||
168 : | |||
169 : | int32_t Uo = This->Uco + 8*(dUy*y + dUx*x); | ||
170 : | int32_t Vo = This->Vco + 8*(dVy*y + dVx*x); | ||
171 : | |||
172 : | int i, j; | ||
173 : | |||
174 : | uDst += 8; | ||
175 : | vDst += 8; | ||
176 : | for (j=8; j>0; --j) { | ||
177 : | int32_t U = Uo, V = Vo; | ||
178 : | Uo += dUy; Vo += dVy; | ||
179 : | |||
180 : | for (i=-8; i<0; ++i) { | ||
181 : | int Offset; | ||
182 : | uint32_t f0, f1, ri, rj; | ||
183 : | int32_t u, v; | ||
184 : | |||
185 : | u = ( U >> 16 ) << rho; | ||
186 : | v = ( V >> 16 ) << rho; | ||
187 : | U += dUx; V += dVx; | ||
188 : | |||
189 : | if (u > 0 && u <= W) { | ||
190 : | ri = MTab[u&15]; | ||
191 : | Offset = u>>4; | ||
192 : | } else { | ||
193 : | if (u>W) Offset = W>>4; | ||
194 : | else Offset = 0; | ||
195 : | ri = MTab[0]; | ||
196 : | } | ||
197 : | |||
198 : | if (v > 0 && v <= H) { | ||
199 : | rj = MTab[v&15]; | ||
200 : | Offset += (v>>4)*srcstride; | ||
201 : | } else { | ||
202 : | if (v>H) Offset += (H>>4)*srcstride; | ||
203 : | rj = MTab[0]; | ||
204 : | } | ||
205 : | |||
206 : | f0 = uSrc[Offset + 0]; | ||
207 : | f0 |= uSrc[Offset + 1] << 16; | ||
208 : | f1 = uSrc[Offset + srcstride + 0]; | ||
209 : | f1 |= uSrc[Offset + srcstride + 1] << 16; | ||
210 : | f0 = (ri*f0)>>16; | ||
211 : | f1 = (ri*f1) & 0x0fff0000; | ||
212 : | f0 |= f1; | ||
213 : | f0 = (rj*f0 + Rounder) >> 24; | ||
214 : | |||
215 : | uDst[i] = (uint8_t)f0; | ||
216 : | |||
217 : | f0 = vSrc[Offset + 0]; | ||
218 : | f0 |= vSrc[Offset + 1] << 16; | ||
219 : | f1 = vSrc[Offset + srcstride + 0]; | ||
220 : | f1 |= vSrc[Offset + srcstride + 1] << 16; | ||
221 : | f0 = (ri*f0)>>16; | ||
222 : | f1 = (ri*f1) & 0x0fff0000; | ||
223 : | f0 |= f1; | ||
224 : | f0 = (rj*f0 + Rounder) >> 24; | ||
225 : | |||
226 : | vDst[i] = (uint8_t)f0; | ||
227 : | } | ||
228 : | uDst += dststride; | ||
229 : | vDst += dststride; | ||
230 : | } | ||
231 : | } | ||
232 : | |||
233 : | Skal | 1.5 | static |
234 : | edgomez | 1.2 | void get_average_mv_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
235 : | Skal | 1.5 | int x, int y, int qpel) |
236 : | edgomez | 1.2 | { |
237 : | int i, j; | ||
238 : | int vx = 0, vy = 0; | ||
239 : | int32_t uo = Dsp->Uo + 16*(Dsp->dU[1]*y + Dsp->dU[0]*x); | ||
240 : | int32_t vo = Dsp->Vo + 16*(Dsp->dV[1]*y + Dsp->dV[0]*x); | ||
241 : | for (j=16; j>0; --j) | ||
242 : | { | ||
243 : | int32_t U, V; | ||
244 : | U = uo; uo += Dsp->dU[1]; | ||
245 : | V = vo; vo += Dsp->dV[1]; | ||
246 : | for (i=16; i>0; --i) | ||
247 : | { | ||
248 : | int32_t u,v; | ||
249 : | u = U >> 16; U += Dsp->dU[0]; vx += u; | ||
250 : | v = V >> 16; V += Dsp->dV[0]; vy += v; | ||
251 : | } | ||
252 : | } | ||
253 : | vx -= (256*x+120) << (5+Dsp->accuracy); /* 120 = 15*16/2 */ | ||
254 : | vy -= (256*y+120) << (5+Dsp->accuracy); | ||
255 : | |||
256 : | mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel ); | ||
257 : | mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel ); | ||
258 : | } | ||
259 : | |||
260 : | /* ************************************************************ | ||
261 : | * simplified version for 1 warp point | ||
262 : | */ | ||
263 : | |||
264 : | Skal | 1.5 | static |
265 : | edgomez | 1.2 | void Predict_1pt_16x16_C(const NEW_GMC_DATA * const This, |
266 : | Skal | 1.5 | uint8_t *Dst, const uint8_t *Src, |
267 : | int dststride, int srcstride, int x, int y, int rounding) | ||
268 : | edgomez | 1.2 | { |
269 : | const int W = This->sW; | ||
270 : | const int H = This->sH; | ||
271 : | const int rho = 3-This->accuracy; | ||
272 : | const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; | ||
273 : | |||
274 : | |||
275 : | int32_t uo = This->Uo + (x<<8); /* ((16*x)<<4) */ | ||
276 : | int32_t vo = This->Vo + (y<<8); | ||
277 : | uint32_t ri = MTab[uo & 15]; | ||
278 : | uint32_t rj = MTab[vo & 15]; | ||
279 : | int i, j; | ||
280 : | |||
281 : | int32_t Offset; | ||
282 : | Skal | 1.4 | if (vo>=(-16<<4) && vo<=H) Offset = (vo>>4)*srcstride; |
283 : | edgomez | 1.2 | else { |
284 : | if (vo>H) Offset = ( H>>4)*srcstride; | ||
285 : | else Offset =-16*srcstride; | ||
286 : | rj = MTab[0]; | ||
287 : | } | ||
288 : | Skal | 1.4 | if (uo>=(-16<<4) && uo<=W) Offset += (uo>>4); |
289 : | edgomez | 1.2 | else { |
290 : | if (uo>W) Offset += (W>>4); | ||
291 : | else Offset -= 16; | ||
292 : | ri = MTab[0]; | ||
293 : | } | ||
294 : | |||
295 : | Dst += 16; | ||
296 : | |||
297 : | for(j=16; j>0; --j, Offset+=srcstride-16) | ||
298 : | { | ||
299 : | for(i=-16; i<0; ++i, ++Offset) | ||
300 : | { | ||
301 : | uint32_t f0, f1; | ||
302 : | f0 = Src[ Offset +0 ]; | ||
303 : | f0 |= Src[ Offset +1 ] << 16; | ||
304 : | f1 = Src[ Offset+srcstride +0 ]; | ||
305 : | f1 |= Src[ Offset+srcstride +1 ] << 16; | ||
306 : | f0 = (ri*f0)>>16; | ||
307 : | f1 = (ri*f1) & 0x0fff0000; | ||
308 : | f0 |= f1; | ||
309 : | f0 = ( rj*f0 + Rounder ) >> 24; | ||
310 : | Dst[i] = (uint8_t)f0; | ||
311 : | } | ||
312 : | Dst += dststride; | ||
313 : | } | ||
314 : | } | ||
315 : | |||
316 : | Skal | 1.5 | static |
317 : | edgomez | 1.2 | void Predict_1pt_8x8_C(const NEW_GMC_DATA * const This, |
318 : | Skal | 1.5 | uint8_t *uDst, const uint8_t *uSrc, |
319 : | uint8_t *vDst, const uint8_t *vSrc, | ||
320 : | int dststride, int srcstride, int x, int y, int rounding) | ||
321 : | edgomez | 1.2 | { |
322 : | const int W = This->sW >> 1; | ||
323 : | const int H = This->sH >> 1; | ||
324 : | const int rho = 3-This->accuracy; | ||
325 : | const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; | ||
326 : | |||
327 : | int32_t uo = This->Uco + (x<<7); | ||
328 : | int32_t vo = This->Vco + (y<<7); | ||
329 : | uint32_t rri = MTab[uo & 15]; | ||
330 : | uint32_t rrj = MTab[vo & 15]; | ||
331 : | int i, j; | ||
332 : | |||
333 : | int32_t Offset; | ||
334 : | Skal | 1.4 | if (vo>=(-8<<4) && vo<=H) Offset = (vo>>4)*srcstride; |
335 : | edgomez | 1.2 | else { |
336 : | if (vo>H) Offset = ( H>>4)*srcstride; | ||
337 : | else Offset =-8*srcstride; | ||
338 : | rrj = MTab[0]; | ||
339 : | } | ||
340 : | Skal | 1.4 | if (uo>=(-8<<4) && uo<=W) Offset += (uo>>4); |
341 : | edgomez | 1.2 | else { |
342 : | if (uo>W) Offset += ( W>>4); | ||
343 : | else Offset -= 8; | ||
344 : | rri = MTab[0]; | ||
345 : | } | ||
346 : | |||
347 : | uDst += 8; | ||
348 : | vDst += 8; | ||
349 : | for(j=8; j>0; --j, Offset+=srcstride-8) | ||
350 : | { | ||
351 : | for(i=-8; i<0; ++i, Offset++) | ||
352 : | { | ||
353 : | uint32_t f0, f1; | ||
354 : | f0 = uSrc[ Offset + 0 ]; | ||
355 : | f0 |= uSrc[ Offset + 1 ] << 16; | ||
356 : | f1 = uSrc[ Offset + srcstride + 0 ]; | ||
357 : | f1 |= uSrc[ Offset + srcstride + 1 ] << 16; | ||
358 : | f0 = (rri*f0)>>16; | ||
359 : | f1 = (rri*f1) & 0x0fff0000; | ||
360 : | f0 |= f1; | ||
361 : | f0 = ( rrj*f0 + Rounder ) >> 24; | ||
362 : | uDst[i] = (uint8_t)f0; | ||
363 : | |||
364 : | f0 = vSrc[ Offset + 0 ]; | ||
365 : | f0 |= vSrc[ Offset + 1 ] << 16; | ||
366 : | f1 = vSrc[ Offset + srcstride + 0 ]; | ||
367 : | f1 |= vSrc[ Offset + srcstride + 1 ] << 16; | ||
368 : | f0 = (rri*f0)>>16; | ||
369 : | f1 = (rri*f1) & 0x0fff0000; | ||
370 : | f0 |= f1; | ||
371 : | f0 = ( rrj*f0 + Rounder ) >> 24; | ||
372 : | vDst[i] = (uint8_t)f0; | ||
373 : | } | ||
374 : | uDst += dststride; | ||
375 : | vDst += dststride; | ||
376 : | } | ||
377 : | } | ||
378 : | |||
379 : | Skal | 1.5 | static |
380 : | edgomez | 1.2 | void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv, |
381 : | int x, int y, int qpel) | ||
382 : | { | ||
383 : | mv->x = RSHIFT(Dsp->Uo<<qpel, 3); | ||
384 : | mv->y = RSHIFT(Dsp->Vo<<qpel, 3); | ||
385 : | } | ||
386 : | |||
387 : | Skal | 1.5 | #if defined(ARCH_IS_IA32) |
388 : | /* ************************************************************* | ||
389 : | * MMX core function | ||
390 : | */ | ||
391 : | |||
392 : | static | ||
393 : | void (*GMC_Core_Lin_8)(uint8_t *Dst, const uint16_t * Offsets, | ||
394 : | const uint8_t * const Src0, const int BpS, const int Rounder) = 0; | ||
395 : | |||
396 : | extern void xvid_GMC_Core_Lin_8_mmx(uint8_t *Dst, const uint16_t * Offsets, | ||
397 : | const uint8_t * const Src0, const int BpS, const int Rounder); | ||
398 : | |||
399 : | extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets, | ||
400 : | const uint8_t * const Src0, const int BpS, const int Rounder); | ||
401 : | |||
402 : | /* *************************************************************/ | ||
403 : | |||
404 : | static void GMC_Core_Non_Lin_8(uint8_t *Dst, | ||
405 : | const uint16_t * Offsets, | ||
406 : | const uint8_t * const Src0, const int srcstride, | ||
407 : | const int Rounder) | ||
408 : | { | ||
409 : | int i; | ||
410 : | for(i=0; i<8; ++i) | ||
411 : | { | ||
412 : | uint32_t u = Offsets[i ]; | ||
413 : | uint32_t v = Offsets[i+16]; | ||
414 : | const uint32_t ri = MTab[u&0x0f]; | ||
415 : | const uint32_t rj = MTab[v&0x0f]; | ||
416 : | uint32_t f0, f1; | ||
417 : | const uint8_t * const Src = Src0 + (u>>4) + (v>>4)*srcstride; | ||
418 : | f0 = Src[0]; | ||
419 : | f0 |= Src[1] << 16; | ||
420 : | f1 = Src[srcstride +0]; | ||
421 : | f1 |= Src[srcstride +1] << 16; | ||
422 : | f0 = (ri*f0)>>16; | ||
423 : | f1 = (ri*f1) & 0x0fff0000; | ||
424 : | f0 |= f1; | ||
425 : | f0 = ( rj*f0 + Rounder ) >> 24; | ||
426 : | Dst[i] = (uint8_t)f0; | ||
427 : | } | ||
428 : | } | ||
429 : | |||
430 : | ////////////////////////////////////////////////////////// | ||
431 : | |||
432 : | static | ||
433 : | void Predict_16x16_mmx(const NEW_GMC_DATA * const This, | ||
434 : | uint8_t *dst, const uint8_t *src, | ||
435 : | int dststride, int srcstride, int x, int y, int rounding) | ||
436 : | { | ||
437 : | const int W = This->sW; | ||
438 : | const int H = This->sH; | ||
439 : | const int rho = 3 - This->accuracy; | ||
440 : | const int Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; | ||
441 : | const uint32_t W2 = W<<(16-rho); | ||
442 : | const uint32_t H2 = H<<(16-rho); | ||
443 : | |||
444 : | const int dUx = This->dU[0]; | ||
445 : | const int dVx = This->dV[0]; | ||
446 : | const int dUy = This->dU[1]; | ||
447 : | const int dVy = This->dV[1]; | ||
448 : | |||
449 : | int Uo = This->Uo + 16*(dUy*y + dUx*x); | ||
450 : | int Vo = This->Vo + 16*(dVy*y + dVx*x); | ||
451 : | |||
452 : | int i, j; | ||
453 : | |||
454 : | DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE); | ||
455 : | for(j=16; j>0; --j) | ||
456 : | { | ||
457 : | int32_t U = Uo, V = Vo; | ||
458 : | Uo += dUy; Vo += dVy; | ||
459 : | if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && | ||
460 : | H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) | ||
461 : | { | ||
462 : | for(i=0; i<16; ++i) | ||
463 : | { | ||
464 : | uint32_t u = ( U >> 16 ) << rho; | ||
465 : | uint32_t v = ( V >> 16 ) << rho; | ||
466 : | U += dUx; V += dVx; | ||
467 : | Offsets[ i] = u; | ||
468 : | Offsets[16+i] = v; | ||
469 : | } | ||
470 : | Isibaar | 1.6 | |
471 : | { | ||
472 : | Skal | 1.5 | // batch 8 input pixels when linearity says it's ok |
473 : | Isibaar | 1.6 | uint32_t UV1, UV2; |
474 : | UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U; | ||
475 : | UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U; | ||
476 : | if (UV1+7*16==UV2) | ||
477 : | GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder); | ||
478 : | else | ||
479 : | GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder); | ||
480 : | UV1 = (Offsets[ 8] | (Offsets[24]<<16)) & 0xfff0fff0U; | ||
481 : | UV2 = (Offsets[15] | (Offsets[31]<<16)) & 0xfff0fff0U; | ||
482 : | if (UV1+7*16==UV2) | ||
483 : | GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder); | ||
484 : | else | ||
485 : | GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); | ||
486 : | } | ||
487 : | } | ||
488 : | Skal | 1.5 | else |
489 : | { | ||
490 : | for(i=0; i<16; ++i) | ||
491 : | { | ||
492 : | int u = ( U >> 16 ) << rho; | ||
493 : | int v = ( V >> 16 ) << rho; | ||
494 : | U += dUx; V += dVx; | ||
495 : | |||
496 : | Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u; | ||
497 : | Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v; | ||
498 : | } | ||
499 : | // due to boundary clipping, we cannot infer the 8-pixels batchability | ||
500 : | // simply by using the linearity. Oh well, not a big deal... | ||
501 : | GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder); | ||
502 : | GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder); | ||
503 : | } | ||
504 : | dst += dststride; | ||
505 : | } | ||
506 : | } | ||
507 : | |||
508 : | static | ||
509 : | void Predict_8x8_mmx(const NEW_GMC_DATA * const This, | ||
510 : | uint8_t *uDst, const uint8_t *uSrc, | ||
511 : | uint8_t *vDst, const uint8_t *vSrc, | ||
512 : | int dststride, int srcstride, int x, int y, int rounding) | ||
513 : | { | ||
514 : | const int W = This->sW >> 1; | ||
515 : | const int H = This->sH >> 1; | ||
516 : | const int rho = 3-This->accuracy; | ||
517 : | const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16; | ||
518 : | const uint32_t W2 = W<<(16-rho); | ||
519 : | const uint32_t H2 = H<<(16-rho); | ||
520 : | |||
521 : | const int dUx = This->dU[0]; | ||
522 : | const int dVx = This->dV[0]; | ||
523 : | const int dUy = This->dU[1]; | ||
524 : | const int dVy = This->dV[1]; | ||
525 : | |||
526 : | int Uo = This->Uco + 8*(dUy*y + dUx*x); | ||
527 : | int Vo = This->Vco + 8*(dVy*y + dVx*x); | ||
528 : | |||
529 : | DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE); | ||
530 : | int i, j; | ||
531 : | for(j=8; j>0; --j) | ||
532 : | { | ||
533 : | int32_t U = Uo, V = Vo; | ||
534 : | Uo += dUy; Vo += dVy; | ||
535 : | if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) && | ||
536 : | H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) ) | ||
537 : | { | ||
538 : | for(i=0; i<8; ++i) | ||
539 : | { | ||
540 : | int32_t u = ( U >> 16 ) << rho; | ||
541 : | int32_t v = ( V >> 16 ) << rho; | ||
542 : | U += dUx; V += dVx; | ||
543 : | Offsets[ i] = u; | ||
544 : | Offsets[16+i] = v; | ||
545 : | } | ||
546 : | Isibaar | 1.6 | |
547 : | { | ||
548 : | // batch 8 input pixels when linearity says it's ok | ||
549 : | const uint32_t UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U; | ||
550 : | const uint32_t UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U; | ||
551 : | if (UV1+7*16==UV2) | ||
552 : | { | ||
553 : | const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride; | ||
554 : | GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder); | ||
555 : | GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder); | ||
556 : | } | ||
557 : | else { | ||
558 : | GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder); | ||
559 : | GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); | ||
560 : | } | ||
561 : | } | ||
562 : | Skal | 1.5 | } |
563 : | else | ||
564 : | { | ||
565 : | for(i=0; i<8; ++i) | ||
566 : | { | ||
567 : | int u = ( U >> 16 ) << rho; | ||
568 : | int v = ( V >> 16 ) << rho; | ||
569 : | U += dUx; V += dVx; | ||
570 : | Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u; | ||
571 : | Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v; | ||
572 : | } | ||
573 : | GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder); | ||
574 : | GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder); | ||
575 : | } | ||
576 : | uDst += dststride; | ||
577 : | vDst += dststride; | ||
578 : | } | ||
579 : | } | ||
580 : | |||
581 : | #endif /* ARCH_IS_IA32 */ | ||
582 : | |||
583 : | /* ************************************************************* | ||
584 : | * will initialize internal pointers | ||
585 : | */ | ||
586 : | |||
587 : | void init_GMC(const unsigned int cpu_flags) | ||
588 : | { | ||
589 : | Predict_16x16_func = Predict_16x16_C; | ||
590 : | Predict_8x8_func = Predict_8x8_C; | ||
591 : | |||
592 : | Isibaar | 1.6 | #if defined(ARCH_IS_IA32) |
593 : | Skal | 1.5 | if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) || |
594 : | (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) || | ||
595 : | (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2)) | ||
596 : | { | ||
597 : | Predict_16x16_func = Predict_16x16_mmx; | ||
598 : | Predict_8x8_func = Predict_8x8_mmx; | ||
599 : | if (cpu_flags & XVID_CPU_SSE2) | ||
600 : | GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2; | ||
601 : | else | ||
602 : | GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx; | ||
603 : | } | ||
604 : | #endif | ||
605 : | } | ||
606 : | |||
607 : | edgomez | 1.2 | /* ************************************************************* |
608 : | * Warning! It's Accuracy being passed, not 'resolution'! | ||
609 : | */ | ||
610 : | |||
611 : | void generate_GMCparameters( int nb_pts, const int accuracy, | ||
612 : | const WARPPOINTS *const pts, | ||
613 : | const int width, const int height, | ||
614 : | NEW_GMC_DATA *const gmc) | ||
615 : | { | ||
616 : | gmc->sW = width << 4; | ||
617 : | gmc->sH = height << 4; | ||
618 : | gmc->accuracy = accuracy; | ||
619 : | gmc->num_wp = nb_pts; | ||
620 : | |||
621 : | /* reduce the number of points, if possible */ | ||
622 : | edgomez | 1.3 | if (nb_pts<2 || (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) { |
623 : | if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) { | ||
624 : | if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) { | ||
625 : | nb_pts = 0; | ||
626 : | } | ||
627 : | else nb_pts = 1; | ||
628 : | } | ||
629 : | else nb_pts = 2; | ||
630 : | } | ||
631 : | edgomez | 1.2 | |
632 : | /* now, nb_pts stores the actual number of points required for interpolation */ | ||
633 : | |||
634 : | if (nb_pts<=1) | ||
635 : | { | ||
636 : | if (nb_pts==1) { | ||
637 : | /* store as 4b fixed point */ | ||
638 : | gmc->Uo = pts->duv[0].x << accuracy; | ||
639 : | gmc->Vo = pts->duv[0].y << accuracy; | ||
640 : | gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy; /* DIV2RND() */ | ||
641 : | gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy; /* DIV2RND() */ | ||
642 : | } | ||
643 : | else { /* zero points?! */ | ||
644 : | gmc->Uo = gmc->Vo = 0; | ||
645 : | gmc->Uco = gmc->Vco = 0; | ||
646 : | } | ||
647 : | |||
648 : | gmc->predict_16x16 = Predict_1pt_16x16_C; | ||
649 : | gmc->predict_8x8 = Predict_1pt_8x8_C; | ||
650 : | gmc->get_average_mv = get_average_mv_1pt_C; | ||
651 : | } | ||
652 : | else { /* 2 or 3 points */ | ||
653 : | const int rho = 3 - accuracy; /* = {3,2,1,0} for Acc={0,1,2,3} */ | ||
654 : | int Alpha = log2bin(width-1); | ||
655 : | int Ws = 1 << Alpha; | ||
656 : | |||
657 : | gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width ); /* dU/dx */ | ||
658 : | gmc->dV[0] = RDIV( 8*Ws*pts->duv[1].y, width ); /* dV/dx */ | ||
659 : | |||
660 : | if (nb_pts==2) { | ||
661 : | gmc->dU[1] = -gmc->dV[0]; /* -Sin */ | ||
662 : | gmc->dV[1] = gmc->dU[0] ; /* Cos */ | ||
663 : | } | ||
664 : | else | ||
665 : | { | ||
666 : | const int Beta = log2bin(height-1); | ||
667 : | const int Hs = 1<<Beta; | ||
668 : | gmc->dU[1] = RDIV( 8*Hs*pts->duv[2].x, height ); /* dU/dy */ | ||
669 : | gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height ); /* dV/dy */ | ||
670 : | if (Beta>Alpha) { | ||
671 : | gmc->dU[0] <<= (Beta-Alpha); | ||
672 : | gmc->dV[0] <<= (Beta-Alpha); | ||
673 : | Alpha = Beta; | ||
674 : | Ws = Hs; | ||
675 : | } | ||
676 : | else { | ||
677 : | gmc->dU[1] <<= Alpha - Beta; | ||
678 : | gmc->dV[1] <<= Alpha - Beta; | ||
679 : | } | ||
680 : | } | ||
681 : | /* upscale to 16b fixed-point */ | ||
682 : | gmc->dU[0] <<= (16-Alpha - rho); | ||
683 : | gmc->dU[1] <<= (16-Alpha - rho); | ||
684 : | gmc->dV[0] <<= (16-Alpha - rho); | ||
685 : | gmc->dV[1] <<= (16-Alpha - rho); | ||
686 : | |||
687 : | gmc->Uo = ( pts->duv[0].x <<(16+ accuracy)) + (1<<15); | ||
688 : | gmc->Vo = ( pts->duv[0].y <<(16+ accuracy)) + (1<<15); | ||
689 : | gmc->Uco = ((pts->duv[0].x-1)<<(17+ accuracy)) + (1<<17); | ||
690 : | gmc->Vco = ((pts->duv[0].y-1)<<(17+ accuracy)) + (1<<17); | ||
691 : | gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2; | ||
692 : | gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2; | ||
693 : | |||
694 : | Skal | 1.5 | gmc->predict_16x16 = Predict_16x16_func; |
695 : | gmc->predict_8x8 = Predict_8x8_func; | ||
696 : | edgomez | 1.2 | gmc->get_average_mv = get_average_mv_C; |
697 : | } | ||
698 : | } | ||
699 : | |||
700 : | /* ******************************************************************* | ||
701 : | * quick and dirty routine to generate the full warped image | ||
702 : | * (pGMC != NULL) or just all average Motion Vectors (pGMC == NULL) */ | ||
703 : | |||
704 : | void | ||
705 : | generate_GMCimage( const NEW_GMC_DATA *const gmc_data, /* [input] precalculated data */ | ||
706 : | const IMAGE *const pRef, /* [input] */ | ||
707 : | const int mb_width, | ||
708 : | const int mb_height, | ||
709 : | const int stride, | ||
710 : | const int stride2, | ||
711 : | const int fcode, /* [input] some parameters... */ | ||
712 : | const int32_t quarterpel, /* [input] for rounding avgMV */ | ||
713 : | const int reduced_resolution, /* [input] ignored */ | ||
714 : | const int32_t rounding, /* [input] for rounding image data */ | ||
715 : | MACROBLOCK *const pMBs, /* [output] average motion vectors */ | ||
716 : | IMAGE *const pGMC) /* [output] full warped image */ | ||
717 : | { | ||
718 : | |||
719 : | unsigned int mj,mi; | ||
720 : | VECTOR avgMV; | ||
721 : | |||
722 : | for (mj = 0; mj < (unsigned int)mb_height; mj++) | ||
723 : | for (mi = 0; mi < (unsigned int)mb_width; mi++) { | ||
724 : | const int mbnum = mj*mb_width+mi; | ||
725 : | if (pGMC) | ||
726 : | { | ||
727 : | gmc_data->predict_16x16(gmc_data, | ||
728 : | pGMC->y + mj*16*stride + mi*16, pRef->y, | ||
729 : | stride, stride, mi, mj, rounding); | ||
730 : | |||
731 : | gmc_data->predict_8x8(gmc_data, | ||
732 : | pGMC->u + mj*8*stride2 + mi*8, pRef->u, | ||
733 : | pGMC->v + mj*8*stride2 + mi*8, pRef->v, | ||
734 : | stride2, stride2, mi, mj, rounding); | ||
735 : | } | ||
736 : | gmc_data->get_average_mv(gmc_data, &avgMV, mi, mj, quarterpel); | ||
737 : | |||
738 : | pMBs[mbnum].amv.x = gmc_sanitize(avgMV.x, quarterpel, fcode); | ||
739 : | pMBs[mbnum].amv.y = gmc_sanitize(avgMV.y, quarterpel, fcode); | ||
740 : | |||
741 : | pMBs[mbnum].mcsel = 0; /* until mode decision */ | ||
742 : | } | ||
743 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |