1 |
|
/***************************************************************************** |
2 |
|
* |
3 |
|
* XVID MPEG-4 VIDEO CODEC |
4 |
|
* - Motion Estimation for P- and S- VOPs - |
5 |
|
* |
6 |
|
* Copyright(C) 2002 Christoph Lampert <gruel@web.de> |
7 |
|
* 2002 Michael Militzer <michael@xvid.org> |
8 |
|
* 2002-2003 Radoslaw Czyz <xvid@syskin.cjb.net> |
9 |
|
* |
10 |
|
* This program is free software ; you can redistribute it and/or modify |
11 |
|
* it under the terms of the GNU General Public License as published by |
12 |
|
* the Free Software Foundation ; either version 2 of the License, or |
13 |
|
* (at your option) any later version. |
14 |
|
* |
15 |
|
* This program is distributed in the hope that it will be useful, |
16 |
|
* but WITHOUT ANY WARRANTY ; without even the implied warranty of |
17 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 |
|
* GNU General Public License for more details. |
19 |
|
* |
20 |
|
* You should have received a copy of the GNU General Public License |
21 |
|
* along with this program ; if not, write to the Free Software |
22 |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
|
* |
24 |
|
* $Id$ |
25 |
|
* |
26 |
|
****************************************************************************/ |
27 |
|
|
28 |
|
#include <assert.h> |
29 |
|
#include <stdio.h> |
30 |
|
#include <stdlib.h> |
31 |
|
#include <string.h> /* memcpy */ |
32 |
|
|
33 |
|
#include "../encoder.h" |
34 |
|
#include "../prediction/mbprediction.h" |
35 |
|
#include "../global.h" |
36 |
|
#include "../utils/timer.h" |
37 |
|
#include "../image/interpolate8x8.h" |
38 |
|
#include "estimation.h" |
39 |
|
#include "motion.h" |
40 |
|
#include "sad.h" |
41 |
|
#include "motion_inlines.h" |
42 |
|
|
43 |
|
static const int xvid_me_lambda_vec8[32] = |
44 |
|
{ 0 ,(int)(1.00235 * NEIGH_TEND_8X8 + 0.5), |
45 |
|
(int)(1.15582 + NEIGH_TEND_8X8 + 0.5), (int)(1.31976*NEIGH_TEND_8X8 + 0.5), |
46 |
|
(int)(1.49591*NEIGH_TEND_8X8 + 0.5), (int)(1.68601*NEIGH_TEND_8X8 + 0.5), |
47 |
|
(int)(1.89187*NEIGH_TEND_8X8 + 0.5), (int)(2.11542*NEIGH_TEND_8X8 + 0.5), |
48 |
|
(int)(2.35878*NEIGH_TEND_8X8 + 0.5), (int)(2.62429*NEIGH_TEND_8X8 + 0.5), |
49 |
|
(int)(2.91455*NEIGH_TEND_8X8 + 0.5), (int)(3.23253*NEIGH_TEND_8X8 + 0.5), |
50 |
|
(int)(3.58158*NEIGH_TEND_8X8 + 0.5), (int)(3.96555*NEIGH_TEND_8X8 + 0.5), |
51 |
|
(int)(4.38887*NEIGH_TEND_8X8 + 0.5), (int)(4.85673*NEIGH_TEND_8X8 + 0.5), |
52 |
|
(int)(5.37519*NEIGH_TEND_8X8 + 0.5), (int)(5.95144*NEIGH_TEND_8X8 + 0.5), |
53 |
|
(int)(6.59408*NEIGH_TEND_8X8 + 0.5), (int)(7.31349*NEIGH_TEND_8X8 + 0.5), |
54 |
|
(int)(8.12242*NEIGH_TEND_8X8 + 0.5), (int)(9.03669*NEIGH_TEND_8X8 + 0.5), |
55 |
|
(int)(10.0763*NEIGH_TEND_8X8 + 0.5), (int)(11.2669*NEIGH_TEND_8X8 + 0.5), |
56 |
|
(int)(12.6426*NEIGH_TEND_8X8 + 0.5), (int)(14.2493*NEIGH_TEND_8X8 + 0.5), |
57 |
|
(int)(16.1512*NEIGH_TEND_8X8 + 0.5), (int)(18.442*NEIGH_TEND_8X8 + 0.5), |
58 |
|
(int)(21.2656*NEIGH_TEND_8X8 + 0.5), (int)(24.8580*NEIGH_TEND_8X8 + 0.5), |
59 |
|
(int)(29.6436*NEIGH_TEND_8X8 + 0.5), (int)(36.4949*NEIGH_TEND_8X8 + 0.5) |
60 |
|
}; |
61 |
|
|
62 |
|
static void |
63 |
|
CheckCandidate16(const int x, const int y, SearchData * const data, const unsigned int Direction) |
64 |
|
{ |
65 |
|
const uint8_t * Reference; |
66 |
|
int32_t sad; uint32_t t; |
67 |
|
|
68 |
|
if ( (x > data->max_dx) || (x < data->min_dx) |
69 |
|
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
70 |
|
|
71 |
|
Reference = GetReference(x, y, data); |
72 |
|
|
73 |
|
sad = sad16v(data->Cur, Reference, data->iEdgedWidth, data->temp); |
74 |
|
t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel, 0); |
75 |
|
|
76 |
|
sad += (data->lambda16 * t * sad)>>10; |
77 |
|
data->temp[0] += (data->lambda8 * t * (data->temp[0] + NEIGH_8X8_BIAS))>>10; |
78 |
|
|
79 |
|
if (data->chroma) { |
80 |
|
if (sad >= data->iMinSAD[0]) goto no16; |
81 |
|
sad += xvid_me_ChromaSAD((x >> 1) + roundtab_79[x & 0x3], |
82 |
|
(y >> 1) + roundtab_79[y & 0x3], data); |
83 |
|
} |
84 |
|
|
85 |
|
if (sad < data->iMinSAD[0]) { |
86 |
|
data->iMinSAD[0] = sad; |
87 |
|
data->currentMV[0].x = x; data->currentMV[0].y = y; |
88 |
|
data->dir = Direction; |
89 |
|
} |
90 |
|
|
91 |
|
no16: |
92 |
|
if (data->temp[0] < data->iMinSAD[1]) { |
93 |
|
data->iMinSAD[1] = data->temp[0]; data->currentMV[1].x = x; data->currentMV[1].y = y; } |
94 |
|
if (data->temp[1] < data->iMinSAD[2]) { |
95 |
|
data->iMinSAD[2] = data->temp[1]; data->currentMV[2].x = x; data->currentMV[2].y = y; } |
96 |
|
if (data->temp[2] < data->iMinSAD[3]) { |
97 |
|
data->iMinSAD[3] = data->temp[2]; data->currentMV[3].x = x; data->currentMV[3].y = y; } |
98 |
|
if (data->temp[3] < data->iMinSAD[4]) { |
99 |
|
data->iMinSAD[4] = data->temp[3]; data->currentMV[4].x = x; data->currentMV[4].y = y; } |
100 |
|
} |
101 |
|
|
102 |
|
static void |
103 |
|
CheckCandidate16_qpel(const int x, const int y, SearchData * const data, const unsigned int Direction) |
104 |
|
{ |
105 |
|
const uint8_t *Reference; |
106 |
|
int32_t sad; uint32_t t; |
107 |
|
|
108 |
|
if ( (x > data->max_dx) || (x < data->min_dx) |
109 |
|
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
110 |
|
|
111 |
|
Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); |
112 |
|
|
113 |
|
sad = sad16v(data->Cur, Reference, data->iEdgedWidth, data->temp); |
114 |
|
t = d_mv_bits(x, y, data->predMV, data->iFcode, 0, 0); |
115 |
|
|
116 |
|
sad += (data->lambda16 * t * sad)>>10; |
117 |
|
data->temp[0] += (data->lambda8 * t * (data->temp[0] + NEIGH_8X8_BIAS))>>10; |
118 |
|
|
119 |
|
if (data->chroma && (sad < data->iMinSAD[0] || sad < data->iMinSAD2) ) |
120 |
|
sad += xvid_me_ChromaSAD(((x/2) >> 1) + roundtab_79[(x/2) & 0x3], |
121 |
|
((y/2) >> 1) + roundtab_79[(y/2) & 0x3], data); |
122 |
|
|
123 |
|
if (data->temp[0] < data->iMinSAD[1]) { |
124 |
|
data->iMinSAD[1] = data->temp[0]; data->currentQMV[1].x = x; data->currentQMV[1].y = y; } |
125 |
|
if (data->temp[1] < data->iMinSAD[2]) { |
126 |
|
data->iMinSAD[2] = data->temp[1]; data->currentQMV[2].x = x; data->currentQMV[2].y = y; } |
127 |
|
if (data->temp[2] < data->iMinSAD[3]) { |
128 |
|
data->iMinSAD[3] = data->temp[2]; data->currentQMV[3].x = x; data->currentQMV[3].y = y; } |
129 |
|
if (data->temp[3] < data->iMinSAD[4]) { |
130 |
|
data->iMinSAD[4] = data->temp[3]; data->currentQMV[4].x = x; data->currentQMV[4].y = y; } |
131 |
|
|
132 |
|
if (sad < data->iMinSAD[0]) { |
133 |
|
data->iMinSAD2 = *(data->iMinSAD); |
134 |
|
data->currentQMV2.x = data->currentQMV->x; |
135 |
|
data->currentQMV2.y = data->currentQMV->y; |
136 |
|
|
137 |
|
data->iMinSAD[0] = sad; |
138 |
|
data->currentQMV[0].x = x; data->currentQMV[0].y = y; |
139 |
|
} else if (sad < data->iMinSAD2) { |
140 |
|
data->iMinSAD2 = sad; |
141 |
|
data->currentQMV2.x = x; data->currentQMV2.y = y; |
142 |
|
} |
143 |
|
} |
144 |
|
|
145 |
|
static void |
146 |
|
CheckCandidate8(const int x, const int y, SearchData * const data, const unsigned int Direction) |
147 |
|
{ |
148 |
|
int32_t sad; uint32_t t; |
149 |
|
const uint8_t * Reference; |
150 |
|
VECTOR * current; |
151 |
|
|
152 |
|
if ( (x > data->max_dx) || (x < data->min_dx) |
153 |
|
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
154 |
|
|
155 |
|
if (!data->qpel_precision) { |
156 |
|
Reference = GetReference(x, y, data); |
157 |
|
current = data->currentMV; |
158 |
|
} else { /* x and y are in 1/4 precision */ |
159 |
|
Reference = xvid_me_interpolate8x8qpel(x, y, 0, 0, data); |
160 |
|
current = data->currentQMV; |
161 |
|
} |
162 |
|
|
163 |
|
sad = sad8(data->Cur, Reference, data->iEdgedWidth); |
164 |
|
t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); |
165 |
|
|
166 |
|
sad += (data->lambda8 * t * (sad+NEIGH_8X8_BIAS))>>10; |
167 |
|
|
168 |
|
if (sad < *(data->iMinSAD)) { |
169 |
|
*(data->iMinSAD) = sad; |
170 |
|
current->x = x; current->y = y; |
171 |
|
data->dir = Direction; |
172 |
|
} |
173 |
|
} |
174 |
|
|
175 |
|
static void |
176 |
|
CheckCandidate8_qpel(const int x, const int y, SearchData * const data, const unsigned int Direction) |
177 |
|
{ |
178 |
|
int32_t sad; uint32_t t; |
179 |
|
const uint8_t * Reference; |
180 |
|
VECTOR * current; |
181 |
|
|
182 |
|
if ( (x > data->max_dx) || (x < data->min_dx) |
183 |
|
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
184 |
|
|
185 |
|
/* x and y are in 1/4 precision */ |
186 |
|
Reference = xvid_me_interpolate8x8qpel(x, y, 0, 0, data); |
187 |
|
current = data->currentQMV; |
188 |
|
|
189 |
|
sad = sad8(data->Cur, Reference, data->iEdgedWidth); |
190 |
|
t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); |
191 |
|
|
192 |
|
sad += (data->lambda8 * t * (sad+NEIGH_8X8_BIAS))>>10; |
193 |
|
|
194 |
|
if (sad < *(data->iMinSAD)) { |
195 |
|
data->iMinSAD2 = *(data->iMinSAD); |
196 |
|
data->currentQMV2.x = data->currentQMV->x; |
197 |
|
data->currentQMV2.y = data->currentQMV->y; |
198 |
|
|
199 |
|
*(data->iMinSAD) = sad; |
200 |
|
data->currentQMV->x = x; data->currentQMV->y = y; |
201 |
|
data->dir = Direction; |
202 |
|
} else if (sad < data->iMinSAD2) { |
203 |
|
data->iMinSAD2 = sad; |
204 |
|
data->currentQMV2.x = x; data->currentQMV2.y = y; |
205 |
|
} |
206 |
|
} |
207 |
|
|
208 |
|
static void |
209 |
|
CheckCandidate32(const int x, const int y, SearchData * const data, const unsigned int Direction) |
210 |
|
{ |
211 |
|
uint32_t t; |
212 |
|
const uint8_t * Reference; |
213 |
|
int sad; |
214 |
|
|
215 |
|
if ( (!(x&1) && x !=0) || (!(y&1) && y !=0) || /* non-zero even value */ |
216 |
|
(x > data->max_dx) || (x < data->min_dx) |
217 |
|
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
218 |
|
|
219 |
|
Reference = GetReference(x, y, data); |
220 |
|
t = d_mv_bits(x, y, data->predMV, data->iFcode, 0, 1); |
221 |
|
|
222 |
|
sad = sad32v_c(data->Cur, Reference, data->iEdgedWidth, data->temp); |
223 |
|
|
224 |
|
sad += (data->lambda16 * t * sad) >> 10; |
225 |
|
data->temp[0] += (data->lambda8 * t * (data->temp[0] + NEIGH_8X8_BIAS))>>10; |
226 |
|
|
227 |
|
if (sad < data->iMinSAD[0]) { |
228 |
|
data->iMinSAD[0] = sad; |
229 |
|
data->currentMV[0].x = x; data->currentMV[0].y = y; |
230 |
|
data->dir = Direction; |
231 |
|
} |
232 |
|
|
233 |
|
if (data->temp[0] < data->iMinSAD[1]) { |
234 |
|
data->iMinSAD[1] = data->temp[0]; data->currentMV[1].x = x; data->currentMV[1].y = y; } |
235 |
|
if (data->temp[1] < data->iMinSAD[2]) { |
236 |
|
data->iMinSAD[2] = data->temp[1]; data->currentMV[2].x = x; data->currentMV[2].y = y; } |
237 |
|
if (data->temp[2] < data->iMinSAD[3]) { |
238 |
|
data->iMinSAD[3] = data->temp[2]; data->currentMV[3].x = x; data->currentMV[3].y = y; } |
239 |
|
if (data->temp[3] < data->iMinSAD[4]) { |
240 |
|
data->iMinSAD[4] = data->temp[3]; data->currentMV[4].x = x; data->currentMV[4].y = y; } |
241 |
|
} |
242 |
|
|
243 |
|
int |
244 |
|
xvid_me_SkipDecisionP(const IMAGE * current, const IMAGE * reference, |
245 |
|
const int x, const int y, |
246 |
|
const uint32_t stride, const uint32_t iQuant, int rrv) |
247 |
|
{ |
248 |
|
int offset = (x + y*stride)*8; |
249 |
|
if(!rrv) { |
250 |
|
uint32_t sadC = sad8(current->u + offset, |
251 |
|
reference->u + offset, stride); |
252 |
|
if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP) return 0; |
253 |
|
sadC += sad8(current->v + offset, |
254 |
|
reference->v + offset, stride); |
255 |
|
if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP) return 0; |
256 |
|
return 1; |
257 |
|
|
258 |
|
} else { |
259 |
|
uint32_t sadC = sad16(current->u + 2*offset, |
260 |
|
reference->u + 2*offset, stride, 256*4096); |
261 |
|
if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP*4) return 0; |
262 |
|
sadC += sad16(current->v + 2*offset, |
263 |
|
reference->v + 2*offset, stride, 256*4096); |
264 |
|
if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP*4) return 0; |
265 |
|
return 1; |
266 |
|
} |
267 |
|
} |
268 |
|
|
269 |
|
/* |
270 |
|
* pmv are filled with: |
271 |
|
* [0]: Median (or whatever is correct in a special case) |
272 |
|
* [1]: left neighbour |
273 |
|
* [2]: top neighbour |
274 |
|
* [3]: topright neighbour |
275 |
|
* psad are filled with: |
276 |
|
* [0]: minimum of [1] to [3] |
277 |
|
* [1]: left neighbour's SAD (NB:[1] to [3] are actually not needed) |
278 |
|
* [2]: top neighbour's SAD |
279 |
|
* [3]: topright neighbour's SAD |
280 |
|
*/ |
281 |
|
|
282 |
|
static __inline void |
283 |
|
get_pmvdata2(const MACROBLOCK * const mbs, |
284 |
|
const int mb_width, |
285 |
|
const int bound, |
286 |
|
const int x, |
287 |
|
const int y, |
288 |
|
VECTOR * const pmv, |
289 |
|
int32_t * const psad) |
290 |
|
{ |
291 |
|
int lx, ly, lz; /* left */ |
292 |
|
int tx, ty, tz; /* top */ |
293 |
|
int rx, ry, rz; /* top-right */ |
294 |
|
int lpos, tpos, rpos; |
295 |
|
int num_cand = 0, last_cand = 1; |
296 |
|
|
297 |
|
lx = x - 1; ly = y; lz = 1; |
298 |
|
tx = x; ty = y - 1; tz = 2; |
299 |
|
rx = x + 1; ry = y - 1; rz = 2; |
300 |
|
|
301 |
|
lpos = lx + ly * mb_width; |
302 |
|
rpos = rx + ry * mb_width; |
303 |
|
tpos = tx + ty * mb_width; |
304 |
|
|
305 |
|
if (lpos >= bound && lx >= 0) { |
306 |
|
num_cand++; |
307 |
|
last_cand = 1; |
308 |
|
pmv[1] = mbs[lpos].mvs[lz]; |
309 |
|
psad[1] = mbs[lpos].sad8[lz]; |
310 |
|
} else { |
311 |
|
pmv[1] = zeroMV; |
312 |
|
psad[1] = MV_MAX_ERROR; |
313 |
|
} |
314 |
|
|
315 |
|
if (tpos >= bound) { |
316 |
|
num_cand++; |
317 |
|
last_cand = 2; |
318 |
|
pmv[2]= mbs[tpos].mvs[tz]; |
319 |
|
psad[2] = mbs[tpos].sad8[tz]; |
320 |
|
} else { |
321 |
|
pmv[2] = zeroMV; |
322 |
|
psad[2] = MV_MAX_ERROR; |
323 |
|
} |
324 |
|
|
325 |
|
if (rpos >= bound && rx < mb_width) { |
326 |
|
num_cand++; |
327 |
|
last_cand = 3; |
328 |
|
pmv[3] = mbs[rpos].mvs[rz]; |
329 |
|
psad[3] = mbs[rpos].sad8[rz]; |
330 |
|
} else { |
331 |
|
pmv[3] = zeroMV; |
332 |
|
psad[3] = MV_MAX_ERROR; |
333 |
|
} |
334 |
|
|
335 |
|
/* original pmvdata() compatibility hack */ |
336 |
|
if (x == 0 && y == 0) { |
337 |
|
pmv[0] = pmv[1] = pmv[2] = pmv[3] = zeroMV; |
338 |
|
psad[0] = 0; |
339 |
|
psad[1] = psad[2] = psad[3] = MV_MAX_ERROR; |
340 |
|
return; |
341 |
|
} |
342 |
|
|
343 |
|
/* if only one valid candidate preictor, the invalid candiates are set to the canidate */ |
344 |
|
if (num_cand == 1) { |
345 |
|
pmv[0] = pmv[last_cand]; |
346 |
|
psad[0] = psad[last_cand]; |
347 |
|
return; |
348 |
|
} |
349 |
|
|
350 |
|
if ((MVequal(pmv[1], pmv[2])) && (MVequal(pmv[1], pmv[3]))) { |
351 |
|
pmv[0] = pmv[1]; |
352 |
|
psad[0] = MIN(MIN(psad[1], psad[2]), psad[3]); |
353 |
|
return; |
354 |
|
} |
355 |
|
|
356 |
|
/* set median, minimum */ |
357 |
|
|
358 |
|
pmv[0].x = |
359 |
|
MIN(MAX(pmv[1].x, pmv[2].x), |
360 |
|
MIN(MAX(pmv[2].x, pmv[3].x), MAX(pmv[1].x, pmv[3].x))); |
361 |
|
pmv[0].y = |
362 |
|
MIN(MAX(pmv[1].y, pmv[2].y), |
363 |
|
MIN(MAX(pmv[2].y, pmv[3].y), MAX(pmv[1].y, pmv[3].y))); |
364 |
|
|
365 |
|
psad[0] = MIN(MIN(psad[1], psad[2]), psad[3]); |
366 |
|
|
367 |
|
} |
368 |
|
|
369 |
|
|
370 |
|
static void |
371 |
|
ModeDecision_SAD(SearchData * const Data, |
372 |
|
MACROBLOCK * const pMB, |
373 |
|
const MACROBLOCK * const pMBs, |
374 |
|
const int x, const int y, |
375 |
|
const MBParam * const pParam, |
376 |
|
const uint32_t MotionFlags, |
377 |
|
const uint32_t VopFlags, |
378 |
|
const uint32_t VolFlags, |
379 |
|
const IMAGE * const pCurrent, |
380 |
|
const IMAGE * const pRef, |
381 |
|
const IMAGE * const vGMC, |
382 |
|
const int coding_type) |
383 |
|
{ |
384 |
|
int mode = MODE_INTER; |
385 |
|
int mcsel = 0; |
386 |
|
int inter4v = (VopFlags & XVID_VOP_INTER4V) && (pMB->dquant == 0); |
387 |
|
const uint32_t iQuant = pMB->quant; |
388 |
|
|
389 |
|
const int skip_possible = (coding_type == P_VOP) && (pMB->dquant == 0); |
390 |
|
|
391 |
|
int sad; |
392 |
|
int InterBias = MV16_INTER_BIAS; |
393 |
|
|
394 |
|
pMB->mcsel = 0; |
395 |
|
|
396 |
|
if (inter4v == 0 || Data->iMinSAD[0] < Data->iMinSAD[1] + Data->iMinSAD[2] + |
397 |
|
Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant) { |
398 |
|
mode = MODE_INTER; |
399 |
|
sad = Data->iMinSAD[0]; |
400 |
|
} else { |
401 |
|
mode = MODE_INTER4V; |
402 |
|
sad = Data->iMinSAD[1] + Data->iMinSAD[2] + |
403 |
|
Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant; |
404 |
|
Data->iMinSAD[0] = sad; |
405 |
|
} |
406 |
|
|
407 |
|
/* final skip decision, a.k.a. "the vector you found, really that good?" */ |
408 |
|
if (skip_possible && (pMB->sad16 < (int)iQuant * MAX_SAD00_FOR_SKIP)) |
409 |
|
if ( (100*sad)/(pMB->sad16+1) > FINAL_SKIP_THRESH) |
410 |
|
if (Data->chroma || xvid_me_SkipDecisionP(pCurrent, pRef, x, y, Data->iEdgedWidth/2, iQuant, Data->rrv)) { |
411 |
|
mode = MODE_NOT_CODED; |
412 |
|
sad = 0; |
413 |
|
} |
414 |
|
|
415 |
|
/* mcsel */ |
416 |
|
if (coding_type == S_VOP) { |
417 |
|
|
418 |
|
int32_t iSAD = sad16(Data->Cur, |
419 |
|
vGMC->y + 16*y*Data->iEdgedWidth + 16*x, Data->iEdgedWidth, 65536); |
420 |
|
|
421 |
|
if (Data->chroma) { |
422 |
|
iSAD += sad8(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2); |
423 |
|
iSAD += sad8(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2); |
424 |
|
} |
425 |
|
|
426 |
|
if (iSAD <= sad) { /* mode decision GMC */ |
427 |
|
mode = MODE_INTER; |
428 |
|
mcsel = 1; |
429 |
|
sad = iSAD; |
430 |
|
} |
431 |
|
} |
432 |
|
|
433 |
|
/* intra decision */ |
434 |
|
|
435 |
|
if (iQuant > 10) InterBias += 60 * (iQuant - 10); /* to make high quants work */ |
436 |
|
if (y != 0) |
437 |
|
if ((pMB - pParam->mb_width)->mode == MODE_INTRA ) InterBias -= 80; |
438 |
|
if (x != 0) |
439 |
|
if ((pMB - 1)->mode == MODE_INTRA ) InterBias -= 80; |
440 |
|
|
441 |
|
if (Data->chroma) InterBias += 50; /* dev8(chroma) ??? <-- yes, we need dev8 (no big difference though) */ |
442 |
|
if (Data->rrv) InterBias *= 4; |
443 |
|
|
444 |
|
if (InterBias < sad) { |
445 |
|
int32_t deviation; |
446 |
|
if (!Data->rrv) |
447 |
|
deviation = dev16(Data->Cur, Data->iEdgedWidth); |
448 |
|
else |
449 |
|
deviation = dev16(Data->Cur, Data->iEdgedWidth) + /* dev32() */ |
450 |
|
dev16(Data->Cur+16, Data->iEdgedWidth) + |
451 |
|
dev16(Data->Cur + 16*Data->iEdgedWidth, Data->iEdgedWidth) + |
452 |
|
dev16(Data->Cur+16+16*Data->iEdgedWidth, Data->iEdgedWidth); |
453 |
|
|
454 |
|
if (deviation < (sad - InterBias)) mode = MODE_INTRA; |
455 |
|
} |
456 |
|
|
457 |
|
pMB->cbp = 63; |
458 |
|
pMB->sad16 = pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = sad; |
459 |
|
|
460 |
|
if (Data->rrv) { |
461 |
|
Data->currentMV[0].x = RRV_MV_SCALEDOWN(Data->currentMV[0].x); |
462 |
|
Data->currentMV[0].y = RRV_MV_SCALEDOWN(Data->currentMV[0].y); |
463 |
|
} |
464 |
|
|
465 |
|
if (mode == MODE_INTER && mcsel == 0) { |
466 |
|
pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = Data->currentMV[0]; |
467 |
|
|
468 |
|
if(Data->qpel) { |
469 |
|
pMB->qmvs[0] = pMB->qmvs[1] |
470 |
|
= pMB->qmvs[2] = pMB->qmvs[3] = Data->currentQMV[0]; |
471 |
|
pMB->pmvs[0].x = Data->currentQMV[0].x - Data->predMV.x; |
472 |
|
pMB->pmvs[0].y = Data->currentQMV[0].y - Data->predMV.y; |
473 |
|
} else { |
474 |
|
pMB->pmvs[0].x = Data->currentMV[0].x - Data->predMV.x; |
475 |
|
pMB->pmvs[0].y = Data->currentMV[0].y - Data->predMV.y; |
476 |
|
} |
477 |
|
|
478 |
|
} else if (mode == MODE_INTER ) { /* but mcsel == 1 */ |
479 |
|
|
480 |
|
pMB->mcsel = 1; |
481 |
|
if (Data->qpel) { |
482 |
|
pMB->qmvs[0] = pMB->qmvs[1] = pMB->qmvs[2] = pMB->qmvs[3] = pMB->amv; |
483 |
|
pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = pMB->amv.x/2; |
484 |
|
pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = pMB->amv.y/2; |
485 |
|
} else |
486 |
|
pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = pMB->amv; |
487 |
|
|
488 |
|
} else |
489 |
|
if (mode == MODE_INTER4V) ; /* anything here? */ |
490 |
|
else /* INTRA, NOT_CODED */ |
491 |
|
ZeroMacroblockP(pMB, 0); |
492 |
|
|
493 |
|
pMB->mode = mode; |
494 |
|
} |
495 |
|
|
496 |
|
static __inline void |
497 |
|
PreparePredictionsP(VECTOR * const pmv, int x, int y, int iWcount, |
498 |
|
int iHcount, const MACROBLOCK * const prevMB, int rrv) |
499 |
|
{ |
500 |
|
/* this function depends on get_pmvdata which means that it sucks. It should get the predictions by itself */ |
501 |
|
if (rrv) { iWcount /= 2; iHcount /= 2; } |
502 |
|
|
503 |
|
if ( (y != 0) && (x < (iWcount-1)) ) { /* [5] top-right neighbour */ |
504 |
|
pmv[5].x = EVEN(pmv[3].x); |
505 |
|
pmv[5].y = EVEN(pmv[3].y); |
506 |
|
} else pmv[5].x = pmv[5].y = 0; |
507 |
|
|
508 |
|
if (x != 0) { pmv[3].x = EVEN(pmv[1].x); pmv[3].y = EVEN(pmv[1].y); }/* pmv[3] is left neighbour */ |
509 |
|
else pmv[3].x = pmv[3].y = 0; |
510 |
|
|
511 |
|
if (y != 0) { pmv[4].x = EVEN(pmv[2].x); pmv[4].y = EVEN(pmv[2].y); }/* [4] top neighbour */ |
512 |
|
else pmv[4].x = pmv[4].y = 0; |
513 |
|
|
514 |
|
/* [1] median prediction */ |
515 |
|
pmv[1].x = EVEN(pmv[0].x); pmv[1].y = EVEN(pmv[0].y); |
516 |
|
|
517 |
|
pmv[0].x = pmv[0].y = 0; /* [0] is zero; not used in the loop (checked before) but needed here for make_mask */ |
518 |
|
|
519 |
|
pmv[2].x = EVEN(prevMB->mvs[0].x); /* [2] is last frame */ |
520 |
|
pmv[2].y = EVEN(prevMB->mvs[0].y); |
521 |
|
|
522 |
|
if ((x < iWcount-1) && (y < iHcount-1)) { |
523 |
|
pmv[6].x = EVEN((prevMB+1+iWcount)->mvs[0].x); /* [6] right-down neighbour in last frame */ |
524 |
|
pmv[6].y = EVEN((prevMB+1+iWcount)->mvs[0].y); |
525 |
|
} else pmv[6].x = pmv[6].y = 0; |
526 |
|
|
527 |
|
if (rrv) { |
528 |
|
int i; |
529 |
|
for (i = 0; i < 7; i++) { |
530 |
|
pmv[i].x = RRV_MV_SCALEUP(pmv[i].x); |
531 |
|
pmv[i].y = RRV_MV_SCALEUP(pmv[i].y); |
532 |
|
} |
533 |
|
} |
534 |
|
} |
535 |
|
|
536 |
|
static void |
537 |
|
Search8(SearchData * const OldData, |
538 |
|
const int x, const int y, |
539 |
|
const uint32_t MotionFlags, |
540 |
|
const MBParam * const pParam, |
541 |
|
MACROBLOCK * const pMB, |
542 |
|
const MACROBLOCK * const pMBs, |
543 |
|
const int block, |
544 |
|
SearchData * const Data) |
545 |
|
{ |
546 |
|
int i = 0; |
547 |
|
CheckFunc * CheckCandidate; |
548 |
|
*Data->iMinSAD = *(OldData->iMinSAD + 1 + block); |
549 |
|
*Data->currentMV = *(OldData->currentMV + 1 + block); |
550 |
|
*Data->currentQMV = *(OldData->currentQMV + 1 + block); |
551 |
|
|
552 |
|
if(Data->qpel) { |
553 |
|
Data->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x/2, y/2, block); |
554 |
|
if (block != 0) i = d_mv_bits( Data->currentQMV->x, Data->currentQMV->y, |
555 |
|
Data->predMV, Data->iFcode, 0, 0); |
556 |
|
} else { |
557 |
|
Data->predMV = get_pmv2(pMBs, pParam->mb_width, 0, x/2, y/2, block); |
558 |
|
if (block != 0) i = d_mv_bits( Data->currentMV->x, Data->currentMV->y, |
559 |
|
Data->predMV, Data->iFcode, 0, Data->rrv); |
560 |
|
} |
561 |
|
|
562 |
|
*(Data->iMinSAD) += (Data->lambda8 * i * (*Data->iMinSAD + NEIGH_8X8_BIAS))>>10; |
563 |
|
|
564 |
|
if (MotionFlags & (XVID_ME_EXTSEARCH8|XVID_ME_HALFPELREFINE8|XVID_ME_QUARTERPELREFINE8)) { |
565 |
|
|
566 |
|
if (Data->rrv) i = 16; else i = 8; |
567 |
|
|
568 |
|
Data->RefP[0] = OldData->RefP[0] + i * ((block&1) + Data->iEdgedWidth*(block>>1)); |
569 |
|
Data->RefP[1] = OldData->RefP[1] + i * ((block&1) + Data->iEdgedWidth*(block>>1)); |
570 |
|
Data->RefP[2] = OldData->RefP[2] + i * ((block&1) + Data->iEdgedWidth*(block>>1)); |
571 |
|
Data->RefP[3] = OldData->RefP[3] + i * ((block&1) + Data->iEdgedWidth*(block>>1)); |
572 |
|
|
573 |
|
Data->Cur = OldData->Cur + i * ((block&1) + Data->iEdgedWidth*(block>>1)); |
574 |
|
Data->qpel_precision = 0; |
575 |
|
|
576 |
|
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 3, |
577 |
|
pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, Data->rrv); |
578 |
|
|
579 |
|
if (!Data->rrv) CheckCandidate = CheckCandidate8; |
580 |
|
else CheckCandidate = CheckCandidate16no4v; |
581 |
|
|
582 |
|
if (MotionFlags & XVID_ME_EXTSEARCH8 && (!(MotionFlags & XVID_ME_EXTSEARCH_RD))) { |
583 |
|
int32_t temp_sad = *(Data->iMinSAD); /* store current MinSAD */ |
584 |
|
|
585 |
|
MainSearchFunc *MainSearchPtr; |
586 |
|
if (MotionFlags & XVID_ME_USESQUARES8) MainSearchPtr = xvid_me_SquareSearch; |
587 |
|
else if (MotionFlags & XVID_ME_ADVANCEDDIAMOND8) MainSearchPtr = xvid_me_AdvDiamondSearch; |
588 |
|
else MainSearchPtr = xvid_me_DiamondSearch; |
589 |
|
|
590 |
|
MainSearchPtr(Data->currentMV->x, Data->currentMV->y, Data, 255, CheckCandidate); |
591 |
|
|
592 |
|
if(*(Data->iMinSAD) < temp_sad) { |
593 |
|
Data->currentQMV->x = 2 * Data->currentMV->x; /* update our qpel vector */ |
594 |
|
Data->currentQMV->y = 2 * Data->currentMV->y; |
595 |
|
} |
596 |
|
} |
597 |
|
|
598 |
|
if (MotionFlags & XVID_ME_HALFPELREFINE8) { |
599 |
|
int32_t temp_sad = *(Data->iMinSAD); /* store current MinSAD */ |
600 |
|
|
601 |
|
xvid_me_SubpelRefine(Data, CheckCandidate); /* perform halfpel refine of current best vector */ |
602 |
|
|
603 |
|
if(*(Data->iMinSAD) < temp_sad) { /* we have found a better match */ |
604 |
|
Data->currentQMV->x = 2 * Data->currentMV->x; /* update our qpel vector */ |
605 |
|
Data->currentQMV->y = 2 * Data->currentMV->y; |
606 |
|
} |
607 |
|
} |
608 |
|
|
609 |
|
if (Data->qpel && (MotionFlags & XVID_ME_QUARTERPELREFINE8)) { |
610 |
|
Data->qpel_precision = 1; |
611 |
|
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 3, |
612 |
|
pParam->width, pParam->height, Data->iFcode, 2, 0); |
613 |
|
|
614 |
|
if((MotionFlags & XVID_ME_FASTREFINE8) && (!Data->rrv)) |
615 |
|
SubpelRefine_Fast(Data, CheckCandidate8_qpel); |
616 |
|
else |
617 |
|
xvid_me_SubpelRefine(Data, CheckCandidate); |
618 |
|
} |
619 |
|
} |
620 |
|
|
621 |
|
if (Data->rrv) { |
622 |
|
Data->currentMV->x = RRV_MV_SCALEDOWN(Data->currentMV->x); |
623 |
|
Data->currentMV->y = RRV_MV_SCALEDOWN(Data->currentMV->y); |
624 |
|
} |
625 |
|
|
626 |
|
if(Data->qpel) { |
627 |
|
pMB->pmvs[block].x = Data->currentQMV->x - Data->predMV.x; |
628 |
|
pMB->pmvs[block].y = Data->currentQMV->y - Data->predMV.y; |
629 |
|
pMB->qmvs[block] = *Data->currentQMV; |
630 |
|
} else { |
631 |
|
pMB->pmvs[block].x = Data->currentMV->x - Data->predMV.x; |
632 |
|
pMB->pmvs[block].y = Data->currentMV->y - Data->predMV.y; |
633 |
|
} |
634 |
|
|
635 |
|
*(OldData->iMinSAD + 1 + block) = *Data->iMinSAD; |
636 |
|
*(OldData->currentMV + 1 + block) = *Data->currentMV; |
637 |
|
*(OldData->currentQMV + 1 + block) = *Data->currentQMV; |
638 |
|
|
639 |
|
pMB->mvs[block] = *Data->currentMV; |
640 |
|
pMB->sad8[block] = 4 * *Data->iMinSAD; |
641 |
|
} |
642 |
|
|
643 |
|
|
644 |
|
|
645 |
|
static void |
646 |
|
SearchP(const IMAGE * const pRef, |
647 |
|
const uint8_t * const pRefH, |
648 |
|
const uint8_t * const pRefV, |
649 |
|
const uint8_t * const pRefHV, |
650 |
|
const IMAGE * const pCur, |
651 |
|
const int x, |
652 |
|
const int y, |
653 |
|
const uint32_t MotionFlags, |
654 |
|
const uint32_t VopFlags, |
655 |
|
SearchData * const Data, |
656 |
|
const MBParam * const pParam, |
657 |
|
const MACROBLOCK * const pMBs, |
658 |
|
const MACROBLOCK * const prevMBs, |
659 |
|
MACROBLOCK * const pMB) |
660 |
|
{ |
661 |
|
|
662 |
|
int i, threshA; |
663 |
|
VECTOR pmv[7]; |
664 |
|
int inter4v = (VopFlags & XVID_VOP_INTER4V) && (pMB->dquant == 0); |
665 |
|
CheckFunc * CheckCandidate; |
666 |
|
|
667 |
|
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
668 |
|
pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, Data->rrv); |
669 |
|
|
670 |
|
get_pmvdata2(pMBs, pParam->mb_width, 0, x, y, pmv, Data->temp); |
671 |
|
|
672 |
|
Data->chromaX = Data->chromaY = 0; /* chroma-sad cache */ |
673 |
|
i = Data->rrv ? 2 : 1; |
674 |
|
Data->Cur = pCur->y + (x + y * Data->iEdgedWidth) * 16*i; |
675 |
|
Data->CurV = pCur->v + (x + y * (Data->iEdgedWidth/2)) * 8*i; |
676 |
|
Data->CurU = pCur->u + (x + y * (Data->iEdgedWidth/2)) * 8*i; |
677 |
|
|
678 |
|
Data->RefP[0] = pRef->y + (x + Data->iEdgedWidth*y) * 16*i; |
679 |
|
Data->RefP[2] = pRefH + (x + Data->iEdgedWidth*y) * 16*i; |
680 |
|
Data->RefP[1] = pRefV + (x + Data->iEdgedWidth*y) * 16*i; |
681 |
|
Data->RefP[3] = pRefHV + (x + Data->iEdgedWidth*y) * 16*i; |
682 |
|
Data->RefP[4] = pRef->u + (x + y * (Data->iEdgedWidth/2)) * 8*i; |
683 |
|
Data->RefP[5] = pRef->v + (x + y * (Data->iEdgedWidth/2)) * 8*i; |
684 |
|
|
685 |
|
Data->lambda16 = xvid_me_lambda_vec16[pMB->quant]; |
686 |
|
Data->lambda8 = xvid_me_lambda_vec8[pMB->quant]; |
687 |
|
Data->qpel_precision = 0; |
688 |
|
Data->dir = 0; |
689 |
|
|
690 |
|
memset(Data->currentMV, 0, 5*sizeof(VECTOR)); |
691 |
|
|
692 |
|
if (Data->qpel) Data->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x, y, 0); |
693 |
|
else Data->predMV = pmv[0]; |
694 |
|
|
695 |
|
i = d_mv_bits(0, 0, Data->predMV, Data->iFcode, 0, 0); |
696 |
|
Data->iMinSAD[0] = pMB->sad16 + ((Data->lambda16 * i * pMB->sad16)>>10); |
697 |
|
Data->iMinSAD[1] = pMB->sad8[0] + ((Data->lambda8 * i * (pMB->sad8[0]+NEIGH_8X8_BIAS)) >> 10); |
698 |
|
Data->iMinSAD[2] = pMB->sad8[1]; |
699 |
|
Data->iMinSAD[3] = pMB->sad8[2]; |
700 |
|
Data->iMinSAD[4] = pMB->sad8[3]; |
701 |
|
|
702 |
|
if ((!(VopFlags & XVID_VOP_MODEDECISION_RD)) && (x | y)) { |
703 |
|
threshA = Data->temp[0]; /* that's where we keep this SAD atm */ |
704 |
|
if (threshA < 512) threshA = 512; |
705 |
|
else if (threshA > 1024) threshA = 1024; |
706 |
|
} else |
707 |
|
threshA = 512; |
708 |
|
|
709 |
|
PreparePredictionsP(pmv, x, y, pParam->mb_width, pParam->mb_height, |
710 |
|
prevMBs + x + y * pParam->mb_width, Data->rrv); |
711 |
|
|
712 |
|
if (!Data->rrv) { |
713 |
|
if (inter4v) CheckCandidate = CheckCandidate16; |
714 |
|
else CheckCandidate = CheckCandidate16no4v; /* for extra speed */ |
715 |
|
} else CheckCandidate = CheckCandidate32; |
716 |
|
|
717 |
|
/* main loop. checking all predictions (but first, which is 0,0 and has been checked in MotionEstimation())*/ |
718 |
|
|
719 |
|
for (i = 1; i < 7; i++) |
720 |
|
if (!vector_repeats(pmv, i)) { |
721 |
|
CheckCandidate(pmv[i].x, pmv[i].y, Data, i); |
722 |
|
if (Data->iMinSAD[0] <= threshA) { i++; break; } |
723 |
|
} |
724 |
|
|
725 |
|
if ((Data->iMinSAD[0] <= threshA) || |
726 |
|
(MVequal(Data->currentMV[0], (prevMBs+x+y*pParam->mb_width)->mvs[0]) && |
727 |
|
(Data->iMinSAD[0] < (prevMBs+x+y*pParam->mb_width)->sad16))) |
728 |
|
inter4v = 0; |
729 |
|
else { |
730 |
|
|
731 |
|
MainSearchFunc * MainSearchPtr; |
732 |
|
int mask = make_mask(pmv, i, Data->dir); /* all vectors pmv[0..i-1] have been checked */ |
733 |
|
|
734 |
|
if (MotionFlags & XVID_ME_USESQUARES16) MainSearchPtr = xvid_me_SquareSearch; |
735 |
|
else if (MotionFlags & XVID_ME_ADVANCEDDIAMOND16) MainSearchPtr = xvid_me_AdvDiamondSearch; |
736 |
|
else MainSearchPtr = xvid_me_DiamondSearch; |
737 |
|
|
738 |
|
MainSearchPtr(Data->currentMV->x, Data->currentMV->y, Data, mask, CheckCandidate); |
739 |
|
|
740 |
|
/* extended search, diamond starting in 0,0 and in prediction. |
741 |
|
note that this search is/might be done in halfpel positions, |
742 |
|
which makes it more different than the diamond above */ |
743 |
|
|
744 |
|
if (MotionFlags & XVID_ME_EXTSEARCH16) { |
745 |
|
int32_t bSAD; |
746 |
|
VECTOR startMV = Data->predMV, backupMV = Data->currentMV[0]; |
747 |
|
if (Data->qpel) { |
748 |
|
startMV.x /= 2; |
749 |
|
startMV.y /= 2; |
750 |
|
} else if (Data->rrv) { |
751 |
|
startMV.x = RRV_MV_SCALEUP(startMV.x); |
752 |
|
startMV.y = RRV_MV_SCALEUP(startMV.y); |
753 |
|
} |
754 |
|
if (!(MVequal(startMV, backupMV))) { |
755 |
|
bSAD = Data->iMinSAD[0]; Data->iMinSAD[0] = MV_MAX_ERROR; |
756 |
|
|
757 |
|
CheckCandidate(startMV.x, startMV.y, Data, 255); |
758 |
|
xvid_me_DiamondSearch(startMV.x, startMV.y, Data, 255, CheckCandidate); |
759 |
|
if (bSAD < Data->iMinSAD[0]) { |
760 |
|
Data->currentMV[0] = backupMV; |
761 |
|
Data->iMinSAD[0] = bSAD; } |
762 |
|
} |
763 |
|
|
764 |
|
backupMV = Data->currentMV[0]; |
765 |
|
startMV.x = startMV.y = 1; |
766 |
|
if (!(MVequal(startMV, backupMV))) { |
767 |
|
bSAD = Data->iMinSAD[0]; Data->iMinSAD[0] = MV_MAX_ERROR; |
768 |
|
|
769 |
|
CheckCandidate(startMV.x, startMV.y, Data, 255); |
770 |
|
xvid_me_DiamondSearch(startMV.x, startMV.y, Data, 255, CheckCandidate); |
771 |
|
if (bSAD < Data->iMinSAD[0]) { |
772 |
|
Data->currentMV[0] = backupMV; |
773 |
|
Data->iMinSAD[0] = bSAD; |
774 |
|
} |
775 |
|
} |
776 |
|
} |
777 |
|
} |
778 |
|
|
779 |
|
if (MotionFlags & XVID_ME_HALFPELREFINE16) |
780 |
|
xvid_me_SubpelRefine(Data, CheckCandidate); |
781 |
|
|
782 |
|
for(i = 0; i < 5; i++) { |
783 |
|
Data->currentQMV[i].x = 2 * Data->currentMV[i].x; /* initialize qpel vectors */ |
784 |
|
Data->currentQMV[i].y = 2 * Data->currentMV[i].y; |
785 |
|
} |
786 |
|
|
787 |
|
if (Data->qpel) { |
788 |
|
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
789 |
|
pParam->width, pParam->height, Data->iFcode, 2, 0); |
790 |
|
Data->qpel_precision = 1; |
791 |
|
if (MotionFlags & XVID_ME_QUARTERPELREFINE16) { |
792 |
|
if(MotionFlags & XVID_ME_FASTREFINE16) |
793 |
|
SubpelRefine_Fast(Data, CheckCandidate16_qpel); |
794 |
|
else |
795 |
|
xvid_me_SubpelRefine(Data, CheckCandidate16_qpel); |
796 |
|
} |
797 |
|
} |
798 |
|
|
799 |
|
if (Data->iMinSAD[0] < (int32_t)pMB->quant * 30*((MotionFlags & XVID_ME_FASTREFINE16) ? 8 : 1)) |
800 |
|
inter4v = 0; |
801 |
|
|
802 |
|
if (inter4v) { |
803 |
|
SearchData Data8; |
804 |
|
memcpy(&Data8, Data, sizeof(SearchData)); /* quick copy of common data */ |
805 |
|
|
806 |
|
Search8(Data, 2*x, 2*y, MotionFlags, pParam, pMB, pMBs, 0, &Data8); |
807 |
|
Search8(Data, 2*x + 1, 2*y, MotionFlags, pParam, pMB, pMBs, 1, &Data8); |
808 |
|
Search8(Data, 2*x, 2*y + 1, MotionFlags, pParam, pMB, pMBs, 2, &Data8); |
809 |
|
Search8(Data, 2*x + 1, 2*y + 1, MotionFlags, pParam, pMB, pMBs, 3, &Data8); |
810 |
|
|
811 |
|
if ((Data->chroma) && (!(VopFlags & XVID_VOP_MODEDECISION_RD))) { |
812 |
|
/* chroma is only used for comparsion to INTER. if the comparsion will be done in BITS domain, it will not be used */ |
813 |
|
int sumx = 0, sumy = 0; |
814 |
|
|
815 |
|
if (Data->qpel) |
816 |
|
for (i = 1; i < 5; i++) { |
817 |
|
sumx += Data->currentQMV[i].x/2; |
818 |
|
sumy += Data->currentQMV[i].y/2; |
819 |
|
} |
820 |
|
else |
821 |
|
for (i = 1; i < 5; i++) { |
822 |
|
sumx += Data->currentMV[i].x; |
823 |
|
sumy += Data->currentMV[i].y; |
824 |
|
} |
825 |
|
|
826 |
|
Data->iMinSAD[1] += xvid_me_ChromaSAD((sumx >> 3) + roundtab_76[sumx & 0xf], |
827 |
|
(sumy >> 3) + roundtab_76[sumy & 0xf], Data); |
828 |
|
} |
829 |
|
} else Data->iMinSAD[1] = 4096*256; |
830 |
|
} |
831 |
|
|
832 |
|
static __inline uint32_t |
833 |
|
MakeGoodMotionFlags(const uint32_t MotionFlags, const uint32_t VopFlags, const uint32_t VolFlags) |
834 |
|
{ |
835 |
|
uint32_t Flags = MotionFlags; |
836 |
|
|
837 |
|
if (!(VopFlags & XVID_VOP_MODEDECISION_RD)) |
838 |
|
Flags &= ~(XVID_ME_QUARTERPELREFINE16_RD+XVID_ME_QUARTERPELREFINE8_RD+XVID_ME_HALFPELREFINE16_RD+XVID_ME_HALFPELREFINE8_RD+XVID_ME_EXTSEARCH_RD); |
839 |
|
|
840 |
|
if (Flags & XVID_ME_EXTSEARCH_RD) |
841 |
|
Flags |= XVID_ME_HALFPELREFINE16_RD; |
842 |
|
|
843 |
|
if (Flags & XVID_ME_EXTSEARCH_RD && MotionFlags & XVID_ME_EXTSEARCH8) |
844 |
|
Flags |= XVID_ME_HALFPELREFINE8_RD; |
845 |
|
|
846 |
|
if (Flags & XVID_ME_HALFPELREFINE16_RD) |
847 |
|
Flags |= XVID_ME_QUARTERPELREFINE16_RD; |
848 |
|
|
849 |
|
if (Flags & XVID_ME_HALFPELREFINE8_RD) { |
850 |
|
Flags |= XVID_ME_QUARTERPELREFINE8_RD; |
851 |
|
Flags &= ~XVID_ME_HALFPELREFINE8; |
852 |
|
} |
853 |
|
|
854 |
|
if (Flags & XVID_ME_QUARTERPELREFINE8_RD) |
855 |
|
Flags &= ~XVID_ME_QUARTERPELREFINE8; |
856 |
|
|
857 |
|
if (!(VolFlags & XVID_VOL_QUARTERPEL)) |
858 |
|
Flags &= ~(XVID_ME_QUARTERPELREFINE16+XVID_ME_QUARTERPELREFINE8+XVID_ME_QUARTERPELREFINE16_RD+XVID_ME_QUARTERPELREFINE8_RD); |
859 |
|
|
860 |
|
if (!(VopFlags & XVID_VOP_HALFPEL)) |
861 |
|
Flags &= ~(XVID_ME_EXTSEARCH16+XVID_ME_HALFPELREFINE16+XVID_ME_HALFPELREFINE8+XVID_ME_HALFPELREFINE16_RD+XVID_ME_HALFPELREFINE8_RD); |
862 |
|
|
863 |
|
if ((VopFlags & XVID_VOP_GREYSCALE) || (VopFlags & XVID_VOP_REDUCED)) |
864 |
|
Flags &= ~(XVID_ME_CHROMA_PVOP + XVID_ME_CHROMA_BVOP); |
865 |
|
|
866 |
|
return Flags; |
867 |
|
} |
868 |
|
|
869 |
|
bool |
870 |
|
MotionEstimation(MBParam * const pParam, |
871 |
|
FRAMEINFO * const current, |
872 |
|
FRAMEINFO * const reference, |
873 |
|
const IMAGE * const pRefH, |
874 |
|
const IMAGE * const pRefV, |
875 |
|
const IMAGE * const pRefHV, |
876 |
|
const IMAGE * const pGMC, |
877 |
|
const uint32_t iLimit) |
878 |
|
{ |
879 |
|
MACROBLOCK *const pMBs = current->mbs; |
880 |
|
const IMAGE *const pCurrent = ¤t->image; |
881 |
|
const IMAGE *const pRef = &reference->image; |
882 |
|
|
883 |
|
uint32_t mb_width = pParam->mb_width; |
884 |
|
uint32_t mb_height = pParam->mb_height; |
885 |
|
const uint32_t iEdgedWidth = pParam->edged_width; |
886 |
|
const uint32_t MotionFlags = MakeGoodMotionFlags(current->motion_flags, current->vop_flags, current->vol_flags); |
887 |
|
int stat_thresh = 0; |
888 |
|
|
889 |
|
uint32_t x, y; |
890 |
|
uint32_t iIntra = 0; |
891 |
|
int32_t sad00; |
892 |
|
int skip_thresh = INITIAL_SKIP_THRESH * \ |
893 |
|
(current->vop_flags & XVID_VOP_REDUCED ? 4:1) * \ |
894 |
|
(current->vop_flags & XVID_VOP_MODEDECISION_RD ? 2:1); |
895 |
|
|
896 |
|
/* some pre-initialized thingies for SearchP */ |
897 |
|
DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
898 |
|
SearchData Data; |
899 |
|
memset(&Data, 0, sizeof(SearchData)); |
900 |
|
Data.iEdgedWidth = iEdgedWidth; |
901 |
|
Data.iFcode = current->fcode; |
902 |
|
Data.rounding = pParam->m_rounding_type; |
903 |
|
Data.qpel = (current->vol_flags & XVID_VOL_QUARTERPEL ? 1:0); |
904 |
|
Data.chroma = MotionFlags & XVID_ME_CHROMA_PVOP; |
905 |
|
Data.rrv = (current->vop_flags & XVID_VOP_REDUCED) ? 1:0; |
906 |
|
Data.dctSpace = dct_space; |
907 |
|
Data.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); |
908 |
|
Data.mpeg_quant_matrices = pParam->mpeg_quant_matrices; |
909 |
|
Data.iMinSAD2 = 0; |
910 |
|
|
911 |
|
if ((current->vop_flags & XVID_VOP_REDUCED)) { |
912 |
|
mb_width = (pParam->width + 31) / 32; |
913 |
|
mb_height = (pParam->height + 31) / 32; |
914 |
|
Data.qpel = 0; |
915 |
|
} |
916 |
|
|
917 |
|
Data.RefQ = pRefV->u; /* a good place, also used in MC (for similar purpose) */ |
918 |
|
if (sadInit) (*sadInit) (); |
919 |
|
|
920 |
|
for (y = 0; y < mb_height; y++) { |
921 |
|
for (x = 0; x < mb_width; x++) { |
922 |
|
MACROBLOCK *pMB = &pMBs[x + y * pParam->mb_width]; |
923 |
|
MACROBLOCK *prevMB = &reference->mbs[x + y * pParam->mb_width]; |
924 |
|
|
925 |
|
if (!Data.rrv) pMB->sad16 = |
926 |
|
sad16v(pCurrent->y + (x + y * iEdgedWidth) * 16, |
927 |
|
pRef->y + (x + y * iEdgedWidth) * 16, |
928 |
|
pParam->edged_width, pMB->sad8 ); |
929 |
|
|
930 |
|
else pMB->sad16 = |
931 |
|
sad32v_c(pCurrent->y + (x + y * iEdgedWidth) * 32, |
932 |
|
pRef->y + (x + y * iEdgedWidth) * 32, |
933 |
|
pParam->edged_width, pMB->sad8 ); |
934 |
|
|
935 |
|
if (Data.chroma) { |
936 |
|
Data.chromaSAD = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8, |
937 |
|
pRef->u + x*8 + y*(iEdgedWidth/2)*8, iEdgedWidth/2) |
938 |
|
+ sad8(pCurrent->v + (x + y*(iEdgedWidth/2))*8, |
939 |
|
pRef->v + (x + y*(iEdgedWidth/2))*8, iEdgedWidth/2); |
940 |
|
pMB->sad16 += Data.chromaSAD; |
941 |
|
} |
942 |
|
|
943 |
|
sad00 = pMB->sad16; |
944 |
|
|
945 |
|
/* initial skip decision */ |
946 |
|
/* no early skip for GMC (global vector = skip vector is unknown!) */ |
947 |
|
if (current->coding_type != S_VOP) { /* no fast SKIP for S(GMC)-VOPs */ |
948 |
|
if (pMB->dquant == 0 && sad00 < pMB->quant * skip_thresh) |
949 |
|
if (Data.chroma || xvid_me_SkipDecisionP(pCurrent, pRef, x, y, iEdgedWidth/2, pMB->quant, Data.rrv)) { |
950 |
|
ZeroMacroblockP(pMB, sad00); |
951 |
|
pMB->mode = MODE_NOT_CODED; |
952 |
|
continue; |
953 |
|
} |
954 |
|
} |
955 |
|
|
956 |
|
if(MotionFlags & XVID_ME_DETECT_STATIC_MOTION) { |
957 |
|
if(x > 0 && y > 0 && x < pParam->mb_width) { |
958 |
|
if(MVequal((&pMBs[(x-1) + y * pParam->mb_width])->mvs[0], zeroMV) && |
959 |
|
MVequal((&pMBs[x + (y-1) * pParam->mb_width])->mvs[0], zeroMV) && |
960 |
|
MVequal((&pMBs[(x+1) + (y-1) * pParam->mb_width])->mvs[0], zeroMV) && |
961 |
|
MVequal(prevMB->mvs[0], zeroMV)) { |
962 |
|
stat_thresh = MAX((&pMBs[(x-1) + y * pParam->mb_width])->sad16, |
963 |
|
MAX((&pMBs[x + (y-1) * pParam->mb_width])->sad16, |
964 |
|
MAX((&pMBs[(x+1) + (y-1) * pParam->mb_width])->sad16, |
965 |
|
prevMB->sad16))); |
966 |
|
} else { |
967 |
|
stat_thresh = MIN((&pMBs[(x-1) + y * pParam->mb_width])->sad16, |
968 |
|
MIN((&pMBs[x + (y-1) * pParam->mb_width])->sad16, |
969 |
|
MIN((&pMBs[(x+1) + (y-1) * pParam->mb_width])->sad16, |
970 |
|
prevMB->sad16))); |
971 |
|
} |
972 |
|
} |
973 |
|
} |
974 |
|
|
975 |
|
/* favorize (0,0) vector for cartoons */ |
976 |
|
if ((current->vop_flags & XVID_VOP_CARTOON) && |
977 |
|
((sad00 < pMB->quant * 4 * skip_thresh) || (sad00 < stat_thresh))) { |
978 |
|
ZeroMacroblockP(pMB, sad00); |
979 |
|
continue; |
980 |
|
} |
981 |
|
|
982 |
|
SearchP(pRef, pRefH->y, pRefV->y, pRefHV->y, pCurrent, x, |
983 |
|
y, MotionFlags, current->vop_flags, |
984 |
|
&Data, pParam, pMBs, reference->mbs, pMB); |
985 |
|
|
986 |
|
if (current->vop_flags & XVID_VOP_MODEDECISION_RD) |
987 |
|
xvid_me_ModeDecision_RD(&Data, pMB, pMBs, x, y, pParam, |
988 |
|
MotionFlags, current->vop_flags, current->vol_flags, |
989 |
|
pCurrent, pRef, pGMC, current->coding_type); |
990 |
|
|
991 |
|
else if (current->vop_flags & XVID_VOP_FAST_MODEDECISION_RD) |
992 |
|
xvid_me_ModeDecision_Fast(&Data, pMB, pMBs, x, y, pParam, |
993 |
|
MotionFlags, current->vop_flags, current->vol_flags, |
994 |
|
pCurrent, pRef, pGMC, current->coding_type); |
995 |
|
else |
996 |
|
ModeDecision_SAD(&Data, pMB, pMBs, x, y, pParam, |
997 |
|
MotionFlags, current->vop_flags, current->vol_flags, |
998 |
|
pCurrent, pRef, pGMC, current->coding_type); |
999 |
|
|
1000 |
|
|
1001 |
|
if (pMB->mode == MODE_INTRA) |
1002 |
|
if (++iIntra > iLimit) return 1; |
1003 |
|
} |
1004 |
|
} |
1005 |
|
return 0; |
1006 |
|
} |
1007 |
|
|
1008 |
|
|