141 |
+ d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision); |
+ d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision); |
142 |
|
|
143 |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
144 |
sad += (data->lambda16 * t * sad)>>10; |
sad += (data->lambda16 * t); |
145 |
|
|
146 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
147 |
sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], |
sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], |
207 |
if (sad > *(data->iMinSAD)) return; |
if (sad > *(data->iMinSAD)) return; |
208 |
} |
} |
209 |
|
|
210 |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0) * sad)>>10; |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); |
211 |
|
|
212 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
213 |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
265 |
|
|
266 |
done: |
done: |
267 |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
268 |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0) * sad)>>10; |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); |
269 |
|
|
270 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
271 |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
304 |
data->qpel^data->qpel_precision); |
data->qpel^data->qpel_precision); |
305 |
|
|
306 |
sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); |
sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); |
307 |
sad += (data->lambda16 * t * sad)>>10; |
sad += (data->lambda16 * t); |
308 |
|
|
309 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
310 |
sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], |
sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], |
1081 |
|
|
1082 |
if (frame->vop_flags & XVID_VOP_RD_BVOP) |
if (frame->vop_flags & XVID_VOP_RD_BVOP) |
1083 |
ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, |
ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, |
1084 |
pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j); |
pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j, best_sad); |
1085 |
else |
else |
1086 |
ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV); |
ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV); |
1087 |
|
|
1093 |
frame->fcode = getMinFcode(MVmaxF); |
frame->fcode = getMinFcode(MVmaxF); |
1094 |
frame->bcode = getMinFcode(MVmaxB); |
frame->bcode = getMinFcode(MVmaxB); |
1095 |
} |
} |
1096 |
|
|
1097 |
|
|
1098 |
|
|
1099 |
|
void |
1100 |
|
SMPMotionEstimationBVOP(SMPmotionData * h) |
1101 |
|
{ |
1102 |
|
const MBParam * const pParam = h->pParam; |
1103 |
|
const FRAMEINFO * const frame = h->current; |
1104 |
|
const int32_t time_bp = h->time_bp; |
1105 |
|
const int32_t time_pp = h->time_pp; |
1106 |
|
/* forward (past) reference */ |
1107 |
|
const MACROBLOCK * const f_mbs = h->f_mbs; |
1108 |
|
const IMAGE * const f_ref = h->fRef; |
1109 |
|
const IMAGE * const f_refH = h->fRefH; |
1110 |
|
const IMAGE * const f_refV = h->fRefV; |
1111 |
|
const IMAGE * const f_refHV = h->fRefHV; |
1112 |
|
/* backward (future) reference */ |
1113 |
|
const FRAMEINFO * const b_reference = h->reference; |
1114 |
|
const IMAGE * const b_ref = h->pRef; |
1115 |
|
const IMAGE * const b_refH = h->pRefH; |
1116 |
|
const IMAGE * const b_refV = h->pRefV; |
1117 |
|
const IMAGE * const b_refHV = h->pRefHV; |
1118 |
|
|
1119 |
|
int y_step = h->y_step; |
1120 |
|
int start_y = h->start_y; |
1121 |
|
int * complete_count_self = h->complete_count_self; |
1122 |
|
const int * complete_count_above = h->complete_count_above; |
1123 |
|
int max_mbs; |
1124 |
|
int current_mb = 0; |
1125 |
|
|
1126 |
|
uint32_t i, j; |
1127 |
|
int32_t best_sad = 256*4096; |
1128 |
|
uint32_t skip_sad; |
1129 |
|
int fb_thresh; |
1130 |
|
const MACROBLOCK * const b_mbs = b_reference->mbs; |
1131 |
|
|
1132 |
|
VECTOR f_predMV, b_predMV; |
1133 |
|
|
1134 |
|
int MVmaxF = 0, MVmaxB = 0; |
1135 |
|
const int32_t TRB = time_pp - time_bp; |
1136 |
|
const int32_t TRD = time_pp; |
1137 |
|
DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
1138 |
|
|
1139 |
|
/* some pre-inintialized data for the rest of the search */ |
1140 |
|
SearchData Data_d, Data_f, Data_b, Data_i; |
1141 |
|
memset(&Data_d, 0, sizeof(SearchData)); |
1142 |
|
|
1143 |
|
Data_d.iEdgedWidth = pParam->edged_width; |
1144 |
|
Data_d.qpel = pParam->vol_flags & XVID_VOL_QUARTERPEL ? 1 : 0; |
1145 |
|
Data_d.rounding = 0; |
1146 |
|
Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; |
1147 |
|
Data_d.iQuant = frame->quant; |
1148 |
|
Data_d.quant_sq = frame->quant*frame->quant; |
1149 |
|
Data_d.dctSpace = dct_space; |
1150 |
|
Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); |
1151 |
|
Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; |
1152 |
|
|
1153 |
|
Data_d.RefQ = h->RefQ; |
1154 |
|
|
1155 |
|
memcpy(&Data_f, &Data_d, sizeof(SearchData)); |
1156 |
|
memcpy(&Data_b, &Data_d, sizeof(SearchData)); |
1157 |
|
memcpy(&Data_i, &Data_d, sizeof(SearchData)); |
1158 |
|
|
1159 |
|
Data_f.iFcode = Data_i.iFcode = frame->fcode; |
1160 |
|
Data_b.iFcode = Data_i.bFcode = frame->bcode; |
1161 |
|
|
1162 |
|
max_mbs = 0; |
1163 |
|
|
1164 |
|
for (j = start_y; j < pParam->mb_height; j += y_step) { |
1165 |
|
if (j == 0) max_mbs = pParam->mb_width; /* we can process all blocks of the first row */ |
1166 |
|
|
1167 |
|
f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ |
1168 |
|
|
1169 |
|
for (i = 0; i < pParam->mb_width; i++) { |
1170 |
|
MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; |
1171 |
|
const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; |
1172 |
|
pMB->mode = -1; |
1173 |
|
|
1174 |
|
initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, |
1175 |
|
i, j, f_ref, f_refH->y, f_refV->y, f_refHV->y, |
1176 |
|
b_ref, b_refH->y, b_refV->y, b_refHV->y, |
1177 |
|
&frame->image, b_mb); |
1178 |
|
|
1179 |
|
if (current_mb >= max_mbs) { |
1180 |
|
/* we ME-ed all macroblocks we safely could. grab next portion */ |
1181 |
|
int above_count = *complete_count_above; /* sync point */ |
1182 |
|
if (above_count == pParam->mb_width) { |
1183 |
|
/* full line above is ready */ |
1184 |
|
above_count = pParam->mb_width+1; |
1185 |
|
if (j < pParam->mb_height-y_step) { |
1186 |
|
/* this is not last line, grab a portion of MBs from the next line too */ |
1187 |
|
above_count += MAX(0, complete_count_above[1] - 1); |
1188 |
|
} |
1189 |
|
} |
1190 |
|
|
1191 |
|
max_mbs = current_mb + above_count - i - 1; |
1192 |
|
|
1193 |
|
if (current_mb >= max_mbs) { |
1194 |
|
/* current workload is zero */ |
1195 |
|
i--; |
1196 |
|
sched_yield(); |
1197 |
|
continue; |
1198 |
|
} |
1199 |
|
} |
1200 |
|
|
1201 |
|
/* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ |
1202 |
|
if (b_reference->coding_type != S_VOP) |
1203 |
|
if (b_mb->mode == MODE_NOT_CODED) { |
1204 |
|
pMB->mode = MODE_NOT_CODED; |
1205 |
|
pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; |
1206 |
|
pMB->sad16 = 0; |
1207 |
|
*complete_count_self = i+1; |
1208 |
|
current_mb++; |
1209 |
|
continue; |
1210 |
|
} |
1211 |
|
|
1212 |
|
/* direct search comes first, because it (1) checks for SKIP-mode |
1213 |
|
and (2) sets very good predictions for forward and backward search */ |
1214 |
|
skip_sad = SearchDirect_initial(i, j, frame->motion_flags, TRB, TRD, pParam, pMB, |
1215 |
|
b_mb, &best_sad, &Data_d); |
1216 |
|
|
1217 |
|
if (pMB->mode == MODE_DIRECT_NONE_MV) { |
1218 |
|
pMB->sad16 = best_sad; |
1219 |
|
pMB->cbp = 0; |
1220 |
|
*complete_count_self = i+1; |
1221 |
|
current_mb++; |
1222 |
|
continue; |
1223 |
|
} |
1224 |
|
|
1225 |
|
SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, |
1226 |
|
&f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1]); |
1227 |
|
|
1228 |
|
SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, |
1229 |
|
&b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2]); |
1230 |
|
|
1231 |
|
if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) |
1232 |
|
fb_thresh = best_sad; |
1233 |
|
else |
1234 |
|
fb_thresh = best_sad + (best_sad>>1); |
1235 |
|
|
1236 |
|
if (Data_f.iMinSAD[0] <= fb_thresh) |
1237 |
|
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); |
1238 |
|
|
1239 |
|
if (Data_b.iMinSAD[0] <= fb_thresh) |
1240 |
|
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); |
1241 |
|
|
1242 |
|
SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, |
1243 |
|
&Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); |
1244 |
|
|
1245 |
|
if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) |
1246 |
|
|| Data_i.iMinSAD[0] <= best_sad) |
1247 |
|
|
1248 |
|
SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); |
1249 |
|
|
1250 |
|
if (Data_d.iMinSAD[0] <= 2*best_sad) |
1251 |
|
if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) |
1252 |
|
|| (best_sad > 1000)) |
1253 |
|
|
1254 |
|
SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); |
1255 |
|
|
1256 |
|
/* final skip decision */ |
1257 |
|
if ( (skip_sad < 2 * Data_d.iQuant * MAX_SAD00_FOR_SKIP ) |
1258 |
|
&& ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) { |
1259 |
|
|
1260 |
|
Data_d.chromaSAD = 0; /* green light for chroma check */ |
1261 |
|
|
1262 |
|
SkipDecisionB(pMB, &Data_d); |
1263 |
|
|
1264 |
|
if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ |
1265 |
|
pMB->sad16 = skip_sad; |
1266 |
|
pMB->cbp = 0; |
1267 |
|
*complete_count_self = i+1; |
1268 |
|
current_mb++; |
1269 |
|
continue; |
1270 |
|
} |
1271 |
|
} |
1272 |
|
|
1273 |
|
if (frame->vop_flags & XVID_VOP_RD_BVOP) |
1274 |
|
ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, |
1275 |
|
pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j, best_sad); |
1276 |
|
else |
1277 |
|
ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV); |
1278 |
|
|
1279 |
|
*complete_count_self = i+1; |
1280 |
|
current_mb++; |
1281 |
|
} |
1282 |
|
|
1283 |
|
complete_count_self++; |
1284 |
|
complete_count_above++; |
1285 |
|
} |
1286 |
|
} |