--- estimation_bvop.c 2004/10/12 21:08:41 1.16 +++ estimation_bvop.c 2006/02/25 01:20:41 1.25 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_bvop.c,v 1.16 2004/10/12 21:08:41 edgomez Exp $ + * $Id: estimation_bvop.c,v 1.25 2006/02/25 01:20:41 syskin Exp $ * ****************************************************************************/ @@ -137,11 +137,11 @@ xcb = xb/2; ycb = yb/2; } - t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0) - + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision, 0); + t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision) + + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision); sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); - sad += (data->lambda16 * t * sad)>>10; + sad += (data->lambda16 * t); if (data->chroma && sad < *data->iMinSAD) sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], @@ -207,7 +207,7 @@ if (sad > *(data->iMinSAD)) return; } - sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0, 0) * sad)>>10; + sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); if (data->chroma && sad < *data->iMinSAD) sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], @@ -265,7 +265,7 @@ done: sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); - sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0, 0) * sad)>>10; + sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); if (data->chroma && sad < *data->iMinSAD) sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], @@ -291,8 +291,6 @@ if ( (x > data->max_dx) || ( x < data->min_dx) || (y > data->max_dy) || (y < data->min_dy) ) return; - if (data->rrv && (!(x&1) && x !=0) | (!(y&1) && y !=0) ) return; /* non-zero even value */ - if (data->qpel_precision) { /* x and y are in 1/4 precision */ Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); current = data->currentQMV; @@ -303,10 +301,10 @@ xc = x; yc = y; } t = d_mv_bits(x, y, data->predMV, data->iFcode, - data->qpel^data->qpel_precision, data->rrv); + data->qpel^data->qpel_precision); sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); - sad += (data->lambda16 * t * sad)>>10; + sad += (data->lambda16 * t); if (data->chroma && sad < *data->iMinSAD) sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], @@ -445,7 +443,7 @@ Data->predMV = *predMV; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, - pParam->width, pParam->height, iFcode - Data->qpel, 1, 0); + pParam->width, pParam->height, iFcode - Data->qpel, 1); pmv[0] = Data->predMV; if (Data->qpel) { @@ -492,7 +490,7 @@ if(MotionFlags & XVID_ME_FASTREFINE16) { /* fast */ get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, - pParam->width, pParam->height, Data->iFcode, 2, 0); + pParam->width, pParam->height, Data->iFcode, 2); FullRefine_Fast(Data, CheckCandidate16no4v, 0); } else { @@ -507,7 +505,7 @@ Data->currentQMV->y = 2*Data->currentMV->y; } get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, - pParam->width, pParam->height, Data->iFcode, 2, 0); + pParam->width, pParam->height, Data->iFcode, 2); Data->qpel_precision = 1; xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidate16no4v, 0); /* qpel part */ } @@ -713,8 +711,8 @@ Data->currentMV[0] = startF; Data->currentMV[1] = startB; - get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, 0); - get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1, 0); + get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1); + get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1); if (Data->currentMV[0].x > f_range[1]) Data->currentMV[0].x = f_range[1]; if (Data->currentMV[0].x < f_range[0]) Data->currentMV[0].x = f_range[0]; @@ -743,8 +741,8 @@ int i, j; int b_range[4], f_range[4]; - get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, 0); - get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1, 0); + get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1); + get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1); /* diamond */ do { @@ -774,7 +772,7 @@ if (Data->qpel) { Data->qpel_precision = 1; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, - x, y, 4, pParam->width, pParam->height, Data->iFcode, 2, 0); + x, y, 4, pParam->width, pParam->height, Data->iFcode, 2); Data->currentQMV[0].x = 2 * Data->currentMV[0].x; Data->currentQMV[0].y = 2 * Data->currentMV[0].y; @@ -785,7 +783,7 @@ xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidateInt, 1); get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, - x, y, 4, pParam->width, pParam->height, Data->bFcode, 2, 0); + x, y, 4, pParam->width, pParam->height, Data->bFcode, 2); xvid_me_SubpelRefine(Data->currentQMV[1], Data, CheckCandidateInt, 2); } @@ -831,6 +829,7 @@ pMB->sad16 = best_sad; pMB->mode = mode; + pMB->cbp = 63; switch (mode) { @@ -920,6 +919,32 @@ } } +static __inline void +maxMotionBVOP(int * const MVmaxF, int * const MVmaxB, const MACROBLOCK * const pMB, const int qpel) +{ + if (pMB->mode == MODE_FORWARD || pMB->mode == MODE_INTERPOLATE) { + const VECTOR * const mv = qpel ? pMB->qmvs : pMB->mvs; + int max = *MVmaxF; + if (mv[0].x > max) max = mv[0].x; + else if (-mv[0].x - 1 > max) max = -mv[0].x - 1; + if (mv[0].y > max) max = mv[0].y; + else if (-mv[0].y - 1 > max) max = -mv[0].y - 1; + + *MVmaxF = max; + } + + if (pMB->mode == MODE_BACKWARD || pMB->mode == MODE_INTERPOLATE) { + const VECTOR * const mv = qpel ? pMB->b_qmvs : pMB->b_mvs; + int max = *MVmaxB; + if (mv[0].x > max) max = mv[0].x; + else if (-mv[0].x - 1 > max) max = -mv[0].x - 1; + if (mv[0].y > max) max = mv[0].y; + else if (-mv[0].y - 1 > max) max = -mv[0].y - 1; + *MVmaxB = max; + } +} + + void MotionEstimationBVOP(MBParam * const pParam, FRAMEINFO * const frame, @@ -940,13 +965,13 @@ { uint32_t i, j; int32_t best_sad = 256*4096; - int32_t sad2; uint32_t skip_sad; - + int fb_thresh; const MACROBLOCK * const b_mbs = b_reference->mbs; VECTOR f_predMV, b_predMV; + int MVmaxF = 0, MVmaxB = 0; const int32_t TRB = time_pp - time_bp; const int32_t TRD = time_pp; DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); @@ -960,6 +985,7 @@ Data_d.rounding = 0; Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; Data_d.iQuant = frame->quant; + Data_d.quant_sq = frame->quant*frame->quant; Data_d.dctSpace = dct_space; Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; @@ -970,11 +996,173 @@ memcpy(&Data_b, &Data_d, sizeof(SearchData)); memcpy(&Data_i, &Data_d, sizeof(SearchData)); + Data_f.iFcode = Data_i.iFcode = frame->fcode = b_reference->fcode; + Data_b.iFcode = Data_i.bFcode = frame->bcode = b_reference->fcode; + + for (j = 0; j < pParam->mb_height; j++) { + + f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ + + for (i = 0; i < pParam->mb_width; i++) { + MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; + const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; + pMB->mode = -1; + + initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, + i, j, f_ref, f_refH->y, f_refV->y, f_refHV->y, + b_ref, b_refH->y, b_refV->y, b_refHV->y, + &frame->image, b_mb); + +/* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ + if (b_reference->coding_type != S_VOP) + if (b_mb->mode == MODE_NOT_CODED) { + pMB->mode = MODE_NOT_CODED; + pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; + pMB->sad16 = 0; + continue; + } + +/* direct search comes first, because it (1) checks for SKIP-mode + and (2) sets very good predictions for forward and backward search */ + skip_sad = SearchDirect_initial(i, j, frame->motion_flags, TRB, TRD, pParam, pMB, + b_mb, &best_sad, &Data_d); + + if (pMB->mode == MODE_DIRECT_NONE_MV) { + pMB->sad16 = best_sad; + pMB->cbp = 0; + continue; + } + + SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, + &f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1]); + + SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, + &b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2]); + + if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) + fb_thresh = best_sad; + else + fb_thresh = best_sad + (best_sad>>1); + + if (Data_f.iMinSAD[0] <= fb_thresh) + SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); + + if (Data_b.iMinSAD[0] <= fb_thresh) + SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); + + SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, + &Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); + + if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) + || Data_i.iMinSAD[0] <= best_sad) + + SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); + + if (Data_d.iMinSAD[0] <= 2*best_sad) + if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) + || (best_sad > 1000)) + + SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); + + /* final skip decision */ + if ( (skip_sad < 2 * Data_d.iQuant * MAX_SAD00_FOR_SKIP ) + && ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) { + + Data_d.chromaSAD = 0; /* green light for chroma check */ + + SkipDecisionB(pMB, &Data_d); + + if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ + pMB->sad16 = skip_sad; + pMB->cbp = 0; + continue; + } + } + + if (frame->vop_flags & XVID_VOP_RD_BVOP) + ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, + pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j, best_sad); + else + ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV); + + maxMotionBVOP(&MVmaxF, &MVmaxB, pMB, Data_d.qpel); + + } + } + + frame->fcode = getMinFcode(MVmaxF); + frame->bcode = getMinFcode(MVmaxB); +} + + + +void +SMPMotionEstimationBVOP(SMPmotionData * h) +{ + const MBParam * const pParam = h->pParam; + const FRAMEINFO * const frame = h->current; + const int32_t time_bp = h->time_bp; + const int32_t time_pp = h->time_pp; + /* forward (past) reference */ + const MACROBLOCK * const f_mbs = h->f_mbs; + const IMAGE * const f_ref = h->fRef; + const IMAGE * const f_refH = h->fRefH; + const IMAGE * const f_refV = h->fRefV; + const IMAGE * const f_refHV = h->fRefHV; + /* backward (future) reference */ + const FRAMEINFO * const b_reference = h->reference; + const IMAGE * const b_ref = h->pRef; + const IMAGE * const b_refH = h->pRefH; + const IMAGE * const b_refV = h->pRefV; + const IMAGE * const b_refHV = h->pRefHV; + + int y_step = h->y_step; + int start_y = h->start_y; + int * complete_count_self = h->complete_count_self; + const int * complete_count_above = h->complete_count_above; + int max_mbs; + int current_mb = 0; + + int32_t i, j; + int32_t best_sad = 256*4096; + uint32_t skip_sad; + int fb_thresh; + const MACROBLOCK * const b_mbs = b_reference->mbs; + + VECTOR f_predMV, b_predMV; + + int MVmaxF = 0, MVmaxB = 0; + const int32_t TRB = time_pp - time_bp; + const int32_t TRD = time_pp; + DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); + + /* some pre-inintialized data for the rest of the search */ + SearchData Data_d, Data_f, Data_b, Data_i; + memset(&Data_d, 0, sizeof(SearchData)); + + Data_d.iEdgedWidth = pParam->edged_width; + Data_d.qpel = pParam->vol_flags & XVID_VOL_QUARTERPEL ? 1 : 0; + Data_d.rounding = 0; + Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; + Data_d.iQuant = frame->quant; + Data_d.quant_sq = frame->quant*frame->quant; + Data_d.dctSpace = dct_space; + Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); + Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; + + Data_d.RefQ = h->RefQ; + + memcpy(&Data_f, &Data_d, sizeof(SearchData)); + memcpy(&Data_b, &Data_d, sizeof(SearchData)); + memcpy(&Data_i, &Data_d, sizeof(SearchData)); + Data_f.iFcode = Data_i.iFcode = frame->fcode; Data_b.iFcode = Data_i.bFcode = frame->bcode; + max_mbs = 0; - for (j = 0; j < pParam->mb_height; j++) { + for (j = start_y; j < pParam->mb_height; j += y_step) { + if (j == 0) max_mbs = pParam->mb_width; /* we can process all blocks of the first row */ f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ @@ -988,12 +1176,36 @@ b_ref, b_refH->y, b_refV->y, b_refHV->y, &frame->image, b_mb); + if (current_mb >= max_mbs) { + /* we ME-ed all macroblocks we safely could. grab next portion */ + int above_count = *complete_count_above; /* sync point */ + if (above_count == pParam->mb_width) { + /* full line above is ready */ + above_count = pParam->mb_width+1; + if (j < pParam->mb_height-y_step) { + /* this is not last line, grab a portion of MBs from the next line too */ + above_count += MAX(0, complete_count_above[1] - 1); + } + } + + max_mbs = current_mb + above_count - i - 1; + + if (current_mb >= max_mbs) { + /* current workload is zero */ + i--; + sched_yield(); + continue; + } + } + /* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ if (b_reference->coding_type != S_VOP) if (b_mb->mode == MODE_NOT_CODED) { pMB->mode = MODE_NOT_CODED; pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; pMB->sad16 = 0; + *complete_count_self = i+1; + current_mb++; continue; } @@ -1004,6 +1216,9 @@ if (pMB->mode == MODE_DIRECT_NONE_MV) { pMB->sad16 = best_sad; + pMB->cbp = 0; + *complete_count_self = i+1; + current_mb++; continue; } @@ -1013,24 +1228,30 @@ SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, &b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2]); - sad2 = best_sad; + if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) + fb_thresh = best_sad; + else + fb_thresh = best_sad + (best_sad>>1); - if (Data_f.iMinSAD[0] < 2*sad2+2000) + if (Data_f.iMinSAD[0] <= fb_thresh) SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); - if (Data_b.iMinSAD[0] < 2*sad2+2000) + if (Data_b.iMinSAD[0] <= fb_thresh) SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, &Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); - if (((Data_i.iMinSAD[0] < 2*best_sad+2000) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) + if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) || Data_i.iMinSAD[0] <= best_sad) SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); - if ((Data_d.iMinSAD[0] <= 2*best_sad) && (!frame->motion_flags&XVID_ME_SKIP_DELTASEARCH)) - SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); + if (Data_d.iMinSAD[0] <= 2*best_sad) + if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) + || (best_sad > 1000)) + + SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); /* final skip decision */ if ( (skip_sad < 2 * Data_d.iQuant * MAX_SAD00_FOR_SKIP ) @@ -1042,16 +1263,28 @@ if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ pMB->sad16 = skip_sad; + pMB->cbp = 0; + *complete_count_self = i+1; + current_mb++; continue; } } if (frame->vop_flags & XVID_VOP_RD_BVOP) ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, - pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j); + pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j, best_sad); else ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV); + *complete_count_self = i+1; + current_mb++; + maxMotionBVOP(&MVmaxF, &MVmaxB, pMB, Data_d.qpel); } + + complete_count_self++; + complete_count_above++; } + + h->minfcode = getMinFcode(MVmaxF); + h->minbcode = getMinFcode(MVmaxB); }