--- estimation_pvop.c 2003/09/10 22:18:59 1.1.2.1 +++ estimation_pvop.c 2004/07/08 07:12:54 1.8 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_pvop.c,v 1.1.2.1 2003/09/10 22:18:59 edgomez Exp $ + * $Id: estimation_pvop.c,v 1.8 2004/07/08 07:12:54 syskin Exp $ * ****************************************************************************/ @@ -42,7 +42,7 @@ static const int xvid_me_lambda_vec8[32] = { 0 ,(int)(1.00235 * NEIGH_TEND_8X8 + 0.5), - (int)(1.15582 + NEIGH_TEND_8X8 + 0.5), (int)(1.31976*NEIGH_TEND_8X8 + 0.5), + (int)(1.15582*NEIGH_TEND_8X8 + 0.5), (int)(1.31976*NEIGH_TEND_8X8 + 0.5), (int)(1.49591*NEIGH_TEND_8X8 + 0.5), (int)(1.68601*NEIGH_TEND_8X8 + 0.5), (int)(1.89187*NEIGH_TEND_8X8 + 0.5), (int)(2.11542*NEIGH_TEND_8X8 + 0.5), (int)(2.35878*NEIGH_TEND_8X8 + 0.5), (int)(2.62429*NEIGH_TEND_8X8 + 0.5), @@ -60,24 +60,23 @@ }; static void -CheckCandidate16(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate16(const int x, const int y, SearchData * const data, const unsigned int Direction) { - int xc, yc; const uint8_t * Reference; + int32_t sad, xc, yc; uint32_t t; VECTOR * current; - int32_t sad; uint32_t t; if ( (x > data->max_dx) || (x < data->min_dx) || (y > data->max_dy) || (y < data->min_dy) ) return; - if (!data->qpel_precision) { + if (data->qpel_precision) { /* x and y are in 1/4 precision */ + Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); + current = data->currentQMV; + xc = x/2; yc = y/2; + } else { Reference = GetReference(x, y, data); current = data->currentMV; xc = x; yc = y; - } else { /* x and y are in 1/4 precision */ - Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); - xc = x/2; yc = y/2; /* for chroma sad */ - current = data->currentQMV; } sad = sad16v(data->Cur, Reference, data->iEdgedWidth, data->temp); @@ -86,59 +85,19 @@ sad += (data->lambda16 * t * sad)>>10; data->temp[0] += (data->lambda8 * t * (data->temp[0] + NEIGH_8X8_BIAS))>>10; - if (data->chroma && sad < data->iMinSAD[0]) + if (data->chroma) { + if (sad >= data->iMinSAD[0]) goto no16; sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], (yc >> 1) + roundtab_79[yc & 0x3], data); + } if (sad < data->iMinSAD[0]) { data->iMinSAD[0] = sad; current[0].x = x; current[0].y = y; - *data->dir = Direction; - } - - if (data->temp[0] < data->iMinSAD[1]) { - data->iMinSAD[1] = data->temp[0]; current[1].x = x; current[1].y = y; } - if (data->temp[1] < data->iMinSAD[2]) { - data->iMinSAD[2] = data->temp[1]; current[2].x = x; current[2].y = y; } - if (data->temp[2] < data->iMinSAD[3]) { - data->iMinSAD[3] = data->temp[2]; current[3].x = x; current[3].y = y; } - if (data->temp[3] < data->iMinSAD[4]) { - data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; } -} - -static void -CheckCandidate16_subpel(const int x, const int y, const SearchData * const data, const unsigned int Direction) -{ - int xc, yc; - const uint8_t *Reference; - VECTOR *current, *current2; - int32_t sad; uint32_t t; - - if ( (x > data->max_dx) || (x < data->min_dx) - || (y > data->max_dy) || (y < data->min_dy) ) return; - - if (!data->qpel_precision) { - Reference = GetReference(x, y, data); - current = data->currentMV; - current2 = data->currentMV2; - xc = x; yc = y; - } else { /* x and y are in 1/4 precision */ - Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); - xc = x/2; yc = y/2; /* for chroma sad */ - current = data->currentQMV; - current2 = data->currentQMV2; + data->dir = Direction; } - sad = sad16v(data->Cur, Reference, data->iEdgedWidth, data->temp); - t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); - - sad += (data->lambda16 * t * sad)>>10; - data->temp[0] += (data->lambda8 * t * (data->temp[0] + NEIGH_8X8_BIAS))>>10; - - if (data->chroma && sad < data->iMinSAD[0]) - sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], - (yc >> 1) + roundtab_79[yc & 0x3], data); - +no16: if (data->temp[0] < data->iMinSAD[1]) { data->iMinSAD[1] = data->temp[0]; current[1].x = x; current[1].y = y; } if (data->temp[1] < data->iMinSAD[2]) { @@ -147,26 +106,10 @@ data->iMinSAD[3] = data->temp[2]; current[3].x = x; current[3].y = y; } if (data->temp[3] < data->iMinSAD[4]) { data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; } - - if (sad < data->iMinSAD[0]) { - *(data->iMinSAD2) = *(data->iMinSAD); - current2->x = current->x; current2->y = current->y; - - data->iMinSAD[0] = sad; - current[0].x = x; current[0].y = y; - *data->dir = Direction; - return; - } - - if (sad < *(data->iMinSAD2)) { - *(data->iMinSAD2) = sad; - current2->x = x; current2->y = y; - *data->dir = Direction; - } } static void -CheckCandidate8(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate8(const int x, const int y, SearchData * const data, const unsigned int Direction) { int32_t sad; uint32_t t; const uint8_t * Reference; @@ -191,12 +134,12 @@ if (sad < *(data->iMinSAD)) { *(data->iMinSAD) = sad; current->x = x; current->y = y; - *data->dir = Direction; + data->dir = Direction; } } static void -CheckCandidate32(const int x, const int y, const SearchData * const data, const unsigned int Direction) +CheckCandidate32(const int x, const int y, SearchData * const data, const unsigned int Direction) { uint32_t t; const uint8_t * Reference; @@ -217,7 +160,7 @@ if (sad < data->iMinSAD[0]) { data->iMinSAD[0] = sad; data->currentMV[0].x = x; data->currentMV[0].y = y; - *data->dir = Direction; + data->dir = Direction; } if (data->temp[0] < data->iMinSAD[1]) { @@ -230,106 +173,6 @@ data->iMinSAD[4] = data->temp[3]; data->currentMV[4].x = x; data->currentMV[4].y = y; } } -static void -SubpelRefine_Fast(SearchData * data, CheckFunc * CheckCandidate) -{ -/* Do a half-pel or q-pel refinement */ - VECTOR centerMV; - VECTOR second_best; - int best_sad = *data->iMinSAD; - int xo, yo, xo2, yo2; - int size = 2; - CheckFunc *backupFunc = CheckCandidate; - - if(data->qpel_precision) - size = 1; - - centerMV = *data->currentMV; - *data->iMinSAD = 256 * 4096; - - CHECK_CANDIDATE(centerMV.x, centerMV.y - size, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y - size, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y + size, 0); - - CHECK_CANDIDATE(centerMV.x, centerMV.y + size, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y + size, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y - size, 0); - - second_best = *data->currentMV; - - if(data->qpel_precision) { - second_best.x *= 2; second_best.y *= 2; - } - - data->currentMV[0] = centerMV; - *data->iMinSAD = best_sad; - - centerMV = data->qpel_precision ? *data->currentQMV : *data->currentMV; - - xo = centerMV.x; - yo = centerMV.y; - xo2 = second_best.x; - yo2 = second_best.y; - - CheckCandidate = CheckCandidate16_subpel; - *data->iMinSAD2 = 256 * 4096; - - if (yo == yo2) - { - CHECK_CANDIDATE((xo+xo2)>>1, yo, 0); - CHECK_CANDIDATE(xo, yo-1, 0); - CHECK_CANDIDATE(xo, yo+1, 0); - - if(best_sad <= *data->iMinSAD2) - goto ende; - - if(data->currentQMV[0].x == data->currentQMV2[0].x) { - CHECK_CANDIDATE((xo+xo2)>>1, yo-1, 0); - CHECK_CANDIDATE((xo+xo2)>>1, yo+1, 0); - goto ende; - } - else { - CHECK_CANDIDATE((xo+xo2)>>1, - (data->currentQMV[0].x == xo) ? data->currentQMV[0].y : data->currentQMV2[0].y, - 0); - goto ende; - } - } - - if (xo == xo2) - { - CHECK_CANDIDATE(xo, (yo+yo2)>>1, 0); - CHECK_CANDIDATE(xo-1, yo, 0); - CHECK_CANDIDATE(xo+1, yo, 0); - - if(best_sad < *data->iMinSAD2) - goto ende; - - if(data->currentQMV[0].y == data->currentQMV2[0].y) { - CHECK_CANDIDATE(xo-1, (yo+yo2)>>1, 0); - CHECK_CANDIDATE(xo+1, (yo+yo2)>>1, 0); - goto ende; - } - else { - CHECK_CANDIDATE((data->currentQMV[0].y == yo) ? data->currentQMV[0].x : data->currentQMV2[0].x, (yo+yo2)>>1, 0); - goto ende; - } - } - - CHECK_CANDIDATE(xo, (yo+yo2)>>1, 0); - CHECK_CANDIDATE((xo+xo2)>>1, yo, 0); - - if(best_sad <= *data->iMinSAD2) - goto ende; - - CHECK_CANDIDATE((xo+xo2)>>1, (yo+yo2)>>1, 0); - -ende: - CheckCandidate = backupFunc; -} - int xvid_me_SkipDecisionP(const IMAGE * current, const IMAGE * reference, const int x, const int y, @@ -369,13 +212,12 @@ * [3]: topright neighbour's SAD */ -static __inline int +static __inline void get_pmvdata2(const MACROBLOCK * const mbs, const int mb_width, const int bound, const int x, const int y, - const int block, VECTOR * const pmv, int32_t * const psad) { @@ -385,27 +227,9 @@ int lpos, tpos, rpos; int num_cand = 0, last_cand = 1; - switch (block) { - case 0: - lx = x - 1; ly = y; lz = 1; - tx = x; ty = y - 1; tz = 2; - rx = x + 1; ry = y - 1; rz = 2; - break; - case 1: - lx = x; ly = y; lz = 0; - tx = x; ty = y - 1; tz = 3; - rx = x + 1; ry = y - 1; rz = 2; - break; - case 2: - lx = x - 1; ly = y; lz = 3; - tx = x; ty = y; tz = 0; - rx = x; ry = y; rz = 1; - break; - default: - lx = x; ly = y; lz = 2; - tx = x; ty = y; tz = 0; - rx = x; ry = y; rz = 1; - } + lx = x - 1; ly = y; lz = 1; + tx = x; ty = y - 1; tz = 2; + rx = x + 1; ry = y - 1; rz = 2; lpos = lx + ly * mb_width; rpos = rx + ry * mb_width; @@ -442,29 +266,24 @@ } /* original pmvdata() compatibility hack */ - if (x == 0 && y == 0 && block == 0) { + if (x == 0 && y == 0) { pmv[0] = pmv[1] = pmv[2] = pmv[3] = zeroMV; psad[0] = 0; psad[1] = psad[2] = psad[3] = MV_MAX_ERROR; - return 0; + return; } /* if only one valid candidate preictor, the invalid candiates are set to the canidate */ if (num_cand == 1) { pmv[0] = pmv[last_cand]; psad[0] = psad[last_cand]; -#if 0 - return MVequal(pmv[0], zeroMV); /* no point calculating median mv and minimum sad */ -#endif - - /* original pmvdata() compatibility hack */ - return y==0 && block <= 1 ? 0 : MVequal(pmv[0], zeroMV); + return; } if ((MVequal(pmv[1], pmv[2])) && (MVequal(pmv[1], pmv[3]))) { pmv[0] = pmv[1]; psad[0] = MIN(MIN(psad[1], psad[2]), psad[3]); - return 1; + return; } /* set median, minimum */ @@ -478,7 +297,6 @@ psad[0] = MIN(MIN(psad[1], psad[2]), psad[3]); - return 0; } @@ -494,7 +312,8 @@ const IMAGE * const pCurrent, const IMAGE * const pRef, const IMAGE * const vGMC, - const int coding_type) + const int coding_type, + const int skip_sad) { int mode = MODE_INTER; int mcsel = 0; @@ -520,8 +339,8 @@ } /* final skip decision, a.k.a. "the vector you found, really that good?" */ - if (skip_possible && (pMB->sad16 < (int)iQuant * MAX_SAD00_FOR_SKIP)) - if ( (100*sad)/(pMB->sad16+1) > FINAL_SKIP_THRESH) + if (skip_possible && (skip_sad < (int)iQuant * MAX_SAD00_FOR_SKIP)) + if ( (100*skip_sad)/(pMB->sad16+1) > FINAL_SKIP_THRESH) if (Data->chroma || xvid_me_SkipDecisionP(pCurrent, pRef, x, y, Data->iEdgedWidth/2, iQuant, Data->rrv)) { mode = MODE_NOT_CODED; sad = 0; @@ -547,7 +366,7 @@ /* intra decision */ - if (iQuant > 8) InterBias += 100 * (iQuant - 8); /* to make high quants work */ + if (iQuant > 10) InterBias += 60 * (iQuant - 10); /* to make high quants work */ if (y != 0) if ((pMB - pParam->mb_width)->mode == MODE_INTRA ) InterBias -= 80; if (x != 0) @@ -590,7 +409,7 @@ pMB->pmvs[0].y = Data->currentMV[0].y - Data->predMV.y; } - } else if (mode == MODE_INTER ) { // but mcsel == 1 + } else if (mode == MODE_INTER ) { /* but mcsel == 1 */ pMB->mcsel = 1; if (Data->qpel) { @@ -649,7 +468,7 @@ } static void -Search8(const SearchData * const OldData, +Search8(SearchData * const OldData, const int x, const int y, const uint32_t MotionFlags, const MBParam * const pParam, @@ -659,10 +478,11 @@ SearchData * const Data) { int i = 0; + VECTOR vbest_q; int32_t sbest_q; CheckFunc * CheckCandidate; - Data->iMinSAD = OldData->iMinSAD + 1 + block; - Data->currentMV = OldData->currentMV + 1 + block; - Data->currentQMV = OldData->currentQMV + 1 + block; + *Data->iMinSAD = *(OldData->iMinSAD + 1 + block); + *Data->currentMV = *(OldData->currentMV + 1 + block); + *Data->currentQMV = *(OldData->currentQMV + 1 + block); if(Data->qpel) { Data->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x/2, y/2, block); @@ -678,6 +498,9 @@ if (MotionFlags & (XVID_ME_EXTSEARCH8|XVID_ME_HALFPELREFINE8|XVID_ME_QUARTERPELREFINE8)) { + vbest_q = Data->currentQMV[0]; + sbest_q = Data->iMinSAD[0]; + if (Data->rrv) i = 16; else i = 8; Data->RefP[0] = OldData->RefP[0] + i * ((block&1) + Data->iEdgedWidth*(block>>1)); @@ -695,7 +518,6 @@ else CheckCandidate = CheckCandidate16no4v; if (MotionFlags & XVID_ME_EXTSEARCH8 && (!(MotionFlags & XVID_ME_EXTSEARCH_RD))) { - int32_t temp_sad = *(Data->iMinSAD); /* store current MinSAD */ MainSearchFunc *MainSearchPtr; if (MotionFlags & XVID_ME_USESQUARES8) MainSearchPtr = xvid_me_SquareSearch; @@ -703,38 +525,48 @@ else MainSearchPtr = xvid_me_DiamondSearch; MainSearchPtr(Data->currentMV->x, Data->currentMV->y, Data, 255, CheckCandidate); - - if(*(Data->iMinSAD) < temp_sad) { - Data->currentQMV->x = 2 * Data->currentMV->x; /* update our qpel vector */ - Data->currentQMV->y = 2 * Data->currentMV->y; - } } - if (MotionFlags & XVID_ME_HALFPELREFINE8) { - int32_t temp_sad = *(Data->iMinSAD); /* store current MinSAD */ + if(!Data->qpel) { + /* halfpel mode */ + if (MotionFlags & XVID_ME_HALFPELREFINE8) + /* perform halfpel refine of current best vector */ + xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate, 0); + } else { + /* qpel mode */ + Data->currentQMV->x = 2*Data->currentMV->x; + Data->currentQMV->y = 2*Data->currentMV->y; + + if(MotionFlags & XVID_ME_FASTREFINE8) { + /* fast */ + get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 3, + pParam->width, pParam->height, Data->iFcode, 2, 0); + FullRefine_Fast(Data, CheckCandidate8, 0); + } else if(MotionFlags & XVID_ME_QUARTERPELREFINE8) { + /* full */ + if (MotionFlags & XVID_ME_HALFPELREFINE8) { + xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate8, 0); /* hpel part */ + Data->currentQMV->x = 2*Data->currentMV->x; + Data->currentQMV->y = 2*Data->currentMV->y; + } - xvid_me_SubpelRefine(Data, CheckCandidate); /* perform halfpel refine of current best vector */ + get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 3, + pParam->width, pParam->height, Data->iFcode, 2, 0); + Data->qpel_precision = 1; - if(*(Data->iMinSAD) < temp_sad) { /* we have found a better match */ - Data->currentQMV->x = 2 * Data->currentMV->x; /* update our qpel vector */ - Data->currentQMV->y = 2 * Data->currentMV->y; + xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidate8, 0); /* qpel part */ } } - if (Data->qpel && MotionFlags & XVID_ME_QUARTERPELREFINE8) { - Data->qpel_precision = 1; - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 3, - pParam->width, pParam->height, Data->iFcode, 2, 0); - xvid_me_SubpelRefine(Data, CheckCandidate); - } + if (sbest_q <= Data->iMinSAD[0]) /* we have not found a better match */ + Data->currentQMV[0] = vbest_q; + } if (Data->rrv) { Data->currentMV->x = RRV_MV_SCALEDOWN(Data->currentMV->x); Data->currentMV->y = RRV_MV_SCALEDOWN(Data->currentMV->y); - } - - if(Data->qpel) { + } else if(Data->qpel) { pMB->pmvs[block].x = Data->currentQMV->x - Data->predMV.x; pMB->pmvs[block].y = Data->currentQMV->y - Data->predMV.y; pMB->qmvs[block] = *Data->currentQMV; @@ -743,6 +575,10 @@ pMB->pmvs[block].y = Data->currentMV->y - Data->predMV.y; } + *(OldData->iMinSAD + 1 + block) = *Data->iMinSAD; + *(OldData->currentMV + 1 + block) = *Data->currentMV; + *(OldData->currentQMV + 1 + block) = *Data->currentQMV; + pMB->mvs[block] = *Data->currentMV; pMB->sad8[block] = 4 * *Data->iMinSAD; } @@ -774,9 +610,9 @@ get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, Data->rrv); - get_pmvdata2(pMBs, pParam->mb_width, 0, x, y, 0, pmv, Data->temp); + get_pmvdata2(pMBs, pParam->mb_width, 0, x, y, pmv, Data->temp); - Data->temp[5] = Data->temp[6] = 0; /* chroma-sad cache */ + Data->chromaX = Data->chromaY = 0; /* chroma-sad cache */ i = Data->rrv ? 2 : 1; Data->Cur = pCur->y + (x + y * Data->iEdgedWidth) * 16*i; Data->CurV = pCur->v + (x + y * (Data->iEdgedWidth/2)) * 8*i; @@ -792,7 +628,7 @@ Data->lambda16 = xvid_me_lambda_vec16[pMB->quant]; Data->lambda8 = xvid_me_lambda_vec8[pMB->quant]; Data->qpel_precision = 0; - *Data->dir = 0; + Data->dir = 0; memset(Data->currentMV, 0, 5*sizeof(VECTOR)); @@ -836,7 +672,7 @@ else { MainSearchFunc * MainSearchPtr; - int mask = make_mask(pmv, i, *Data->dir); // all vectors pmv[0..i-1] have been checked + int mask = make_mask(pmv, i, Data->dir); /* all vectors pmv[0..i-1] have been checked */ if (MotionFlags & XVID_ME_USESQUARES16) MainSearchPtr = xvid_me_SquareSearch; else if (MotionFlags & XVID_ME_ADVANCEDDIAMOND16) MainSearchPtr = xvid_me_AdvDiamondSearch; @@ -851,7 +687,10 @@ if (MotionFlags & XVID_ME_EXTSEARCH16) { int32_t bSAD; VECTOR startMV = Data->predMV, backupMV = Data->currentMV[0]; - if (Data->rrv) { + if (Data->qpel) { + startMV.x /= 2; + startMV.y /= 2; + } else if (Data->rrv) { startMV.x = RRV_MV_SCALEUP(startMV.x); startMV.y = RRV_MV_SCALEUP(startMV.y); } @@ -859,7 +698,7 @@ bSAD = Data->iMinSAD[0]; Data->iMinSAD[0] = MV_MAX_ERROR; CheckCandidate(startMV.x, startMV.y, Data, 255); - MainSearchPtr(startMV.x, startMV.y, Data, 255, CheckCandidate); + xvid_me_DiamondSearch(startMV.x, startMV.y, Data, 255, CheckCandidate); if (bSAD < Data->iMinSAD[0]) { Data->currentMV[0] = backupMV; Data->iMinSAD[0] = bSAD; } @@ -871,7 +710,7 @@ bSAD = Data->iMinSAD[0]; Data->iMinSAD[0] = MV_MAX_ERROR; CheckCandidate(startMV.x, startMV.y, Data, 255); - MainSearchPtr(startMV.x, startMV.y, Data, 255, CheckCandidate); + xvid_me_DiamondSearch(startMV.x, startMV.y, Data, 255, CheckCandidate); if (bSAD < Data->iMinSAD[0]) { Data->currentMV[0] = backupMV; Data->iMinSAD[0] = bSAD; @@ -880,27 +719,43 @@ } } - if (MotionFlags & XVID_ME_HALFPELREFINE16) - xvid_me_SubpelRefine(Data, CheckCandidate); - - for(i = 0; i < 5; i++) { - Data->currentQMV[i].x = 2 * Data->currentMV[i].x; /* initialize qpel vectors */ - Data->currentQMV[i].y = 2 * Data->currentMV[i].y; - } - if (Data->qpel) { - get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, - pParam->width, pParam->height, Data->iFcode, 2, 0); - Data->qpel_precision = 1; - if (MotionFlags & XVID_ME_QUARTERPELREFINE16) { - if(MotionFlags & XVID_ME_FASTREFINE16) - SubpelRefine_Fast(Data, CheckCandidate); - else - xvid_me_SubpelRefine(Data, CheckCandidate); + if(!Data->qpel) { + /* halfpel mode */ + if (MotionFlags & XVID_ME_HALFPELREFINE16) + xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate, 0); + } else { + /* qpel mode */ + + for(i = 0; i < 5; i++) { + Data->currentQMV[i].x = 2 * Data->currentMV[i].x; /* initialize qpel vectors */ + Data->currentQMV[i].y = 2 * Data->currentMV[i].y; + } + if(MotionFlags & XVID_ME_FASTREFINE16 && MotionFlags & XVID_ME_QUARTERPELREFINE16) { + /* fast */ + get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, + pParam->width, pParam->height, Data->iFcode, 2, 0); + FullRefine_Fast(Data, CheckCandidate, 0); + } else { + if(MotionFlags & (XVID_ME_QUARTERPELREFINE16 | XVID_ME_QUARTERPELREFINE16_RD)) { + /* full */ + if (MotionFlags & XVID_ME_HALFPELREFINE16) { + xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate, 0); /* hpel part */ + for(i = 0; i < 5; i++) { + Data->currentQMV[i].x = 2 * Data->currentMV[i].x; + Data->currentQMV[i].y = 2 * Data->currentMV[i].y; + } + } + get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, + pParam->width, pParam->height, Data->iFcode, 2, 0); + Data->qpel_precision = 1; + if(MotionFlags & XVID_ME_QUARTERPELREFINE16) + xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidate, 0); /* qpel part */ + } } } - if (Data->iMinSAD[0] < (int32_t)pMB->quant * 30) + if (Data->iMinSAD[0] < (int32_t)pMB->quant * 30* ((MotionFlags & XVID_ME_FASTREFINE16) ? 8 : 1)) inter4v = 0; if (inter4v) { @@ -913,7 +768,7 @@ Search8(Data, 2*x + 1, 2*y + 1, MotionFlags, pParam, pMB, pMBs, 3, &Data8); if ((Data->chroma) && (!(VopFlags & XVID_VOP_MODEDECISION_RD))) { - /* chroma is only used for comparsion to INTER. if the comparsion will be done in BITS domain, it will not be used */ + /* chroma is only used for comparison to INTER. if the comparison will be done in RD domain, it will not be used */ int sumx = 0, sumy = 0; if (Data->qpel) @@ -958,6 +813,9 @@ if (Flags & XVID_ME_QUARTERPELREFINE8_RD) Flags &= ~XVID_ME_QUARTERPELREFINE8; + if (Flags & XVID_ME_QUARTERPELREFINE16_RD) + Flags &= ~XVID_ME_QUARTERPELREFINE16; + if (!(VolFlags & XVID_VOL_QUARTERPEL)) Flags &= ~(XVID_ME_QUARTERPELREFINE16+XVID_ME_QUARTERPELREFINE8+XVID_ME_QUARTERPELREFINE16_RD+XVID_ME_QUARTERPELREFINE8_RD); @@ -967,6 +825,12 @@ if ((VopFlags & XVID_VOP_GREYSCALE) || (VopFlags & XVID_VOP_REDUCED)) Flags &= ~(XVID_ME_CHROMA_PVOP + XVID_ME_CHROMA_BVOP); + if (Flags & XVID_ME_FASTREFINE8) + Flags &= ~XVID_ME_HALFPELREFINE8_RD; + + if (Flags & XVID_ME_FASTREFINE16) + Flags &= ~XVID_ME_HALFPELREFINE16_RD; + return Flags; } @@ -998,25 +862,10 @@ (current->vop_flags & XVID_VOP_MODEDECISION_RD ? 2:1); /* some pre-initialized thingies for SearchP */ - int32_t temp[8]; uint32_t dir; - VECTOR currentMV[5]; - VECTOR currentQMV[5]; - VECTOR currentMV2[5]; - VECTOR currentQMV2[5]; - int32_t iMinSAD[5]; - int32_t iMinSAD2[5]; DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); SearchData Data; memset(&Data, 0, sizeof(SearchData)); Data.iEdgedWidth = iEdgedWidth; - Data.currentMV = currentMV; - Data.currentQMV = currentQMV; - Data.currentMV2 = currentMV2; - Data.currentQMV2 = currentQMV2; - Data.iMinSAD = iMinSAD; - Data.iMinSAD2 = iMinSAD2; - Data.temp = temp; - Data.dir = &dir; Data.iFcode = current->fcode; Data.rounding = pParam->m_rounding_type; Data.qpel = (current->vol_flags & XVID_VOL_QUARTERPEL ? 1:0); @@ -1024,6 +873,7 @@ Data.rrv = (current->vop_flags & XVID_VOP_REDUCED) ? 1:0; Data.dctSpace = dct_space; Data.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); + Data.mpeg_quant_matrices = pParam->mpeg_quant_matrices; if ((current->vop_flags & XVID_VOP_REDUCED)) { mb_width = (pParam->width + 31) / 32; @@ -1048,17 +898,18 @@ sad32v_c(pCurrent->y + (x + y * iEdgedWidth) * 32, pRef->y + (x + y * iEdgedWidth) * 32, pParam->edged_width, pMB->sad8 ); + + sad00 = 4*MAX(MAX(pMB->sad8[0], pMB->sad8[1]), MAX(pMB->sad8[2], pMB->sad8[3])); if (Data.chroma) { - Data.temp[7] = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8, + Data.chromaSAD = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8, pRef->u + x*8 + y*(iEdgedWidth/2)*8, iEdgedWidth/2) + sad8(pCurrent->v + (x + y*(iEdgedWidth/2))*8, pRef->v + (x + y*(iEdgedWidth/2))*8, iEdgedWidth/2); - pMB->sad16 += Data.temp[7]; + pMB->sad16 += Data.chromaSAD; + sad00 += Data.chromaSAD; } - sad00 = pMB->sad16; - /* initial skip decision */ /* no early skip for GMC (global vector = skip vector is unknown!) */ if (current->coding_type != S_VOP) { /* no fast SKIP for S(GMC)-VOPs */ @@ -1080,12 +931,12 @@ MAX((&pMBs[x + (y-1) * pParam->mb_width])->sad16, MAX((&pMBs[(x+1) + (y-1) * pParam->mb_width])->sad16, prevMB->sad16))); - } - } else { - stat_thresh = MIN((&pMBs[(x-1) + y * pParam->mb_width])->sad16, + } else { + stat_thresh = MIN((&pMBs[(x-1) + y * pParam->mb_width])->sad16, MIN((&pMBs[x + (y-1) * pParam->mb_width])->sad16, MIN((&pMBs[(x+1) + (y-1) * pParam->mb_width])->sad16, prevMB->sad16))); + } } } @@ -1112,7 +963,7 @@ else ModeDecision_SAD(&Data, pMB, pMBs, x, y, pParam, MotionFlags, current->vop_flags, current->vol_flags, - pCurrent, pRef, pGMC, current->coding_type); + pCurrent, pRef, pGMC, current->coding_type, sad00); if (pMB->mode == MODE_INTRA)