--- motion_est.c 2003/05/03 23:25:22 1.68.2.1 +++ motion_est.c 2004/03/22 22:36:24 1.73 @@ -646,7 +646,7 @@ { int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64; - int32_t bits = 0, sum; + int32_t bits = 0; VECTOR * current; const uint8_t * ptr; int i, cbp = 0, t, xc, yc; @@ -667,55 +667,12 @@ for(i = 0; i < 4; i++) { int s = 8*((i&1) + (i>>1)*data->iEdgedWidth); transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); - fdct(in); - if (data->lambda8 == 0) sum = quant_inter(coeff, in, data->lambda16); - else sum = quant4_inter(coeff, in, data->lambda16); - if (sum > 0) { - cbp |= 1 << (5 - i); - bits += data->temp[i] = CodeCoeffInter_CalcBits(coeff, scan_tables[0]); - } else data->temp[i] = 0; - } - - bits += t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); - - if (bits < data->iMinSAD[0]) { // there is still a chance, adding chroma - xc = (xc >> 1) + roundtab_79[xc & 0x3]; - yc = (yc >> 1) + roundtab_79[yc & 0x3]; - - //chroma U - ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); - transfer_8to16subro(in, ptr, data->CurU, data->iEdgedWidth/2); - fdct(in); - if (data->lambda8 == 0) sum = quant_inter(coeff, in, data->lambda16); - else sum = quant4_inter(coeff, in, data->lambda16); - if (sum > 0) { - cbp |= 1 << (5 - 4); - bits += CodeCoeffInter_CalcBits(coeff, scan_tables[0]); - } - - if (bits < data->iMinSAD[0]) { - //chroma V - ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); - transfer_8to16subro(in, ptr, data->CurV, data->iEdgedWidth/2); - fdct(in); - if (data->lambda8 == 0) sum = quant_inter(coeff, in, data->lambda16); - else sum = quant4_inter(coeff, in, data->lambda16); - if (sum > 0) { - cbp |= 1 << (5 - 5); - bits += CodeCoeffInter_CalcBits(coeff, scan_tables[0]); - } - } + bits += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, i); } - bits += xvid_cbpy_tab[15-(cbp>>2)].len; - bits += mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len; - - if (bits < data->iMinSAD[0]) { - data->iMinSAD[0] = bits; - current[0].x = x; current[0].y = y; - *dir = Direction; - } + bits += t = BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); + //8x8 blocks for inter4v mode if (data->temp[0] + t < data->iMinSAD[1]) { data->iMinSAD[1] = data->temp[0] + t; current[1].x = x; current[1].y = y; } if (data->temp[1] < data->iMinSAD[2]) { @@ -725,16 +682,42 @@ if (data->temp[3] < data->iMinSAD[4]) { data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; } + bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len; + + if (bits >= data->iMinSAD[0]) return; + + //chroma + xc = (xc >> 1) + roundtab_79[xc & 0x3]; + yc = (yc >> 1) + roundtab_79[yc & 0x3]; + + //chroma U + ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); + transfer_8to16subro(in, ptr, data->CurU, data->iEdgedWidth/2); + bits += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 4); + if (bits >= data->iMinSAD[0]) return; + + //chroma V + ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); + transfer_8to16subro(in, ptr, data->CurV, data->iEdgedWidth/2); + bits += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5); + + bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len; + + if (bits < data->iMinSAD[0]) { + data->iMinSAD[0] = bits; + current[0].x = x; current[0].y = y; + *dir = Direction; + } } static void CheckCandidateBits8(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64; - int32_t sum, bits; + int32_t bits; VECTOR * current; const uint8_t * ptr; - int cbp; + int cbp = 0; if ( (x > data->max_dx) || (x < data->min_dx) || (y > data->max_dy) || (y < data->min_dy) ) return; @@ -748,15 +731,8 @@ } transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth); - fdct(in); - if (data->lambda8 == 0) sum = quant_inter(coeff, in, data->lambda16); - else sum = quant4_inter(coeff, in, data->lambda16); - if (sum > 0) { - bits = CodeCoeffInter_CalcBits(coeff, scan_tables[0]); - cbp = 1; - } else cbp = bits = 0; - - bits += sum = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); + bits = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5); + bits += BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); if (bits < data->iMinSAD[0]) { data->temp[0] = cbp; @@ -932,8 +908,8 @@ static __inline int SkipDecisionP(const IMAGE * current, const IMAGE * reference, - const int x, const int y, - const uint32_t stride, const uint32_t iQuant, int rrv) + const int x, const int y, + const uint32_t stride, const uint32_t iQuant, int rrv) { int offset = (x + y*stride)*8; @@ -967,12 +943,118 @@ } static __inline void -ZeroMacroblockP(MACROBLOCK *pMB, const int32_t sad) +ModeDecision(SearchData * const Data, + MACROBLOCK * const pMB, + const MACROBLOCK * const pMBs, + const int x, const int y, + const MBParam * const pParam, + const uint32_t MotionFlags, + const uint32_t GlobalFlags, + const IMAGE * const pCurrent, + const IMAGE * const pRef) { - pMB->mode = MODE_INTER; - pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = zeroMV; - pMB->qmvs[0] = pMB->qmvs[1] = pMB->qmvs[2] = pMB->qmvs[3] = zeroMV; - pMB->sad16 = pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = sad; + int mode = MODE_INTER; + int inter4v = (GlobalFlags & XVID_INTER4V) && (pMB->dquant == NO_CHANGE); + const uint32_t iQuant = pMB->quant; + + const int skip_possible = (!(GlobalFlags & XVID_GMC)) && (pMB->dquant == NO_CHANGE); + + if (!(GlobalFlags & XVID_MODEDECISION_BITS)) { //normal, fast, SAD-based mode decision + int sad; + int InterBias = MV16_INTER_BIAS; + if (inter4v == 0 || Data->iMinSAD[0] < Data->iMinSAD[1] + Data->iMinSAD[2] + + Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant) { + mode = MODE_INTER; + sad = Data->iMinSAD[0]; + } else { + mode = MODE_INTER4V; + sad = Data->iMinSAD[1] + Data->iMinSAD[2] + + Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant; + Data->iMinSAD[0] = sad; + } + + // final skip decision, a.k.a. "the vector you found, really that good?" + if (skip_possible && (pMB->sad16 < (int)iQuant * MAX_SAD00_FOR_SKIP)) + if ( (100*sad)/(pMB->sad16+1) > FINAL_SKIP_THRESH) + if (Data->chroma || SkipDecisionP(pCurrent, pRef, x, y, Data->iEdgedWidth/2, iQuant, Data->rrv)) { + mode = MODE_NOT_CODED; + sad = 0; + } + + // intra decision + + if (iQuant > 8) InterBias += 100 * (iQuant - 8); // to make high quants work + if (y != 0) + if ((pMB - pParam->mb_width)->mode == MODE_INTRA ) InterBias -= 80; + if (x != 0) + if ((pMB - 1)->mode == MODE_INTRA ) InterBias -= 80; + + if (Data->chroma) InterBias += 50; // dev8(chroma) ??? + if (Data->rrv) InterBias *= 4; + + if (InterBias < pMB->sad16) { + int32_t deviation; + if (!Data->rrv) deviation = dev16(Data->Cur, Data->iEdgedWidth); + else deviation = dev16(Data->Cur, Data->iEdgedWidth) + + dev16(Data->Cur+16, Data->iEdgedWidth) + + dev16(Data->Cur + 16*Data->iEdgedWidth, Data->iEdgedWidth) + + dev16(Data->Cur+16+16*Data->iEdgedWidth, Data->iEdgedWidth); + + if (deviation < (sad - InterBias)) mode = MODE_INTRA; + } + + } else { // BITS + + int bits, intra, i; + VECTOR backup[5], *v; + Data->iQuant = iQuant; + + v = Data->qpel ? Data->currentQMV : Data->currentMV; + for (i = 0; i < 5; i++) { + Data->iMinSAD[i] = 256*4096; + backup[i] = v[i]; + } + + bits = CountMBBitsInter(Data, pMBs, x, y, pParam, MotionFlags); + if (bits == 0) + mode = MODE_INTER; // quick stop + else { + if (inter4v) { + int bits_inter4v = CountMBBitsInter4v(Data, pMB, pMBs, x, y, pParam, MotionFlags, backup); + if (bits_inter4v < bits) { Data->iMinSAD[0] = bits = bits_inter4v; mode = MODE_INTER4V; } + } + + intra = CountMBBitsIntra(Data); + + if (intra < bits) { *Data->iMinSAD = bits = intra; mode = MODE_INTRA; } + } + } + + if (Data->rrv) { + Data->currentMV[0].x = RRV_MV_SCALEDOWN(Data->currentMV[0].x); + Data->currentMV[0].y = RRV_MV_SCALEDOWN(Data->currentMV[0].y); + } + + if (mode == MODE_INTER) { + pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = Data->currentMV[0]; + pMB->sad16 = pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = Data->iMinSAD[0]; + + if(Data->qpel) { + pMB->qmvs[0] = pMB->qmvs[1] + = pMB->qmvs[2] = pMB->qmvs[3] = Data->currentQMV[0]; + pMB->pmvs[0].x = Data->currentQMV[0].x - Data->predMV.x; + pMB->pmvs[0].y = Data->currentQMV[0].y - Data->predMV.y; + } else { + pMB->pmvs[0].x = Data->currentMV[0].x - Data->predMV.x; + pMB->pmvs[0].y = Data->currentMV[0].y - Data->predMV.y; + } + + } else if (mode == MODE_INTER4V) + pMB->sad16 = Data->iMinSAD[0]; + else // INTRA, NOT_CODED + SkipMacroblockP(pMB, 0); + + pMB->mode = mode; } bool @@ -1005,7 +1087,7 @@ VECTOR currentMV[5]; VECTOR currentQMV[5]; int32_t iMinSAD[5]; - DECLARE_ALIGNED_MATRIX(dct_space, 2, 64, int16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); SearchData Data; memset(&Data, 0, sizeof(SearchData)); Data.iEdgedWidth = iEdgedWidth; @@ -1019,6 +1101,7 @@ Data.chroma = MotionFlags & PMV_CHROMA16; Data.rrv = current->global_flags & XVID_REDUCED; Data.dctSpace = dct_space; + Data.quant_type = pParam->m_quant_type; if ((current->global_flags & XVID_REDUCED)) { mb_width = (pParam->width + 31) / 32; @@ -1053,46 +1136,35 @@ sad00 = pMB->sad16; - if (!(current->global_flags & XVID_LUMIMASKING)) { + if (!(current->global_flags & XVID_LUMIMASKING)) pMB->dquant = NO_CHANGE; - } else { + else { if (pMB->dquant != NO_CHANGE) { quant += DQtab[pMB->dquant]; if (quant > 31) quant = 31; else if (quant < 1) quant = 1; } } - pMB->quant = current->quant; + pMB->quant = quant; //initial skip decision /* no early skip for GMC (global vector = skip vector is unknown!) */ if (!(current->global_flags & XVID_GMC)) { /* no fast SKIP for S(GMC)-VOPs */ - if (pMB->dquant == NO_CHANGE && sad00 < pMB->quant * skip_thresh) + if (pMB->dquant == NO_CHANGE && sad00 < quant * skip_thresh) if (Data.chroma || SkipDecisionP(pCurrent, pRef, x, y, iEdgedWidth/2, pMB->quant, Data.rrv)) { SkipMacroblockP(pMB, sad00); continue; } } - if ((current->global_flags & XVID_CARTOON_MODE) && - (sad00 < pMB->quant * 4 * skip_thresh)) { /* favorize (0,0) vector for cartoons */ - ZeroMacroblockP(pMB, sad00); - continue; - } - SearchP(pRef, pRefH->y, pRefV->y, pRefHV->y, pCurrent, x, - y, MotionFlags, current->global_flags, pMB->quant, - &Data, pParam, pMBs, reference->mbs, - current->global_flags & XVID_INTER4V, pMB); - -/* final skip decision, a.k.a. "the vector you found, really that good?" */ - if (!(current->global_flags & XVID_GMC || current->global_flags & XVID_MODEDECISION_BITS)) { - if ( pMB->dquant == NO_CHANGE && sad00 < pMB->quant * MAX_SAD00_FOR_SKIP) { - if ( (100*pMB->sad16)/(sad00+1) > FINAL_SKIP_THRESH * (Data.rrv ? 4:1) ) - if (Data.chroma || SkipDecisionP(pCurrent, pRef, x, y, iEdgedWidth/2, pMB->quant, Data.rrv)) - SkipMacroblockP(pMB, sad00); - } - } + y, MotionFlags, current->global_flags, + &Data, pParam, pMBs, reference->mbs, pMB); + + ModeDecision(&Data, pMB, pMBs, x, y, pParam, + MotionFlags, current->global_flags, + pCurrent, pRef); + if (pMB->mode == MODE_INTRA) if (++iIntra > iLimit) return 1; } @@ -1165,85 +1237,6 @@ } } -static int -ModeDecision(const uint32_t iQuant, SearchData * const Data, - int inter4v, - MACROBLOCK * const pMB, - const MACROBLOCK * const pMBs, - const int x, const int y, - const MBParam * const pParam, - const uint32_t MotionFlags, - const uint32_t GlobalFlags) -{ - - int mode = MODE_INTER; - - if (!(GlobalFlags & XVID_MODEDECISION_BITS)) { //normal, fast, SAD-based mode decision - int sad; - int InterBias = MV16_INTER_BIAS; - if (inter4v == 0 || Data->iMinSAD[0] < Data->iMinSAD[1] + Data->iMinSAD[2] + - Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant) { - mode = MODE_INTER; - sad = Data->iMinSAD[0]; - } else { - mode = MODE_INTER4V; - sad = Data->iMinSAD[1] + Data->iMinSAD[2] + - Data->iMinSAD[3] + Data->iMinSAD[4] + IMV16X16 * (int32_t)iQuant; - Data->iMinSAD[0] = sad; - } - - /* intra decision */ - - if (iQuant > 8) InterBias += 100 * (iQuant - 8); // to make high quants work - if (y != 0) - if ((pMB - pParam->mb_width)->mode == MODE_INTRA ) InterBias -= 80; - if (x != 0) - if ((pMB - 1)->mode == MODE_INTRA ) InterBias -= 80; - - if (Data->chroma) InterBias += 50; // to compensate bigger SAD - if (Data->rrv) InterBias *= 4; - - if (InterBias < pMB->sad16) { - int32_t deviation; - if (!Data->rrv) deviation = dev16(Data->Cur, Data->iEdgedWidth); - else deviation = dev16(Data->Cur, Data->iEdgedWidth) + - dev16(Data->Cur+8, Data->iEdgedWidth) + - dev16(Data->Cur + 8*Data->iEdgedWidth, Data->iEdgedWidth) + - dev16(Data->Cur+8+8*Data->iEdgedWidth, Data->iEdgedWidth); - - if (deviation < (sad - InterBias)) return MODE_INTRA; - } - return mode; - - } else { - - int bits, intra, i; - VECTOR backup[5], *v; - Data->lambda16 = iQuant; - Data->lambda8 = pParam->m_quant_type; - - v = Data->qpel ? Data->currentQMV : Data->currentMV; - for (i = 0; i < 5; i++) { - Data->iMinSAD[i] = 256*4096; - backup[i] = v[i]; - } - - bits = CountMBBitsInter(Data, pMBs, x, y, pParam, MotionFlags); - if (bits == 0) return MODE_INTER; // quick stop - - if (inter4v) { - int bits_inter4v = CountMBBitsInter4v(Data, pMB, pMBs, x, y, pParam, MotionFlags, backup); - if (bits_inter4v < bits) { Data->iMinSAD[0] = bits = bits_inter4v; mode = MODE_INTER4V; } - } - - intra = CountMBBitsIntra(Data); - - if (intra < bits) { *Data->iMinSAD = bits = intra; return MODE_INTRA; } - - return mode; - } -} - static void SearchP(const IMAGE * const pRef, const uint8_t * const pRefH, @@ -1254,20 +1247,19 @@ const int y, const uint32_t MotionFlags, const uint32_t GlobalFlags, - const uint32_t iQuant, SearchData * const Data, const MBParam * const pParam, const MACROBLOCK * const pMBs, const MACROBLOCK * const prevMBs, - int inter4v, MACROBLOCK * const pMB) { int i, iDirection = 255, mask, threshA; VECTOR pmv[7]; + int inter4v = (GlobalFlags & XVID_INTER4V) && (pMB->dquant == NO_CHANGE); get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, - pParam->width, pParam->height, Data->iFcode - Data->qpel, 0, Data->rrv); + pParam->width, pParam->height, Data->iFcode - Data->qpel, 0, Data->rrv); get_pmvdata2(pMBs, pParam->mb_width, 0, x, y, 0, pmv, Data->temp); @@ -1284,12 +1276,10 @@ Data->RefP[4] = pRef->u + (x + y * (Data->iEdgedWidth/2)) * 8*i; Data->RefP[5] = pRef->v + (x + y * (Data->iEdgedWidth/2)) * 8*i; - Data->lambda16 = lambda_vec16[iQuant]; - Data->lambda8 = lambda_vec8[iQuant]; + Data->lambda16 = lambda_vec16[pMB->quant]; + Data->lambda8 = lambda_vec8[pMB->quant]; Data->qpel_precision = 0; - if (pMB->dquant != NO_CHANGE) inter4v = 0; - memset(Data->currentMV, 0, 5*sizeof(VECTOR)); if (Data->qpel) Data->predMV = get_qpmv2(pMBs, pParam->mb_width, 0, x, y, 0); @@ -1302,7 +1292,7 @@ Data->iMinSAD[3] = pMB->sad8[2]; Data->iMinSAD[4] = pMB->sad8[3]; - if ((!(GlobalFlags & XVID_MODEDECISION_BITS)) || (x | y)) { + if ((!(GlobalFlags & XVID_MODEDECISION_BITS)) && (x | y)) { threshA = Data->temp[0]; // that's where we keep this SAD atm if (threshA < 512) threshA = 512; else if (threshA > 1024) threshA = 1024; @@ -1327,8 +1317,8 @@ if ((Data->iMinSAD[0] <= threshA) || (MVequal(Data->currentMV[0], (prevMBs+x+y*pParam->mb_width)->mvs[0]) && - (Data->iMinSAD[0] < (prevMBs+x+y*pParam->mb_width)->sad16))) { - if (!(GlobalFlags & XVID_MODEDECISION_BITS)) inter4v = 0; } + (Data->iMinSAD[0] < (prevMBs+x+y*pParam->mb_width)->sad16))) + inter4v = 0; else { MainSearchFunc * MainSearchPtr; @@ -1381,15 +1371,16 @@ Data->currentQMV[i].y = 2 * Data->currentMV[i].y; } - if (MotionFlags & PMV_QUARTERPELREFINE16) { - + if (Data->qpel) { get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, pParam->width, pParam->height, Data->iFcode, 1, 0); Data->qpel_precision = 1; - SubpelRefine(Data); + if (MotionFlags & PMV_QUARTERPELREFINE16) + SubpelRefine(Data); } - if ((!(GlobalFlags & XVID_MODEDECISION_BITS)) && (Data->iMinSAD[0] < (int32_t)iQuant * 30)) inter4v = 0; + if (Data->iMinSAD[0] < (int32_t)pMB->quant * 30) + inter4v = 0; if (inter4v) { SearchData Data8; @@ -1401,51 +1392,24 @@ Search8(Data, 2*x + 1, 2*y + 1, MotionFlags, pParam, pMB, pMBs, 3, &Data8); if ((Data->chroma) && (!(GlobalFlags & XVID_MODEDECISION_BITS))) { - // chroma is only used for comparsion to INTER. if the comparsion will be done in BITS domain, there is no reason to compute it + // chroma is only used for comparsion to INTER. if the comparsion will be done in BITS domain, it will not be used int sumx = 0, sumy = 0; - const int div = Data->qpel ? 2 : 1; - const VECTOR * const mv = Data->qpel ? pMB->qmvs : pMB->mvs; - for (i = 0; i < 4; i++) { - sumx += mv[i].x / div; - sumy += mv[i].y / div; - } + if (Data->qpel) + for (i = 1; i < 5; i++) { + sumx += Data->currentQMV[i].x/2; + sumy += Data->currentQMV[i].y/2; + } + else + for (i = 1; i < 5; i++) { + sumx += Data->currentMV[i].x; + sumy += Data->currentMV[i].y; + } Data->iMinSAD[1] += ChromaSAD( (sumx >> 3) + roundtab_76[sumx & 0xf], (sumy >> 3) + roundtab_76[sumy & 0xf], Data); } - } - - inter4v = ModeDecision(iQuant, Data, inter4v, pMB, pMBs, x, y, pParam, MotionFlags, GlobalFlags); - - if (Data->rrv) { - Data->currentMV[0].x = RRV_MV_SCALEDOWN(Data->currentMV[0].x); - Data->currentMV[0].y = RRV_MV_SCALEDOWN(Data->currentMV[0].y); - } - - if (inter4v == MODE_INTER) { - pMB->mode = MODE_INTER; - pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = Data->currentMV[0]; - pMB->sad16 = pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = Data->iMinSAD[0]; - - if(Data->qpel) { - pMB->qmvs[0] = pMB->qmvs[1] - = pMB->qmvs[2] = pMB->qmvs[3] = Data->currentQMV[0]; - pMB->pmvs[0].x = Data->currentQMV[0].x - Data->predMV.x; - pMB->pmvs[0].y = Data->currentQMV[0].y - Data->predMV.y; - } else { - pMB->pmvs[0].x = Data->currentMV[0].x - Data->predMV.x; - pMB->pmvs[0].y = Data->currentMV[0].y - Data->predMV.y; - } - - } else if (inter4v == MODE_INTER4V) { - pMB->mode = MODE_INTER4V; - pMB->sad16 = Data->iMinSAD[0]; - } else { // INTRA mode - SkipMacroblockP(pMB, 0); // not skip, but similar enough - pMB->mode = MODE_INTRA; - } - + } else Data->iMinSAD[1] = 4096*256; } static void @@ -1811,7 +1775,7 @@ CheckCandidate(0, 0, 255, &k, Data); // initial (fast) skip decision - if (*Data->iMinSAD < pMB->quant * INITIAL_SKIP_THRESH * (2 + Data->chroma?1:0)) { + if (*Data->iMinSAD < pMB->quant * INITIAL_SKIP_THRESH * (Data->chroma?3:2)) { //possible skip if (Data->chroma) { pMB->mode = MODE_DIRECT_NONE_MV; @@ -2195,8 +2159,9 @@ } } -#define INTRA_THRESH 1800 -#define INTER_THRESH 1200 +#define INTRA_THRESH 2200 +#define INTER_THRESH 50 +#define INTRA_THRESH2 95 int MEanalysis( const IMAGE * const pRef, @@ -2211,8 +2176,9 @@ int sSAD = 0; MACROBLOCK * const pMBs = Current->mbs; const IMAGE * const pCurrent = &Current->image; - int IntraThresh = INTRA_THRESH, InterThresh = INTER_THRESH + 10*b_thresh; + int IntraThresh = INTRA_THRESH, InterThresh = INTER_THRESH + b_thresh; int s = 0, blocks = 0; + int complexity = 0; int32_t iMinSAD[5], temp[5]; VECTOR currentMV[5]; @@ -2224,21 +2190,24 @@ Data.temp = temp; CheckCandidate = CheckCandidate32I; - if (intraCount != 0 && intraCount < 10) // we're right after an I frame - IntraThresh += 15 * (intraCount - 10) * (intraCount - 10); - else - if ( 5*(maxIntra - intraCount) < maxIntra) // we're close to maximum. 2 sec when max is 10 sec - IntraThresh -= (IntraThresh * (maxIntra - 8*(maxIntra - intraCount)))/maxIntra; - InterThresh -= (350 - 8*b_thresh) * bCount; - if (InterThresh < 300 + 5*b_thresh) InterThresh = 300 + 5*b_thresh; + if (intraCount != 0) { + if (intraCount < 10) // we're right after an I frame + IntraThresh += 15* (intraCount - 10) * (intraCount - 10); + else + if ( 5*(maxIntra - intraCount) < maxIntra) // we're close to maximum. 2 sec when max is 10 sec + IntraThresh -= (IntraThresh * (maxIntra - 8*(maxIntra - intraCount)))/maxIntra; + } + + InterThresh -= 12 * bCount; + if (InterThresh < 15 + b_thresh) InterThresh = 15 + b_thresh; if (sadInit) (*sadInit) (); for (y = 1; y < pParam->mb_height-1; y += 2) { for (x = 1; x < pParam->mb_width-1; x += 2) { int i; - blocks += 4; + blocks += 10; if (bCount == 0) pMBs[x + y * pParam->mb_width].mvs[0] = zeroMV; else { //extrapolation of the vector found for last frame @@ -2253,26 +2222,28 @@ for (i = 0; i < 4; i++) { int dev; MACROBLOCK *pMB = &pMBs[x+(i&1) + (y+(i>>1)) * pParam->mb_width]; - if (pMB->sad16 > IntraThresh) { - dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16, - pParam->edged_width); - if (dev + IntraThresh < pMB->sad16) { - pMB->mode = MODE_INTRA; - if (++intra > ((pParam->mb_height-2)*(pParam->mb_width-2))/2) return I_VOP; - } + dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16, + pParam->edged_width); + + complexity += dev; + if (dev + IntraThresh < pMB->sad16) { + pMB->mode = MODE_INTRA; + if (++intra > ((pParam->mb_height-2)*(pParam->mb_width-2))/2) return I_VOP; } - if (pMB->mvs[0].x == 0 && pMB->mvs[0].y == 0) s++; + + if (pMB->mvs[0].x == 0 && pMB->mvs[0].y == 0) + if (dev > 500 && pMB->sad16 < 1000) + sSAD += 1000; sSAD += pMB->sad16; } } } + complexity >>= 7; - sSAD /= blocks; - s = (10*s) / blocks; - - if (s > 4) sSAD += (s - 2) * (60 - 2*b_thresh); //static block - looks bad when in bframe... + sSAD /= complexity + 4*blocks; + if (intraCount > 12 && sSAD > INTRA_THRESH2 ) return I_VOP; if (sSAD > InterThresh ) return P_VOP; emms(); return B_VOP; @@ -2482,10 +2453,6 @@ Data->qpel_precision = 1; CheckCandidateBits16(Data->currentQMV[0].x, Data->currentQMV[0].y, 255, &iDirection, Data); - //checking if this vector is perfect. if it is, we stop. - if (Data->temp[0] == 0 && Data->temp[1] == 0 && Data->temp[2] == 0 && Data->temp[3] == 0) - return 0; //quick stop - if (MotionFlags & (HALFPELREFINE16_BITS | EXTSEARCH_BITS)) { //we have to prepare for halfpixel-precision search for(i = 0; i < 5; i++) bsad[i] = Data->iMinSAD[i]; get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 16, @@ -2498,14 +2465,10 @@ } else { // not qpel CheckCandidateBits16(Data->currentMV[0].x, Data->currentMV[0].y, 255, &iDirection, Data); - //checking if this vector is perfect. if it is, we stop. - if (Data->temp[0] == 0 && Data->temp[1] == 0 && Data->temp[2] == 0 && Data->temp[3] == 0) { - return 0; //inter - } } if (MotionFlags&EXTSEARCH_BITS) SquareSearch(Data->currentMV->x, Data->currentMV->y, Data, iDirection); - + if (MotionFlags&HALFPELREFINE16_BITS) SubpelRefine(Data); if (Data->qpel) { @@ -2531,7 +2494,6 @@ return Data->iMinSAD[0]; } - static int CountMBBitsInter4v(const SearchData * const Data, MACROBLOCK * const pMB, const MACROBLOCK * const pMBs, @@ -2544,11 +2506,13 @@ SearchData Data2, *Data8 = &Data2; int sumx = 0, sumy = 0; int16_t *in = Data->dctSpace, *coeff = Data->dctSpace + 64; + uint8_t * ptr; memcpy(Data8, Data, sizeof(SearchData)); CheckCandidate = CheckCandidateBits8; - for (i = 0; i < 4; i++) { + for (i = 0; i < 4; i++) { //for all luma blocks + Data8->iMinSAD = Data->iMinSAD + i + 1; Data8->currentMV = Data->currentMV + i + 1; Data8->currentQMV = Data->currentQMV + i + 1; @@ -2571,13 +2535,13 @@ get_range(&Data8->min_dx, &Data8->max_dx, &Data8->min_dy, &Data8->max_dy, 2*x + (i&1), 2*y + (i>>1), 8, pParam->width, pParam->height, Data8->iFcode, Data8->qpel, 0); - *Data8->iMinSAD += t; + *Data8->iMinSAD += BITS_MULT*t; Data8->qpel_precision = Data8->qpel; // checking the vector which has been found by SAD-based 8x8 search (if it's different than the one found so far) { VECTOR *v = Data8->qpel ? Data8->currentQMV : Data8->currentMV; - if (!( (v->x == backup[i+1].x) && (v->y == backup[i+1].y) )) + if (!MVequal (*v, backup[i+1]) ) CheckCandidateBits8(backup[i+1].x, backup[i+1].y, 255, &iDirection, Data8); } @@ -2596,9 +2560,10 @@ if (MotionFlags & PMV_EXTSEARCH8 && MotionFlags & EXTSEARCH_BITS) SquareSearch(Data8->currentMV->x, Data8->currentMV->x, Data8, 255); - if (MotionFlags & HALFPELREFINE8_BITS) SubpelRefine(Data8); + if (MotionFlags & HALFPELREFINE8_BITS) + SubpelRefine(Data8); - if(s > *Data8->iMinSAD) { //we have found a better match + if (s > *Data8->iMinSAD) { //we have found a better match Data8->currentQMV->x = 2*Data8->currentMV->x; Data8->currentQMV->y = 2*Data8->currentMV->y; } @@ -2610,18 +2575,24 @@ } if (MotionFlags & QUARTERPELREFINE8_BITS) SubpelRefine(Data8); - } else // not qpel - if (MotionFlags & HALFPELREFINE8_BITS) SubpelRefine(Data8); //halfpel mode, halfpel refinement + } else { // not qpel + + if (MotionFlags & PMV_EXTSEARCH8 && MotionFlags & EXTSEARCH_BITS) //extsearch + SquareSearch(Data8->currentMV->x, Data8->currentMV->x, Data8, 255); + + if (MotionFlags & HALFPELREFINE8_BITS) + SubpelRefine(Data8); //halfpel refinement + } //checking vector equal to predicion if (i != 0 && MotionFlags & CHECKPREDICTION_BITS) { const VECTOR * v = Data->qpel ? Data8->currentQMV : Data8->currentMV; - if (!(Data8->predMV.x == v->x && Data8->predMV.y == v->y)) + if (!MVequal(*v, Data8->predMV)) CheckCandidateBits8(Data8->predMV.x, Data8->predMV.y, 255, &iDirection, Data8); } bits += *Data8->iMinSAD; - if (bits >= Data->iMinSAD[0]) break; // no chances for INTER4V + if (bits >= Data->iMinSAD[0]) return bits; // no chances for INTER4V // MB structures for INTER4V mode; we have to set them here, we don't have predictor anywhere else if(Data->qpel) { @@ -2639,95 +2610,60 @@ pMB->mvs[i] = *Data8->currentMV; pMB->sad8[i] = 4 * *Data8->iMinSAD; if (Data8->temp[0]) cbp |= 1 << (5 - i); - } - if (bits < *Data->iMinSAD) { // there is still a chance for inter4v mode. let's check chroma - const uint8_t * ptr; - sumx = (sumx >> 3) + roundtab_76[sumx & 0xf]; - sumy = (sumy >> 3) + roundtab_76[sumy & 0xf]; - - //chroma U - ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); - transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); - fdct(in); - if (Data->lambda8 == 0) i = quant_inter(coeff, in, Data->lambda16); - else i = quant4_inter(coeff, in, Data->lambda16); - if (i > 0) { - bits += CodeCoeffInter_CalcBits(coeff, scan_tables[0]); - cbp |= 1 << (5 - 4); - } - - if (bits < *Data->iMinSAD) { // still possible - //chroma V - ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); - transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); - fdct(in); - if (Data->lambda8 == 0) i = quant_inter(coeff, in, Data->lambda16); - else i = quant4_inter(coeff, in, Data->lambda16); - if (i > 0) { - bits += CodeCoeffInter_CalcBits(coeff, scan_tables[0]); - cbp |= 1 << (5 - 5); - } - bits += xvid_cbpy_tab[15-(cbp>>2)].len; - bits += mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len; - } - } + } // /for all luma blocks + + bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len; + + // let's check chroma + sumx = (sumx >> 3) + roundtab_76[sumx & 0xf]; + sumy = (sumy >> 3) + roundtab_76[sumy & 0xf]; + + //chroma U + ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); + transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4); + + if (bits >= *Data->iMinSAD) return bits; + + //chroma V + ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); + transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5); + + bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len; return bits; } - static int CountMBBitsIntra(const SearchData * const Data) { - int bits = 1; //this one is ac/dc prediction flag. always 1. - int cbp = 0, i, t, dc = 1024, b_dc; - const uint32_t iQuant = Data->lambda16; + int bits = BITS_MULT*1; //this one is ac/dc prediction flag bit + int cbp = 0, i, dc = 0; int16_t *in = Data->dctSpace, * coeff = Data->dctSpace + 64; - uint32_t iDcScaler = get_dc_scaler(iQuant, 1);; for(i = 0; i < 4; i++) { int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth); transfer_8to16copy(in, Data->Cur + s, Data->iEdgedWidth); - fdct(in); - b_dc = in[0]; - in[0] -= dc; - dc = b_dc; - if (Data->lambda8 == 0) quant_intra(coeff, in, iQuant, iDcScaler); - else quant4_intra(coeff, in, iQuant, iDcScaler); - - bits += t = CodeCoeffIntra_CalcBits(coeff, scan_tables[0]) + dcy_tab[coeff[0] + 255].len;; - Data->temp[i] = t; - if (t != 0) cbp |= 1 << (5 - i); - if (bits >= Data->iMinSAD[0]) break; - } - - if (bits < Data->iMinSAD[0]) { // INTRA still looks good, let's add chroma - iDcScaler = get_dc_scaler(iQuant, 0); - //chroma U - transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); - fdct(in); - in[0] -= 1024; - if (Data->lambda8 == 0) quant_intra(coeff, in, iQuant, iDcScaler); - else quant4_intra(coeff, in, iQuant, iDcScaler); - - bits += t = CodeCoeffIntra_CalcBits(coeff, scan_tables[0]) + dcc_tab[coeff[0] + 255].len; - if (t != 0) cbp |= 1 << (5 - 4); - - if (bits < Data->iMinSAD[0]) { - //chroma V - transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); - fdct(in); - in[0] -= 1024; - if (Data->lambda8 == 0) quant_intra(coeff, in, iQuant, iDcScaler); - else quant4_intra(coeff, in, iQuant, iDcScaler); - - bits += t = CodeCoeffIntra_CalcBits(coeff, scan_tables[0]) + dcc_tab[coeff[0] + 255].len; - if (t != 0) cbp |= 1 << (5 - 5); + bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, &dc); - bits += xvid_cbpy_tab[cbp>>2].len; - bits += mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp & 3) << 3)].len; - } + if (bits >= Data->iMinSAD[0]) return bits; } + + bits += BITS_MULT*xvid_cbpy_tab[cbp>>2].len; + + //chroma U + transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); + bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, &dc); + + if (bits >= Data->iMinSAD[0]) return bits; + + //chroma V + transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); + bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, &dc); + + bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp & 3) << 3)].len; + return bits; }