--- estimation_common.c 2004/11/24 22:14:11 1.2.2.4 +++ estimation_common.c 2005/12/09 04:39:49 1.13 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: estimation_common.c,v 1.2.2.4 2004/11/24 22:14:11 edgomez Exp $ + * $Id: estimation_common.c,v 1.13 2005/12/09 04:39:49 syskin Exp $ * ****************************************************************************/ @@ -55,22 +55,22 @@ { 0, 1, 0, 0 }; const int xvid_me_lambda_vec16[32] = - { 0 ,(int)(1.00235 * NEIGH_TEND_16X16 + 0.5), - (int)(1.15582*NEIGH_TEND_16X16 + 0.5), (int)(1.31976*NEIGH_TEND_16X16 + 0.5), - (int)(1.49591*NEIGH_TEND_16X16 + 0.5), (int)(1.68601*NEIGH_TEND_16X16 + 0.5), - (int)(1.89187*NEIGH_TEND_16X16 + 0.5), (int)(2.11542*NEIGH_TEND_16X16 + 0.5), - (int)(2.35878*NEIGH_TEND_16X16 + 0.5), (int)(2.62429*NEIGH_TEND_16X16 + 0.5), - (int)(2.91455*NEIGH_TEND_16X16 + 0.5), (int)(3.23253*NEIGH_TEND_16X16 + 0.5), - (int)(3.58158*NEIGH_TEND_16X16 + 0.5), (int)(3.96555*NEIGH_TEND_16X16 + 0.5), - (int)(4.38887*NEIGH_TEND_16X16 + 0.5), (int)(4.85673*NEIGH_TEND_16X16 + 0.5), - (int)(5.37519*NEIGH_TEND_16X16 + 0.5), (int)(5.95144*NEIGH_TEND_16X16 + 0.5), - (int)(6.59408*NEIGH_TEND_16X16 + 0.5), (int)(7.31349*NEIGH_TEND_16X16 + 0.5), - (int)(8.12242*NEIGH_TEND_16X16 + 0.5), (int)(9.03669*NEIGH_TEND_16X16 + 0.5), - (int)(10.0763*NEIGH_TEND_16X16 + 0.5), (int)(11.2669*NEIGH_TEND_16X16 + 0.5), - (int)(12.6426*NEIGH_TEND_16X16 + 0.5), (int)(14.2493*NEIGH_TEND_16X16 + 0.5), - (int)(16.1512*NEIGH_TEND_16X16 + 0.5), (int)(18.442*NEIGH_TEND_16X16 + 0.5), - (int)(21.2656*NEIGH_TEND_16X16 + 0.5), (int)(24.8580*NEIGH_TEND_16X16 + 0.5), - (int)(29.6436*NEIGH_TEND_16X16 + 0.5), (int)(36.4949*NEIGH_TEND_16X16 + 0.5) + { 0 ,(int)(1.0 * NEIGH_TEND_16X16 + 0.5), + (int)(2.0*NEIGH_TEND_16X16 + 0.5), (int)(3.0*NEIGH_TEND_16X16 + 0.5), + (int)(4.0*NEIGH_TEND_16X16 + 0.5), (int)(5.0*NEIGH_TEND_16X16 + 0.5), + (int)(6.0*NEIGH_TEND_16X16 + 0.5), (int)(7.0*NEIGH_TEND_16X16 + 0.5), + (int)(8.0*NEIGH_TEND_16X16 + 0.5), (int)(9.0*NEIGH_TEND_16X16 + 0.5), + (int)(10.0*NEIGH_TEND_16X16 + 0.5), (int)(11.0*NEIGH_TEND_16X16 + 0.5), + (int)(12.0*NEIGH_TEND_16X16 + 0.5), (int)(13.0*NEIGH_TEND_16X16 + 0.5), + (int)(14.0*NEIGH_TEND_16X16 + 0.5), (int)(15.0*NEIGH_TEND_16X16 + 0.5), + (int)(16.0*NEIGH_TEND_16X16 + 0.5), (int)(17.0*NEIGH_TEND_16X16 + 0.5), + (int)(18.0*NEIGH_TEND_16X16 + 0.5), (int)(19.0*NEIGH_TEND_16X16 + 0.5), + (int)(20.0*NEIGH_TEND_16X16 + 0.5), (int)(21.0*NEIGH_TEND_16X16 + 0.5), + (int)(22.0*NEIGH_TEND_16X16 + 0.5), (int)(23.0*NEIGH_TEND_16X16 + 0.5), + (int)(24.0*NEIGH_TEND_16X16 + 0.5), (int)(25.0*NEIGH_TEND_16X16 + 0.5), + (int)(26.0*NEIGH_TEND_16X16 + 0.5), (int)(27.0*NEIGH_TEND_16X16 + 0.5), + (int)(28.0*NEIGH_TEND_16X16 + 0.5), (int)(29.0*NEIGH_TEND_16X16 + 0.5), + (int)(30.0*NEIGH_TEND_16X16 + 0.5), (int)(31.0*NEIGH_TEND_16X16 + 0.5) }; /***************************************************************************** @@ -349,97 +349,158 @@ } void -xvid_me_SubpelRefine(SearchData * const data, CheckFunc * const CheckCandidate) +xvid_me_SubpelRefine(VECTOR centerMV, SearchData * const data, CheckFunc * const CheckCandidate, int dir) { /* Do a half-pel or q-pel refinement */ - const VECTOR centerMV = data->qpel_precision ? *data->currentQMV : *data->currentMV; - CHECK_CANDIDATE(centerMV.x, centerMV.y - 1, 0); - CHECK_CANDIDATE(centerMV.x + 1, centerMV.y - 1, 0); - CHECK_CANDIDATE(centerMV.x + 1, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x + 1, centerMV.y + 1, 0); - CHECK_CANDIDATE(centerMV.x, centerMV.y + 1, 0); - CHECK_CANDIDATE(centerMV.x - 1, centerMV.y + 1, 0); - CHECK_CANDIDATE(centerMV.x - 1, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x - 1, centerMV.y - 1, 0); + CHECK_CANDIDATE(centerMV.x, centerMV.y - 1, dir); + CHECK_CANDIDATE(centerMV.x + 1, centerMV.y - 1, dir); + CHECK_CANDIDATE(centerMV.x + 1, centerMV.y, dir); + CHECK_CANDIDATE(centerMV.x + 1, centerMV.y + 1, dir); + CHECK_CANDIDATE(centerMV.x, centerMV.y + 1, dir); + CHECK_CANDIDATE(centerMV.x - 1, centerMV.y + 1, dir); + CHECK_CANDIDATE(centerMV.x - 1, centerMV.y, dir); + CHECK_CANDIDATE(centerMV.x - 1, centerMV.y - 1, dir); +} + +#define CHECK_CANDIDATE_2ndBEST(X, Y, DIR) { \ + *data->iMinSAD = s_best2; \ + CheckCandidate((X),(Y), data, direction); \ + if (data->iMinSAD[0] < s_best) { \ + s_best2 = s_best; \ + s_best = data->iMinSAD[0]; \ + v_best2 = v_best; \ + v_best.x = X; v_best.y = Y; \ + dir = DIR; \ + } else if (data->iMinSAD[0] < s_best2) { \ + s_best2 = data->iMinSAD[0]; \ + v_best2.x = X; v_best2.y = Y; \ + } \ } void -SubpelRefine_Fast(SearchData * data, CheckFunc * CheckCandidate) +FullRefine_Fast(SearchData * data, CheckFunc * CheckCandidate, int direction) { -/* Do a fast q-pel refinement */ - VECTOR centerMV; - VECTOR second_best; - int best_sad = *data->iMinSAD; - int xo, yo, xo2, yo2; - int size = 2; - data->iMinSAD2 = 0; - - /* check all halfpixel positions near our best halfpel position */ - centerMV = *data->currentQMV; - *data->iMinSAD = 256 * 4096; - - CHECK_CANDIDATE(centerMV.x, centerMV.y - size, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y - size, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x + size, centerMV.y + size, 0); - - CHECK_CANDIDATE(centerMV.x, centerMV.y + size, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y + size, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y, 0); - CHECK_CANDIDATE(centerMV.x - size, centerMV.y - size, 0); - - second_best = *data->currentQMV; - - /* after second_best has been found, go back to the vector we began with */ - - data->currentQMV[0] = centerMV; - *data->iMinSAD = best_sad; - - xo = centerMV.x; - yo = centerMV.y; - xo2 = second_best.x; - yo2 = second_best.y; +/* Do a fast h-pel and then q-pel refinement */ + + int32_t s_best = data->iMinSAD[0], s_best2 = 256*4096; + VECTOR v_best, v_best2; + int dir = 0, xo2, yo2, best_halfpel, b_cbp; + + int xo = 2*data->currentMV[0].x, yo = 2*data->currentMV[0].y; + + data->currentQMV[0].x = v_best.x = v_best2.x = xo; + data->currentQMV[0].y = v_best.y = v_best2.y = yo; + + data->qpel_precision = 1; + + /* halfpel refinement: check 8 neighbours, but keep the second best SAD as well */ + CHECK_CANDIDATE_2ndBEST(xo - 2, yo, 1+16+64); + CHECK_CANDIDATE_2ndBEST(xo + 2, yo, 2+32+128); + CHECK_CANDIDATE_2ndBEST(xo, yo - 2, 4+16+32); + CHECK_CANDIDATE_2ndBEST(xo, yo + 2, 8+64+128); + CHECK_CANDIDATE_2ndBEST(xo - 2, yo - 2, 1+4+16+32+64); + CHECK_CANDIDATE_2ndBEST(xo + 2, yo - 2, 2+4+16+32+128); + CHECK_CANDIDATE_2ndBEST(xo - 2, yo + 2, 1+8+16+64+128); + CHECK_CANDIDATE_2ndBEST(xo + 2, yo + 2, 2+8+32+64+128); + + xo = v_best.x; yo = v_best.y, b_cbp = data->cbp[0]; + + /* we need all 8 neighbours *of best hpel position found above* checked for 2nd best + let's check the missing ones */ + + /* on rare occasions, 1st best and 2nd best are far away, and 2nd best is not 1st best's neighbour. + to simplify stuff, we'll forget that evil 2nd best and make a full search for a new 2nd best */ + /* todo. we should check the missing neighbours first, maybe they'll give us 2nd best which is even better + than the infamous one. in that case, we will not have to re-check the other neighbours */ + + if (abs(v_best.x - v_best2.x) > 2 || abs(v_best.y - v_best2.y) > 2) { /* v_best2 is useless */ + data->iMinSAD[0] = 256*4096; + dir = ~0; /* all */ + } else { + data->iMinSAD[0] = s_best2; + data->currentQMV[0] = v_best2; + } - data->iMinSAD2 = 256 * 4096; + if (dir & 1) CHECK_CANDIDATE( xo - 2, yo, direction); + if (dir & 2) CHECK_CANDIDATE( xo + 2, yo, direction); + if (dir & 4) CHECK_CANDIDATE( xo, yo - 2, direction); + if (dir & 8) CHECK_CANDIDATE( xo, yo + 2, direction); + if (dir & 16) CHECK_CANDIDATE( xo - 2, yo - 2, direction); + if (dir & 32) CHECK_CANDIDATE( xo + 2, yo - 2, direction); + if (dir & 64) CHECK_CANDIDATE( xo - 2, yo + 2, direction); + if (dir & 128) CHECK_CANDIDATE( xo + 2, yo + 2, direction); + + /* read the position of 2nd best */ + v_best2 = data->currentQMV[0]; + + /* after second_best has been found, go back to best vector */ + + data->currentQMV[0].x = xo; + data->currentQMV[0].y = yo; + data->cbp[0] = b_cbp; + + data->currentMV[0].x = xo/2; + data->currentMV[0].y = yo/2; + data->iMinSAD[0] = best_halfpel = s_best; + + xo2 = v_best2.x; + yo2 = v_best2.y; + s_best2 = 256*4096; if (yo == yo2) { - CHECK_CANDIDATE((xo+xo2)>>1, yo, 0); - CHECK_CANDIDATE(xo, yo-1, 0); - CHECK_CANDIDATE(xo, yo+1, 0); - - if(best_sad <= data->iMinSAD2) return; - - if(data->currentQMV[0].x == data->currentQMV2.x) { - CHECK_CANDIDATE((xo+xo2)>>1, yo-1, 0); - CHECK_CANDIDATE((xo+xo2)>>1, yo+1, 0); + CHECK_CANDIDATE_2ndBEST((xo+xo2)>>1, yo, 0); + CHECK_CANDIDATE_2ndBEST(xo, yo-1, 0); + CHECK_CANDIDATE_2ndBEST(xo, yo+1, 0); + data->currentQMV[0] = v_best; + data->iMinSAD[0] = s_best; + + if(best_halfpel <= s_best2) return; + + if(data->currentQMV[0].x == v_best2.x) { + CHECK_CANDIDATE((xo+xo2)>>1, yo-1, direction); + CHECK_CANDIDATE((xo+xo2)>>1, yo+1, direction); } else { CHECK_CANDIDATE((xo+xo2)>>1, - (data->currentQMV[0].x == xo) ? data->currentQMV[0].y : data->currentQMV2.y, 0); + (data->currentQMV[0].x == xo) ? data->currentQMV[0].y : v_best2.y, direction); } return; } if (xo == xo2) { - CHECK_CANDIDATE(xo, (yo+yo2)>>1, 0); - CHECK_CANDIDATE(xo-1, yo, 0); - CHECK_CANDIDATE(xo+1, yo, 0); - - if(best_sad <= data->iMinSAD2) return; - - if(data->currentQMV[0].y == data->currentQMV2.y) { - CHECK_CANDIDATE(xo-1, (yo+yo2)>>1, 0); - CHECK_CANDIDATE(xo+1, (yo+yo2)>>1, 0); + CHECK_CANDIDATE_2ndBEST(xo, (yo+yo2)>>1, 0); + CHECK_CANDIDATE_2ndBEST(xo-1, yo, 0); + CHECK_CANDIDATE_2ndBEST(xo+1, yo, 0); + data->currentQMV[0] = v_best; + data->iMinSAD[0] = s_best; + + if(best_halfpel <= s_best2) return; + + if(data->currentQMV[0].y == v_best2.y) { + CHECK_CANDIDATE(xo-1, (yo+yo2)>>1, direction); + CHECK_CANDIDATE(xo+1, (yo+yo2)>>1, direction); } else { - CHECK_CANDIDATE((data->currentQMV[0].y == yo) ? data->currentQMV[0].x : data->currentQMV2.x, (yo+yo2)>>1, 0); + CHECK_CANDIDATE((data->currentQMV[0].y == yo) ? data->currentQMV[0].x : v_best2.x, (yo+yo2)>>1, direction); } return; } - CHECK_CANDIDATE(xo, (yo+yo2)>>1, 0); - CHECK_CANDIDATE((xo+xo2)>>1, yo, 0); + CHECK_CANDIDATE_2ndBEST(xo, (yo+yo2)>>1, 0); + CHECK_CANDIDATE_2ndBEST((xo+xo2)>>1, yo, 0); + data->currentQMV[0] = v_best; + data->iMinSAD[0] = s_best; + + if(best_halfpel <= s_best2) return; - if(best_sad <= data->iMinSAD2) return; + CHECK_CANDIDATE((xo+xo2)>>1, (yo+yo2)>>1, direction); - CHECK_CANDIDATE((xo+xo2)>>1, (yo+yo2)>>1, 0); +} + +/* it's the positive max, so "32" needs fcode of 2, not 1 */ +unsigned int +getMinFcode(const int MVmax) +{ + unsigned int fcode; + for (fcode = 1; (16 << fcode) <= MVmax; fcode++); + return fcode; }