--- encoder.c 2006/07/10 15:05:30 1.120.2.1 +++ encoder.c 2011/04/06 14:30:14 1.141 @@ -3,9 +3,9 @@ * XVID MPEG-4 VIDEO CODEC * - Encoder main module - * - * Copyright(C) 2002 Michael Militzer - * 2002-2003 Peter Ross - * 2002 Daniel Smith + * Copyright(C) 2002-2010 Michael Militzer + * 2002-2003 Peter Ross + * 2002 Daniel Smith * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: encoder.c,v 1.120.2.1 2006/07/10 15:05:30 Isibaar Exp $ + * $Id: encoder.c,v 1.141 2011/04/06 14:30:14 Isibaar Exp $ * ****************************************************************************/ @@ -49,6 +49,9 @@ #include "quant/quant_matrix.h" #include "utils/mem_align.h" +# include "motion/motion_smp.h" + + /***************************************************************************** * Local function prototypes ****************************************************************************/ @@ -135,7 +138,7 @@ enc_create(xvid_enc_create_t * create) { Encoder *pEnc; - int n; + int n; if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */ return XVID_ERR_VERSION; @@ -238,6 +241,14 @@ goto xvid_err_memory1a; } + /* temp lambdas */ + if (pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + pEnc->temp_lambda = (float *) xvid_malloc(pEnc->mbParam.mb_width * + pEnc->mbParam.mb_height * 6 * sizeof(float), CACHE_LINE); + if (pEnc->temp_lambda == NULL) + goto xvid_err_memory1a; + } + /* bframes */ pEnc->mbParam.max_bframes = MAX(create->max_bframes, 0); pEnc->mbParam.bquant_ratio = MAX(create->bquant_ratio, 0); @@ -424,7 +435,7 @@ /* timestamp stuff */ pEnc->mbParam.m_stamp = 0; - pEnc->m_framenum = 0; + pEnc->m_framenum = create->start_frame_num; pEnc->current->stamp = 0; pEnc->reference->stamp = 0; @@ -433,6 +444,73 @@ pEnc->iFrameNum = 0; pEnc->fMvPrevSigma = -1; + /* slices */ + pEnc->num_slices = MIN(MAX(1, create->num_slices), (int) pEnc->mbParam.mb_height); + + /* multithreaded stuff */ + if (create->num_threads > 0) { +#ifndef HAVE_PTHREAD + int t = MAX(1, create->num_threads); +#else + int t = MIN(create->num_threads, (int) (pEnc->mbParam.mb_height>>1)); /* at least two rows per thread */ +#endif + int threads_per_slice = MAX(1, (t / pEnc->num_slices)); + int rows_per_thread = (pEnc->mbParam.mb_height + threads_per_slice - 1) / threads_per_slice; + + pEnc->num_threads = t; + pEnc->smpData = xvid_malloc(t*sizeof(SMPData), CACHE_LINE); + if (!pEnc->smpData) + goto xvid_err_nosmp; + + /* tmp bitstream buffer for slice coding */ + pEnc->smpData[0].tmp_buffer = xvid_malloc(16*pEnc->mbParam.edged_width*pEnc->mbParam.mb_height*sizeof(uint8_t), CACHE_LINE); + if (! pEnc->smpData[0].tmp_buffer) goto xvid_err_nosmp; + + for (n = 0; n < t; n++) { + int s = MIN(pEnc->num_threads, pEnc->num_slices); + + pEnc->smpData[n].complete_count_self = + xvid_malloc(rows_per_thread * sizeof(int), CACHE_LINE); + + if (!pEnc->smpData[n].complete_count_self) + goto xvid_err_nosmp; + + if (n > 0 && n < s) { + pEnc->smpData[n].bs = (Bitstream *) xvid_malloc(sizeof(Bitstream), CACHE_LINE); + if (!pEnc->smpData[n].bs) + goto xvid_err_nosmp; + + pEnc->smpData[n].sStat = (Statistics *) xvid_malloc(sizeof(Statistics), CACHE_LINE); + if (!pEnc->smpData[n].sStat) + goto xvid_err_nosmp; + + pEnc->smpData[n].tmp_buffer = pEnc->smpData[0].tmp_buffer + 16*(((n-1)*pEnc->mbParam.edged_width*pEnc->mbParam.mb_height)/s); + BitstreamInit(pEnc->smpData[n].bs, pEnc->smpData[n].tmp_buffer, 0); + } + + if (n != 0) + pEnc->smpData[n].complete_count_above = + pEnc->smpData[n-1].complete_count_self; + } + pEnc->smpData[0].complete_count_above = + pEnc->smpData[t-1].complete_count_self - 1; + + } else { + xvid_err_nosmp: + /* no SMP */ + if (pEnc->smpData) { + if (pEnc->smpData[0].tmp_buffer) + xvid_free(pEnc->smpData[0].tmp_buffer); + } + else { + pEnc->smpData = xvid_malloc(1*sizeof(SMPData), CACHE_LINE); + if (pEnc->smpData == NULL) + goto xvid_err_memory5; + } + + create->num_threads = 0; + } + create->handle = (void *) pEnc; init_timer(); @@ -519,6 +597,10 @@ xvid_free(pEnc->temp_dquants); } + if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + xvid_free(pEnc->temp_lambda); + } + xvid_err_memory0: for (n=0; nnum_plugins;n++) { if (pEnc->plugins[n].func) { @@ -617,6 +699,9 @@ xvid_free(pEnc->temp_dquants); } + if ((pEnc->mbParam.plugin_flags & XVID_REQLAMBDA)) { + xvid_free(pEnc->temp_lambda); + } if (pEnc->num_plugins>0) { xvid_plg_destroy_t pdestroy; @@ -635,9 +720,21 @@ xvid_free(pEnc->mbParam.mpeg_quant_matrices); - if (pEnc->num_plugins>0) + if (pEnc->num_zones > 0) xvid_free(pEnc->zones); + if (pEnc->num_threads > 0) { + for (i = 1; i < MAX(1, MIN(pEnc->num_threads, pEnc->num_slices)); i++) { + xvid_free(pEnc->smpData[i].bs); + xvid_free(pEnc->smpData[i].sStat); + } + if (pEnc->smpData[0].tmp_buffer) xvid_free(pEnc->smpData[0].tmp_buffer); + + for (i = 0; i < pEnc->num_threads; i++) + xvid_free(pEnc->smpData[i].complete_count_self); + } + xvid_free(pEnc->smpData); + xvid_free(pEnc); return 0; /* ok */ @@ -651,7 +748,7 @@ static void call_plugins(Encoder * pEnc, FRAMEINFO * frame, IMAGE * original, int opt, int * type, int * quant, xvid_enc_stats_t * stats) { - unsigned int i, j; + unsigned int i, j, k; xvid_plg_data_t data; /* set data struct */ @@ -710,9 +807,19 @@ if ((pEnc->mbParam.plugin_flags & XVID_REQDQUANTS)) { data.dquant = pEnc->temp_dquants; data.dquant_stride = pEnc->mbParam.mb_width; - memset(data.dquant, 0, data.mb_width*data.mb_height); + memset(data.dquant, 0, data.mb_width*data.mb_height*sizeof(int)); } - + + if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + int block = 0; + emms(); + data.lambda = pEnc->temp_lambda; + for(i = 0;i < pEnc->mbParam.mb_height; i++) + for(j = 0;j < pEnc->mbParam.mb_width; j++) + for (k = 0; k < 6; k++) + data.lambda[block++] = 1.0f; + } + } else { /* XVID_PLG_AFTER */ if ((pEnc->mbParam.plugin_flags & XVID_REQORIGINAL)) { data.original.csp = XVID_CSP_PLANAR; @@ -816,6 +923,23 @@ frame->mbs[j*pEnc->mbParam.mb_width + i].dquant = 0; } } + + if (pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { + for (j = 0; j < pEnc->mbParam.mb_height; j++) + for (i = 0; i < pEnc->mbParam.mb_width; i++) + for (k = 0; k < 6; k++) { + frame->mbs[j*pEnc->mbParam.mb_width + i].lambda[k] = + (int) ((float)(1<mbParam.mb_height; j++) + for (i = 0; imbParam.mb_width; i++) + for (k = 0; k < 6; k++) { + frame->mbs[j*pEnc->mbParam.mb_width + i].lambda[k] = 1<mbs[0].quant = data.quant; /* FRAME will not affect the quant in stats */ } @@ -915,7 +1039,6 @@ return; } - /***************************************************************************** * IPB frame encoder entry point * @@ -1225,8 +1348,9 @@ if (pEnc->current->stamp > 0) { call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); } - else - stats->type = XVID_TYPE_NOTHING; + else if (stats) { + stats->type = XVID_TYPE_NOTHING; + } } /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1409,10 +1533,8 @@ static __inline void -CodeIntraMB(Encoder * pEnc, - MACROBLOCK * pMB) +CodeIntraMB(MACROBLOCK * pMB) { - pMB->mode = MODE_INTRA; /* zero mv statistics */ @@ -1426,20 +1548,113 @@ } } +static void +SliceCodeI(SMPData *data) +{ + Encoder *pEnc = (Encoder *) data->pEnc; + Bitstream *bs = (Bitstream *) data->bs; + + uint16_t x, y; + int mb_width = pEnc->mbParam.mb_width; + int mb_height = pEnc->mbParam.mb_height; + + int bound = 0, num_slices = pEnc->num_slices; + FRAMEINFO *const current = pEnc->current; + + DECLARE_ALIGNED_MATRIX(dct_codes, 6, 64, int16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(qcoeff, 6, 64, int16_t, CACHE_LINE); + + if (data->start_y > 0) { /* write resync marker */ + bound = data->start_y*mb_width; + write_video_packet_header(bs, &pEnc->mbParam, current, bound); + } + + for (y = data->start_y; y < data->stop_y; y++) { + int new_bound = mb_width * ((((y*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); + + if (new_bound > bound) { + bound = new_bound; + BitstreamPadAlways(bs); + write_video_packet_header(bs, &pEnc->mbParam, current, bound); + } + + for (x = 0; x < mb_width; x++) { + MACROBLOCK *pMB = ¤t->mbs[x + y * mb_width]; + + CodeIntraMB(pMB); + + MBTransQuantIntra(&pEnc->mbParam, current, pMB, x, y, + dct_codes, qcoeff); + + start_timer(); + MBPrediction(current, x, y, mb_width, qcoeff, bound); + stop_prediction_timer(); + + start_timer(); + MBCoding(current, pMB, qcoeff, bs, data->sStat); + stop_coding_timer(); + + } + } + + emms(); + BitstreamPadAlways(bs); +} + +static __inline void +SerializeBitstreams(Encoder *pEnc, FRAMEINFO *current, Bitstream *bs, int num_threads) +{ + int k; + uint32_t pos = BitstreamLength(bs); + + for (k = 1; k < num_threads; k++) { + uint32_t len = BitstreamLength(pEnc->smpData[k].bs); + memcpy((void *)((ptr_t)bs->start + pos), + (void *)((ptr_t)pEnc->smpData[k].bs->start), len); + + current->length += len; + pos += len; + + /* collect stats */ + current->sStat.iTextBits += pEnc->smpData[k].sStat->iTextBits; + current->sStat.kblks += pEnc->smpData[k].sStat->kblks; + current->sStat.mblks += pEnc->smpData[k].sStat->mblks; + current->sStat.ublks += pEnc->smpData[k].sStat->ublks; + current->sStat.iMVBits += pEnc->smpData[k].sStat->iMVBits; + } + + if (num_threads > 1) { + uint32_t pos32 = pos>>2; + bs->tail = bs->start + pos32; + bs->pos = 8*(pos - (pos32<<2)); + bs->buf = 0; + + if (bs->pos > 0) { + uint32_t pos8 = bs->pos/8; + memset((void *)((ptr_t)bs->tail+pos8), 0, (4-pos8)); + pos = *bs->tail; +#ifndef ARCH_IS_BIG_ENDIAN + BSWAP(pos); +#endif + bs->buf = pos; + } + } +} static int FrameCodeI(Encoder * pEnc, Bitstream * bs) { int bits = BitstreamPos(bs); - int mb_width = pEnc->mbParam.mb_width; + int bound = 0, num_slices = pEnc->num_slices; + int num_threads = MAX(1, MIN(pEnc->num_threads, num_slices)); + int slices_per_thread = (num_slices*1024 / num_threads); int mb_height = pEnc->mbParam.mb_height; - - DECLARE_ALIGNED_MATRIX(dct_codes, 6, 64, int16_t, CACHE_LINE); - DECLARE_ALIGNED_MATRIX(qcoeff, 6, 64, int16_t, CACHE_LINE); - - uint16_t x, y; +#ifdef HAVE_PTHREAD + void * status = NULL; +#endif + uint16_t k; pEnc->mbParam.m_rounding_type = 1; pEnc->current->rounding_type = pEnc->mbParam.m_rounding_type; @@ -1449,7 +1664,7 @@ SetMacroblockQuants(&pEnc->mbParam, pEnc->current); - BitstreamWriteVolHeader(bs, &pEnc->mbParam, pEnc->current); + BitstreamWriteVolHeader(bs, &pEnc->mbParam, pEnc->current, num_slices); set_timecodes(pEnc->current,pEnc->reference,pEnc->mbParam.fbase); @@ -1458,33 +1673,52 @@ BitstreamWriteVopHeader(bs, &pEnc->mbParam, pEnc->current, 1, pEnc->current->mbs[0].quant); pEnc->current->sStat.iTextBits = 0; - pEnc->current->sStat.kblks = mb_width * mb_height; - pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; - for (y = 0; y < mb_height; y++) - for (x = 0; x < mb_width; x++) { - MACROBLOCK *pMB = - &pEnc->current->mbs[x + y * pEnc->mbParam.mb_width]; + /* multithreaded intra coding - dispatch threads */ + for (k = 0; k < num_threads; k++) { + int add = ((slices_per_thread + 512) >> 10); - CodeIntraMB(pEnc, pMB); + slices_per_thread += ((num_slices*1024 / num_threads) - add*1024); - MBTransQuantIntra(&pEnc->mbParam, pEnc->current, pMB, x, y, - dct_codes, qcoeff); + pEnc->smpData[k].pEnc = (void *) pEnc; + pEnc->smpData[k].stop_y = (((bound+add) * mb_height + (num_slices-1)) / num_slices); + pEnc->smpData[k].start_y = ((bound * mb_height + (num_slices-1)) / num_slices); - start_timer(); - MBPrediction(pEnc->current, x, y, pEnc->mbParam.mb_width, qcoeff); - stop_prediction_timer(); + bound += add; - start_timer(); - MBCoding(pEnc->current, pMB, qcoeff, bs, &pEnc->current->sStat); - stop_coding_timer(); + if (k > 0) { + BitstreamReset(pEnc->smpData[k].bs); + pEnc->smpData[k].sStat->iTextBits = 0; } + } + pEnc->smpData[0].bs = bs; + pEnc->smpData[0].sStat = &pEnc->current->sStat; - emms(); +#ifdef HAVE_PTHREAD + /* create threads */ + for (k = 1; k < num_threads; k++) { + pthread_create(&pEnc->smpData[k].handle, NULL, + (void*)SliceCodeI, (void*)&pEnc->smpData[k]); + } +#endif - BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ + SliceCodeI(&pEnc->smpData[0]); - pEnc->current->length = (BitstreamPos(bs) - bits) / 8; +#ifdef HAVE_PTHREAD + /* wait until all threads are finished */ + for (k = 1; k < num_threads; k++) { + pthread_join(pEnc->smpData[k].handle, &status); + } +#endif + + pEnc->current->length = BitstreamLength(bs) - (bits/8); + + /* reassemble the pieces together */ + SerializeBitstreams(pEnc, pEnc->current, bs, num_threads); + + pEnc->current->sStat.iMVBits = 0; + pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; + pEnc->current->sStat.kblks = pEnc->mbParam.mb_width * pEnc->mbParam.mb_height; pEnc->fMvPrevSigma = -1; pEnc->mbParam.m_fcode = 2; @@ -1521,17 +1755,160 @@ #define BFRAME_SKIP_THRESHHOLD 30 -/* FrameCodeP also handles S(GMC)-VOPs */ -static int -FrameCodeP(Encoder * pEnc, - Bitstream * bs) +static void +SliceCodeP(SMPData *data) { - int bits = BitstreamPos(bs); + Encoder *pEnc = (Encoder *) data->pEnc; + Bitstream *bs = (Bitstream *) data->bs; + + int x, y, k; + FRAMEINFO *const current = pEnc->current; + FRAMEINFO *const reference = pEnc->reference; + MBParam * const pParam = &pEnc->mbParam; + int mb_width = pParam->mb_width; + int mb_height = pParam->mb_height; DECLARE_ALIGNED_MATRIX(dct_codes, 6, 64, int16_t, CACHE_LINE); DECLARE_ALIGNED_MATRIX(qcoeff, 6, 64, int16_t, CACHE_LINE); - int x, y, k; + int bound = 0, num_slices = pEnc->num_slices; + + if (data->start_y > 0) { /* write resync marker */ + bound = data->start_y*mb_width; + write_video_packet_header(bs, pParam, current, bound); + } + + for (y = data->start_y; y < data->stop_y; y++) { + int new_bound = mb_width * ((((y*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); + + if (new_bound > bound) { + bound = new_bound; + BitstreamPadAlways(bs); + write_video_packet_header(bs, pParam, current, bound); + } + + for (x = 0; x < mb_width; x++) { + MACROBLOCK *pMB = ¤t->mbs[x + y * pParam->mb_width]; + int skip_possible; + + if (pMB->mode == MODE_INTRA || pMB->mode == MODE_INTRA_Q) { + CodeIntraMB(pMB); + MBTransQuantIntra(pParam, current, pMB, x, y, + dct_codes, qcoeff); + + start_timer(); + MBPrediction(current, x, y, pParam->mb_width, qcoeff, bound); + stop_prediction_timer(); + + data->sStat->kblks++; + + MBCoding(current, pMB, qcoeff, bs, data->sStat); + stop_coding_timer(); + continue; + } + + start_timer(); + MBMotionCompensation(pMB, x, y, &reference->image, + &pEnc->vInterH, &pEnc->vInterV, + &pEnc->vInterHV, &pEnc->vGMC, + ¤t->image, + dct_codes, pParam->width, + pParam->height, + pParam->edged_width, + (current->vol_flags & XVID_VOL_QUARTERPEL), + current->rounding_type, + data->RefQ); + + stop_comp_timer(); + + pMB->field_pred = 0; + + if (pMB->cbp != 0) { + pMB->cbp = MBTransQuantInter(pParam, current, pMB, x, y, + dct_codes, qcoeff); + } + + if (pMB->dquant != 0) + MBSetDquant(pMB, x, y, pParam); + + + if (pMB->cbp || pMB->mvs[0].x || pMB->mvs[0].y || + pMB->mvs[1].x || pMB->mvs[1].y || pMB->mvs[2].x || + pMB->mvs[2].y || pMB->mvs[3].x || pMB->mvs[3].y) { + data->sStat->mblks++; + } else { + data->sStat->ublks++; + } + + start_timer(); + + /* Finished processing the MB, now check if to CODE or SKIP */ + + skip_possible = (pMB->cbp == 0) && (pMB->mode == MODE_INTER); + + if (current->coding_type == S_VOP) + skip_possible &= (pMB->mcsel == 1); + else { /* PVOP */ + const VECTOR * const mv = (pParam->vol_flags & XVID_VOL_QUARTERPEL) ? + pMB->qmvs : pMB->mvs; + skip_possible &= ((mv->x|mv->y) == 0); + } + + if ((pMB->mode == MODE_NOT_CODED) || (skip_possible)) { + /* This is a candidate for SKIPping, but for P-VOPs check intermediate B-frames first */ + int bSkip = 1; + + if (current->coding_type == P_VOP) { /* special rule for P-VOP's SKIP */ + for (k = pEnc->bframenum_head; k < pEnc->bframenum_tail; k++) { + int iSAD; + iSAD = sad16(reference->image.y + 16*y*pParam->edged_width + 16*x, + pEnc->bframes[k]->image.y + 16*y*pParam->edged_width + 16*x, + pParam->edged_width, BFRAME_SKIP_THRESHHOLD * pMB->quant); + if (iSAD >= BFRAME_SKIP_THRESHHOLD * pMB->quant || ((bound > 1) && + ((y*mb_width+x == bound) || (y*mb_width+x == bound+1)))) { /* Some third-party decoders have problems with coloc skip MB before or after + resync marker in BVOP. We avoid any ambiguity and force no skip at slice boundary */ + bSkip = 0; /* could not SKIP */ + if (pParam->vol_flags & XVID_VOL_QUARTERPEL) { + VECTOR predMV = get_qpmv2(current->mbs, pParam->mb_width, bound, x, y, 0); + pMB->pmvs[0].x = - predMV.x; + pMB->pmvs[0].y = - predMV.y; + } else { + VECTOR predMV = get_pmv2(current->mbs, pParam->mb_width, bound, x, y, 0); + pMB->pmvs[0].x = - predMV.x; + pMB->pmvs[0].y = - predMV.y; + } + pMB->mode = MODE_INTER; + pMB->cbp = 0; + break; + } + } + } + + if (bSkip) { + /* do SKIP */ + pMB->mode = MODE_NOT_CODED; + MBSkip(bs); + stop_coding_timer(); + continue; /* next MB */ + } + } + + /* ordinary case: normal coded INTER/INTER4V block */ + MBCoding(current, pMB, qcoeff, bs, data->sStat); + stop_coding_timer(); + } + } + + BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ + emms(); +} + +/* FrameCodeP also handles S(GMC)-VOPs */ +static int +FrameCodeP(Encoder * pEnc, Bitstream * bs) +{ + int bits = BitstreamPos(bs); + FRAMEINFO *const current = pEnc->current; FRAMEINFO *const reference = pEnc->reference; MBParam * const pParam = &pEnc->mbParam; @@ -1539,12 +1916,20 @@ int mb_height = pParam->mb_height; int coded = 1; + int k = 0, bound = 0, num_slices = pEnc->num_slices; + int num_threads = MAX(1, MIN(pEnc->num_threads, num_slices)); +#ifdef HAVE_PTHREAD + void * status = NULL; + int threads_per_slice = (pEnc->num_threads*1024 / num_threads); +#endif + int slices_per_thread = (num_slices*1024 / num_threads); + IMAGE *pRef = &reference->image; if (!reference->is_edged) { start_timer(); image_setedges(pRef, pParam->edged_width, pParam->edged_height, - pParam->width, pParam->height, 0); + pParam->width, pParam->height, XVID_BS_VERSION); stop_edges_timer(); reference->is_edged = 1; } @@ -1556,8 +1941,8 @@ if ((current->vop_flags & XVID_VOP_HALFPEL)) { if (reference->is_interpolated != current->rounding_type) { start_timer(); - image_interpolate(pRef, &pEnc->vInterH, &pEnc->vInterV, - &pEnc->vInterHV, pParam->edged_width, + image_interpolate(pRef->y, pEnc->vInterH.y, pEnc->vInterV.y, + pEnc->vInterHV.y, pParam->edged_width, pParam->edged_height, (pParam->vol_flags & XVID_VOL_QUARTERPEL), current->rounding_type); @@ -1567,19 +1952,25 @@ } current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = - current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = 0; + current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = + current->sStat.iMVBits = 0; current->coding_type = P_VOP; + if (current->vop_flags & XVID_VOP_RD_PSNRHVSM) { + image_block_variance(¤t->image, pParam->edged_width, current->mbs, + pParam->mb_width, pParam->mb_height); + } + call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); SetMacroblockQuants(&pEnc->mbParam, current); start_timer(); - if (current->vol_flags & XVID_VOL_GMC ) /* GMC only for S(GMC)-VOPs */ + if (current->vol_flags & XVID_VOL_GMC) /* GMC only for S(GMC)-VOPs */ { int gmcval; current->warp = GlobalMotionEst( current->mbs, pParam, current, reference, - &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV); + &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, num_slices); if (current->motion_flags & XVID_ME_GME_REFINE) { gmcval = GlobalMotionEstRefine(¤t->warp, @@ -1626,138 +2017,140 @@ } } - MotionEstimation(&pEnc->mbParam, current, reference, - &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, - &pEnc->vGMC, 256*4096); +#ifdef HAVE_PTHREAD + if (pEnc->num_threads > 0) { + /* multithreaded motion estimation - dispatch threads */ + while (k < pEnc->num_threads) { + int i, add_s = (slices_per_thread + 512) >> 10; + int add_t = (threads_per_slice + 512) >> 10; + + int start_y = (bound * mb_height + (num_slices-1)) / num_slices; + int stop_y = ((bound+add_s) * mb_height + (num_slices-1)) / num_slices; + int rows_per_thread = (stop_y - start_y + add_t - 1) / add_t; + + slices_per_thread += ((num_slices*1024 / num_threads) - add_s*1024); + threads_per_slice += ((pEnc->num_threads*1024 / num_threads) - add_t*1024); + + for (i = 0; i < add_t; i++) { + memset(pEnc->smpData[k+i].complete_count_self, 0, rows_per_thread * sizeof(int)); + + pEnc->smpData[k+i].pEnc = (void *) pEnc; + pEnc->smpData[k+i].y_row = i; + pEnc->smpData[k+i].y_step = add_t; + pEnc->smpData[k+i].stop_y = stop_y; + pEnc->smpData[k+i].start_y = start_y; + + /* todo: sort out temp space once and for all */ + pEnc->smpData[k+i].RefQ = (((k+i)&1) ? pEnc->vInterV.u : pEnc->vInterV.v) + + 16*((k+i)>>1)*pParam->edged_width; + } + + pEnc->smpData[k].complete_count_above = + pEnc->smpData[k+add_t-1].complete_count_self - 1; - stop_motion_timer(); + bound += add_s; + k += add_t; + } - set_timecodes(current,reference,pParam->fbase); + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->smpData[k].handle, NULL, + (void*)MotionEstimateSMP, (void*)&pEnc->smpData[k]); + } - BitstreamWriteVopHeader(bs, &pEnc->mbParam, current, 1, current->mbs[0].quant); + MotionEstimateSMP(&pEnc->smpData[0]); - for (y = 0; y < mb_height; y++) { - for (x = 0; x < mb_width; x++) { - MACROBLOCK *pMB = ¤t->mbs[x + y * pParam->mb_width]; - int skip_possible; + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->smpData[k].handle, &status); + } - if (pMB->mode == MODE_INTRA || pMB->mode == MODE_INTRA_Q) { - CodeIntraMB(pEnc, pMB); - MBTransQuantIntra(&pEnc->mbParam, current, pMB, x, y, - dct_codes, qcoeff); + current->fcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + current->sStat.iMvSum += pEnc->smpData[k].mvSum; + current->sStat.iMvCount += pEnc->smpData[k].mvCount; + if (pEnc->smpData[k].minfcode > current->fcode) + current->fcode = pEnc->smpData[k].minfcode; + } - start_timer(); - MBPrediction(current, x, y, pParam->mb_width, qcoeff); - stop_prediction_timer(); + } else +#endif + { - current->sStat.kblks++; + /* regular ME */ - MBCoding(current, pMB, qcoeff, bs, ¤t->sStat); - stop_coding_timer(); - continue; - } + MotionEstimation(&pEnc->mbParam, current, reference, + &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + &pEnc->vGMC, 256*4096, num_slices); - start_timer(); - MBMotionCompensation(pMB, x, y, &reference->image, - &pEnc->vInterH, &pEnc->vInterV, - &pEnc->vInterHV, &pEnc->vGMC, - ¤t->image, - dct_codes, pParam->width, - pParam->height, - pParam->edged_width, - (current->vol_flags & XVID_VOL_QUARTERPEL), - current->rounding_type); + } - stop_comp_timer(); + stop_motion_timer(); - pMB->field_pred = 0; + set_timecodes(current,reference,pParam->fbase); - if (pMB->cbp != 0) { - pMB->cbp = MBTransQuantInter(&pEnc->mbParam, current, pMB, x, y, - dct_codes, qcoeff); - } + BitstreamWriteVopHeader(bs, &pEnc->mbParam, current, 1, current->mbs[0].quant); - if (pMB->dquant != 0) - MBSetDquant(pMB, x, y, &pEnc->mbParam); + /* multithreaded inter coding - dispatch threads */ + bound = 0; + slices_per_thread = (num_slices*1024 / num_threads); - if (pMB->cbp || pMB->mvs[0].x || pMB->mvs[0].y || - pMB->mvs[1].x || pMB->mvs[1].y || pMB->mvs[2].x || - pMB->mvs[2].y || pMB->mvs[3].x || pMB->mvs[3].y) { - current->sStat.mblks++; - } else { - current->sStat.ublks++; - } + for (k = 0; k < num_threads; k++) { + int add = ((slices_per_thread + 512) >> 10); - start_timer(); + slices_per_thread += ((num_slices*1024 / num_threads) - add*1024); - /* Finished processing the MB, now check if to CODE or SKIP */ + pEnc->smpData[k].pEnc = (void *) pEnc; + pEnc->smpData[k].stop_y = (((bound+add) * mb_height + (num_slices-1)) / num_slices); + pEnc->smpData[k].start_y = ((bound * mb_height + (num_slices-1)) / num_slices); + pEnc->smpData[k].RefQ = ((k&1) ? pEnc->vInterV.u : pEnc->vInterV.v) + 16*(k>>1)*pParam->edged_width; - skip_possible = (pMB->cbp == 0) && (pMB->mode == MODE_INTER); + bound += add; - if (current->coding_type == S_VOP) - skip_possible &= (pMB->mcsel == 1); - else { /* PVOP */ - const VECTOR * const mv = (pParam->vol_flags & XVID_VOL_QUARTERPEL) ? - pMB->qmvs : pMB->mvs; - skip_possible &= ((mv->x|mv->y) == 0); - } + if (k > 0) { + pEnc->smpData[k].sStat->iTextBits = pEnc->smpData[k].sStat->kblks = + pEnc->smpData[k].sStat->mblks = pEnc->smpData[k].sStat->ublks = + pEnc->smpData[k].sStat->iMVBits = 0; + + BitstreamReset(pEnc->smpData[k].bs); + } + } + pEnc->smpData[0].bs = bs; + pEnc->smpData[0].sStat = ¤t->sStat; - if ((pMB->mode == MODE_NOT_CODED) || (skip_possible)) { - /* This is a candidate for SKIPping, but for P-VOPs check intermediate B-frames first */ - int bSkip = 1; +#ifdef HAVE_PTHREAD + /* create threads */ + for (k = 1; k < num_threads; k++) { + pthread_create(&pEnc->smpData[k].handle, NULL, + (void*)SliceCodeP, (void*)&pEnc->smpData[k]); + } +#endif - if (current->coding_type == P_VOP) { /* special rule for P-VOP's SKIP */ + SliceCodeP(&pEnc->smpData[0]); - for (k = pEnc->bframenum_head; k < pEnc->bframenum_tail; k++) { - int iSAD; - iSAD = sad16(reference->image.y + 16*y*pParam->edged_width + 16*x, - pEnc->bframes[k]->image.y + 16*y*pParam->edged_width + 16*x, - pParam->edged_width, BFRAME_SKIP_THRESHHOLD * pMB->quant); - if (iSAD >= BFRAME_SKIP_THRESHHOLD * pMB->quant) { - bSkip = 0; /* could not SKIP */ - if (pParam->vol_flags & XVID_VOL_QUARTERPEL) { - VECTOR predMV = get_qpmv2(current->mbs, pParam->mb_width, 0, x, y, 0); - pMB->pmvs[0].x = - predMV.x; - pMB->pmvs[0].y = - predMV.y; - } else { - VECTOR predMV = get_pmv2(current->mbs, pParam->mb_width, 0, x, y, 0); - pMB->pmvs[0].x = - predMV.x; - pMB->pmvs[0].y = - predMV.y; - } - pMB->mode = MODE_INTER; - pMB->cbp = 0; - break; - } - } - } +#ifdef HAVE_PTHREAD + /* wait until all threads are finished */ + for (k = 1; k < num_threads; k++) { + pthread_join(pEnc->smpData[k].handle, &status); + } +#endif - if (bSkip) { - /* do SKIP */ - pMB->mode = MODE_NOT_CODED; - MBSkip(bs); - stop_coding_timer(); - continue; /* next MB */ - } - } + current->length = BitstreamLength(bs) - (bits/8); - /* ordinary case: normal coded INTER/INTER4V block */ - MBCoding(current, pMB, qcoeff, bs, &pEnc->current->sStat); - stop_coding_timer(); - } - } + /* reassemble the pieces together */ + SerializeBitstreams(pEnc, pEnc->current, bs, num_threads); - emms(); updateFcode(¤t->sStat, pEnc); /* frame drop code */ #if 0 DPRINTF(XVID_DEBUG_DEBUG, "kmu %i %i %i\n", current->sStat.kblks, current->sStat.mblks, current->sStat.ublks); #endif - if (current->sStat.kblks + current->sStat.mblks <= + + if (current->sStat.kblks + current->sStat.mblks < (pParam->frame_drop_ratio * mb_width * mb_height) / 100 && - ( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) ) + ( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) && + (current->coding_type == P_VOP) ) { current->sStat.kblks = current->sStat.mblks = current->sStat.iTextBits = 0; current->sStat.ublks = mb_width * mb_height; @@ -1778,6 +2171,10 @@ memcpy(current->mbs, reference->mbs, sizeof(MACROBLOCK) * mb_width * mb_height); coded = 0; + BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ + + current->length = (BitstreamPos(bs) - bits) / 8; + } else { pEnc->current->is_edged = 0; /* not edged */ @@ -1806,13 +2203,90 @@ } */ - BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ - - current->length = (BitstreamPos(bs) - bits) / 8; - return coded; } +static void +SliceCodeB(SMPData *data) +{ + Encoder *pEnc = (Encoder *) data->pEnc; + Bitstream *bs = (Bitstream *) data->bs; + + DECLARE_ALIGNED_MATRIX(dct_codes, 6, 64, int16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(qcoeff, 6, 64, int16_t, CACHE_LINE); + + int x, y; + FRAMEINFO * const frame = (FRAMEINFO * const) data->current; + MBParam * const pParam = &pEnc->mbParam; + int mb_width = pParam->mb_width; + int mb_height = pParam->mb_height; + IMAGE *f_ref = &pEnc->reference->image; + IMAGE *b_ref = &pEnc->current->image; + + int bound = data->start_y*mb_width; + int num_slices = pEnc->num_slices; + + if (data->start_y > 0) { /* write resync marker */ + write_video_packet_header(bs, pParam, frame, bound+1); + } + + for (y = data->start_y; y < MIN(data->stop_y+1, mb_height); y++) { + int new_bound = mb_width * ((((y*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); + int stop_x = (y == data->stop_y) ? 1 : mb_width; + int start_x = (y == data->start_y && y > 0) ? 1 : 0; + + for (x = start_x; x < stop_x; x++) { + MACROBLOCK * const mb = &frame->mbs[x + y * pEnc->mbParam.mb_width]; + + /* decoder ignores mb when refence block is INTER(0,0), CBP=0 */ + if (mb->mode == MODE_NOT_CODED) { + if (pParam->plugin_flags & XVID_REQORIGINAL) { + MBMotionCompensation(mb, x, y, f_ref, NULL, f_ref, NULL, NULL, &frame->image, + NULL, 0, 0, pParam->edged_width, 0, 0, data->RefQ); + } + continue; + } + + if (new_bound > bound && x > 0) { + bound = new_bound; + BitstreamPadAlways(bs); + write_video_packet_header(bs, pParam, frame, y*mb_width+x); + } + + mb->quant = frame->quant; + + if (mb->cbp != 0 || pParam->plugin_flags & XVID_REQORIGINAL) { + /* we have to motion-compensate, transfer etc, + because there might be blocks to code */ + + MBMotionCompensationBVOP(pParam, mb, x, y, &frame->image, + f_ref, &pEnc->f_refh, &pEnc->f_refv, + &pEnc->f_refhv, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV, dct_codes, + data->RefQ); + + mb->cbp = MBTransQuantInterBVOP(pParam, frame, mb, x, y, dct_codes, qcoeff); + } + + if (mb->mode == MODE_DIRECT_NO4V) + mb->mode = MODE_DIRECT; + + if (mb->mode == MODE_DIRECT && (mb->cbp | mb->pmvs[3].x | mb->pmvs[3].y) == 0) + mb->mode = MODE_DIRECT_NONE_MV; /* skipped */ + else + if (frame->vop_flags & XVID_VOP_GREYSCALE) + /* keep only bits 5-2 -- Chroma blocks will just be skipped by MBCodingBVOP */ + mb->cbp &= 0x3C; + + start_timer(); + MBCodingBVOP(frame, mb, qcoeff, frame->fcode, frame->bcode, bs, data->sStat); + stop_coding_timer(); + } + } + + BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ + emms(); +} static void FrameCodeB(Encoder * pEnc, @@ -1820,13 +2294,20 @@ Bitstream * bs) { int bits = BitstreamPos(bs); - DECLARE_ALIGNED_MATRIX(dct_codes, 6, 64, int16_t, CACHE_LINE); - DECLARE_ALIGNED_MATRIX(qcoeff, 6, 64, int16_t, CACHE_LINE); - uint32_t x, y; + int k = 0, bound = 0, num_slices = pEnc->num_slices; + int num_threads = MAX(1, MIN(pEnc->num_threads, num_slices)); +#ifdef HAVE_PTHREAD + void * status = NULL; + int threads_per_slice = (pEnc->num_threads*1024 / num_threads); +#endif + int slices_per_thread = (num_slices*1024 / num_threads); IMAGE *f_ref = &pEnc->reference->image; IMAGE *b_ref = &pEnc->current->image; + MBParam * const pParam = &pEnc->mbParam; + int mb_height = pParam->mb_height; + #ifdef BFRAMES_DEC_DEBUG FILE *fp; static char first=0; @@ -1843,13 +2324,13 @@ if (!pEnc->reference->is_edged) { image_setedges(f_ref, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, pEnc->mbParam.width, - pEnc->mbParam.height, 0); - pEnc->current->is_edged = 1; + pEnc->mbParam.height, XVID_BS_VERSION); + pEnc->reference->is_edged = 1; } if (pEnc->reference->is_interpolated != 0) { start_timer(); - image_interpolate(f_ref, &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + image_interpolate(f_ref->y, pEnc->f_refh.y, pEnc->f_refv.y, pEnc->f_refhv.y, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, (pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); stop_inter_timer(); @@ -1860,13 +2341,13 @@ if (!pEnc->current->is_edged) { image_setedges(b_ref, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, pEnc->mbParam.width, - pEnc->mbParam.height, 0); + pEnc->mbParam.height, XVID_BS_VERSION); pEnc->current->is_edged = 1; } if (pEnc->current->is_interpolated != 0) { start_timer(); - image_interpolate(b_ref, &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + image_interpolate(b_ref->y, pEnc->vInterH.y, pEnc->vInterV.y, pEnc->vInterHV.y, pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, (pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); stop_inter_timer(); @@ -1874,77 +2355,147 @@ } frame->coding_type = B_VOP; - call_plugins(pEnc, pEnc->current, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + + if ((frame->vop_flags & XVID_VOP_RD_PSNRHVSM) && (frame->vop_flags & XVID_VOP_RD_BVOP)) { + image_block_variance(&frame->image, pEnc->mbParam.edged_width, frame->mbs, + pEnc->mbParam.mb_width, pEnc->mbParam.mb_height); + } + + call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + + frame->fcode = frame->bcode = pEnc->current->fcode; start_timer(); - MotionEstimationBVOP(&pEnc->mbParam, frame, - ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ - ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ - pEnc->reference->mbs, f_ref, - &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, - pEnc->current, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV); + +#ifdef HAVE_PTHREAD + if (pEnc->num_threads > 0) { + + /* multithreaded motion estimation - dispatch threads */ + while (k < pEnc->num_threads) { + int i, add_s = (slices_per_thread + 512) >> 10; + int add_t = (threads_per_slice + 512) >> 10; + + int start_y = (bound * mb_height + (num_slices-1)) / num_slices; + int stop_y = ((bound+add_s) * mb_height + (num_slices-1)) / num_slices; + int rows_per_thread = (stop_y - start_y + add_t - 1) / add_t; + + slices_per_thread += ((num_slices*1024 / num_threads) - add_s*1024); + threads_per_slice += ((pEnc->num_threads*1024 / num_threads) - add_t*1024); + + for (i = 0; i < add_t; i++) { + memset(pEnc->smpData[k+i].complete_count_self, 0, rows_per_thread * sizeof(int)); + + pEnc->smpData[k+i].pEnc = (void *) pEnc; + pEnc->smpData[k+i].current = frame; + + pEnc->smpData[k+i].y_row = i; + pEnc->smpData[k+i].y_step = add_t; + pEnc->smpData[k+i].stop_y = stop_y; + pEnc->smpData[k+i].start_y = start_y; + + /* todo: sort out temp space once and for all */ + pEnc->smpData[k+i].RefQ = (((k+i)&1) ? pEnc->vInterV.u : pEnc->vInterV.v) + + 16*((k+i)>>1)*pParam->edged_width; + } + + pEnc->smpData[k].complete_count_above = + pEnc->smpData[k+add_t-1].complete_count_self - 1; + + bound += add_s; + k += add_t; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->smpData[k].handle, NULL, + (void*)SMPMotionEstimationBVOP, (void*)&pEnc->smpData[k]); + } + + SMPMotionEstimationBVOP(&pEnc->smpData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->smpData[k].handle, &status); + } + + frame->fcode = frame->bcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + if (pEnc->smpData[k].minfcode > frame->fcode) + frame->fcode = pEnc->smpData[k].minfcode; + if (pEnc->smpData[k].minbcode > frame->bcode) + frame->bcode = pEnc->smpData[k].minbcode; + } + } else +#endif + { + + MotionEstimationBVOP(&pEnc->mbParam, frame, + ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ + ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ + pEnc->reference->mbs, f_ref, + &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + pEnc->current, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV, + pEnc->num_slices); + } stop_motion_timer(); set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); BitstreamWriteVopHeader(bs, &pEnc->mbParam, frame, 1, frame->quant); + /* reset stats */ frame->sStat.iTextBits = 0; + frame->sStat.iMVBits = 0; frame->sStat.iMvSum = 0; frame->sStat.iMvCount = 0; frame->sStat.kblks = frame->sStat.mblks = frame->sStat.ublks = 0; frame->sStat.mblks = pEnc->mbParam.mb_width * pEnc->mbParam.mb_height; frame->sStat.kblks = frame->sStat.ublks = 0; + + /* multithreaded inter coding - dispatch threads */ + bound = 0; + slices_per_thread = (num_slices*1024 / num_threads); + + for (k = 0; k < num_threads; k++) { + int add = ((slices_per_thread + 512) >> 10); - for (y = 0; y < pEnc->mbParam.mb_height; y++) { - for (x = 0; x < pEnc->mbParam.mb_width; x++) { - MACROBLOCK * const mb = &frame->mbs[x + y * pEnc->mbParam.mb_width]; - - /* decoder ignores mb when refence block is INTER(0,0), CBP=0 */ - if (mb->mode == MODE_NOT_CODED) { - if (pEnc->mbParam.plugin_flags & XVID_REQORIGINAL) { - MBMotionCompensation(mb, x, y, f_ref, NULL, f_ref, NULL, NULL, &frame->image, - NULL, 0, 0, pEnc->mbParam.edged_width, 0, 0); - } - continue; - } - - mb->quant = frame->quant; + slices_per_thread += ((num_slices*1024 / num_threads) - add*1024); - if (mb->cbp != 0 || pEnc->mbParam.plugin_flags & XVID_REQORIGINAL) { - /* we have to motion-compensate, transfer etc, - because there might be blocks to code */ + pEnc->smpData[k].pEnc = (void *) pEnc; + pEnc->smpData[k].current = frame; + pEnc->smpData[k].stop_y = (((bound+add) * mb_height + (num_slices-1)) / num_slices); + pEnc->smpData[k].start_y = ((bound * mb_height + (num_slices-1)) / num_slices); + bound += add; - MBMotionCompensationBVOP(&pEnc->mbParam, mb, x, y, &frame->image, - f_ref, &pEnc->f_refh, &pEnc->f_refv, - &pEnc->f_refhv, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV, - dct_codes); + /* todo: sort out temp space once and for all */ + pEnc->smpData[k].RefQ = ((k&1) ? pEnc->vInterV.u : pEnc->vInterV.v) + 16*(k>>1)*pParam->edged_width; - mb->cbp = MBTransQuantInterBVOP(&pEnc->mbParam, frame, mb, x, y, dct_codes, qcoeff); - } - - if (mb->mode == MODE_DIRECT_NO4V) - mb->mode = MODE_DIRECT; + if (k > 0) { + BitstreamReset(pEnc->smpData[k].bs); + pEnc->smpData[k].sStat->iTextBits = pEnc->smpData[k].sStat->kblks = + pEnc->smpData[k].sStat->mblks = pEnc->smpData[k].sStat->ublks = pEnc->smpData[k].sStat->iMVBits = 0; + } + } - if (mb->mode == MODE_DIRECT && (mb->cbp | mb->pmvs[3].x | mb->pmvs[3].y) == 0) - mb->mode = MODE_DIRECT_NONE_MV; /* skipped */ - else - if (frame->vop_flags & XVID_VOP_GREYSCALE) - /* keep only bits 5-2 -- Chroma blocks will just be skipped by MBCodingBVOP */ - mb->cbp &= 0x3C; +#ifdef HAVE_PTHREAD + for (k = 1; k < num_threads; k++) { + pthread_create(&pEnc->smpData[k].handle, NULL, + (void*)SliceCodeB, (void*)&pEnc->smpData[k]); + } +#endif - start_timer(); - MBCodingBVOP(frame, mb, qcoeff, frame->fcode, frame->bcode, bs, - &frame->sStat); - stop_coding_timer(); - } + pEnc->smpData[0].bs = bs; + pEnc->smpData[0].sStat = &frame->sStat; + SliceCodeB(&pEnc->smpData[0]); + +#ifdef HAVE_PTHREAD + for (k = 1; k < num_threads; k++) { + pthread_join(pEnc->smpData[k].handle, &status); } +#endif - emms(); + frame->length = BitstreamLength(bs) - (bits/8); - BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ - frame->length = (BitstreamPos(bs) - bits) / 8; + /* reassemble the pieces together */ + SerializeBitstreams(pEnc, frame, bs, num_threads); #ifdef BFRAMES_DEC_DEBUG if (!first){