49 |
#include "quant/quant_matrix.h" |
#include "quant/quant_matrix.h" |
50 |
#include "utils/mem_align.h" |
#include "utils/mem_align.h" |
51 |
|
|
52 |
|
# include "motion/motion_smp.h" |
53 |
|
|
54 |
|
|
55 |
/***************************************************************************** |
/***************************************************************************** |
56 |
* Local function prototypes |
* Local function prototypes |
57 |
****************************************************************************/ |
****************************************************************************/ |
444 |
pEnc->iFrameNum = 0; |
pEnc->iFrameNum = 0; |
445 |
pEnc->fMvPrevSigma = -1; |
pEnc->fMvPrevSigma = -1; |
446 |
|
|
447 |
|
/* multithreaded stuff */ |
448 |
|
if (create->num_threads > 0) { |
449 |
|
int t = create->num_threads; |
450 |
|
int rows_per_thread = (pEnc->mbParam.mb_height+t-1)/t; |
451 |
|
pEnc->num_threads = t; |
452 |
|
pEnc->motionData = xvid_malloc(t*sizeof(SMPmotionData), CACHE_LINE); |
453 |
|
if (!pEnc->motionData) |
454 |
|
goto xvid_err_nosmp; |
455 |
|
|
456 |
|
for (n = 0; n < t; n++) { |
457 |
|
pEnc->motionData[n].complete_count_self = |
458 |
|
xvid_malloc(rows_per_thread * sizeof(int), CACHE_LINE); |
459 |
|
|
460 |
|
if (!pEnc->motionData[n].complete_count_self) |
461 |
|
goto xvid_err_nosmp; |
462 |
|
|
463 |
|
if (n != 0) |
464 |
|
pEnc->motionData[n].complete_count_above = |
465 |
|
pEnc->motionData[n-1].complete_count_self; |
466 |
|
} |
467 |
|
pEnc->motionData[0].complete_count_above = |
468 |
|
pEnc->motionData[t-1].complete_count_self - 1; |
469 |
|
|
470 |
|
} else { |
471 |
|
xvid_err_nosmp: |
472 |
|
/* no SMP */ |
473 |
|
create->num_threads = 0; |
474 |
|
pEnc->motionData = NULL; |
475 |
|
} |
476 |
|
|
477 |
create->handle = (void *) pEnc; |
create->handle = (void *) pEnc; |
478 |
|
|
479 |
init_timer(); |
init_timer(); |
662 |
xvid_free(pEnc->temp_dquants); |
xvid_free(pEnc->temp_dquants); |
663 |
} |
} |
664 |
|
|
665 |
|
if ((pEnc->mbParam.plugin_flags & XVID_REQLAMBDA)) { |
666 |
|
xvid_free(pEnc->temp_lambda); |
667 |
|
} |
668 |
|
|
669 |
if (pEnc->num_plugins>0) { |
if (pEnc->num_plugins>0) { |
670 |
xvid_plg_destroy_t pdestroy; |
xvid_plg_destroy_t pdestroy; |
683 |
|
|
684 |
xvid_free(pEnc->mbParam.mpeg_quant_matrices); |
xvid_free(pEnc->mbParam.mpeg_quant_matrices); |
685 |
|
|
686 |
if (pEnc->num_plugins>0) |
if (pEnc->num_zones > 0) |
687 |
xvid_free(pEnc->zones); |
xvid_free(pEnc->zones); |
688 |
|
|
689 |
|
if (pEnc->num_threads > 0) { |
690 |
|
for (i = 0; i < pEnc->num_threads; i++) |
691 |
|
xvid_free(pEnc->motionData[i].complete_count_self); |
692 |
|
|
693 |
|
xvid_free(pEnc->motionData); |
694 |
|
} |
695 |
|
|
696 |
xvid_free(pEnc); |
xvid_free(pEnc); |
697 |
|
|
698 |
return 0; /* ok */ |
return 0; /* ok */ |
765 |
if ((pEnc->mbParam.plugin_flags & XVID_REQDQUANTS)) { |
if ((pEnc->mbParam.plugin_flags & XVID_REQDQUANTS)) { |
766 |
data.dquant = pEnc->temp_dquants; |
data.dquant = pEnc->temp_dquants; |
767 |
data.dquant_stride = pEnc->mbParam.mb_width; |
data.dquant_stride = pEnc->mbParam.mb_width; |
768 |
memset(data.dquant, 0, data.mb_width*data.mb_height); |
memset(data.dquant, 0, data.mb_width*data.mb_height*sizeof(int)); |
769 |
} |
} |
770 |
|
|
771 |
if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { |
if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { |
772 |
int block = 0; |
int block = 0; |
773 |
|
emms(); |
774 |
data.lambda = pEnc->temp_lambda; |
data.lambda = pEnc->temp_lambda; |
775 |
for(i = 0;i < pEnc->mbParam.mb_height; i++) |
for(i = 0;i < pEnc->mbParam.mb_height; i++) |
776 |
for(j = 0;j < pEnc->mbParam.mb_width; j++) |
for(j = 0;j < pEnc->mbParam.mb_width; j++) |
1307 |
if (pEnc->current->stamp > 0) { |
if (pEnc->current->stamp > 0) { |
1308 |
call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); |
call_plugins(pEnc, pEnc->reference, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); |
1309 |
} |
} |
1310 |
else |
else if (stats) { |
1311 |
stats->type = XVID_TYPE_NOTHING; |
stats->type = XVID_TYPE_NOTHING; |
1312 |
} |
} |
1313 |
|
} |
1314 |
|
|
1315 |
/* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
/* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
1316 |
* closed-gop |
* closed-gop |
1429 |
|
|
1430 |
if ( FrameCodeP(pEnc, &bs) == 0 ) { |
if ( FrameCodeP(pEnc, &bs) == 0 ) { |
1431 |
/* N-VOP, we mustn't code b-frames yet */ |
/* N-VOP, we mustn't code b-frames yet */ |
1432 |
|
if ((pEnc->mbParam.global_flags & XVID_GLOBAL_PACKED) || |
1433 |
|
pEnc->mbParam.max_bframes == 0) |
1434 |
call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); |
call_plugins(pEnc, pEnc->current, &pEnc->sOriginal, XVID_PLG_AFTER, NULL, NULL, stats); |
1435 |
goto done; |
goto done; |
1436 |
} |
} |
1541 |
BitstreamWriteVopHeader(bs, &pEnc->mbParam, pEnc->current, 1, pEnc->current->mbs[0].quant); |
BitstreamWriteVopHeader(bs, &pEnc->mbParam, pEnc->current, 1, pEnc->current->mbs[0].quant); |
1542 |
|
|
1543 |
pEnc->current->sStat.iTextBits = 0; |
pEnc->current->sStat.iTextBits = 0; |
1544 |
|
pEnc->current->sStat.iMVBits = 0; |
1545 |
pEnc->current->sStat.kblks = mb_width * mb_height; |
pEnc->current->sStat.kblks = mb_width * mb_height; |
1546 |
pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; |
pEnc->current->sStat.mblks = pEnc->current->sStat.ublks = 0; |
1547 |
|
|
1640 |
if ((current->vop_flags & XVID_VOP_HALFPEL)) { |
if ((current->vop_flags & XVID_VOP_HALFPEL)) { |
1641 |
if (reference->is_interpolated != current->rounding_type) { |
if (reference->is_interpolated != current->rounding_type) { |
1642 |
start_timer(); |
start_timer(); |
1643 |
image_interpolate(pRef, &pEnc->vInterH, &pEnc->vInterV, |
image_interpolate(pRef->y, pEnc->vInterH.y, pEnc->vInterV.y, |
1644 |
&pEnc->vInterHV, pParam->edged_width, |
pEnc->vInterHV.y, pParam->edged_width, |
1645 |
pParam->edged_height, |
pParam->edged_height, |
1646 |
(pParam->vol_flags & XVID_VOL_QUARTERPEL), |
(pParam->vol_flags & XVID_VOL_QUARTERPEL), |
1647 |
current->rounding_type); |
current->rounding_type); |
1651 |
} |
} |
1652 |
|
|
1653 |
current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = |
current->sStat.iTextBits = current->sStat.iMvSum = current->sStat.iMvCount = |
1654 |
current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = 0; |
current->sStat.kblks = current->sStat.mblks = current->sStat.ublks = |
1655 |
|
current->sStat.iMVBits = 0; |
1656 |
|
|
1657 |
current->coding_type = P_VOP; |
current->coding_type = P_VOP; |
1658 |
|
|
1711 |
} |
} |
1712 |
} |
} |
1713 |
|
|
1714 |
|
|
1715 |
|
if (pEnc->num_threads > 0) { |
1716 |
|
/* multithreaded motion estimation - dispatch threads */ |
1717 |
|
|
1718 |
|
void * status; |
1719 |
|
int rows_per_thread = (pParam->mb_height + pEnc->num_threads - 1)/pEnc->num_threads; |
1720 |
|
|
1721 |
|
for (k = 0; k < pEnc->num_threads; k++) { |
1722 |
|
memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); |
1723 |
|
pEnc->motionData[k].pParam = &pEnc->mbParam; |
1724 |
|
pEnc->motionData[k].current = current; |
1725 |
|
pEnc->motionData[k].reference = reference; |
1726 |
|
pEnc->motionData[k].pRefH = &pEnc->vInterH; |
1727 |
|
pEnc->motionData[k].pRefV = &pEnc->vInterV; |
1728 |
|
pEnc->motionData[k].pRefHV = &pEnc->vInterHV; |
1729 |
|
pEnc->motionData[k].pGMC = &pEnc->vGMC; |
1730 |
|
pEnc->motionData[k].y_step = pEnc->num_threads; |
1731 |
|
pEnc->motionData[k].start_y = k; |
1732 |
|
/* todo: sort out temp space once and for all */ |
1733 |
|
pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pParam->edged_width; |
1734 |
|
} |
1735 |
|
|
1736 |
|
for (k = 1; k < pEnc->num_threads; k++) { |
1737 |
|
pthread_create(&pEnc->motionData[k].handle, NULL, |
1738 |
|
(void*)MotionEstimateSMP, (void*)&pEnc->motionData[k]); |
1739 |
|
} |
1740 |
|
|
1741 |
|
MotionEstimateSMP(&pEnc->motionData[0]); |
1742 |
|
|
1743 |
|
for (k = 1; k < pEnc->num_threads; k++) { |
1744 |
|
pthread_join(pEnc->motionData[k].handle, &status); |
1745 |
|
} |
1746 |
|
|
1747 |
|
current->fcode = 0; |
1748 |
|
for (k = 0; k < pEnc->num_threads; k++) { |
1749 |
|
current->sStat.iMvSum += pEnc->motionData[k].mvSum; |
1750 |
|
current->sStat.iMvCount += pEnc->motionData[k].mvCount; |
1751 |
|
if (pEnc->motionData[k].minfcode > current->fcode) |
1752 |
|
current->fcode = pEnc->motionData[k].minfcode; |
1753 |
|
} |
1754 |
|
|
1755 |
|
} else { |
1756 |
|
/* regular ME */ |
1757 |
|
|
1758 |
MotionEstimation(&pEnc->mbParam, current, reference, |
MotionEstimation(&pEnc->mbParam, current, reference, |
1759 |
&pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, |
&pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, |
1760 |
&pEnc->vGMC, 256*4096); |
&pEnc->vGMC, 256*4096); |
1761 |
|
} |
1762 |
|
|
1763 |
stop_motion_timer(); |
stop_motion_timer(); |
1764 |
|
|
1888 |
(pParam->frame_drop_ratio * mb_width * mb_height) / 100 && |
(pParam->frame_drop_ratio * mb_width * mb_height) / 100 && |
1889 |
( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) ) |
( (pEnc->bframenum_head >= pEnc->bframenum_tail) || !(pEnc->mbParam.global_flags & XVID_GLOBAL_CLOSED_GOP)) ) |
1890 |
{ |
{ |
1891 |
current->sStat.kblks = current->sStat.mblks = 0; |
current->sStat.kblks = current->sStat.mblks = current->sStat.iTextBits = 0; |
1892 |
current->sStat.ublks = mb_width * mb_height; |
current->sStat.ublks = mb_width * mb_height; |
1893 |
|
|
1894 |
BitstreamReset(bs); |
BitstreamReset(bs); |
1978 |
|
|
1979 |
if (pEnc->reference->is_interpolated != 0) { |
if (pEnc->reference->is_interpolated != 0) { |
1980 |
start_timer(); |
start_timer(); |
1981 |
image_interpolate(f_ref, &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, |
image_interpolate(f_ref->y, pEnc->f_refh.y, pEnc->f_refv.y, pEnc->f_refhv.y, |
1982 |
pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, |
pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, |
1983 |
(pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); |
(pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); |
1984 |
stop_inter_timer(); |
stop_inter_timer(); |
1995 |
|
|
1996 |
if (pEnc->current->is_interpolated != 0) { |
if (pEnc->current->is_interpolated != 0) { |
1997 |
start_timer(); |
start_timer(); |
1998 |
image_interpolate(b_ref, &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, |
image_interpolate(b_ref->y, pEnc->vInterH.y, pEnc->vInterV.y, pEnc->vInterHV.y, |
1999 |
pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, |
pEnc->mbParam.edged_width, pEnc->mbParam.edged_height, |
2000 |
(pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); |
(pEnc->mbParam.vol_flags & XVID_VOL_QUARTERPEL), 0); |
2001 |
stop_inter_timer(); |
stop_inter_timer(); |
2005 |
frame->coding_type = B_VOP; |
frame->coding_type = B_VOP; |
2006 |
call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); |
call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); |
2007 |
|
|
2008 |
|
frame->fcode = frame->bcode = pEnc->current->fcode; |
2009 |
|
|
2010 |
start_timer(); |
start_timer(); |
2011 |
|
if (pEnc->num_threads > 0) { |
2012 |
|
void * status; |
2013 |
|
int k; |
2014 |
|
/* multithreaded motion estimation - dispatch threads */ |
2015 |
|
int rows_per_thread = (pEnc->mbParam.mb_height + pEnc->num_threads - 1)/pEnc->num_threads; |
2016 |
|
|
2017 |
|
for (k = 0; k < pEnc->num_threads; k++) { |
2018 |
|
memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); |
2019 |
|
pEnc->motionData[k].pParam = &pEnc->mbParam; |
2020 |
|
pEnc->motionData[k].current = frame; |
2021 |
|
pEnc->motionData[k].reference = pEnc->current; |
2022 |
|
pEnc->motionData[k].fRef = f_ref; |
2023 |
|
pEnc->motionData[k].fRefH = &pEnc->f_refh; |
2024 |
|
pEnc->motionData[k].fRefV = &pEnc->f_refv; |
2025 |
|
pEnc->motionData[k].fRefHV = &pEnc->f_refhv; |
2026 |
|
pEnc->motionData[k].pRef = b_ref; |
2027 |
|
pEnc->motionData[k].pRefH = &pEnc->vInterH; |
2028 |
|
pEnc->motionData[k].pRefV = &pEnc->vInterV; |
2029 |
|
pEnc->motionData[k].pRefHV = &pEnc->vInterHV; |
2030 |
|
pEnc->motionData[k].time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); |
2031 |
|
pEnc->motionData[k].time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); |
2032 |
|
pEnc->motionData[k].y_step = pEnc->num_threads; |
2033 |
|
pEnc->motionData[k].start_y = k; |
2034 |
|
/* todo: sort out temp space once and for all */ |
2035 |
|
pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pEnc->mbParam.edged_width; |
2036 |
|
} |
2037 |
|
|
2038 |
|
for (k = 1; k < pEnc->num_threads; k++) { |
2039 |
|
pthread_create(&pEnc->motionData[k].handle, NULL, |
2040 |
|
(void*)SMPMotionEstimationBVOP, (void*)&pEnc->motionData[k]); |
2041 |
|
} |
2042 |
|
|
2043 |
|
SMPMotionEstimationBVOP(&pEnc->motionData[0]); |
2044 |
|
|
2045 |
|
for (k = 1; k < pEnc->num_threads; k++) { |
2046 |
|
pthread_join(pEnc->motionData[k].handle, &status); |
2047 |
|
} |
2048 |
|
|
2049 |
|
frame->fcode = frame->bcode = 0; |
2050 |
|
for (k = 0; k < pEnc->num_threads; k++) { |
2051 |
|
if (pEnc->motionData[k].minfcode > frame->fcode) |
2052 |
|
frame->fcode = pEnc->motionData[k].minfcode; |
2053 |
|
if (pEnc->motionData[k].minbcode > frame->bcode) |
2054 |
|
frame->bcode = pEnc->motionData[k].minbcode; |
2055 |
|
} |
2056 |
|
} else { |
2057 |
MotionEstimationBVOP(&pEnc->mbParam, frame, |
MotionEstimationBVOP(&pEnc->mbParam, frame, |
2058 |
((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ |
((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ |
2059 |
((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ |
((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ |
2061 |
&pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, |
&pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, |
2062 |
pEnc->current, b_ref, &pEnc->vInterH, |
pEnc->current, b_ref, &pEnc->vInterH, |
2063 |
&pEnc->vInterV, &pEnc->vInterHV); |
&pEnc->vInterV, &pEnc->vInterHV); |
2064 |
|
} |
2065 |
stop_motion_timer(); |
stop_motion_timer(); |
2066 |
|
|
2067 |
set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); |
set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); |
2068 |
BitstreamWriteVopHeader(bs, &pEnc->mbParam, frame, 1, frame->quant); |
BitstreamWriteVopHeader(bs, &pEnc->mbParam, frame, 1, frame->quant); |
2069 |
|
|
2070 |
frame->sStat.iTextBits = 0; |
frame->sStat.iTextBits = 0; |
2071 |
|
frame->sStat.iMVBits = 0; |
2072 |
frame->sStat.iMvSum = 0; |
frame->sStat.iMvSum = 0; |
2073 |
frame->sStat.iMvCount = 0; |
frame->sStat.iMvCount = 0; |
2074 |
frame->sStat.kblks = frame->sStat.mblks = frame->sStat.ublks = 0; |
frame->sStat.kblks = frame->sStat.mblks = frame->sStat.ublks = 0; |
2119 |
stop_coding_timer(); |
stop_coding_timer(); |
2120 |
} |
} |
2121 |
} |
} |
|
|
|
2122 |
emms(); |
emms(); |
2123 |
|
|
2124 |
BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ |
BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */ |