您的位置:首页 > 其它

x265探索与研究(九):compressFrame()函数

2016-04-18 21:22 821 查看

x265探索与研究(九):compressFrame()函数

 

        compressFrame()函数是一个功能繁杂且分析难度较大的函数,主要包括时间戳的初始化工作、access unit的设计、加权预测技术、运动参考帧的估计、当前Slice的QP值确定、熵编码相关信息配置、并行计算与否及其空间的申请、SEI相关配置、线程控制、CTU分析、Multi-pass
Encoding、滤波与去噪处理等等,其中最重要的就是调用了encodeSlice()函数。

 

        compressFrame()函数中调用的主要函数如下图所示:

 

 


 

下面给出compressFrame()函数的代码分析:

 /*=============================================================*/
/*
====== Analysed by: RuiDong Fang
====== Csdn Blog: http://blog.csdn.net/frd2009041510 ====== Date: 2016.04.18
====== Funtion: compressFrame()函数,编码一帧。
*/
/*=============================================================*/
void FrameEncoder::compressFrame()
{
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////1、初始化一些变量
ProfileScopeEvent(frameThread); //帧线程的档次范围

m_startCompressTime = x265_mdate(); //编码的开始时间戳(timestamp时间戳 when frame encoder is given a frame)
m_totalActiveWorkerCount = 0; //统计m_activeWorkerCount的和,即经过CTU压缩后统计,进入帧前初始化为0(sum of m_activeWorkerCount sampled at end of each CTU)
m_activeWorkerCountSamples = 0; //当前帧已经编码分析完毕的CTU个数(count of times m_activeWorkerCount was sampled (think vbv restarts))
m_totalWorkerElapsedTime = 0; //初始化所有CTU编码滤波占用的时间(total elapsed time spent by worker threads processing CTUs)
m_totalNoWorkerTime = 0; //初始化当前帧编码占用的时间(total elapsed time without any active worker threads)
m_countRowBlocks = 0; //正在运行的CTU行因为上一行没有完成完毕而强制退出的个数,在帧编码前初始化为0(count of workers forced to abandon a row because of top dependency)
m_allRowsAvailableTime = 0; //初始化当前帧所有CTU行准备好的时间点(timestamp when all reference dependencies are resolved)
m_stallStartTime = 0; //初始化正在进行编码的rows个数为0时的时间点(timestamp when worker count becomes 0)

m_completionCount = 0;
m_bAllRowsStop = false; //是否将所有CTU的编码停止,在每帧进入前初始化为false,在CTU编码决策中需要重新编码时将置为true
m_vbvResetTriggerRow = -1; //需要重新编码的CTU行号,每帧开始编码前初始化为-1

m_SSDY = m_SSDU = m_SSDV = 0;
m_ssim = 0;
m_ssimCnt = 0;
memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats)); //将当前帧的统计信息初始化为0

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////2、存取单元access unit
/* Emit access unit delimiter unless this is the first frame and the user is
* not repeating headers (since AUD is supposed to be the first NAL in the access
* unit) */
//当是第一帧时,需要给出一个定界符
Slice* slice = m_frame->m_encData->m_slice; //获取当前slice
if (m_param->bEnableAccessUnitDelimiters && (m_frame->m_poc || m_param->bRepeatHeaders))
{
m_bs.resetBits();
m_entropyCoder.setBitstream(&m_bs);
m_entropyCoder.codeAUD(*slice);
m_bs.writeByteAlignment();
m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
}
if (m_frame->m_lowres.bKeyframe && m_param->bRepeatHeaders)
m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs); //====================get stream headers

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////3、加权预测
// Weighted Prediction parameters estimation.
bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred; //P帧加权预测标志位
bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred; //B帧加权预测标志位

//加权预测的参数估计
if (bUseWeightP || bUseWeightB) //使能加权预测
{
#if DETAILED_CU_STATS
m_cuStats.countWeightAnalyze++;
ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
#endif
WeightAnalysis wa(*this); //用于多线程加权分析
if (m_pool && wa.tryBondPeers(*this, 1)) //从当前job中拥有核并且sleep状态的核可以触发多线程,如果没有可用核则在当前线程中完成进入else
/* use an idle worker for weight analysis */
wa.waitForExit(); //一直等待到任务全部完成,这里等待的是核释放,内核释放了任务也就完成了
else
weightAnalyse(*slice, *m_frame, *m_param); //====================加权预测分析(每个list的第一帧分析加权与否,其它不加权)
}
else
slice->disableWeights(); //不使能加权预测

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////4、运动参考,配置参考帧信息
// Generate motion references
int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0; //预测方向的数量,若为P帧则数量为1,若为B帧则数量为2,否则为0,即获取当前有几个list
for (int l = 0; l < numPredDir; l++) //根据预测方向的数量遍历list的个数
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++) //遍历当前list的所有参考帧
{
WeightParam *w = NULL;
if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag) //如果是P帧或B帧,并且bPresentFlag为1
w = slice->m_weightPredTable[l][ref]; //获取加权参数
m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param); //获取参考帧信息,申请加权帧内存
}
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////5、根据码率控制确定QP的大小
/* Get the QP for this frame from rate control. This call may block until
* frames ahead of it in encode order have called rateControlEnd() */
int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top); //====================rateControlStart()函数,即码率控制开始
m_rce.newQp = qp; //获取当前估计的量化参数

/* Clip slice QP to 0-51 spec range before encoding */
slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp); //编码前,根据Offset确定/修正当前Slice的具体QP,取值必须处在0~51内

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////6、熵编码前的一些准备
m_initSliceContext.resetEntropy(*slice); //====================重置熵编码相关信息

m_frameFilter.start(m_frame, m_initSliceContext, qp); //====================滤波,在后面的调用中若bEnableSAO使能,则SAO

/* ensure all rows are blocked prior to initializing row CTU counters */
WaveFront::clearEnabledRowMask(); //将当前WPPmap全部初始化为不可执行

/* reset entropy coders */
m_entropyCoder.load(m_initSliceContext);
for (uint32_t i = 0; i < m_numRows; i++)
m_rows[i].init(m_initSliceContext);

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////7、并行与否,申请空间
uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1; //一帧并行的流数(CTU行数)
//根据并行的流数申请相关空间
if (!m_outStreams)
{
m_outStreams = new Bitstream[numSubstreams]; //申请空间
m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams); //申请空间
if (!m_param->bEnableSAO) //若没有使能SAO
for (uint32_t i = 0; i < numSubstreams; i++)
m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
}
else
for (uint32_t i = 0; i < numSubstreams; i++)
m_outStreams[i].resetBits();

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////8、SEI(Supplemental Enhancement Information)相关配置
int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;

if (m_frame->m_lowres.bKeyframe)
{
if (m_param->bEmitHRDSEI)
{
SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;

// since the temporal layer HRD is not ready, we assumed it is fixed
bpSei->m_auCpbRemovalDelayDelta = 1;
bpSei->m_cpbDelayOffset = 0;
bpSei->m_dpbDelayOffset = 0;

// hrdFullness() calculates the initial CPB removal delay and offset
m_top->m_rateControl->hrdFullness(bpSei);

m_bs.resetBits();
bpSei->write(m_bs, *slice->m_sps);
m_bs.writeByteAlignment();

m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);

m_top->m_lastBPSEI = m_rce.encodeOrder;
}
}

if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
{
SEIPictureTiming *sei = m_rce.picTimingSEI;
const VUI *vui = &slice->m_sps->vuiParameters;
const HRDInfo *hrd = &vui->hrdParameters;
int poc = slice->m_poc;

if (vui->frameFieldInfoPresentFlag)
{
if (m_param->interlaceMode == 2)
sei->m_picStruct = (poc & 1) ? 1 /* top */ : 2 /* bottom */;
else if (m_param->interlaceMode == 1)
sei->m_picStruct = (poc & 1) ? 2 /* bottom */ : 1 /* top */;
else
sei->m_picStruct = 0;
sei->m_sourceScanType = 0;
sei->m_duplicateFlag = false;
}

if (vui->hrdParametersPresentFlag)
{
// The m_aucpbremoval delay specifies how many clock ticks the
// access unit associated with the picture timing SEI message has to
// wait after removal of the access unit with the most recent
// buffering period SEI message
sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));
sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics + poc - m_rce.encodeOrder;
}

m_bs.resetBits();
sei->write(m_bs, *slice->m_sps);
m_bs.writeByteAlignment();
m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////9、配合RC的线程控制
/* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to
* tune RateControl parameters for other frames.
* Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
* RateControlEnd here, after the slicecontexts are initialized. For the rest - ABR
* and VBV, unlock only after rateControlUpdateStats of this frame is called */
//因为m_startEndOrder在rateControlUpdateStats中只对ABR或者VBV模式更新,在此更新为了配合RC的线程控制
if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
{
m_top->m_rateControl->m_startEndOrder.incr(); //更新计数

if (m_rce.encodeOrder < m_param->frameNumThreads - 1) //刚启动时多更新一次
m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
}

//================================================================================================================大部分的计算处理都在此处之下
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////10、CTU分析(分两种情况:WPP使能、WPP不使能)
/* Analyze CTU rows, most of the hard work is done here. Frame is
* compressed in a wave-front pattern if WPP is enabled. Row based loop
* filters runs behind the CTU compression and reconstruction */

m_rows[0].active = true; //触发第一个CTU行
if (m_param->bEnableWavefront) //如果WPP使能
{
for (uint32_t row = 0; row < m_numRows; row++) //遍历所有CTU行
{
// block until all reference frames have reconstructed the rows we need
for (int l = 0; l < numPredDir; l++) //当前list的个数
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++) //当前list 中ref的个数
{
Frame *refpic = slice->m_refPicList[l][ref]; //获取参考帧

uint32_t reconRowCount = refpic->m_reconRowCount.get();
while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);

if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);
}
}

//当前外部参考块(如参考帧对应的参考块)准备完毕,将当前row对应位置的map置为1,标记可以执行
enableRowEncoder(row); //===========================/* clear external dependency for this row */
if (!row) //如果是第一行
{
m_row0WaitTime = x265_mdate(); //获取当前帧开始编码的的时间点,用于计算当前帧的编码时间

//当前内部参考块准备完毕,将当前row对应位置的map置为1,标记可以执行
enqueueRowEncoder(0); //===========================/* clear internal dependency, start wavefront */
}
tryWakeOne(); //CTU行准备好并触发wpp, 在findjob中运行
}

m_allRowsAvailableTime = x265_mdate(); //当前帧所有CTU行准备好的时间点
tryWakeOne(); //多触发一次/* ensure one thread is active or help-wanted flag is set prior to blocking */
static const int block_ms = 250; //超时时间
//每250ms触发一次,保证全部CTU行都能够执行(如果m_completionEvent在某一位置触发,则会造成不超时,循环退出)
while (m_completionEvent.timedWait(block_ms))
tryWakeOne(); //触发
}
else //如果WPP不使能
{
for (uint32_t i = 0; i < m_numRows + m_filterRowDelay; i++)
{
// compress
if (i < m_numRows)
{
// block until all reference frames have reconstructed the rows we need
for (int l = 0; l < numPredDir; l++)
{
int list = l;
for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
{
Frame *refpic = slice->m_refPicList[list][ref]; //获取参考帧

uint32_t reconRowCount = refpic->m_reconRowCount.get();
while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))
reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);

if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows);
}
}

if (!i)
m_row0WaitTime = x265_mdate();
else if (i == m_numRows - 1)
m_allRowsAvailableTime = x265_mdate();
processRowEncoder(i, m_tld[m_localTldIdx]); //===========================Called by worker threads
}

// filter
if (i >= m_filterRowDelay)
m_frameFilter.processRow(i - m_filterRowDelay); //===========================滤波每一行(Row based loop filters)
}
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////11、Multi-pass encoding
if (m_param->rc.bStatWrite) //Multi-pass encoding(Enable writing the stats in a multi-pass encode to the stat output file)
{
int totalI = 0, totalP = 0, totalSkip = 0;

// accumulate intra,inter,skip cu count per frame for 2 pass
for (uint32_t i = 0; i < m_numRows; i++)
{
m_frame->m_encData->m_frameStats.mvBits += m_rows[i].rowStats.mvBits;
m_frame->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
m_frame->m_encData->m_frameStats.miscBits += m_rows[i].rowStats.miscBits;
totalI += m_rows[i].rowStats.intra8x8Cnt;
totalP += m_rows[i].rowStats.inter8x8Cnt;
totalSkip += m_rows[i].rowStats.skip8x8Cnt;
}
int totalCuCount = totalI + totalP + totalSkip; //总CU数目
m_frame->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;
m_frame->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
m_frame->m_encData->m_frameStats.percent8x8Skip = (double)totalSkip / totalCuCount;
}
for (uint32_t i = 0; i < m_numRows; i++)
{
m_frame->m_encData->m_frameStats.cntIntraNxN += m_rows[i].rowStats.cntIntraNxN;
m_frame->m_encData->m_frameStats.totalCu += m_rows[i].rowStats.totalCu;
m_frame->m_encData->m_frameStats.totalCtu += m_rows[i].rowStats.totalCtu;
m_frame->m_encData->m_frameStats.lumaDistortion += m_rows[i].rowStats.lumaDistortion;
m_frame->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;
m_frame->m_encData->m_frameStats.psyEnergy += m_rows[i].rowStats.psyEnergy;
m_frame->m_encData->m_frameStats.lumaLevel += m_rows[i].rowStats.lumaLevel;

if (m_rows[i].rowStats.maxLumaLevel > m_frame->m_encData->m_frameStats.maxLumaLevel)
m_frame->m_encData->m_frameStats.maxLumaLevel = m_rows[i].rowStats.maxLumaLevel;
for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
{
m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
for (int m = 0; m < INTER_MODES; m++)
m_frame->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
for (int n = 0; n < INTRA_MODES; n++)
m_frame->m_encData->m_frameStats.cuIntraDistribution[depth]
+= m_rows[i].rowStats.cuIntraDistribution[depth]
;
}
}
m_frame->m_encData->m_frameStats.avgLumaDistortion = (double)(m_frame->m_encData->m_frameStats.lumaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;
m_frame->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame->m_encData->m_frameStats.chromaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;
m_frame->m_encData->m_frameStats.avgPsyEnergy = (double)(m_frame->m_encData->m_frameStats.psyEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
m_frame->m_encData->m_frameStats.avgLumaLevel = m_frame->m_encData->m_frameStats.lumaLevel / m_frame->m_encData->m_frameStats.totalCtu;
m_frame->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;
for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
{
m_frame->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
for (int n = 0; n < INTRA_MODES; n++)
m_frame->m_encData->m_frameStats.percentIntraDistribution[depth]
= (double)(m_frame->m_encData->m_frameStats.cuIntraDistribution[depth]
* 100) / m_frame->m_encData->m_frameStats.totalCu;
uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
cuInterRectCnt += m_frame->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame->m_encData->m_frameStats.cuInterDistribution[depth][2];
m_frame->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame->m_encData->m_frameStats.totalCu;
m_frame->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame->m_encData->m_frameStats.totalCu;
m_frame->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame->m_encData->m_frameStats.totalCu;
}

m_bs.resetBits();
m_entropyCoder.load(m_initSliceContext);
m_entropyCoder.setBitstream(&m_bs);
m_entropyCoder.codeSliceHeader(*slice, *m_frame->m_encData); //===========================codeSliceHeader()函数

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////12、SAO,并调用encodeSlice()
// finish encode of each CTU row, only required when SAO is enabled
if (m_param->bEnableSAO)
encodeSlice(); //===========================调用encodeSlice()

// serialize each row, record final lengths in slice header
uint32_t maxStreamSize = m_nalList.serializeSubstreams(m_substreamSizes, numSubstreams, m_outStreams);

// complete the slice header by writing WPP row-starts
m_entropyCoder.setBitstream(&m_bs);
if (slice->m_pps->bEntropyCodingSyncEnabled)
m_entropyCoder.codeSliceHeaderWPPEntryPoints(*slice, m_substreamSizes, maxStreamSize);
m_bs.writeByteAlignment();

m_nalList.serialize(slice->m_nalUnitType, m_bs);

if (m_param->decodedPictureHashSEI)
{
if (m_param->decodedPictureHashSEI == 1)
{
m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::MD5;
for (int i = 0; i < 3; i++)
MD5Final(&m_state[i], m_seiReconPictureDigest.m_digest[i]);
}
else if (m_param->decodedPictureHashSEI == 2)
{
m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CRC;
for (int i = 0; i < 3; i++)
crcFinish(m_crc[i], m_seiReconPictureDigest.m_digest[i]);
}
else if (m_param->decodedPictureHashSEI == 3)
{
m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CHECKSUM;
for (int i = 0; i < 3; i++)
checksumFinish(m_checksum[i], m_seiReconPictureDigest.m_digest[i]);
}

m_bs.resetBits();
m_seiReconPictureDigest.write(m_bs, *slice->m_sps);
m_bs.writeByteAlignment();

m_nalList.serialize(NAL_UNIT_SUFFIX_SEI, m_bs);
}

uint64_t bytes = 0;
for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
{
int type = m_nalList.m_nal[i].type;

// exclude SEI
if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
{
bytes += m_nalList.m_nal[i].sizeBytes;
// and exclude start code prefix
bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
}
}
m_accessUnitBits = bytes << 3;

m_endCompressTime = x265_mdate(); //timestamp after all CTUs are compressed
// printf("编码时间(Compressinging Time): %d ms\n",m_endCompressTime-m_startCompressTime); //added by Fred

/* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */
if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce) < 0)
m_top->m_aborted = true; //一般不进入,错误返回

/* Decrement referenced frame reference counts, allow them to be recycled */
for (int l = 0; l < numPredDir; l++)
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++) //遍历每个List的参考帧
{
Frame *refpic = slice->m_refPicList[l][ref]; //获取参考帧地址
ATOMIC_DEC(&refpic->m_countRefEncoders); //被参考计数减一
}
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////13、去噪处理NR updates(Noise Reduction)
int numTLD; //TLD=Thread Local Data当前并行线程个数
if (m_pool)
numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders; //应用WPP,为当前机器的核数,否则为当前核数加上当前的线程个数
else
numTLD = 1; //不应用多线程,并行个数为1

if (m_nr) //如果noise reduction(更新去噪偏移值)
{
/* Accumulate NR statistics from all worker threads */
//加速所有线程的NR统计
for (int i = 0; i < numTLD; i++) //遍历当前帧编码应用所有线程:累加当前帧各个系数的统计数字
{
//获取当前线程对应当前帧的去噪类;i 确定当前帧的所有线程,如每个WPP行 m_jpId 确定当前帧
NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++) //遍历所有TU类别
{
for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff]; //累加当前帧所有TU对应位置的系数绝对值和

m_nr->count[cat] += nr->count[cat]; //累加当前帧的TU计数
}
}

noiseReductionUpdate(); //===========================DCT-domain noise reduction / adaptive deadzone from libavcodec

/* Copy updated NR coefficients back to all worker threads */
//拷贝NR系数至所有的工作线程
for (int i = 0; i < numTLD; i++) //遍历当前帧编码应用所有线程
{
//获取当前线程对应当前帧的去噪类;i 确定当前帧的所有线程,如每个WPP行 m_jpId 确定当前帧
NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
//将当前更新的去噪偏移值拷贝到各个线程中的去噪偏移中去
memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES); //初始为0
memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS); //初始为0
}
}

#if DETAILED_CU_STATS //统计数据
/* Accumulate CU statistics from each worker thread, we could report
* per-frame stats here, but currently we do not. */
for (int i = 0; i < numTLD; i++)
m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId]);
#endif

m_endFrameTime = x265_mdate(); //帧编码的结束时间(timestamp after RCEnd, NR updates, etc)

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息