标签:
HEVC解码器的CTU解码(CTU Decoder)部分在整个HEVC解码器中的位置如下图红框所示,在hls_coding_unit()之中。CTU解码(CTU Decoder)部分的函数调用关系如下图右边方框所示。(右键新窗口打开查看大图)
//解码入口函数
static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
{
HEVCContext *s = avctxt->priv_data;
//CTB尺寸
int ctb_size = 1 << s->sps->log2_ctb_size;
int more_data = 1;
int x_ctb = 0;
int y_ctb = 0;
int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
return AVERROR_INVALIDDATA;
}
if (s->sh.dependent_slice_segment_flag) {
int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
return AVERROR_INVALIDDATA;
}
}
while (more_data && ctb_addr_ts < s->sps->ctb_size) {
int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
//CTB的位置x和y
x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
//初始化周围的参数
hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
//初始化CABAC
ff_hevc_cabac_init(s, ctb_addr_ts);
//样点自适应补偿参数
hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
//解析四叉树结构,并且解码
more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
if (more_data < 0) {
s->tab_slice_address[ctb_addr_rs] = -1;
return more_data;
}
ctb_addr_ts++;
//保存解码信息以供下次使用
ff_hevc_save_states(s, ctb_addr_ts);
//去块效应滤波
ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
}
if (x_ctb + ctb_size >= s->sps->width &&
y_ctb + ctb_size >= s->sps->height)
ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
return ctb_addr_ts;
}
从源代码可以看出,hls_decode_entry()调用了5个函数进行解码工作:
(1)调用hls_decode_neighbour初始化CTU周围的参数信息。
(2)调用ff_hevc_cabac_init()进行CABAC初始化。
(3)调用hls_sao_param初始化样点自适应补偿参数。
(4)调用hls_coding_quadtree()解码CTU。其中包含了PU和TU的解码。本文分析第四步的PU和TU解码过程
(5)调用ff_hevc_hls_filters()进行滤波。其中包含了去块效应滤波和SAO滤波。
/*
* 解析四叉树结构,并且解码
* 注意该函数是递归调用
*
* s:HEVCContext上下文结构体
* x_ctb:CB位置的x坐标
* y_ctb:CB位置的y坐标
* log2_cb_size:CB大小取log2之后的值
* cb_depth:深度
*
*/
static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
int log2_cb_size, int cb_depth)
{
HEVCLocalContext *lc = s->HEVClc;
//CB的大小,split flag=0
//log2_cb_size为CB大小取log之后的结果
const int cb_size = 1 << log2_cb_size;
int ret;
int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
int split_cu;
//确定CU是否还会划分
lc->ct_depth = cb_depth;
if (x0 + cb_size <= s->sps->width &&
y0 + cb_size <= s->sps->height &&
log2_cb_size > s->sps->log2_min_cb_size) {
split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
} else {
split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
}
if (s->pps->cu_qp_delta_enabled_flag &&
log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
lc->tu.is_cu_qp_delta_coded = 0;
lc->tu.cu_qp_delta = 0;
}
if (s->sh.cu_chroma_qp_offset_enabled_flag &&
log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
lc->tu.is_cu_chroma_qp_offset_coded = 0;
}
if (split_cu) {
//如果CU还可以继续划分,则继续解析划分后的CU
//注意:这里是递归调用
//CB的大小,split flag=1
const int cb_size_split = cb_size >> 1;
const int x1 = x0 + cb_size_split;
const int y1 = y0 + cb_size_split;
int more_data = 0;
//注意:
//CU大小减半,log2_cb_size-1
//深度d加1,cb_depth+1
more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
if (more_data < 0)
return more_data;
if (more_data && x1 < s->sps->width) {
more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
if (more_data < 0)
return more_data;
}
if (more_data && y1 < s->sps->height) {
more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
if (more_data < 0)
return more_data;
}
if (more_data && x1 < s->sps->width &&
y1 < s->sps->height) {
more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
if (more_data < 0)
return more_data;
}
if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
lc->qPy_pred = lc->qp_y;
if (more_data)
return ((x1 + cb_size_split) < s->sps->width ||
(y1 + cb_size_split) < s->sps->height);
else
return 0;
} else {
//注意处理的是不可划分的CU单元
//处理CU单元-真正的解码
ret = hls_coding_unit(s, x0, y0, log2_cb_size);
if (ret < 0)
return ret;
if ((!((x0 + cb_size) %
(1 << (s->sps->log2_ctb_size))) ||
(x0 + cb_size >= s->sps->width)) &&
(!((y0 + cb_size) %
(1 << (s->sps->log2_ctb_size))) ||
(y0 + cb_size >= s->sps->height))) {
int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
return !end_of_slice_flag;
} else {
return 1;
}
}
return 0;
}
从源代码可以看出,hls_coding_quadtree()首先调用ff_hevc_split_coding_unit_flag_decode()判断当前CU是否还需要划分。如果需要划分的话,就会递归调用4次hls_coding_quadtree()分别对4个子块继续进行四叉树解析;如果不需要划分,就会调用hls_coding_unit()对CU进行解码。
//处理CU单元-真正的解码
static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
{
//CB大小
int cb_size = 1 << log2_cb_size;
HEVCLocalContext *lc = s->HEVClc;
int log2_min_cb_size = s->sps->log2_min_cb_size;
int length = cb_size >> log2_min_cb_size;
int min_cb_width = s->sps->min_cb_width;
//以最小的CB为单位(例如4x4)的时候,当前CB的位置——x坐标和y坐标
int x_cb = x0 >> log2_min_cb_size;
int y_cb = y0 >> log2_min_cb_size;
int idx = log2_cb_size - 2;
int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
int x, y, ret;
//设置CU的属性值
lc->cu.x = x0;
lc->cu.y = y0;
lc->cu.pred_mode = MODE_INTRA;
lc->cu.part_mode = PART_2Nx2N;
lc->cu.intra_split_flag = 0;
SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
for (x = 0; x < 4; x++)
lc->pu.intra_pred_mode[x] = 1;
if (s->pps->transquant_bypass_enable_flag) {
lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
if (lc->cu.cu_transquant_bypass_flag)
set_deblocking_bypass(s, x0, y0, log2_cb_size);
} else
lc->cu.cu_transquant_bypass_flag = 0;
if (s->sh.slice_type != I_SLICE) {
//Skip类型
uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
//设置到skip_flag缓存中
x = y_cb * min_cb_width + x_cb;
for (y = 0; y < length; y++) {
memset(&s->skip_flag[x], skip_flag, length);
x += min_cb_width;
}
lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
} else {
x = y_cb * min_cb_width + x_cb;
for (y = 0; y < length; y++) {
memset(&s->skip_flag[x], 0, length);
x += min_cb_width;
}
}
if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
if (!s->sh.disable_deblocking_filter_flag)
ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
} else {
int pcm_flag = 0;
//读取预测模式(非 I Slice)
if (s->sh.slice_type != I_SLICE)
lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
//不是帧内预测模式的时候
//或者已经是最小CB的时候
if (lc->cu.pred_mode != MODE_INTRA ||
log2_cb_size == s->sps->log2_min_cb_size) {
//读取CU分割模式
lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
lc->cu.pred_mode == MODE_INTRA;
}
if (lc->cu.pred_mode == MODE_INTRA) {
//帧内预测模式
//PCM方式编码,不常见
if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
pcm_flag = ff_hevc_pcm_flag_decode(s);
}
if (pcm_flag) {
intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
if (s->sps->pcm.loop_filter_disable_flag)
set_deblocking_bypass(s, x0, y0, log2_cb_size);
if (ret < 0)
return ret;
} else {
//获取帧内预测模式
intra_prediction_unit(s, x0, y0, log2_cb_size);
}
} else {
//帧间预测模式
intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
//帧间模式一共有8种划分模式
switch (lc->cu.part_mode) {
case PART_2Nx2N:
//处理PU单元-运动补偿
/*
* hls_prediction_unit()参数:
* x0 : PU左上角x坐标
* y0 : PU左上角y坐标
* nPbW : PU宽度
* nPbH : PU高度
* log2_cb_size : CB大小取log2()的值
* partIdx : PU的索引号-分成4个块的时候取0-3,分成两个块的时候取0和1
*/
hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
break;
case PART_2NxN:
hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
break;
case PART_Nx2N:
hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
break;
case PART_2NxnU:
hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
break;
case PART_2NxnD:
hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
break;
case PART_nLx2N:
hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
break;
case PART_nRx2N:
hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
break;
case PART_NxN:
hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
break;
}
}
if (!pcm_flag) {
int rqt_root_cbf = 1;
if (lc->cu.pred_mode != MODE_INTRA &&
!(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
}
if (rqt_root_cbf) {
const static int cbf[2] = { 0 };
lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
s->sps->max_transform_hierarchy_depth_inter;
//处理TU四叉树
ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
log2_cb_size,
log2_cb_size, 0, 0, cbf, cbf);
if (ret < 0)
return ret;
} else {
if (!s->sh.disable_deblocking_filter_flag)
ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
}
}
}
if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
x = y_cb * min_cb_width + x_cb;
for (y = 0; y < length; y++) {
memset(&s->qp_y_tab[x], lc->qp_y, length);
x += min_cb_width;
}
if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
lc->qPy_pred = lc->qp_y;
}
set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
return 0;
}
从源代码可以看出,hls_coding_unit()主要进行了两个方面的处理:(1)调用hls_prediction_unit()处理PU。其中,hls_prediction_unit()完成了以下两步工作:
(2)调用hls_transform_tree()处理TU树。
(1)解析码流得到运动矢量。HEVC中包含了Merge和AMVP两种运动矢量预测技术。对于使用Merge的码流,调用ff_hevc_luma_mv_merge_mode();对于使用AMVP的码流,调用hevc_luma_mv_mpv_mode()。而hls_transform_tree(),
(2)根据运动矢量进行运动补偿。对于单向预测亮度运动补偿,调用luma_mc_uni(),对于单向预测色度运动补偿,调用chroma_mc_uni();对于双向预测亮度运动补偿,调用luma_mc_bi(),对于双向预测色度运动补偿,调用chroma_mc_bi()。
首先调用ff_hevc_split_transform_flag_decode()判断当前TU是否还需要划分。
如果需要划分的话,就会递归调用4次hls_transform_tree()分别对4个子块继续进行四叉树解析;如果不需要划分,就会调用hls_transform_unit()对TU进行解码。和前面递归划分CTU至CU的思路是一致的。最终会对每一个TU逐一调用hls_transform_unit()进行解码。
标签:
原文地址:http://blog.csdn.net/i000zheng/article/details/51314835