1 |
/************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* 8x8 block-based halfpel interpolation |
* - 8x8 block-based halfpel interpolation - |
5 |
|
* |
6 |
|
* Copyright(C) 2001-2003 Peter Ross <pross@xvid.org> |
7 |
* |
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
17 |
* |
* |
18 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
* |
|
|
*************************************************************************/ |
|
|
|
|
|
/************************************************************************** |
|
|
* |
|
|
* History: |
|
21 |
* |
* |
22 |
* 05.10.2002 new bilinear and qpel interpolation code - Isibaar |
* $Id$ |
|
* 27.12.2001 modified "compensate_halfpel" |
|
|
* 05.11.2001 initial version; (c)2001 peter ross <pross@cs.rmit.edu.au> |
|
23 |
* |
* |
24 |
*************************************************************************/ |
****************************************************************************/ |
|
|
|
25 |
|
|
26 |
#include "../portab.h" |
#include "../portab.h" |
27 |
#include "../global.h" |
#include "../global.h" |
28 |
#include "interpolate8x8.h" |
#include "interpolate8x8.h" |
29 |
|
|
30 |
// function pointers |
/* function pointers */ |
31 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h; |
32 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v; |
33 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv; |
93 |
} |
} |
94 |
} |
} |
95 |
|
|
96 |
// dst = interpolate(src) |
/* dst = interpolate(src) */ |
97 |
|
|
98 |
void |
void |
99 |
interpolate8x8_halfpel_h_c(uint8_t * const dst, |
interpolate8x8_halfpel_h_c(uint8_t * const dst, |
101 |
const uint32_t stride, |
const uint32_t stride, |
102 |
const uint32_t rounding) |
const uint32_t rounding) |
103 |
{ |
{ |
104 |
uint32_t i, j; |
uintptr_t j; |
|
|
|
|
for (j = 0; j < 8; j++) { |
|
|
for (i = 0; i < 8; i++) { |
|
|
|
|
|
int16_t tot = |
|
|
(int32_t) src[j * stride + i] + (int32_t) src[j * stride + i + |
|
|
1]; |
|
105 |
|
|
106 |
tot = (int32_t) ((tot + 1 - rounding) >> 1); |
if (rounding) |
107 |
dst[j * stride + i] = (uint8_t) tot; |
for (j = 0; j < 8*stride; j+=stride) |
108 |
} |
{ |
109 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1); |
110 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1); |
111 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1); |
112 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1); |
113 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1); |
114 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1); |
115 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1); |
116 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1); |
117 |
|
} |
118 |
|
else |
119 |
|
for (j = 0; j < 8*stride; j+=stride) /* forward or backwards? Who knows ... */ |
120 |
|
{ |
121 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1); |
122 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1); |
123 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1); |
124 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1); |
125 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1); |
126 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1); |
127 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1); |
128 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1); |
129 |
} |
} |
130 |
} |
} |
131 |
|
|
137 |
const uint32_t stride, |
const uint32_t stride, |
138 |
const uint32_t rounding) |
const uint32_t rounding) |
139 |
{ |
{ |
140 |
uint32_t i, j; |
uintptr_t j; |
141 |
|
|
|
for (j = 0; j < 8; j++) { |
|
|
for (i = 0; i < 8; i++) { |
|
|
int16_t tot = src[j * stride + i] + src[j * stride + i + stride]; |
|
142 |
|
|
143 |
tot = ((tot + 1 - rounding) >> 1); |
if (rounding) |
144 |
dst[j * stride + i] = (uint8_t) tot; |
for (j = 0; j < 8*stride; j+=stride) /* forward is better. Some automatic prefetch perhaps. */ |
145 |
} |
{ |
146 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1); |
147 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1); |
148 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1); |
149 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1); |
150 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1); |
151 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1); |
152 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1); |
153 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1); |
154 |
|
} |
155 |
|
else |
156 |
|
for (j = 0; j < 8*stride; j+=stride) |
157 |
|
{ |
158 |
|
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1); |
159 |
|
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1); |
160 |
|
dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1); |
161 |
|
dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1); |
162 |
|
dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1); |
163 |
|
dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1); |
164 |
|
dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1); |
165 |
|
dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1); |
166 |
} |
} |
167 |
} |
} |
168 |
|
|
173 |
const uint32_t stride, |
const uint32_t stride, |
174 |
const uint32_t rounding) |
const uint32_t rounding) |
175 |
{ |
{ |
176 |
uint32_t i, j; |
uintptr_t j; |
177 |
|
|
178 |
for (j = 0; j < 8; j++) { |
if (rounding) |
179 |
for (i = 0; i < 8; i++) { |
for (j = 0; j < 8*stride; j+=stride) |
180 |
int16_t tot = |
{ |
181 |
src[j * stride + i] + src[j * stride + i + 1] + |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2); |
182 |
src[j * stride + i + stride] + src[j * stride + i + stride + |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2); |
183 |
1]; |
dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2); |
184 |
tot = ((tot + 2 - rounding) >> 2); |
dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2); |
185 |
dst[j * stride + i] = (uint8_t) tot; |
dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2); |
186 |
} |
dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2); |
187 |
|
dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2); |
188 |
|
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2); |
189 |
|
} |
190 |
|
else |
191 |
|
for (j = 0; j < 8*stride; j+=stride) |
192 |
|
{ |
193 |
|
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2); |
194 |
|
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2); |
195 |
|
dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2); |
196 |
|
dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2); |
197 |
|
dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2); |
198 |
|
dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2); |
199 |
|
dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2); |
200 |
|
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2); |
201 |
} |
} |
202 |
} |
} |
203 |
|
|