1 |
/***************************************************************************** |
/************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - 8x8 block-based halfpel interpolation - |
* 8x8 block-based halfpel interpolation |
|
* |
|
|
* Copyright(C) 2001-2003 Peter Ross <pross@xvid.org> |
|
5 |
* |
* |
6 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
7 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
15 |
* |
* |
16 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
18 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
|
* |
20 |
|
*************************************************************************/ |
21 |
|
|
22 |
|
/************************************************************************** |
23 |
* |
* |
24 |
* $Id$ |
* History: |
25 |
* |
* |
26 |
****************************************************************************/ |
* 05.10.2002 new bilinear and qpel interpolation code - Isibaar |
27 |
|
* 27.12.2001 modified "compensate_halfpel" |
28 |
|
* 05.11.2001 initial version; (c)2001 peter ross <pross@cs.rmit.edu.au> |
29 |
|
* |
30 |
|
*************************************************************************/ |
31 |
|
|
32 |
|
|
33 |
#include "../portab.h" |
#include "../portab.h" |
34 |
#include "../global.h" |
#include "../global.h" |
35 |
#include "interpolate8x8.h" |
#include "interpolate8x8.h" |
36 |
|
|
37 |
/* function pointers */ |
// function pointers |
38 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h; |
39 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v; |
40 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv; |
100 |
} |
} |
101 |
} |
} |
102 |
|
|
103 |
/* dst = interpolate(src) */ |
// dst = interpolate(src) |
104 |
|
|
105 |
void |
void |
106 |
interpolate8x8_halfpel_h_c(uint8_t * const dst, |
interpolate8x8_halfpel_h_c(uint8_t * const dst, |
108 |
const uint32_t stride, |
const uint32_t stride, |
109 |
const uint32_t rounding) |
const uint32_t rounding) |
110 |
{ |
{ |
111 |
uintptr_t j; |
intptr_t j; |
112 |
|
|
113 |
if (rounding) |
if (rounding) |
114 |
for (j = 0; j < 8*stride; j+=stride) |
for (j = 7*stride; j >= 0; j-=stride) |
115 |
{ |
{ |
116 |
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1); |
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1); |
117 |
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1); |
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1); |
144 |
const uint32_t stride, |
const uint32_t stride, |
145 |
const uint32_t rounding) |
const uint32_t rounding) |
146 |
{ |
{ |
147 |
uintptr_t j; |
intptr_t j; |
148 |
|
// const uint8_t * const src2 = src+stride; /* using a second pointer is _not_ faster here */ |
149 |
|
|
150 |
if (rounding) |
if (rounding) |
151 |
for (j = 0; j < 8*stride; j+=stride) /* forward is better. Some automatic prefetch perhaps. */ |
for (j = 0; j < 8*stride; j+=stride) /* forward is better. Some automatic prefetch perhaps. */ |
180 |
const uint32_t stride, |
const uint32_t stride, |
181 |
const uint32_t rounding) |
const uint32_t rounding) |
182 |
{ |
{ |
183 |
uintptr_t j; |
intptr_t j; |
184 |
|
|
185 |
if (rounding) |
if (rounding) |
186 |
for (j = 0; j < 8*stride; j+=stride) |
for (j = 7*stride; j >= 0; j-=stride) |
187 |
{ |
{ |
188 |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2); |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2); |
189 |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2); |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2); |
195 |
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2); |
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2); |
196 |
} |
} |
197 |
else |
else |
198 |
for (j = 0; j < 8*stride; j+=stride) |
for (j = 7*stride; j >= 0; j-=stride) |
199 |
{ |
{ |
200 |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2); |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2); |
201 |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2); |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2); |
208 |
} |
} |
209 |
} |
} |
210 |
|
|
211 |
|
|
212 |
|
|
213 |
|
|
214 |
/************************************************************* |
/************************************************************* |
215 |
* QPEL STUFF STARTS HERE * |
* QPEL STUFF STARTS HERE * |
216 |
*************************************************************/ |
*************************************************************/ |