1 |
/************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* - QPel interpolation - |
* - QPel interpolation - |
5 |
* |
* |
6 |
|
* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net> |
7 |
|
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
10 |
* the Free Software Foundation; either version 2 of the License, or |
* the Free Software Foundation; either version 2 of the License, or |
17 |
* |
* |
18 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
* |
|
|
*************************************************************************/ |
|
|
|
|
|
/************************************************************************** |
|
21 |
* |
* |
22 |
* History: |
* $Id$ |
23 |
* |
* |
24 |
* 22.10.2002 initial coding - Skal - |
****************************************************************************/ |
|
* |
|
|
*************************************************************************/ |
|
25 |
|
|
26 |
#ifndef XVID_AUTO_INCLUDE |
#ifndef XVID_AUTO_INCLUDE |
27 |
|
|
28 |
#include "../portab.h" |
#include "../portab.h" |
29 |
#include "./qpel.h" |
#include "qpel.h" |
30 |
|
|
31 |
////////////////////////////////////////////////////////// |
/* Quarterpel FIR definition |
32 |
|
****************************************************************************/ |
33 |
|
|
34 |
static const int32_t FIR_Tab_8[9][8] = { |
static const int32_t FIR_Tab_8[9][8] = { |
35 |
{ 14, -3, 2, -1, 0, 0, 0, 0 } |
{ 14, -3, 2, -1, 0, 0, 0, 0 }, |
36 |
, { 23, 19, -6, 3, -1, 0, 0, 0 } |
{ 23, 19, -6, 3, -1, 0, 0, 0 }, |
37 |
, { -7, 20, 20, -6, 3, -1, 0, 0 } |
{ -7, 20, 20, -6, 3, -1, 0, 0 }, |
38 |
, { 3, -6, 20, 20, -6, 3, -1, 0 } |
{ 3, -6, 20, 20, -6, 3, -1, 0 }, |
39 |
, { -1, 3, -6, 20, 20, -6, 3, -1 } |
{ -1, 3, -6, 20, 20, -6, 3, -1 }, |
40 |
, { 0, -1, 3, -6, 20, 20, -6, 3 } |
{ 0, -1, 3, -6, 20, 20, -6, 3 }, |
41 |
, { 0, 0, -1, 3, -6, 20, 20, -7 } |
{ 0, 0, -1, 3, -6, 20, 20, -7 }, |
42 |
, { 0, 0, 0, -1, 3, -6, 19, 23 } |
{ 0, 0, 0, -1, 3, -6, 19, 23 }, |
43 |
, { 0, 0, 0, 0, -1, 2, -3, 14 } |
{ 0, 0, 0, 0, -1, 2, -3, 14 } |
44 |
}; |
}; |
45 |
|
|
46 |
static const int32_t FIR_Tab_16[17][16] = { |
static const int32_t FIR_Tab_16[17][16] = { |
47 |
{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
48 |
, { 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
49 |
, { -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
50 |
, { 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
51 |
, { -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 } |
{ -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 }, |
52 |
, { 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 } |
{ 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 }, |
53 |
, { 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 } |
{ 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 }, |
54 |
, { 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 } |
{ 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 }, |
55 |
, { 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 } |
{ 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 }, |
56 |
, { 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 } |
{ 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 }, |
57 |
, { 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 } |
{ 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 }, |
58 |
, { 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 } |
{ 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 }, |
59 |
, { 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 }, |
60 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 }, |
61 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 }, |
62 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 }, |
63 |
, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 } |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 } |
64 |
}; |
}; |
65 |
|
|
66 |
////////////////////////////////////////////////////////// |
/* Implementation |
67 |
// Implementation |
****************************************************************************/ |
68 |
|
|
69 |
#define XVID_AUTO_INCLUDE |
#define XVID_AUTO_INCLUDE |
70 |
|
|
71 |
// 16x? filters |
/* 16x? filters */ |
72 |
|
|
73 |
#define SIZE 16 |
#define SIZE 16 |
74 |
#define TABLE FIR_Tab_16 |
#define TABLE FIR_Tab_16 |
83 |
|
|
84 |
#include __FILE__ /* self-include ourself */ |
#include __FILE__ /* self-include ourself */ |
85 |
|
|
86 |
// note: B-frame always uses Rnd=0... |
/* note: B-frame always uses Rnd=0... */ |
87 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
88 |
#define FUNC_H H_Pass_16_Add_C |
#define FUNC_H H_Pass_16_Add_C |
89 |
#define FUNC_V V_Pass_16_Add_C |
#define FUNC_V V_Pass_16_Add_C |
97 |
#undef SIZE |
#undef SIZE |
98 |
#undef TABLE |
#undef TABLE |
99 |
|
|
100 |
// 8x? filters |
/* 8x? filters */ |
101 |
|
|
102 |
#define SIZE 8 |
#define SIZE 8 |
103 |
#define TABLE FIR_Tab_8 |
#define TABLE FIR_Tab_8 |
112 |
|
|
113 |
#include __FILE__ /* self-include ourself */ |
#include __FILE__ /* self-include ourself */ |
114 |
|
|
115 |
// note: B-frame always uses Rnd=0... |
/* note: B-frame always uses Rnd=0... */ |
116 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1 |
117 |
#define FUNC_H H_Pass_8_Add_C |
#define FUNC_H H_Pass_8_Add_C |
118 |
#define FUNC_V V_Pass_8_Add_C |
#define FUNC_V V_Pass_8_Add_C |
128 |
|
|
129 |
#undef XVID_AUTO_INCLUDE |
#undef XVID_AUTO_INCLUDE |
130 |
|
|
131 |
////////////////////////////////////////////////////////// |
/* general-purpose hooks |
132 |
// general-purpose hooks |
* TODO: embed in enc/dec structure? |
133 |
// TODO: embed in enc/dec structure? |
****************************************************************************/ |
134 |
|
|
135 |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Funcs = 0; |
136 |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0; |
137 |
|
|
138 |
////////////////////////////////////////////////////////// |
/* plain C impl. declaration |
139 |
// plain C impl. declaration |
* TODO: should be declared elsewhere? |
140 |
// TODO: should be declared elsewhere? |
****************************************************************************/ |
141 |
|
|
142 |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Funcs_C = { |
143 |
H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C |
H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C, |
144 |
, V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C |
V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C, |
145 |
|
|
146 |
, H_Pass_8_C,H_Pass_Avrg_8_C,H_Pass_Avrg_Up_8_C |
H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C, |
147 |
, V_Pass_8_C,V_Pass_Avrg_8_C,V_Pass_Avrg_Up_8_C |
V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C |
148 |
}; |
}; |
149 |
|
|
150 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_C = { |
XVID_QP_FUNCS xvid_QP_Add_Funcs_C = { |
151 |
H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C |
H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C, |
152 |
, V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C |
V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C, |
153 |
|
|
154 |
, H_Pass_8_Add_C,H_Pass_Avrg_8_Add_C,H_Pass_Avrg_Up_8_Add_C |
H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C, |
155 |
, V_Pass_8_Add_C,V_Pass_Avrg_8_Add_C,V_Pass_Avrg_Up_8_Add_C |
V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C |
156 |
}; |
}; |
157 |
|
|
158 |
////////////////////////////////////////////////////////// |
/* mmx impl. declaration (see. qpel_mmx.asm |
159 |
// mmx impl. declaration (cf. qpel_mmx.asm) |
* TODO: should be declared elsewhere? |
160 |
// TODO: should be declared elsewhere? |
****************************************************************************/ |
161 |
|
|
162 |
|
#ifdef ARCH_IS_IA32 |
163 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx); |
164 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx); |
165 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx); |
175 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx); |
176 |
|
|
177 |
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
XVID_QP_FUNCS xvid_QP_Funcs_mmx = { |
178 |
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx |
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx, |
179 |
, xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx |
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx, |
180 |
|
|
181 |
, xvid_H_Pass_8_mmx,xvid_H_Pass_Avrg_8_mmx,xvid_H_Pass_Avrg_Up_8_mmx |
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx, |
182 |
, xvid_V_Pass_8_mmx,xvid_V_Pass_Avrg_8_mmx,xvid_V_Pass_Avrg_Up_8_mmx |
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx |
183 |
}; |
}; |
184 |
|
|
185 |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx); |
197 |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx); |
198 |
|
|
199 |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = { |
200 |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx |
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx, |
201 |
, xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx |
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx, |
202 |
|
|
203 |
, xvid_H_Pass_8_Add_mmx,xvid_H_Pass_Avrg_8_Add_mmx,xvid_H_Pass_Avrg_Up_8_Add_mmx |
xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx, |
204 |
, xvid_V_Pass_8_Add_mmx,xvid_V_Pass_Avrg_8_Add_mmx,xvid_V_Pass_Avrg_Up_8_Add_mmx |
xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx, |
205 |
}; |
}; |
206 |
|
#endif /* ARCH_IS_IA32 */ |
207 |
|
|
208 |
////////////////////////////////////////////////////////// |
/* tables for ASM |
209 |
// tables for ASM |
****************************************************************************/ |
|
|
|
|
extern uint16_t xvid_Expand_mmx[256][4]; // 8b -> 64b expansion table |
|
210 |
|
|
211 |
// Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
#ifdef ARCH_IS_IA32 |
212 |
// |
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */ |
213 |
// 17 tables, 2K each => 34K |
#endif |
|
// Mirroring can be acheived composing 11 basic tables |
|
|
// (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
|
|
// Using Symmetries (and bswap) could reduce further |
|
|
// the memory to 7 tables (->14K). |
|
|
|
|
|
extern int16_t xvid_FIR_1_0_0_0[256][4]; |
|
|
extern int16_t xvid_FIR_3_1_0_0[256][4]; |
|
|
extern int16_t xvid_FIR_6_3_1_0[256][4]; |
|
|
extern int16_t xvid_FIR_14_3_2_1[256][4]; |
|
|
extern int16_t xvid_FIR_20_6_3_1[256][4]; |
|
|
extern int16_t xvid_FIR_20_20_6_3[256][4]; |
|
|
extern int16_t xvid_FIR_23_19_6_3[256][4]; |
|
|
extern int16_t xvid_FIR_7_20_20_6[256][4]; |
|
|
extern int16_t xvid_FIR_6_20_20_6[256][4]; |
|
|
extern int16_t xvid_FIR_6_20_20_7[256][4]; |
|
|
extern int16_t xvid_FIR_3_6_20_20[256][4]; |
|
|
extern int16_t xvid_FIR_3_6_19_23[256][4]; |
|
|
extern int16_t xvid_FIR_1_3_6_20[256][4]; |
|
|
extern int16_t xvid_FIR_1_2_3_14[256][4]; |
|
|
extern int16_t xvid_FIR_0_1_3_6[256][4]; |
|
|
extern int16_t xvid_FIR_0_0_1_3[256][4]; |
|
|
extern int16_t xvid_FIR_0_0_0_1[256][4]; |
|
214 |
|
|
215 |
////////////////////////////////////////////////////////// |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
216 |
|
* |
217 |
uint16_t xvid_Expand_mmx[256][4]; // 8b -> 64b expansion table |
* 17 tables, 2K each => 34K |
218 |
|
* Mirroring can be acheived composing 11 basic tables |
219 |
|
* (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) |
220 |
|
* Using Symmetries (and bswap) could reduce further |
221 |
|
* the memory to 7 tables (->14K). */ |
222 |
|
|
223 |
int16_t xvid_FIR_1_0_0_0[256][4]; |
int16_t xvid_FIR_1_0_0_0[256][4]; |
224 |
int16_t xvid_FIR_3_1_0_0[256][4]; |
int16_t xvid_FIR_3_1_0_0[256][4]; |
251 |
} |
} |
252 |
|
|
253 |
|
|
254 |
void xvid_Init_QP_mmx() |
void xvid_Init_QP() |
255 |
{ |
{ |
256 |
int i; |
int i; |
257 |
|
|
258 |
|
#ifdef ARCH_IS_IA32 |
259 |
for(i=0; i<256; ++i) { |
for(i=0; i<256; ++i) { |
260 |
xvid_Expand_mmx[i][0] = i; |
xvid_Expand_mmx[i][0] = i; |
261 |
xvid_Expand_mmx[i][1] = i; |
xvid_Expand_mmx[i][1] = i; |
262 |
xvid_Expand_mmx[i][2] = i; |
xvid_Expand_mmx[i][2] = i; |
263 |
xvid_Expand_mmx[i][3] = i; |
xvid_Expand_mmx[i][3] = i; |
264 |
} |
} |
265 |
|
#endif |
266 |
|
|
267 |
// Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) |
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */ |
268 |
|
|
269 |
Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0); |
Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0); |
270 |
Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0); |
Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0); |
288 |
|
|
289 |
#endif /* !XVID_AUTO_INCLUDE */ |
#endif /* !XVID_AUTO_INCLUDE */ |
290 |
|
|
291 |
////////////////////////////////////////////////////////// |
/***************************************************************************** |
292 |
// "reference" filters impl. in plain C |
* "reference" filters impl. in plain C |
293 |
////////////////////////////////////////////////////////// |
****************************************************************************/ |
294 |
|
|
295 |
#ifdef XVID_AUTO_INCLUDE |
#ifdef XVID_AUTO_INCLUDE |
296 |
|
|
446 |
#undef FUNC_VA_UP |
#undef FUNC_VA_UP |
447 |
|
|
448 |
#endif /* XVID_AUTO_INCLUDE */ |
#endif /* XVID_AUTO_INCLUDE */ |
|
|
|
|
////////////////////////////////////////////////////////// |
|