1 |
|
|
2 |
|
;------------------------------------------------------------------------------ |
3 |
|
; |
4 |
|
; MAKE_COLORSPACE(NAME,STACK, BYTES,PIXELS,ROWS, FUNC, ARG1) |
5 |
|
; |
6 |
|
; This macro provides a assembler width/height scroll loop |
7 |
|
; NAME function name |
8 |
|
; STACK additional stack bytes required by FUNC |
9 |
|
; BYTES bytes-per-pixel for the given colorspace |
10 |
|
; PIXELS pixels (columns) operated on per FUNC call |
11 |
|
; VPIXELS vpixels (rows) operated on per FUNC call |
12 |
|
; FUNC conversion macro name; we expect to find FUNC_INIT and FUNC macros |
13 |
|
; ARG1 argument passed to FUNC |
14 |
|
; |
15 |
|
; throughout the FUNC the registers mean: |
16 |
|
; eax y_stride |
17 |
|
; ebx u_ptr |
18 |
|
; ecx v_ptr |
19 |
|
; edx x_stride |
20 |
|
; esi y_ptr |
21 |
|
; edi x_ptr |
22 |
|
; ebp width |
23 |
|
; |
24 |
|
;------------------------------------------------------------------------------ |
25 |
|
%macro MAKE_COLORSPACE 8 |
26 |
|
%define NAME %1 |
27 |
|
%define STACK %2 |
28 |
|
%define BYTES %3 |
29 |
|
%define PIXELS %4 |
30 |
|
%define VPIXELS %5 |
31 |
|
%define FUNC %6 |
32 |
|
%define ARG1 %7 |
33 |
|
%define ARG2 %8 |
34 |
|
; --- define function global/symbol |
35 |
|
align 16 |
36 |
|
cglobal NAME |
37 |
|
NAME |
38 |
|
; --- init stack --- |
39 |
|
|
40 |
|
%define pushsize 16 |
41 |
|
%define localsize 20 + STACK |
42 |
|
|
43 |
|
%define vflip esp + localsize + pushsize + 40 |
44 |
|
%define height esp + localsize + pushsize + 36 |
45 |
|
%define width esp + localsize + pushsize + 32 |
46 |
|
%define uv_stride esp + localsize + pushsize + 28 |
47 |
|
%define y_stride esp + localsize + pushsize + 24 |
48 |
|
%define v_ptr esp + localsize + pushsize + 20 |
49 |
|
%define u_ptr esp + localsize + pushsize + 16 |
50 |
|
%define y_ptr esp + localsize + pushsize + 12 |
51 |
|
%define x_stride esp + localsize + pushsize + 8 |
52 |
|
%define x_ptr esp + localsize + pushsize + 4 |
53 |
|
%define _ip esp + localsize + pushsize + 0 |
54 |
|
|
55 |
|
push ebx ; esp + localsize + 16 |
56 |
|
push esi ; esp + localsize + 8 |
57 |
|
push edi ; esp + localsize + 4 |
58 |
|
push ebp ; esp + localsize + 0 |
59 |
|
|
60 |
|
%define x_dif esp + localsize - 4 |
61 |
|
%define y_dif esp + localsize - 8 |
62 |
|
%define uv_dif esp + localsize - 12 |
63 |
|
%define fixed_width esp + localsize - 16 |
64 |
|
%define tmp_height esp + localsize - 20 |
65 |
|
|
66 |
|
sub esp, localsize |
67 |
|
|
68 |
|
; --- init varibles --- |
69 |
|
|
70 |
|
mov eax, [width] ; fixed width |
71 |
|
add eax, 15 ; |
72 |
|
and eax, ~15 ; |
73 |
|
mov [fixed_width],eax ; |
74 |
|
|
75 |
|
mov ebx, [x_stride] ; |
76 |
|
%rep BYTES |
77 |
|
sub ebx, eax ; |
78 |
|
%endrep |
79 |
|
mov [x_dif], ebx ; x_dif = x_stride - BYTES*fixed_width |
80 |
|
|
81 |
|
mov ebx, [y_stride] ; |
82 |
|
sub ebx, eax ; |
83 |
|
mov [y_dif], ebx ; y_dif = y_stride - fixed_width |
84 |
|
|
85 |
|
mov ebx, [uv_stride] ; |
86 |
|
mov ecx, eax ; |
87 |
|
shr ecx, 1 ; |
88 |
|
sub ebx, ecx ; |
89 |
|
mov [uv_dif], ebx ; uv_dif = uv_stride - fixed_width/2 |
90 |
|
|
91 |
|
mov esi, [y_ptr] ; $esi$ = y_ptr |
92 |
|
mov edi, [x_ptr] ; $edi$ = x_ptr |
93 |
|
mov edx, [x_stride] ; $edx$ = x_stride |
94 |
|
mov ebp, [height] ; $ebp$ = height |
95 |
|
|
96 |
|
|
97 |
|
mov ebx, [vflip] |
98 |
|
or ebx, ebx |
99 |
|
jz .dont_flip |
100 |
|
|
101 |
|
; --- do flipping --- |
102 |
|
|
103 |
|
xor ebx,ebx |
104 |
|
%rep BYTES |
105 |
|
sub ebx, eax |
106 |
|
%endrep |
107 |
|
sub ebx, edx |
108 |
|
mov [x_dif], ebx ; x_dif = -BYTES*fixed_width - x_stride |
109 |
|
|
110 |
|
mov eax, ebp |
111 |
|
sub eax, 1 |
112 |
|
push edx |
113 |
|
mul edx |
114 |
|
pop edx |
115 |
|
add edi, eax ; $edi$ += (height-1) * x_stride |
116 |
|
|
117 |
|
neg edx ; x_stride = -x_stride |
118 |
|
|
119 |
|
.dont_flip |
120 |
|
|
121 |
|
; --- begin loop --- |
122 |
|
|
123 |
|
mov eax, [y_stride] ; $eax$ = y_stride |
124 |
|
mov ebx, [u_ptr] ; $ebx$ = u_ptr |
125 |
|
mov ecx, [v_ptr] ; $ecx$ = v_ptr |
126 |
|
|
127 |
|
FUNC %+ _INIT ARG1, ARG2 ; call FUNC_INIT |
128 |
|
|
129 |
|
.y_loop |
130 |
|
mov [tmp_height], ebp |
131 |
|
mov ebp, [fixed_width] |
132 |
|
|
133 |
|
.x_loop |
134 |
|
FUNC ARG1, ARG2 ; call FUNC |
135 |
|
|
136 |
|
add edi, BYTES*PIXELS ; x_ptr += BYTES*PIXELS |
137 |
|
add esi, PIXELS ; y_ptr += PIXELS |
138 |
|
add ebx, PIXELS/2 ; u_ptr += PIXELS/2 |
139 |
|
add ecx, PIXELS/2 ; v_ptr += PIXELS/2 |
140 |
|
|
141 |
|
sub ebp, PIXELS ; $ebp$ -= PIXELS |
142 |
|
jg .x_loop ; if ($ebp$ > 0) goto .x_loop |
143 |
|
|
144 |
|
mov ebp, [tmp_height] |
145 |
|
add edi, [x_dif] ; x_ptr += x_dif + (VPIXELS-1)*x_stride |
146 |
|
add esi, [y_dif] ; y_ptr += y_dif + (VPIXELS-1)*y_stride |
147 |
|
%rep VPIXELS-1 |
148 |
|
add edi, edx |
149 |
|
add esi, eax |
150 |
|
%endrep |
151 |
|
|
152 |
|
add ebx, [uv_dif] ; u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride |
153 |
|
add ecx, [uv_dif] ; v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride |
154 |
|
%rep (VPIXELS/2)-1 |
155 |
|
add ebx, [uv_stride] |
156 |
|
add ecx, [uv_stride] |
157 |
|
%endrep |
158 |
|
|
159 |
|
sub ebp, VPIXELS ; $ebp$ -= VPIXELS |
160 |
|
jg .y_loop ; if ($ebp$ > 0) goto .y_loop |
161 |
|
|
162 |
|
; cleanup stack & undef everything |
163 |
|
|
164 |
|
add esp, localsize |
165 |
|
pop ebp |
166 |
|
pop edi |
167 |
|
pop esi |
168 |
|
pop ebx |
169 |
|
|
170 |
|
%undef vflip |
171 |
|
%undef height |
172 |
|
%undef width |
173 |
|
%undef uv_stride |
174 |
|
%undef y_stride |
175 |
|
%undef v_ptr |
176 |
|
%undef u_ptr |
177 |
|
%undef y_ptr |
178 |
|
%undef x_stride |
179 |
|
%undef x_ptr |
180 |
|
%undef _ip |
181 |
|
%undef x_dif |
182 |
|
%undef y_dif |
183 |
|
%undef uv_dif |
184 |
|
%undef fixed_width |
185 |
|
%undef tmp_height |
186 |
|
ret |
187 |
|
%undef NAME |
188 |
|
%undef STACK |
189 |
|
%undef BYTES |
190 |
|
%undef PIXELS |
191 |
|
%undef VPIXELS |
192 |
|
%undef FUNC |
193 |
|
%undef ARG1 |
194 |
|
%endmacro |
195 |
|
;------------------------------------------------------------------------------ |