/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 255 - (show annotations)
Sat Feb 6 22:32:14 2010 UTC (9 years, 9 months ago) by thrust26
File size: 8741 byte(s)
optimized draw_mainly_vertical_8, linebench @ 534 now
1 ; History of timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8
9 .zero
10
11 ; *= tmp1
12
13 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
14 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
15 ;dx .dsb 1 ; Width
16 ;dy .dsb 1 ; Height
17 ;_CurrentPixelX .dsb 1
18 ;_CurrentPixelY .dsb 1
19 ;_OtherPixelX .dsb 1
20 ;_OtherPixelY .dsb 1
21
22 save_a .dsb 1
23 save_y .dsb 1
24 curBit .dsb 1
25
26 .text
27
28 .dsb 256-(*&255)
29
30 ; nop $ea
31 ; inx $e8 11101000
32 ; dex $ca 11001010
33 ; iny $c8 11001000
34 ; dey $88 10001000
35
36 #define _NOP $ea
37 #define _INX $e8
38 #define _DEX $ca
39 #define _INY $c8
40 #define _DEY $88
41 #define _ASL $0a
42 #define _LSR $4a
43 #define _INC_ZP $e6
44 #define _DEC_ZP $c6
45
46
47
48 draw_totaly_vertical_8
49 .(
50 ldx _CurrentPixelX
51 ldy _TableDiv6,x
52 lda _TableBit6Reverse,x ; 4
53 sta _mask_patch+1
54
55 ldx dy
56 inx
57
58 clc ; 2
59 loop
60 _mask_patch
61 lda #0 ; 2
62 eor (tmp0),y ; 5
63 sta (tmp0),y ; 6 => total = 13 cycles
64
65 ; Update screen adress
66 .(
67 lda tmp0+0 ; 3
68 adc #40 ; 2
69 sta tmp0+0 ; 3
70 bcc skip ; 2 (+1 if taken)
71 inc tmp0+1 ; 5
72 clc ; 2
73 skip
74 .)
75 ; ------------------Min=13 Max=17
76
77 dex
78 bne loop
79 rts
80 .)
81
82
83
84 ;
85 ; Expects the following variables to be set when called:
86 ; _CurrentPixelX
87 ; _CurrentPixelY
88 ; _OtherPixelX
89 ; _OtherPixelY
90 ;
91 _DrawLine8
92 ;
93 ; Compute deltas and signs
94 ;
95
96 ; Test Y value
97 .(
98 sec
99 lda _CurrentPixelY
100 sbc _OtherPixelY
101 beq end
102 bcc cur_smaller
103
104 cur_bigger ; y1>y2
105 ; Swap X and Y
106 ; So we always draw from top to bottom
107 ldy _CurrentPixelY
108 ldx _OtherPixelY
109 sty _OtherPixelY
110 stx _CurrentPixelY
111
112 ldy _CurrentPixelX
113 ldx _OtherPixelX
114 sty _OtherPixelX
115 stx _CurrentPixelX
116
117 jmp end
118
119 cur_smaller ; y1<y2
120 ; Absolute value
121 eor #$ff
122 adc #1
123 end
124 sta dy
125 .)
126
127 ;
128 ; Initialise screen pointer
129 ;
130 ldy _CurrentPixelY
131 lda _HiresAddrLow,y ; 4
132 sta tmp0+0 ; 3
133 lda _HiresAddrHigh,y ; 4
134 sta tmp0+1 ; 3 => Total 14 cycles
135
136 ; Test X value
137 .(
138 sec
139 lda _CurrentPixelX
140 sbc _OtherPixelX
141 sta dx
142 beq draw_totaly_vertical_8
143 bcc cur_smaller
144
145 cur_bigger ; x1>x2
146 lda #$ca ; dex
147 bne end
148
149 cur_smaller ; x1<x2
150 ; Absolute value
151 eor #$ff
152 adc #1
153 sta dx
154
155 lda #$e8 ; inx
156 end
157 .)
158
159 jmp alignIt
160
161 .dsb 256-(*&255)
162
163 alignIt
164 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
165 ldy dy
166 beq draw_totaly_horizontal_8
167 cpy dx
168 bcs draw_mainly_vertical_8
169
170 draw_mainly_horizontal_8
171 .(
172 ; here we have DY in Y, and the OPCODE in A
173 sta __auto_stepx ; Write a (dex / nop / inx) instruction
174 cmp #$ca ; dex?
175 bne skipDex
176 dey ; adjust for carry being set in loop
177 skipDex
178 sty __auto_ady+1
179
180 lda dx
181 sta __auto_dx+1
182
183 lda _OtherPixelX
184 sta __auto_cpx+1
185
186 ldx _CurrentPixelX ;Plotting coordinates
187 ldy _CurrentPixelY ;in X and Y
188
189 lda dx
190 lsr
191 eor #$ff
192 ; clc
193
194 loopX
195 sta save_a ; 3 = 3
196 loopY
197 ; Draw the pixel
198 __auto_div6
199 ldy _TableDiv6,x ; 4
200 __auto_bit6
201 lda _TableBit6Reverse,x ; 4
202 eor (tmp0),y ; 5*
203 sta (tmp0),y ; 6*= 19
204
205 __auto_cpx
206 cpx #00 ; 2 At the endpoint yet?
207 beq exitLoop ; 2/3
208 __auto_stepx
209 inx ; 2 Step in x
210 lda save_a ; 3
211 __auto_ady
212 adc #00 ; 2 +DY
213 bcc loopX ; 2/3=13/14
214 ; Time to step in y
215 __auto_dx
216 sbc #00 ; 2 -DX
217 sta save_a ; 3 = 5
218
219 ; Set the new screen adress
220 lda tmp0+0 ; 3
221 adc #40 ; 2
222 sta tmp0+0 ; 3
223 bcc loopY ; 2/3=10/11 ~84 taken
224 inc tmp0+1 ; 5
225 bcs loopY ; 3 = 8
226 ; average: 12.12
227
228 exitLoop
229 rts
230 ; Timings:
231 ; x++/y : 36
232 ; x++/y++: 49.12
233 ; average: 42.56
234 .)
235
236 draw_totaly_horizontal_8
237 .(
238 ; here we have DY in Y, and the OPCODE in A
239 sta _outer_patch ; Write a (dex / nop / inx) instruction
240
241 ldx _OtherPixelX
242 sta __auto_cpx+1
243
244 ldx _CurrentPixelX
245
246 ;
247 ; Draw loop
248 ;
249 outer_loop
250 ldy _TableDiv6,x
251 lda _TableBit6Reverse,x ; 4
252 eor (tmp0),y ; 5
253 sta (tmp0),y ; 6
254
255 _outer_patch
256 inx
257
258 __auto_cpx
259 cpx #00 ; At the endpoint yet?
260 bne outer_loop
261 rts
262 .)
263
264
265 ;
266 ; This code is used when the things are moving faster
267 ; vertically than horizontally
268 ;
269 ; dy>dx
270 ;
271 draw_mainly_vertical_8
272 ; here we have DY in Y, and the OPCODE in A
273 .(
274 ; setup bresenham values:
275 sty __auto_dy+1
276 ldx dx
277 stx __auto_dx+1
278
279 ; setup direction:
280 ; sta __auto_stepx ; Write a (dex / nop / inx) instruction
281 cmp #_DEX ; which direction
282 bne doInx
283 ; dex, moving left:
284 lda #%00100000
285 sta __auto_cpBit+1
286 lda #_ASL ;
287 sta __auto_shBit
288 lda #%00000001
289 sta __auto_ldBit+1
290 lda #_DEY
291 sta __auto_yLo
292 lda #$ff
293 sta __auto_cpY+1
294 lda #_DEC_ZP
295 sta __auto_yHi
296 bne endX
297
298 doInx
299 ; inx, moving right
300 lda #%00000001
301 sta __auto_cpBit+1
302 lda #_LSR
303 sta __auto_shBit
304 lda #%00100000
305 sta __auto_ldBit+1
306 lda #_INY
307 sta __auto_yLo
308 lda #$00
309 sta __auto_cpY+1
310 lda #_INC_ZP
311 sta __auto_yHi
312 endX
313 ; setup X
314 tya ; y = dY
315 tax
316 inx ; x = dY+1
317 ; setup current bit:
318 ldy _CurrentPixelX
319 lda _TableBit6Reverse,y ; 4
320 sta curBit
321 ; setup pointer and Y:
322 ; TODO: self-modyfing code?
323 lda _TableDiv6,y
324 clc
325 adc tmp0
326 tay
327 lda #0
328 sta tmp0
329 bcc skipTmp0
330 inc tmp0+1
331 skipTmp0
332 ; calculate initial bresenham sum:
333 lda dy
334 lsr
335 eor #$ff ; -DX/2
336 clc ; 2
337 bcc loopY ; 3
338 ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
339
340 incHiPtr ; 9
341 inc tmp0+1 ; 5
342 clc ; 2
343 bcc contHiPtr ; 3
344
345 loopY
346 sta save_a ; 3 = 3
347 ; Draw the pixel
348 lda curBit ; 3
349 eor (tmp0),y ; 5*
350 sta (tmp0),y ; 6*= 14**
351
352 dex ; 2 At the endpoint yet?
353 beq exitLoop ; 2/3= 4/5
354 loopX
355 ; Update screen adress
356 tya ; 2
357 adc #40 ; 2
358 tay ; 2
359 bcs incHiPtr ; 2/13 ~16% taken
360 contHiPtr ; = 9.76 average
361
362 lda save_a ; 3
363 __auto_dx
364 adc #00 ; 2 +DX
365 bcc loopY ; 2/3= 7/8 ~50% taken
366
367 ; Time to step in x
368 __auto_dy
369 sbc #00 ; 2 -DY
370 sta save_a ; 3 = 5
371
372 lda curBit ; 3
373 __auto_cpBit
374 cmp #%00100000 ; 2 %00100000/%00000001
375 beq nextColumn ; 2/14.07 ~17% taken
376 __auto_shBit
377 asl ; 2 asl/lsr, clears carry
378 contNextColumn
379 sta curBit ; 3 =~13.71
380
381 ; Draw the pixel
382 eor (tmp0),y ; 5*
383 sta (tmp0),y ; 6*= 11**
384 dex ; 2 At the endpoint yet?
385 bne loopX ; 2/3= 4/5
386 exitLoop
387 rts
388
389 nextColumn
390 __auto_ldBit
391 lda #%00000001 ; 2 %00000001/%00100000
392 __auto_yLo
393 dey ; 2
394 __auto_cpY
395 cpy #$ff ; 2
396 clc ; 2 TODO: optimize
397 bne contNextColumn ; 2/3 ~99% taken
398 __auto_yHi
399 dec tmp0+1 ; 5
400 bcc contNextColumn ; 3
401
402 ; x ,y++: 38.76** (50%)
403 ; x++,y++: 51.47** (50%)
404 ; average: 45.11**
405
406
407 ; x ,y++: 54.12**
408 ; x++,y++: 57.12**
409 ; average: 55.62**
410 .)

  ViewVC Help
Powered by ViewVC 1.1.26