/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 258 - (show annotations)
Sun Feb 7 14:05:11 2010 UTC (9 years, 11 months ago) by thrust26
File size: 10855 byte(s)
final optimization for mainly_horizontal, 517
1 ; History of timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8 ;534 redid mainly_vertical
9 ;529 removed page penalty
10 ;517 final optimization at mainly_horizontal
11
12 .zero
13
14 ; *= tmp1
15
16 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
17 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
18 ;dx .dsb 1 ; Width
19 ;dy .dsb 1 ; Height
20 ;_CurrentPixelX .dsb 1
21 ;_CurrentPixelY .dsb 1
22 ;_OtherPixelX .dsb 1
23 ;_OtherPixelY .dsb 1
24
25 save_a .dsb 1
26 save_y .dsb 1
27 curBit .dsb 1
28
29 #define X_SIZE 240
30 #define ROW_SIZE X_SIZE/6
31
32 #define _NOP $ea
33 #define _INX $e8
34 #define _DEX $ca
35 #define _INY $c8
36 #define _DEY $88
37 #define _ASL $0a
38 #define _LSR $4a
39 #define _INC_ZP $e6
40 #define _DEC_ZP $c6
41
42
43 .text
44
45 .dsb 256-(*&255)
46
47 draw_totaly_vertical_8
48 .(
49 ldx _CurrentPixelX
50 ldy _TableDiv6,x
51 lda _TableBit6Reverse,x ; 4
52 sta _mask_patch+1
53
54 ldx dy
55 inx
56
57 clc ; 2
58 loop
59 _mask_patch
60 lda #0 ; 2
61 eor (tmp0),y ; 5
62 sta (tmp0),y ; 6 => total = 13 cycles
63
64 ; Update screen adress
65 .(
66 lda tmp0+0 ; 3
67 adc #ROW_SIZE ; 2
68 sta tmp0+0 ; 3
69 bcc skip ; 2 (+1 if taken)
70 inc tmp0+1 ; 5
71 clc ; 2
72 skip
73 .)
74 ; ------------------Min=13 Max=17
75
76 dex
77 bne loop
78 rts
79 .)
80
81
82
83 ;
84 ; Expects the following variables to be set when called:
85 ; _CurrentPixelX
86 ; _CurrentPixelY
87 ; _OtherPixelX
88 ; _OtherPixelY
89 ;
90 _DrawLine8
91 ;
92 ; Compute deltas and signs
93 ;
94
95 ; Test Y value
96 .(
97 sec
98 lda _CurrentPixelY
99 sbc _OtherPixelY
100 beq end
101 bcc cur_smaller
102
103 cur_bigger ; y1>y2
104 ; Swap X and Y
105 ; So we always draw from top to bottom
106 ldy _CurrentPixelY
107 ldx _OtherPixelY
108 sty _OtherPixelY
109 stx _CurrentPixelY
110
111 ldy _CurrentPixelX
112 ldx _OtherPixelX
113 sty _OtherPixelX
114 stx _CurrentPixelX
115
116 jmp end
117
118 cur_smaller ; y1<y2
119 ; Absolute value
120 eor #$ff
121 adc #1
122 end
123 sta dy
124 .)
125
126 ;
127 ; Initialise screen pointer
128 ;
129 ldy _CurrentPixelY
130 lda _HiresAddrLow,y ; 4
131 sta tmp0+0 ; 3
132 lda _HiresAddrHigh,y ; 4
133 sta tmp0+1 ; 3 => Total 14 cycles
134
135 ; Test X value
136 .(
137 sec
138 lda _CurrentPixelX
139 sbc _OtherPixelX
140 sta dx
141 beq draw_totaly_vertical_8
142 bcc cur_smaller
143
144 cur_bigger ; x1>x2
145 lda #_DEX
146 bne end
147
148 cur_smaller ; x1<x2
149 ; Absolute value
150 eor #$ff
151 adc #1
152 sta dx
153
154 lda #_INX
155 end
156 .)
157
158 jmp alignIt
159
160 .dsb 256-(*&255)
161
162 alignIt
163 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
164 ldy dy
165 beq draw_totaly_horizontal_8
166 cpy dx
167 bcc draw_mainly_horizontal_8_new
168 jmp draw_mainly_vertical_8
169
170 draw_totaly_horizontal_8
171 .(
172 ; here we have DY in Y, and the OPCODE in A
173 sta _outer_patch ; Write a (dex / nop / inx) instruction
174
175 ldx _OtherPixelX
176 sta __auto_cpx+1
177
178 ldx _CurrentPixelX
179
180 ;
181 ; Draw loop
182 ;
183 outer_loop
184 ldy _TableDiv6,x
185 lda _TableBit6Reverse,x ; 4
186 eor (tmp0),y ; 5
187 sta (tmp0),y ; 6
188
189 _outer_patch
190 inx
191
192 __auto_cpx
193 cpx #00 ; At the endpoint yet?
194 bne outer_loop
195 rts
196 .)
197
198 draw_mainly_horizontal_8
199 .(
200 ; here we have DY in Y, and the OPCODE in A
201 sta __auto_stepx ; Write a (dex / nop / inx) instruction
202 cmp #_DEX ; dex?
203 bne skipDex
204 dey ; adjust for carry being set in loop
205 skipDex
206 sty __auto_ady+1
207
208 lda _OtherPixelX
209 sta __auto_cpx+1
210
211 ldx _CurrentPixelX ;Plotting coordinates
212 ; ldy _CurrentPixelY ;in X and Y
213
214 lda dx
215 sta __auto_dx+1
216 lsr
217 eor #$ff
218 ; clc
219
220 loopX
221 sta save_a ; 3 = 3
222 loopY
223 ; Draw the pixel
224 __auto_div6
225 ldy _TableDiv6,x ; 4
226 __auto_bit6
227 lda _TableBit6Reverse,x ; 4
228 eor (tmp0),y ; 5*
229 sta (tmp0),y ; 6*= 19
230
231 __auto_cpx
232 cpx #00 ; 2 At the endpoint yet?
233 beq exitLoop ; 2/3
234 __auto_stepx
235 inx ; 2 Step in x
236 lda save_a ; 3
237 __auto_ady
238 adc #00 ; 2 +DY TODO: bugfix carry
239 bcc loopX ; 2/3=13/14 ~50% taken
240 ; Time to step in y
241 __auto_dx
242 sbc #00 ; 2 -DX
243 sta save_a ; 3 = 5
244
245 ; Set the new screen adress
246 lda tmp0+0 ; 3
247 adc #ROW_SIZE ; 2
248 sta tmp0+0 ; 3
249 bcc loopY ; 2/3=10/11 ~84% taken
250 inc tmp0+1 ; 5
251 bcs loopY ; 3 = 8
252 ; average: 12.12
253
254 exitLoop
255 rts
256 ; Timings:
257 ; x++/y : 36
258 ; x++/y++: 49.12
259 ; average: 42.56
260 .)
261
262
263 draw_mainly_horizontal_8_new
264 .(
265 ; here we have DY in Y, and the OPCODE (inx, dex) in A
266 sty __auto_dy+1
267
268 cmp #_INX
269 beq doInx
270
271 lda #<_TableDiv6-1 ; == 0
272 ; clc ; _DEX < _INX
273 adc _OtherPixelX
274 sta __auto_div6+1
275 lda #<_TableBit6Reverse-1 ; == 0
276 ; clc
277 adc _OtherPixelX
278
279 ldx #>_TableDiv6
280 ldy #>_TableBit6Reverse ;
281 bne endPatch
282
283 doInx
284 lda #X_SIZE-1
285 ; sec
286 sbc _OtherPixelX
287 sta __auto_div6+1
288 lda #X_SIZE-1
289 ; sec
290 sbc _OtherPixelX
291
292 ldx #>_TableDiv6Rev
293 ldy #>_TableBit6 ;
294 endPatch
295 sta __auto_bit6+1
296 stx __auto_div6+2
297 sty __auto_bit6+2
298
299 lda dx
300 tax
301 inx
302 sta __auto_dx+1
303 lsr
304 eor #$ff
305 clc
306 ; a = sum, x = dX+1
307
308 loopX
309 sta save_a ; 3 = 3
310 loopY
311 ; Draw the pixel
312 __auto_div6
313 ldy _TableDiv6-1,x ; 4
314 __auto_bit6
315 lda _TableBit6Reverse-1,x;4
316 eor (tmp0),y ; 5*
317 sta (tmp0),y ; 6*= 19
318
319 dex ; 2 Step in x
320 beq exitLoop ; 2/3 At the endpoint yet?
321 lda save_a ; 3
322 __auto_dy
323 adc #00 ; 2 +DY TODO: bugfix carry
324 bcc loopX ; 2/3=11/12 ~50% taken
325 ; Time to step in y
326 __auto_dx
327 sbc #00 ; 2 -DX
328 sta save_a ; 3 = 5
329
330 ; Set the new screen adress
331 lda tmp0+0 ; 3
332 adc #ROW_SIZE ; 2
333 sta tmp0+0 ; 3
334 bcc loopY ; 2/3=10/11 ~84% taken
335 inc tmp0+1 ; 5
336 clc ; 2
337 bcc loopY ; 3 = 10
338 ; average: 12.44
339
340 exitLoop
341 rts
342 ; Timings:
343 ; x++/y : 34
344 ; x++/y++: 47.12
345 ; average: 40.56
346 .)
347
348
349 .dsb 256-(*&255)
350
351 ;
352 ; This code is used when the things are moving faster
353 ; vertically than horizontally
354 ;
355 ; dy>dx
356 ;
357 draw_mainly_vertical_8
358 ; here we have DY in Y, and the OPCODE in A
359 .(
360 ; setup bresenham values:
361 sty __auto_dy+1
362 ldx dx
363 stx __auto_dx+1
364
365 ; TODO: two separate branches depending on x-direction
366 ; setup direction:
367 cmp #_DEX ; which direction?
368 bne doInx
369 ; dex -> moving left:
370 lda #%00100000
371 sta __auto_cpBit+1
372 lda #_ASL ;
373 sta __auto_shBit
374 lda #%00000001
375 sta __auto_ldBit+1
376 lda #_DEY
377 sta __auto_yLo
378 ldx #$ff
379 lda #_DEC_ZP
380 bne endPatch
381
382 doInx
383 ; inx -> moving right:
384 lda #%00000001
385 sta __auto_cpBit+1
386 lda #_LSR
387 sta __auto_shBit
388 lda #%00100000
389 sta __auto_ldBit+1
390 lda #_INY
391 sta __auto_yLo
392 ldx #$00
393 lda #_INC_ZP
394 endPatch
395 stx __auto_cpY+1
396 sta __auto_yHi
397 ; setup X
398 tya ; y = dY
399 tax
400 inx ; x = dY+1
401 ; setup current bit:
402 ldy _CurrentPixelX
403 lda _TableBit6Reverse,y ; 4
404 sta curBit
405 ; setup pointer and Y:
406 ; TODO: self-modyfing code?
407 lda _TableDiv6,y
408 clc
409 adc tmp0
410 tay
411 lda #0
412 sta tmp0
413 bcc skipTmp0
414 inc tmp0+1
415 skipTmp0
416 ; calculate initial bresenham sum:
417 lda dy
418 lsr
419 eor #$ff ; -DY/2
420 clc ; 2
421 bcc loopY ; 3
422 ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
423
424 incHiPtr ; 9
425 inc tmp0+1 ; 5
426 clc ; 2
427 bcc contHiPtr ; 3
428 ;----------------------------------------------------------
429 loopY
430 sta save_a ; 3 = 3
431 ; Draw the pixel
432 lda curBit ; 3
433 eor (tmp0),y ; 5
434 sta (tmp0),y ; 6 = 14
435
436 dex ; 2 At the endpoint yet?
437 beq exitLoop ; 2/3= 4/5
438 loopX
439 ; Update screen adress
440 tya ; 2
441 adc #ROW_SIZE ; 2
442 tay ; 2
443 bcs incHiPtr ; 2/13 ~16% taken
444 contHiPtr ; = 9.76 average
445
446 lda save_a ; 3
447 __auto_dx
448 adc #00 ; 2 +DX
449 bcc loopY ; 2/3= 7/8 ~50% taken
450
451 ; Time to step in x
452 __auto_dy
453 sbc #00 ; 2 -DY
454 sta save_a ; 3 = 5
455
456 lda curBit ; 3
457 __auto_cpBit ; TODO: optimize
458 cmp #%00100000 ; 2 %00100000/%00000001
459 beq nextColumn ; 2/14.07 ~17% taken
460 __auto_shBit
461 asl ; 2 asl/lsr, clears carry
462 contNextColumn
463 sta curBit ; 3 =~13.71
464
465 ; Draw the pixel
466 eor (tmp0),y ; 5
467 sta (tmp0),y ; 6 = 11
468 dex ; 2 At the endpoint yet?
469 bne loopX ; 2/3= 4/5
470 exitLoop
471 rts
472 ;----------------------------------------------------------
473 nextColumn
474 __auto_ldBit
475 lda #%00000001 ; 2 %00000001/%00100000
476 __auto_yLo
477 dey ; 2 dey/iny
478 __auto_cpY
479 cpy #$ff ; 2 $ff/$00
480 clc ; 2 TODO: optimize
481 bne contNextColumn ; 2/3 ~99% taken
482 __auto_yHi
483 dec tmp0+1 ; 5 dec/inc
484 bcc contNextColumn ; 3
485
486 ; x ,y++: 38.76 (50%)
487 ; x++,y++: 51.47 (50%)
488 ; average: 45.11
489 .)
490
491
492
493

  ViewVC Help
Powered by ViewVC 1.1.26