/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Annotation of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 261 - (hide annotations)
Mon Feb 8 14:01:57 2010 UTC (9 years, 8 months ago) by thrust26
File size: 14347 byte(s)
optimized chunking, 482
1 thrust26 261 ; History of linebench timings...
2 dbug 249 ;649
3     ;614 (replacing the update of tmp0)
4     ;607
5 thrust26 253 ;588
6     ;583 after alignment
7     ;579
8 thrust26 258 ;534 redid mainly_vertical
9 thrust26 257 ;529 removed page penalty
10 thrust26 258 ;517 final optimization at mainly_horizontal
11 thrust26 259 ;501 chunking, initial version
12 thrust26 261 ;482 optimized chunking (avg: 38.91 cylces)
13 dbug 218
14 thrust26 261
15     ; TODOs:
16     ; + chunking
17     ; - two separate branches instead of patching?
18     ; - countdown minor
19     ; - mainly horizontal
20     ; - mainly vertical
21    
22 thrust26 253 .zero
23 dbug 218
24 thrust26 253 ; *= tmp1
25 dbug 218
26 thrust26 253 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
27     ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
28     ;dx .dsb 1 ; Width
29     ;dy .dsb 1 ; Height
30     ;_CurrentPixelX .dsb 1
31     ;_CurrentPixelY .dsb 1
32     ;_OtherPixelX .dsb 1
33     ;_OtherPixelY .dsb 1
34    
35     save_a .dsb 1
36 thrust26 255 curBit .dsb 1
37 thrust26 259 chunk .dsb 1
38 thrust26 261 lastSum .dsb 1
39 thrust26 253
40 thrust26 261
41 thrust26 259 #define BYTE_PIXEL 6
42 thrust26 258 #define X_SIZE 240
43 thrust26 259 #define ROW_SIZE X_SIZE/BYTE_PIXEL
44 thrust26 257
45     #define _NOP $ea
46     #define _INX $e8
47     #define _DEX $ca
48     #define _INY $c8
49     #define _DEY $88
50     #define _ASL $0a
51     #define _LSR $4a
52     #define _INC_ZP $e6
53     #define _DEC_ZP $c6
54    
55 thrust26 258
56 thrust26 253 .text
57    
58     .dsb 256-(*&255)
59    
60 dbug 222 draw_totaly_vertical_8
61     .(
62 thrust26 253 ldx _CurrentPixelX
63     ldy _TableDiv6,x
64     lda _TableBit6Reverse,x ; 4
65     sta _mask_patch+1
66    
67     ldx dy
68     inx
69    
70     clc ; 2
71 dbug 222 loop
72     _mask_patch
73 thrust26 253 lda #0 ; 2
74     eor (tmp0),y ; 5
75     sta (tmp0),y ; 6 => total = 13 cycles
76 dbug 220
77 thrust26 259 ; update the screen address:
78 thrust26 253 .(
79     lda tmp0+0 ; 3
80 thrust26 257 adc #ROW_SIZE ; 2
81 thrust26 253 sta tmp0+0 ; 3
82     bcc skip ; 2 (+1 if taken)
83     inc tmp0+1 ; 5
84     clc ; 2
85 dbug 222 skip
86 thrust26 253 .)
87     ; ------------------Min=13 Max=17
88 dbug 222
89 thrust26 253 dex
90     bne loop
91     rts
92 dbug 222 .)
93 dbug 218
94 thrust26 253
95    
96 dbug 219 ;
97     ; Expects the following variables to be set when called:
98     ; _CurrentPixelX
99     ; _CurrentPixelY
100     ; _OtherPixelX
101     ; _OtherPixelY
102 thrust26 253 ;
103 dbug 219 _DrawLine8
104 thrust26 253 ;
105     ; Compute deltas and signs
106     ;
107    
108     ; Test Y value
109 dbug 219 .(
110 thrust26 253 sec
111     lda _CurrentPixelY
112     sbc _OtherPixelY
113     beq end
114     bcc cur_smaller
115 dbug 218
116 thrust26 253 cur_bigger ; y1>y2
117     ; Swap X and Y
118     ; So we always draw from top to bottom
119     ldy _CurrentPixelY
120     ldx _OtherPixelY
121     sty _OtherPixelY
122     stx _CurrentPixelY
123 dbug 218
124 thrust26 253 ldy _CurrentPixelX
125     ldx _OtherPixelX
126     sty _OtherPixelX
127     stx _CurrentPixelX
128    
129     jmp end
130    
131     cur_smaller ; y1<y2
132     ; Absolute value
133     eor #$ff
134     adc #1
135 dbug 222 end
136 thrust26 253 sta dy
137 dbug 219 .)
138 dbug 218
139 thrust26 253 ;
140     ; Initialise screen pointer
141     ;
142     ldy _CurrentPixelY
143     lda _HiresAddrLow,y ; 4
144     sta tmp0+0 ; 3
145     lda _HiresAddrHigh,y ; 4
146     sta tmp0+1 ; 3 => Total 14 cycles
147    
148     ; Test X value
149 dbug 219 .(
150 thrust26 253 sec
151     lda _CurrentPixelX
152     sbc _OtherPixelX
153     sta dx
154     beq draw_totaly_vertical_8
155     bcc cur_smaller
156 dbug 218
157 thrust26 253 cur_bigger ; x1>x2
158 thrust26 258 lda #_DEX
159 thrust26 253 bne end
160 dbug 218
161 thrust26 253 cur_smaller ; x1<x2
162     ; Absolute value
163     eor #$ff
164     adc #1
165     sta dx
166    
167 thrust26 258 lda #_INX
168 dbug 219 end
169     .)
170 dbug 218
171 thrust26 253 jmp alignIt
172 dbug 218
173 thrust26 253 .dsb 256-(*&255)
174    
175     alignIt
176     ; Compute slope and call the specialized code for mostly horizontal or vertical lines
177     ldy dy
178     beq draw_totaly_horizontal_8
179     cpy dx
180 thrust26 259 bcc draw_mainly_horizontal_8
181 thrust26 257 jmp draw_mainly_vertical_8
182 thrust26 253
183 thrust26 258 draw_totaly_horizontal_8
184 thrust26 259 .(
185 thrust26 258 ; here we have DY in Y, and the OPCODE in A
186     sta _outer_patch ; Write a (dex / nop / inx) instruction
187    
188     ldx _OtherPixelX
189     sta __auto_cpx+1
190    
191     ldx _CurrentPixelX
192    
193     ;
194     ; Draw loop
195     ;
196     outer_loop
197     ldy _TableDiv6,x
198     lda _TableBit6Reverse,x ; 4
199     eor (tmp0),y ; 5
200     sta (tmp0),y ; 6
201    
202     _outer_patch
203     inx
204    
205     __auto_cpx
206     cpx #00 ; At the endpoint yet?
207     bne outer_loop
208     rts
209     .)
210    
211 thrust26 255 draw_mainly_horizontal_8
212 thrust26 259 .(
213     tax
214 thrust26 255 lda dx
215 thrust26 253 lsr
216 thrust26 259 cmp dy
217     bcs draw_very_horizontal_8
218 thrust26 253
219 thrust26 258 ; here we have DY in Y, and the OPCODE (inx, dex) in A
220     sty __auto_dy+1
221 thrust26 253
222 thrust26 259 ; all this stress to be able to use dex, beq :)
223     cpx #_INX
224 thrust26 258 beq doInx
225 thrust26 253
226 thrust26 258 lda #<_TableDiv6-1 ; == 0
227     ; clc ; _DEX < _INX
228     adc _OtherPixelX
229     sta __auto_div6+1
230     lda #<_TableBit6Reverse-1 ; == 0
231     ; clc
232     adc _OtherPixelX
233 dbug 219
234 thrust26 258 ldx #>_TableDiv6
235     ldy #>_TableBit6Reverse ;
236     bne endPatch
237    
238     doInx
239     lda #X_SIZE-1
240     ; sec
241     sbc _OtherPixelX
242     sta __auto_div6+1
243     lda #X_SIZE-1
244     ; sec
245     sbc _OtherPixelX
246    
247     ldx #>_TableDiv6Rev
248     ldy #>_TableBit6 ;
249     endPatch
250     sta __auto_bit6+1
251     stx __auto_div6+2
252     sty __auto_bit6+2
253    
254     lda dx
255     tax
256 thrust26 259 inx ; 2 +1 since we count to 0
257 thrust26 258 sta __auto_dx+1
258     lsr
259     eor #$ff
260     clc
261     ; a = sum, x = dX+1
262 dbug 219
263 thrust26 258 loopX
264     sta save_a ; 3 = 3
265     loopY
266     ; Draw the pixel
267     __auto_div6
268     ldy _TableDiv6-1,x ; 4
269     __auto_bit6
270     lda _TableBit6Reverse-1,x;4
271     eor (tmp0),y ; 5*
272     sta (tmp0),y ; 6*= 19
273    
274     dex ; 2 Step in x
275     beq exitLoop ; 2/3 At the endpoint yet?
276     lda save_a ; 3
277     __auto_dy
278 thrust26 259 adc #00 ; 2 +DY
279 thrust26 258 bcc loopX ; 2/3=11/12 ~50% taken
280     ; Time to step in y
281     __auto_dx
282     sbc #00 ; 2 -DX
283     sta save_a ; 3 = 5
284    
285 thrust26 259 ; update the screen address:
286 thrust26 258 lda tmp0+0 ; 3
287     adc #ROW_SIZE ; 2
288     sta tmp0+0 ; 3
289     bcc loopY ; 2/3=10/11 ~84% taken
290     inc tmp0+1 ; 5
291     clc ; 2
292     bcc loopY ; 3 = 10
293     ; average: 12.44
294    
295     exitLoop
296 thrust26 253 rts
297 thrust26 258 ; Timings:
298     ; x++/y : 34
299 thrust26 259 ; x++/y++: 47.44
300     ; average: 40.72
301     .)
302 thrust26 253
303 thrust26 259 draw_very_horizontal_8
304     .(
305     ; dX > 2*dY, here we use "chunking"
306     ; here we have DY in Y, and the OPCODE (inx, dex) in A
307     sty __auto_dy+1
308 thrust26 261 sty __auto_dy2+1
309 thrust26 259 cpx #_INX
310 thrust26 261 php
311 thrust26 258
312 thrust26 259 ldx _CurrentPixelX
313 thrust26 261 lda _TableDiv6,x
314     clc
315     adc tmp0
316     tay
317     bcc skipHi
318     inc tmp0+1
319     skipHi
320     lda #0
321     sta tmp0
322    
323     plp
324     beq doInx
325     ; negative x-direction
326 thrust26 259 lda _TableMod6,x
327     tax
328    
329     lda #_DEY
330     sta __auto_stepx
331 thrust26 261 sta __auto_stepx2
332 thrust26 259 lda #$ff
333 thrust26 261 sta __auto_cpy+1
334     sta __auto_cpy2+1
335 thrust26 259 lda #_DEC_ZP
336     sta __auto_yHi
337 thrust26 261 sta __auto_yHi2
338 thrust26 259 lda Pot2NTbl,x
339     sta chunk
340 thrust26 261 lda #<Pot2NTbl
341 thrust26 259 bne endPatch
342    
343     doInx
344 thrust26 261 ; positive x-direction
345 thrust26 259 lda #BYTE_PIXEL-1
346 thrust26 261 ; sec
347 thrust26 259 sbc _TableMod6,x
348     tax
349    
350     lda #_INY
351     sta __auto_stepx
352 thrust26 261 sta __auto_stepx2
353 thrust26 259 lda #$00
354 thrust26 261 sta __auto_cpy+1
355     sta __auto_cpy2+1
356 thrust26 259 lda #_INC_ZP
357     sta __auto_yHi
358 thrust26 261 sta __auto_yHi2
359 thrust26 259 lda Pot2PTbl,x
360     sta chunk
361 thrust26 261 lda #<Pot2PTbl
362 thrust26 259 endPatch
363 thrust26 261 sta __auto_pot1+1
364     sta __auto_pot2+1
365     sta __auto_pot3+1
366 thrust26 259
367     lda dx
368     sta __auto_dx+1
369 thrust26 261 ; calculate initial bresenham sum
370 thrust26 259 lsr
371 thrust26 261 sta lastSum ; 3 this is used for the last line segment
372     eor #$ff ; = -dx/2
373 thrust26 259 clc
374 thrust26 261 jmp loopX
375     ; a = sum, x = dX+1, y = ptr-offset
376 thrust26 259
377 thrust26 257 .dsb 256-(*&255)
378 thrust26 253
379 thrust26 259 nextColumn ;
380     tax ; 2
381     lda chunk ; 3
382     eor (tmp0),y ; 5
383     sta (tmp0),y ; 6
384     lda #%00111111 ; 2
385     sta chunk ; 3
386     txa ; 2
387     ldx #BYTE_PIXEL-1 ; 2
388     __auto_stepx
389     iny ; 2 next column
390 thrust26 261 __auto_cpy
391 thrust26 259 cpy #00 ; 2
392     clc ; 2
393     bne contColumn ; 2/3=33/34 99% taken
394     __auto_yHi
395     inc tmp0+1 ; 5 dec/inc
396     bcc contColumn ; 3 = 8
397    
398 thrust26 261 loopY
399     dec dy ; 5 all but one vertical segments drawn?
400     beq exitLoop ; 2/3= 7/8 yes, exit loop
401 thrust26 259 loopX
402     dex ; 2
403 thrust26 261 bmi nextColumn ; 2/37.03 ~16.7% taken
404     contColumn ; = 9.85
405 thrust26 259 __auto_dy
406     adc #00 ; 2 +DY
407 thrust26 261 bcc loopX ; 2/3= 4/5 ~50% taken
408 thrust26 259 ; Time to step in y
409     __auto_dx
410     sbc #00 ; 2 -DX
411     sta save_a ; 3 = 5
412    
413     ; plot the last bits of current row:
414 thrust26 261 __auto_pot1
415     lda Pot2PTbl,x ; 4
416 thrust26 259 eor chunk ; 3
417     eor (tmp0),y ; 5
418     sta (tmp0),y ; 6
419 thrust26 261 __auto_pot2
420     lda Pot2PTbl,x ; 4
421 thrust26 259 sta chunk ; 3 = 25
422    
423     ; update the screen address:
424     tya ; 2
425     adc #ROW_SIZE ; 2
426     tay ; 2
427     lda save_a ; 3
428 thrust26 261 bcc loopY ; 2/3=11/12 ~84.4% taken
429 thrust26 259 inc tmp0+1 ; 5
430     clc ; 2
431     bcc loopY ; 3 = 10
432 thrust26 261 ; average: 13.40
433 thrust26 259
434 thrust26 261 ; Timings:
435     ; x++/y : 14.85 (75%)
436     ; x++/y++: 64.25 (25%)
437     ; average: 27.20
438    
439 thrust26 259 exitLoop
440 thrust26 261 ; draw the last horizontal line segment:
441     adc lastSum ; 3
442     loopXEnd
443     dex ; 2
444     bmi nextColumnEnd ; 2/37.03 ~16.7% taken
445     contColumnEnd ; = 9.85
446     __auto_dy2
447     adc #00 ; 2 +DY
448     bcc loopXEnd ; 2/3=11/12 ~50% taken
449    
450     ; plot last chunk:
451     __auto_pot3
452     lda Pot2PTbl,x ; 4
453 thrust26 259 eor chunk ; 3
454     eor (tmp0),y ; 5
455     sta (tmp0),y ; 6 = 18
456     rts
457    
458 thrust26 261 nextColumnEnd ;
459     tax ; 2
460     lda chunk ; 3
461     eor (tmp0),y ; 5
462     sta (tmp0),y ; 6
463     lda #%00111111 ; 2
464     sta chunk ; 3
465     txa ; 2
466     ldx #BYTE_PIXEL-1 ; 2
467     __auto_stepx2
468     iny ; 2 next column
469     __auto_cpy2
470     cpy #00 ; 2
471     clc ; 2
472     bne contColumnEnd ; 2/3=33/34 99% taken
473     __auto_yHi2
474     inc tmp0+1 ; 5 dec/inc
475     bcc contColumnEnd ; 3 = 8
476    
477 thrust26 259 Pot2PTbl
478     .byte %00000001, %00000011, %00000111, %00001111
479     .byte %00011111, %00111111
480     Pot2NTbl
481     .byte %00100000, %00110000
482     .byte %00111000, %00111100, %00111110, %00111111
483     .)
484    
485    
486     .dsb 256-(*&255)
487 dbug 219 ;
488     ; This code is used when the things are moving faster
489     ; vertically than horizontally
490     ;
491     ; dy>dx
492     ;
493 thrust26 255 draw_mainly_vertical_8
494     ; here we have DY in Y, and the OPCODE in A
495 thrust26 259 .(
496 thrust26 255 ; setup bresenham values:
497 thrust26 253 sty __auto_dy+1
498 thrust26 255 ldx dx
499     stx __auto_dx+1
500 dbug 219
501 thrust26 255 ; setup direction:
502 thrust26 257 cmp #_DEX ; which direction?
503 thrust26 255 bne doInx
504 thrust26 257 ; dex -> moving left:
505 thrust26 255 lda #%00100000
506     sta __auto_cpBit+1
507     lda #_ASL ;
508     sta __auto_shBit
509     lda #%00000001
510     sta __auto_ldBit+1
511     lda #_DEY
512     sta __auto_yLo
513 thrust26 257 ldx #$ff
514 thrust26 255 lda #_DEC_ZP
515 thrust26 258 bne endPatch
516 thrust26 253
517 thrust26 255 doInx
518 thrust26 257 ; inx -> moving right:
519 thrust26 255 lda #%00000001
520     sta __auto_cpBit+1
521     lda #_LSR
522     sta __auto_shBit
523     lda #%00100000
524     sta __auto_ldBit+1
525     lda #_INY
526     sta __auto_yLo
527 thrust26 257 ldx #$00
528 thrust26 255 lda #_INC_ZP
529 thrust26 258 endPatch
530 thrust26 257 stx __auto_cpY+1
531 thrust26 255 sta __auto_yHi
532     ; setup X
533 thrust26 257 tya ; y = dY
534 thrust26 255 tax
535 thrust26 257 inx ; x = dY+1
536 thrust26 255 ; setup current bit:
537     ldy _CurrentPixelX
538     lda _TableBit6Reverse,y ; 4
539     sta curBit
540     ; setup pointer and Y:
541     ; TODO: self-modyfing code?
542     lda _TableDiv6,y
543     clc
544     adc tmp0
545     tay
546     lda #0
547     sta tmp0
548     bcc skipTmp0
549     inc tmp0+1
550     skipTmp0
551     ; calculate initial bresenham sum:
552     lda dy
553 thrust26 253 lsr
554 thrust26 257 eor #$ff ; -DY/2
555 thrust26 255 clc ; 2
556     bcc loopY ; 3
557     ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
558 thrust26 253
559 thrust26 255 incHiPtr ; 9
560     inc tmp0+1 ; 5
561     clc ; 2
562     bcc contHiPtr ; 3
563 thrust26 257 ;----------------------------------------------------------
564 thrust26 255 loopY
565     sta save_a ; 3 = 3
566 thrust26 253 ; Draw the pixel
567 thrust26 255 lda curBit ; 3
568 thrust26 257 eor (tmp0),y ; 5
569     sta (tmp0),y ; 6 = 14
570 thrust26 253
571 thrust26 255 dex ; 2 At the endpoint yet?
572     beq exitLoop ; 2/3= 4/5
573     loopX
574 thrust26 259 ; update the screen address:
575 thrust26 255 tya ; 2
576 thrust26 257 adc #ROW_SIZE ; 2
577 thrust26 255 tay ; 2
578     bcs incHiPtr ; 2/13 ~16% taken
579     contHiPtr ; = 9.76 average
580 thrust26 253
581 thrust26 255 lda save_a ; 3
582     __auto_dx
583 thrust26 253 adc #00 ; 2 +DX
584 thrust26 255 bcc loopY ; 2/3= 7/8 ~50% taken
585    
586 thrust26 253 ; Time to step in x
587     __auto_dy
588 thrust26 255 sbc #00 ; 2 -DY
589     sta save_a ; 3 = 5
590 dbug 219
591 thrust26 255 lda curBit ; 3
592 thrust26 257 __auto_cpBit ; TODO: optimize
593 thrust26 255 cmp #%00100000 ; 2 %00100000/%00000001
594     beq nextColumn ; 2/14.07 ~17% taken
595     __auto_shBit
596     asl ; 2 asl/lsr, clears carry
597     contNextColumn
598     sta curBit ; 3 =~13.71
599 thrust26 253
600 thrust26 255 ; Draw the pixel
601 thrust26 257 eor (tmp0),y ; 5
602     sta (tmp0),y ; 6 = 11
603 thrust26 255 dex ; 2 At the endpoint yet?
604     bne loopX ; 2/3= 4/5
605 thrust26 253 exitLoop
606     rts
607 thrust26 257 ;----------------------------------------------------------
608 thrust26 255 nextColumn
609     __auto_ldBit
610     lda #%00000001 ; 2 %00000001/%00100000
611     __auto_yLo
612 thrust26 257 dey ; 2 dey/iny
613 thrust26 255 __auto_cpY
614 thrust26 257 cpy #$ff ; 2 $ff/$00
615 thrust26 255 clc ; 2 TODO: optimize
616     bne contNextColumn ; 2/3 ~99% taken
617     __auto_yHi
618 thrust26 257 dec tmp0+1 ; 5 dec/inc
619 thrust26 255 bcc contNextColumn ; 3
620    
621 thrust26 257 ; x ,y++: 38.76 (50%)
622     ; x++,y++: 51.47 (50%)
623     ; average: 45.11
624 thrust26 259 .)
625 thrust26 258
626 thrust26 261 ; *** total timings: ***
627     ; draw_very_horizontal_8 (29.6%): 27.20
628     ; draw_mainly_horizontal_8 (20.4%): 40.72
629     ; draw_mainly_vertical_8 (50.0%): 45.11
630     ;----------------------------------------
631     ; total average (100.0%): 38.91
632 thrust26 258
633    
634    

  ViewVC Help
Powered by ViewVC 1.1.26