/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 264 - (show annotations)
Mon Feb 8 16:59:22 2010 UTC (9 years, 8 months ago) by thrust26
File size: 15808 byte(s)
horizontal lines bug fix and some comments 
1 ; History of linebench timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8 ;534 redid mainly_vertical
9 ;529 removed page penalty
10 ;517 final optimization at mainly_horizontal
11 ;501 chunking, initial version
12 ;482 optimized chunking (avg: 38.91 cylces)
13 ;473 final optimization for mainly_vertical (37.89)
14
15 ; TODOs:
16 ; + chunking (-35)
17 ; - two separate branches instead of patching?
18 ; - countdown minor
19 ; - mainly_horizontal
20 ; + mainly_vertical (-9)
21
22 .zero
23
24 ; *= tmp1
25
26 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
27 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
28 ;dx .dsb 1 ; Width
29 ;dy .dsb 1 ; Height
30 ;_CurrentPixelX .dsb 1
31 ;_CurrentPixelY .dsb 1
32 ;_OtherPixelX .dsb 1
33 ;_OtherPixelY .dsb 1
34
35 save_a .dsb 1
36 curBit .dsb 1
37 chunk .dsb 1
38 lastSum .dsb 1
39
40
41 #define BYTE_PIXEL 6
42 #define X_SIZE 240
43 #define ROW_SIZE X_SIZE/BYTE_PIXEL
44
45 #define _NOP $ea
46 #define _INX $e8
47 #define _DEX $ca
48 #define _INY $c8
49 #define _DEY $88
50 #define _ASL $0a
51 #define _LSR $4a
52 #define _INC_ZP $e6
53 #define _DEC_ZP $c6
54
55
56 .text
57
58 .dsb 256-(*&255)
59
60 ;**********************************************************
61 draw_totaly_vertical_8
62 .(
63 ldx _CurrentPixelX
64 ldy _TableDiv6,x
65 lda _TableBit6Reverse,x ; 4
66 sta _mask_patch+1
67
68 ldx dy
69 inx
70
71 clc ; 2
72 loop
73 _mask_patch
74 lda #0 ; 2
75 eor (tmp0),y ; 5
76 sta (tmp0),y ; 6 => total = 13 cycles
77
78 ; update the screen address:
79 .(
80 lda tmp0+0 ; 3
81 adc #ROW_SIZE ; 2
82 sta tmp0+0 ; 3
83 bcc skip ; 2 (+1 if taken)
84 inc tmp0+1 ; 5
85 clc ; 2
86 skip
87 .)
88 ; ------------------Min=13 Max=17
89
90 dex
91 bne loop
92 rts
93 .)
94
95
96 ;**********************************************************
97 ;
98 ; Expects the following variables to be set when called:
99 ; _CurrentPixelX
100 ; _CurrentPixelY
101 ; _OtherPixelX
102 ; _OtherPixelY
103 ;
104 _DrawLine8
105 ;
106 ; Compute deltas and signs
107 ;
108
109 ; Test Y value
110 .(
111 sec
112 lda _CurrentPixelY
113 sbc _OtherPixelY
114 beq end
115 bcc cur_smaller
116
117 cur_bigger ; y1>y2
118 ; Swap X and Y
119 ; So we always draw from top to bottom
120 ldy _CurrentPixelY
121 ldx _OtherPixelY
122 sty _OtherPixelY
123 stx _CurrentPixelY
124
125 ldy _CurrentPixelX
126 ldx _OtherPixelX
127 sty _OtherPixelX
128 stx _CurrentPixelX
129
130 jmp end
131
132 cur_smaller ; y1<y2
133 ; Absolute value
134 eor #$ff
135 adc #1
136 end
137 sta dy
138 .)
139
140 ;
141 ; Initialise screen pointer
142 ;
143 ldy _CurrentPixelY
144 lda _HiresAddrLow,y ; 4
145 sta tmp0+0 ; 3
146 lda _HiresAddrHigh,y ; 4
147 sta tmp0+1 ; 3 => Total 14 cycles
148
149 ; Test X value
150 .(
151 sec
152 lda _CurrentPixelX
153 sbc _OtherPixelX
154 sta dx
155 beq draw_totaly_vertical_8
156 bcc cur_smaller
157
158 cur_bigger ; x1>x2
159 lda #_DEX
160 bne end
161
162 cur_smaller ; x1<x2
163 ; Absolute value
164 eor #$ff
165 adc #1
166 sta dx
167
168 lda #_INX
169 end
170 .)
171
172 ; jmp alignIt
173 ;
174 ; .dsb 256-(*&255)
175 ;
176 ;alignIt
177 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
178 ldy dy
179 beq draw_totaly_horizontal_8
180 cpy dx
181 bcc draw_mainly_horizontal_8
182 jmp draw_mainly_vertical_8
183
184 ;**********************************************************
185 draw_totaly_horizontal_8
186 .(
187 ; here we have DY in Y, and the OPCODE in A
188 sta _outer_patch ; Write a (dex / nop / inx) instruction
189
190 ldx _OtherPixelX
191 stx __auto_cpx+1
192
193 ldx _CurrentPixelX
194
195 ;
196 ; Draw loop
197 ;
198 outer_loop
199 ldy _TableDiv6,x
200 lda _TableBit6Reverse,x ; 4
201 eor (tmp0),y ; 5
202 sta (tmp0),y ; 6
203
204 _outer_patch
205 inx
206
207 __auto_cpx
208 cpx #00 ; At the endpoint yet?
209 bne outer_loop
210 rts
211 .)
212
213 ;**********************************************************
214 draw_mainly_horizontal_8
215 .(
216 tax
217 lda dx
218 lsr
219 cmp dy
220 bcs draw_very_horizontal_8
221
222 ; here we have DY in Y, and the OPCODE (inx, dex) in A
223 sty __auto_dy+1
224
225 ; all this stress to be able to use dex, beq :)
226 cpx #_INX
227 beq doInx
228
229 lda #<_TableDiv6-1 ; == 0
230 ; clc ; _DEX < _INX
231 adc _OtherPixelX
232 sta __auto_div6+1
233 lda #<_TableBit6Reverse-1 ; == 0
234 ; clc
235 adc _OtherPixelX
236
237 ldx #>_TableDiv6
238 ldy #>_TableBit6Reverse ;
239 bne endPatch
240
241 doInx
242 lda #X_SIZE-1
243 ; sec
244 sbc _OtherPixelX
245 sta __auto_div6+1
246 lda #X_SIZE-1
247 ; sec
248 sbc _OtherPixelX
249
250 ldx #>_TableDiv6Rev
251 ldy #>_TableBit6 ;
252 endPatch
253 sta __auto_bit6+1
254 stx __auto_div6+2
255 sty __auto_bit6+2
256
257 lda dx
258 tax
259 inx ; 2 +1 since we count to 0
260 sta __auto_dx+1
261 lsr
262 eor #$ff
263 clc
264 ; a = sum, x = dX+1
265 ;----------------------------------------------------------
266 loopX
267 sta save_a ; 3 = 3
268 loopY
269 ; Draw the pixel
270 __auto_div6
271 ldy _TableDiv6-1,x ; 4
272 __auto_bit6
273 lda _TableBit6Reverse-1,x;4
274 eor (tmp0),y ; 5*
275 sta (tmp0),y ; 6*= 19
276
277 dex ; 2 Step in x
278 beq exitLoop ; 2/3 At the endpoint yet?
279 lda save_a ; 3
280 __auto_dy
281 adc #00 ; 2 +DY
282 bcc loopX ; 2/3=11/12 ~50% taken
283 ; Time to step in y
284 __auto_dx
285 sbc #00 ; 2 -DX
286 sta save_a ; 3 = 5
287
288 ; update the screen address:
289 lda tmp0+0 ; 3
290 adc #ROW_SIZE ; 2
291 sta tmp0+0 ; 3
292 bcc loopY ; 2/3=10/11 ~84.4% taken
293 inc tmp0+1 ; 5
294 clc ; 2
295 bcc loopY ; 3 = 10
296 ; average: 12.40
297
298 exitLoop
299 rts
300 ; Timings:
301 ; x++/y : 34
302 ; x++/y++: 47.40
303 ; average: 40.70
304 .)
305
306 .dsb 256-(*&255)
307 ;**********************************************************
308 draw_very_horizontal_8
309 .(
310 ; dX > 2*dY, here we use "chunking"
311 ; here we have DY in Y, and the OPCODE (inx, dex) in A
312 sty __auto_dy+1
313 sty __auto_dy2+1
314 cpx #_INX
315 php
316 ; setup pointer and Y:
317 ldx _CurrentPixelX
318 lda _TableDiv6,x
319 clc
320 adc tmp0
321 tay
322 bcc skipHi
323 inc tmp0+1
324 skipHi
325 lda #0
326 sta tmp0
327 ; patch the code:
328 plp
329 beq doInx
330 ; negative x-direction
331 lda _TableMod6,x
332 tax
333
334 lda #_DEY
335 sta __auto_stepx
336 sta __auto_stepx2
337 lda #$ff
338 sta __auto_cpy+1
339 sta __auto_cpy2+1
340 lda #_DEC_ZP
341 sta __auto_yHi
342 sta __auto_yHi2
343 lda Pot2NTbl,x
344 sta chunk
345 lda #<Pot2NTbl
346 bne endPatch
347
348 doInx
349 ; positive x-direction
350 lda #BYTE_PIXEL-1
351 ; sec
352 sbc _TableMod6,x
353 tax
354
355 lda #_INY
356 sta __auto_stepx
357 sta __auto_stepx2
358 lda #$00
359 sta __auto_cpy+1
360 sta __auto_cpy2+1
361 lda #_INC_ZP
362 sta __auto_yHi
363 sta __auto_yHi2
364 lda Pot2PTbl,x
365 sta chunk
366 lda #<Pot2PTbl
367 endPatch
368 sta __auto_pot1+1
369 sta __auto_pot2+1
370 sta __auto_pot3+1
371 lda dx
372 sta __auto_dx+1
373 ; calculate initial bresenham sum
374 lsr
375 sta lastSum ; 3 this is used for the last line segment
376 eor #$ff ; = -dx/2
377 clc
378 jmp loopX
379 ; a = sum, x = dX+1, y = ptr-offset
380
381 ;----------------------------------------------------------
382 nextColumn ;
383 tax ; 2
384 lda chunk ; 3
385 eor (tmp0),y ; 5
386 sta (tmp0),y ; 6
387 lda #%00111111 ; 2
388 sta chunk ; 3
389 txa ; 2
390 ldx #BYTE_PIXEL-1 ; 2
391 __auto_stepx
392 iny ; 2 next column
393 __auto_cpy
394 cpy #00 ; 2
395 clc ; 2
396 bne contColumn ; 2/3=33/34 99% taken
397 __auto_yHi
398 inc tmp0+1 ; 5 dec/inc
399 bcc contColumn ; 3 = 8
400 ;----------------------------------------------------------
401 loopY
402 dec dy ; 5 all but one vertical segments drawn?
403 beq exitLoop ; 2/3= 7/8 yes, exit loop
404 loopX
405 dex ; 2
406 bmi nextColumn ; 2/37.03 ~16.7% taken
407 contColumn ; = 9.85
408 __auto_dy
409 adc #00 ; 2 +DY
410 bcc loopX ; 2/3= 4/5 ~50% taken
411 ; Time to step in y
412 __auto_dx
413 sbc #00 ; 2 -DX
414 sta save_a ; 3 = 5
415
416 ; plot the last bits of current row:
417 __auto_pot1
418 lda Pot2PTbl,x ; 4
419 eor chunk ; 3
420 eor (tmp0),y ; 5
421 sta (tmp0),y ; 6
422 __auto_pot2
423 lda Pot2PTbl,x ; 4
424 sta chunk ; 3 = 25
425
426 ; update the screen address:
427 tya ; 2
428 adc #ROW_SIZE ; 2
429 tay ; 2
430 lda save_a ; 3
431 bcc loopY ; 2/3=11/12 ~84.4% taken
432 inc tmp0+1 ; 5
433 clc ; 2
434 bcc loopY ; 3 = 10
435 ; average: 13.40
436
437 ; Timings:
438 ; x++/y : 14.85 (75%)
439 ; x++/y++: 64.25 (25%)
440 ; average: 27.20
441 ;----------------------------------------------------------
442 exitLoop
443 ; draw the last horizontal line segment:
444 adc lastSum ; 3
445 loopXEnd
446 dex ; 2
447 bmi nextColumnEnd ; 2/37.03 ~16.7% taken
448 contColumnEnd ; = 9.85
449 __auto_dy2
450 adc #00 ; 2 +DY
451 bcc loopXEnd ; 2/3= 4/5 ~25% taken
452
453 ; plot last chunk:
454 __auto_pot3
455 lda Pot2PTbl,x ; 4
456 eor chunk ; 3
457 eor (tmp0),y ; 5
458 sta (tmp0),y ; 6 = 18
459 rts
460 ;----------------------------------------------------------
461 nextColumnEnd ;
462 tax ; 2
463 lda chunk ; 3
464 eor (tmp0),y ; 5
465 sta (tmp0),y ; 6
466 lda #%00111111 ; 2
467 sta chunk ; 3
468 txa ; 2
469 ldx #BYTE_PIXEL-1 ; 2
470 __auto_stepx2
471 iny ; 2 next column
472 __auto_cpy2
473 cpy #00 ; 2
474 clc ; 2
475 bne contColumnEnd ; 2/3=33/34 99% taken
476 __auto_yHi2
477 inc tmp0+1 ; 5 dec/inc
478 bcc contColumnEnd ; 3 = 8
479
480 Pot2PTbl
481 .byte %00000001, %00000011, %00000111, %00001111
482 .byte %00011111, %00111111
483 Pot2NTbl
484 .byte %00100000, %00110000
485 .byte %00111000, %00111100, %00111110, %00111111
486 .)
487
488 .dsb 256-(*&255)
489 ;**********************************************************
490 ;
491 ; This code is used when the things are moving faster
492 ; vertically than horizontally
493 ;
494 ; dy>dx
495 ;
496 draw_mainly_vertical_8
497 ; here we have DY in Y, and the OPCODE in A
498 .(
499 ; setup bresenham values:
500 sty __auto_dy+1
501
502 ; setup direction:
503 cmp #_DEX ; which direction?
504 bne doInx
505 ; dex -> moving left:
506 lda #%00100000
507 sta __auto_cpBit+1
508 lda #_ASL ;
509 sta __auto_shBit
510 lda #%00000001
511 sta __auto_ldBit+1
512 lda #_DEY
513 sta __auto_yLo
514 ldx #$ff
515 lda #_DEC_ZP
516 bne endPatch
517
518 doInx
519 ; inx -> moving right:
520 lda #%00000001
521 sta __auto_cpBit+1
522 lda #_LSR
523 sta __auto_shBit
524 lda #%00100000
525 sta __auto_ldBit+1
526 lda #_INY
527 sta __auto_yLo
528 ldx #$00
529 lda #_INC_ZP
530 endPatch
531 stx __auto_cpY+1
532 sta __auto_yHi
533 ; setup X
534 ldx dx ; X = dx
535 stx __auto_dx1+1
536 stx __auto_dx2+1
537 ; setup current bit:
538 ldy _CurrentPixelX
539 lda _TableBit6Reverse,y ; 4
540 sta curBit
541 ; setup pointer and Y:
542 ; TODO: self-modyfing code for the ptrs?
543 lda _TableDiv6,y
544 clc
545 adc tmp0
546 tay
547 lda #0
548 sta tmp0
549 bcc skipTmp0
550 inc tmp0+1
551 skipTmp0
552 ; calculate initial bresenham sum:
553 lda dy
554 lsr
555 sta lastSum
556 eor #$ff ; -DY/2
557 clc ; 2
558 bcc loopY ; 3
559 ; a = sum, y = tmp0, x = dX, tmp0 = 0
560 ;----------------------------------------------------------
561 incHiPtr ;
562 inc tmp0+1 ; 5
563 clc ; 2
564 bcc contHiPtr ; 3
565 ;----------------------------------------------------------
566 loopY
567 sta save_a ; 3
568 lda curBit ; 3 = 6
569 loopX
570 ; Draw the pixel
571 eor (tmp0),y ; 5
572 sta (tmp0),y ; 6 = 11
573 ; update the screen address:
574 tya ; 2
575 adc #ROW_SIZE ; 2
576 tay ; 2
577 bcs incHiPtr ; 2/13 ~15.6% taken
578 contHiPtr ; = 9.72 average
579 lda save_a ; 3
580 __auto_dx1
581 adc #00 ; 2 +DX
582 bcc loopY ; 2/3= 7/8 ~50% taken
583 ; Time to step in x
584 __auto_dy
585 sbc #00 ; 2 -DY
586 sta save_a ; 3 = 5
587
588 lda curBit ; 3
589 __auto_cpBit ; TODO: optimize
590 cmp #%00100000 ; 2 %00100000/%00000001
591 beq nextColumn ; 2/14.07 ~16.7% taken
592 __auto_shBit
593 asl ; 2 asl/lsr, clears carry
594 contNextColumn
595 sta curBit ; 3 =~13.68
596 ; step in x:
597 dex ; 2 At the endpoint yet?
598 bne loopX ; 2/3= 4/5
599
600 ; x ,y++: 34.72 (50%)
601 ; x++,y++: 51.40 (50%)
602 ; average: 43.06
603
604 ; draw the last vertical line segment:
605 lda save_a ; 3
606 adc lastSum ; 3
607 loopYEnd
608 tax ; 2 = 2
609 ; Draw the pixel
610 lda curBit ; 3
611 eor (tmp0),y ; 5
612 sta (tmp0),y ; 6 = 14
613 ; update the screen address:
614 tya ; 2
615 adc #ROW_SIZE ; 2
616 tay ; 2
617 bcs incHiPtrEnd ; 2/13 ~15.6% taken
618 contHiPtrEnd ; = 9.72 average
619 txa ; 2
620 __auto_dx2
621 adc #00 ; 2 +DX
622 bcc loopYEnd ; 2/3= 6/7 ~25% taken
623 rts
624 ;----------------------------------------------------------
625 nextColumn
626 __auto_ldBit
627 lda #%00000001 ; 2 %00000001/%00100000
628 __auto_yLo
629 dey ; 2 dey/iny
630 __auto_cpY
631 cpy #$ff ; 2 $ff/$00
632 clc ; 2 TODO: optimize
633 bne contNextColumn ; 2/3 ~99% taken
634 __auto_yHi
635 dec tmp0+1 ; 5 dec/inc
636 bcc contNextColumn ; 3
637
638 incHiPtrEnd ; 9
639 inc tmp0+1 ; 5
640 clc ; 2
641 bcc contHiPtrEnd ; 3
642 ;----------------------------------------------------------
643 .)
644
645 ; *** total timings: ***
646 ; draw_very_horizontal_8 (29.6%): 27.20
647 ; draw_mainly_horizontal_8 (20.4%): 40.70
648 ; draw_mainly_vertical_8 (50.0%): 43.06
649 ;----------------------------------------
650 ; total average (100.0%): 37.89
651
652
653

  ViewVC Help
Powered by ViewVC 1.1.26