/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 281 - (show annotations)
Sun Feb 14 17:24:27 2010 UTC (9 years, 9 months ago) by thrust26
File size: 17646 byte(s)
fixed vertical lines
1 ; History of linebench timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8 ;534 redid mainly_vertical
9 ;529 removed page penalty
10 ;517 final optimization at mainly_horizontal
11 ;501 chunking, initial version
12 ;482 optimized chunking (avg: 38.91 cylces)
13 ;473 final optimization for mainly_vertical (37.89 -> 38.34 corrected)
14 ;468 a weird stunt on mainly_horizontal (38.07)
15 ;467 minor very_horizontal optimization (37.88 -> 38.56 corrected)
16 ;463 self modifying pointer in mainly_horizontal (38.35)
17 ;459 self modifying pointer in mainly_vertical (37.99)
18 ;459 a little tweak to very_horizontal (37.94)
19 ;451 refactored to make x-direction always positive (37.07)
20
21 ; TODOs:
22 ; + chunking (-35)
23 ; - two separate branches instead of patching?
24 ; + countdown minor
25 ; x mainly_horizontal (won't work)
26 ; + mainly_vertical (-9)
27 ; o optimizing for space (-2 tables and one alignment page)
28 ; + optimize horizontal (merge with very_horizontal)
29 ; o optimize vertical
30 ; + correct branch taken percentages
31 ; + always draw left to right and patch y-direction (-8)
32 ; + switch between XOR and OR
33
34 .zero
35
36 ; *= tmp1
37
38 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
39 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
40 ;dx .dsb 1 ; Width
41 ;dy .dsb 1 ; Height
42 ;_CurrentPixelX .dsb 1
43 ;_CurrentPixelY .dsb 1
44 ;_OtherPixelX .dsb 1
45 ;_OtherPixelY .dsb 1
46
47 save_a .dsb 1
48 curBit .dsb 1
49 chunk .dsb 1
50 lastSum .dsb 1
51
52
53 ;#define OPP ORA
54 #define OPP EOR
55
56 #define BYTE_PIXEL 6
57 #define X_SIZE 240
58 #define ROW_SIZE X_SIZE/BYTE_PIXEL
59
60 #define _INY $c8
61 #define _DEY $88
62 #define _INC_ZP $e6
63 #define _DEC_ZP $c6
64 #define _INC_ABS $ee
65 #define _DEC_ABS $ce
66 #define _ADC_IMM $69
67 #define _SBC_IMM $e9
68 #define _BCC $90
69 #define _BCS $b0
70 #define _CLC $18
71 #define _SEC $38
72
73
74 .text
75
76 ; .dsb 256-(*&255)
77
78 ;**********************************************************
79 ;
80 ; Expects the following variables to be set when called:
81 ; _CurrentPixelX
82 ; _CurrentPixelY
83 ; _OtherPixelX
84 ; _OtherPixelY
85 ;
86 _DrawLine8
87 ;
88 ; compute deltas and signs
89 ;
90 .(
91 ; test X value
92 sec
93 lda _CurrentPixelX
94 sbc _OtherPixelX
95 bcc cur_smaller
96 beq end
97
98 ldy _CurrentPixelX
99 ldx _OtherPixelX
100 sty _OtherPixelX
101 stx _CurrentPixelX
102
103 ldy _CurrentPixelY
104 ldx _OtherPixelY
105 sty _OtherPixelY
106 stx _CurrentPixelY
107
108 bcs end
109
110 cur_smaller ; y1<y2
111 ; absolute value
112 eor #$ff
113 adc #1
114 end
115 sta dx
116 .)
117 ;
118 ; initialise screen pointer
119 ;
120 ldy _CurrentPixelY
121 lda _HiresAddrLow,y ; 4
122 sta tmp0+0 ; 3
123 lda _HiresAddrHigh,y ; 4
124 sta tmp0+1 ; 3 => Total 14 cycles
125 .(
126 ; test Y value
127 sec
128 lda _CurrentPixelY
129 sbc _OtherPixelY
130 ; beq horizontal
131 ldx #_DEY
132 bcs cur_bigger
133
134 cur_smaller ; x1<x2
135 ; absolute value
136 eor #$ff
137 adc #1
138
139 ldx #_INY
140 cur_bigger ; x1>x2
141 sta dy
142 .)
143 tay
144 jmp alignIt
145
146 ;horizontal
147 ; jmp draw_totally_horizontal_8
148
149 .dsb 256-(*&255)
150
151 alignIt
152 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
153 cmp dx
154 bcc draw_mainly_horizontal_8
155 lda dx
156 beq draw_totaly_vertical_8
157 jmp draw_mainly_vertical_8
158
159 ;**********************************************************
160 draw_totaly_vertical_8
161 .(
162 cpx #_INY
163 bne doDey
164 ; iny -> moving up:
165 clc
166 ldx _CurrentPixelX
167 bcc endPatch
168
169 ; dey -> moving down:
170 doDey ; _DEY < _INY -> C==0!
171 ldy _OtherPixelY
172 lda _HiresAddrLow,y ; 4
173 sta tmp0+0 ; 3
174 lda _HiresAddrHigh,y ; 4
175 sta tmp0+1 ; 3 => Total 14 cycles
176 ldx _OtherPixelX
177
178 endPatch
179 ldy _TableDiv6,x
180 lda _TableBit6Reverse,x ; 4
181 sta _mask_patch+1
182 ldx dy
183 inx
184
185 loop
186 _mask_patch
187 lda #0 ; 2
188 OPP (tmp0),y ; 5*
189 sta (tmp0),y ; 6*= 13**
190
191 ; update the screen address:
192 tya ; 2
193 adc #ROW_SIZE ; 2
194 tay ; 2
195 bcc skip ; 2/3 84.4% taken
196 inc tmp0+1 ; 5
197 clc ; 2
198 skip ; = 9.94
199 dex ; 2
200 bne loop ; 2/3=4/5
201 rts
202 ; average: 27.94
203 .)
204
205 ;**********************************************************
206 draw_mainly_horizontal_8
207 .(
208 ; A = DX, Y = DY, X = opcode
209 lda dx
210 lsr
211 cmp dy
212 bcc contMainly
213 jmp draw_very_horizontal_8
214
215 contMainly
216
217 ; all this stress to be able to use dex, beq :)
218 cpx #_INY
219 beq doIny
220
221 ; dey -> moving down:
222 dey
223 sty _patch_dy+1
224
225 lda #<(loopX-_patch_loop-2)
226 sta _patch_loop+1
227
228 lda #_SBC_IMM
229 sta _patch_adc
230 lda #ROW_SIZE-1
231 sta _patch_adc+1
232 lda #_DEC_ABS
233 ldx #_BCS
234 ldy #_SEC
235 bne endPatch
236
237 doIny
238 sty _patch_dy+1
239
240 lda #<(loopX-_patch_loop-1)
241 sta _patch_loop+1
242
243 lda #_ADC_IMM
244 sta _patch_adc
245 lda #ROW_SIZE
246 sta _patch_adc+1
247 lda #_INC_ABS
248 ldx #_BCC
249 ldy #_CLC
250 endPatch
251 sta _patch_inc1
252 sta _patch_inc2
253 stx _patch_bcc
254 sty _patch_clc1
255 sty _patch_clc2
256
257 lda #X_SIZE-1
258 sec
259 sbc _OtherPixelX
260 sta _patch_bit1+1
261 sta _patch_bit2+1
262
263 ldx _CurrentPixelX
264 lda _TableDiv6,x
265 clc
266 adc tmp0
267 tay
268 lda tmp0+1
269 adc #0
270 sta _patch_ptr0+2
271 sta _patch_ptr1+2
272
273 lda dx
274 tax
275 inx ; 2 +1 since we count to 0
276 sta _patch_dx+1
277 lsr
278 eor #$ff
279 _patch_clc1
280 clc
281
282 sta save_a ; 3 = 3
283 _patch_bit1
284 lda _TableBit6-1,x;4
285 and #$7f ; remove signal bit
286 bne contColumn
287
288 ; a = sum, x = dX+1
289 ;----------------------------------------------------------
290 loopX
291 sec ; 1 50% executed (y--)
292 sta save_a ; 3 = 4
293 loopY
294 _patch_bit2
295 lda _TableBit6-1,x ; 4
296 bmi nextColumn ; 2/10.05 16.7% taken
297 contColumn
298 _patch_ptr0
299 OPP $a000,y ; 4
300 _patch_ptr1
301 sta $a000,y ; 5 = 16.34
302
303 dex ; 2 Step in x
304 beq exitLoop ; 2/3 At the endpoint yet?
305 lda save_a ; 3
306 _patch_dy
307 adc #00 ; 2 +DY
308 _patch_loop
309 bcc loopX ; 2/3=11/12 ~28.0% taken (not 50% due do to special code for very horizontal lines)
310 ; Time to step in y
311 _patch_dx
312 sbc #00 ; 2 -DX
313 sta save_a ; 3 = 5
314
315 ; update the screen address:
316 tya ; 2
317 _patch_adc
318 adc #ROW_SIZE ; 2
319 tay ; 2
320 _patch_bcc
321 bcc loopY ; 2/3= 8/9 ~84.4% taken
322 _patch_inc1
323 inc _patch_ptr0+2 ; 6
324 _patch_inc2
325 inc _patch_ptr1+2 ; 6
326 _patch_clc2
327 clc ; 2
328 bne loopY ; 3 = 17
329 ; average: 11.50
330
331 exitLoop
332 rts
333
334 nextColumn
335 and #$7f ; 2 remove signal bit
336 iny ; 2
337 bne contColumn ; 2/3= 6/7 99% taken
338 inc _patch_ptr0+2 ; 6
339 inc _patch_ptr1+2 ; 6
340 bne contColumn ; 3 = 15
341
342 ; Timings:
343 ; x++/y : 32.34 (28.0%)
344 ; x++/y++: 43.84 (72.0%)
345 ; average: 40.62
346 .)
347
348 ; .dsb 256-(*&255)
349 ;**********************************************************
350 draw_very_horizontal_8
351 .(
352 ; dX > 2*dY, here we use "chunking"
353 ; here we have DY in Y, and the OPCODE (inx, dex) in X
354 sty _patch_dy0+1
355 sty _patch_dy1+1
356 sty _patch_dy2+1
357 cpx #_INY
358 php
359 ; setup pointer and Y:
360 ldx _CurrentPixelX
361 lda _TableDiv6,x
362 clc
363 adc tmp0
364 tay
365 lda #0
366 sta tmp0
367 bcc skipHi
368 inc tmp0+1
369 skipHi
370 lda _TableDiv6,x
371 asl
372 adc _TableDiv6,x
373 asl
374 ; clc
375 adc #BYTE_PIXEL;-1
376 ; sec
377 sbc _CurrentPixelX
378 tax
379 lda Pot2PTbl,x
380 sta chunk
381
382 ; patch the code:
383 plp
384 beq doIny
385 ; no y-direction?
386 lda dy
387 beq draw_totally_horizontal_8
388 ; negative y-direction
389 dec _patch_dy0+1
390
391 lda #_SBC_IMM
392 sta _patch_adc
393 lda #ROW_SIZE-1
394 sta _patch_adc+1
395 lda #_BCS
396 sta _patch_bcc
397 lda #_DEC_ZP
398 sta _patch_inc
399 lda #_SEC
400 bne endPatch
401
402 doIny
403 ; positive y-direction
404 lda #_ADC_IMM
405 sta _patch_adc
406 lda #ROW_SIZE
407 sta _patch_adc+1
408 lda #_BCC
409 sta _patch_bcc
410 lda #_INC_ZP
411 sta _patch_inc
412 lda #_CLC
413 endPatch
414 sta _patch_clc
415
416 lda dx
417 sta _patch_dx+1
418 ; calculate initial bresenham sum
419 lsr
420 sta lastSum ; 3 this is used for the last line segment
421 eor #$ff ; = -dx/2
422 clc
423 bcc loopX
424 ; a = sum, x = _CurrentPixelX % 6, y = ptr-offset
425
426 ;----------------------------------------------------------
427 nextColumnC ;
428 clc ; 2 = 2
429 nextColumn ;
430 tax ; 2
431 lda chunk ; 3
432 OPP (tmp0),y ; 5
433 sta (tmp0),y ; 6
434 lda #%00111111 ; 2
435 sta chunk ; 3
436 txa ; 2
437 ldx #BYTE_PIXEL-1 ; 2
438 iny ; 2 next column
439 bne contColumn ; 2/3=29/30 99% taken
440 inc tmp0+1 ; 5 dec/inc
441 bne contColumn ; 3 = 8
442 ; average: 30.03
443 ;----------------------------------------------------------
444 draw_totally_horizontal_8
445 lda #1
446 sta _patch_dy2+1
447 lda dx
448 eor #$ff ; = -dx
449 clc
450 bcc loopXEnd
451 ;----------------------------------------------------------
452 loopY
453 lda save_a ; 3
454 dec dy ; 5 all but one vertical segments drawn?
455 beq exitLoop ; 2/3=10/11 yes, exit loop
456 dex ; 2
457 bmi nextColumnC ; 2/38.03 ~16.7% taken (this will continue below)
458 _patch_dy0
459 adc #00 ; 2 = 12.01 +DY, no check necessary here!
460 loopX
461 dex ; 2
462 bmi nextColumn ; 2/33.03 ~16.7% taken
463 contColumn ; = 9.17
464 _patch_dy1
465 adc #00 ; 2 +DY
466 bcc loopX ; 2/3= 4/5 ~76.4% taken
467 ; Time to step in y
468 _patch_dx
469 sbc #00 ; 2 -DX
470 sta save_a ; 3 = 5
471
472 ; plot the last bits of current segment:
473 lda Pot2PTbl,x ; 4
474 eor chunk ; 3
475 OPP (tmp0),y ; 5
476 sta (tmp0),y ; 6
477 lda Pot2PTbl,x ; 4
478 sta chunk ; 3 = 25
479
480 ; update the screen address:
481 tya ; 2
482 _patch_adc
483 adc #ROW_SIZE ; 2
484 tay ; 2
485 _patch_bcc
486 bcc loopY ; 2/3= 8/9 ~84.4% taken
487 _patch_inc
488 inc tmp0+1 ; 5
489 _patch_clc
490 clc ; 2
491 bne loopY ; 3 = 10
492 ; average: 10.40
493
494 ; Timings:
495 ; x++/y : 14.17 (76.4%)
496 ; x++/y++: 62.41 (23.6%)
497 ; average: 25.55
498 ;----------------------------------------------------------
499 exitLoop
500 ; draw the last horizontal line segment:
501 clc
502 adc lastSum ; 3
503 loopXEnd
504 dex ; 2
505 bmi nextColumnEnd ; 2/37.03 ~16.7% taken
506 contColumnEnd ; = 9.85
507 _patch_dy2
508 adc #00 ; 2 +DY
509 bcc loopXEnd ; 2/3= 4/5 ~38.2% taken
510
511 ; plot last chunk:
512 lda Pot2PTbl,x ; 4
513 eor chunk ; 3
514 OPP (tmp0),y ; 5
515 sta (tmp0),y ; 6 = 18
516 rts
517 ;----------------------------------------------------------
518 nextColumnEnd ;
519 tax ; 2
520 lda chunk ; 3
521 OPP (tmp0),y ; 5
522 sta (tmp0),y ; 6
523 lda #%00111111 ; 2
524 sta chunk ; 3
525 txa ; 2
526 ldx #BYTE_PIXEL-1 ; 2
527 iny ; 2 next column
528 bne contColumnEnd ; 2/3=29/30 99% taken
529 inc tmp0+1 ; 5 dec/inc
530 bne contColumnEnd ; 3 = 8
531
532 Pot2PTbl
533 .byte %00000001, %00000011, %00000111, %00001111
534 .byte %00011111, %00111111
535 .)
536
537 .dsb 256-(*&255)
538
539 ;**********************************************************
540 ;
541 ; This code is used when the things are moving faster
542 ; vertically than horizontally
543 ;
544 ; dy>dx
545 ;
546 draw_mainly_vertical_8
547 ; A = DX, Y = DY, X = opcode
548 .(
549 ; setup bresenham values:
550 sty _patch_dy+1
551
552 ; setup direction:
553 cpx #_DEY ; which direction?
554 bne doIny
555 ; dey -> moving down:
556 lda #_SBC_IMM
557 sta _patch_adc1
558 sta _patch_adc2
559 lda #ROW_SIZE-1
560 sta _patch_adc1+1
561 sta _patch_adc2+1
562 lda #_BCC
563 sta _patch_bcs1
564 sta _patch_bcs2
565 lda #_DEC_ZP
566 sta _patch_inc2
567 ldy #_SEC
568 ldx dx
569 dex
570 lda #_DEC_ABS
571 bne endPatch
572
573 doIny
574 ; inx -> moving up:
575 lda #_ADC_IMM
576 sta _patch_adc1
577 sta _patch_adc2
578 lda #ROW_SIZE
579 sta _patch_adc1+1
580 sta _patch_adc2+1
581 lda #_BCS
582 sta _patch_bcs1
583 sta _patch_bcs2
584 lda #_INC_ZP
585 sta _patch_inc2
586 ldy #_CLC
587 ldx dx
588 lda #_INC_ABS
589 endPatch
590 sta _patch_inc0
591 sta _patch_inc1
592 stx _patch_dx1+1
593 stx _patch_dx2+1
594 sty _patch_clc1
595 sty _patch_clc2
596
597 ; setup X
598 ldx dx ; X = dx
599 ; setup current bit:
600 ldy _CurrentPixelX
601 lda _TableBit6Reverse,y ; 4
602 sta curBit
603 ; setup pointer and Y:
604 lda _TableDiv6,y
605 clc
606 adc tmp0
607 tay
608 lda #0
609 sta tmp0
610 lda tmp0+1
611 adc #0
612 sta _patch_ptr0+2
613 sta _patch_ptr1+2
614 ; calculate initial bresenham sum:
615 lda dy
616 lsr
617 sta lastSum
618 eor #$ff ; -DY/2
619 clc ; 2
620 bcc loopY ; 3
621 ; a = sum, y = tmp0, x = dX, tmp0 = 0
622 ;----------------------------------------------------------
623 incHiPtr ;
624 _patch_inc0
625 inc _patch_ptr0+2 ; 6
626 _patch_inc1
627 inc _patch_ptr1+2 ; 6
628 _patch_clc1
629 clc ; 2
630 bne contHiPtr ; 3 = 17
631 ;----------------------------------------------------------
632 loopY
633 sta save_a ; 3
634 lda curBit ; 3 = 6
635 loopX
636 ; Draw the pixel
637 _patch_ptr0
638 OPP $a000,y ; 4
639 _patch_ptr1
640 sta $a000,y ; 5 = 9
641 ; update the screen address:
642 tya ; 2
643 _patch_adc1
644 adc #ROW_SIZE ; 2
645 tay ; 2
646 _patch_bcs1
647 bcs incHiPtr ; 2/20 ~15.6% taken
648 contHiPtr ; = 10.81 average
649 lda save_a ; 3
650 _patch_dx1
651 adc #00 ; 2 +DX
652 bcc loopY ; 2/3= 7/8 ~41.4% taken
653 ; Time to step in x
654 _patch_dy
655 sbc #00 ; 2 -DY
656 sta save_a ; 3 = 5
657
658 lda curBit ; 3
659 lsr ; 2
660 beq nextColumn ; 2/12.05 ~16.7% taken
661 contNextColumn
662 sta curBit ; 3 =~11.68
663 ; step in x:
664 dex ; 2 At the endpoint yet?
665 bne loopX ; 2/3= 4/5
666
667 ; x ,y++: 33.81 (41.4%)
668 ; x++,y++: 48.49 (58.6%)
669 ; average: 42.41
670
671 ; draw the last vertical line segment:
672 ldx _patch_ptr0+2 ; 4
673 stx tmp0+1 ; 3
674 lda save_a ; 3
675 adc lastSum ; 3
676 loopYEnd
677 tax ; 2 = 2
678 ; Draw the pixel
679 lda curBit ; 3
680 OPP (tmp0),y ; 5
681 sta (tmp0),y ; 6 = 14
682 ; update the screen address:
683 tya ; 2
684 _patch_adc2
685 adc #ROW_SIZE ; 2
686 tay ; 2
687 _patch_bcs2
688 bcs incHiPtrEnd ; 2/13 ~15.6% taken
689 contHiPtrEnd ; = 9.72 average
690 txa ; 2
691 _patch_dx2
692 adc #00 ; 2 +DX
693 bcc loopYEnd ; 2/3= 6/7 ~20.7% taken
694 rts ; 6
695 ;----------------------------------------------------------
696 nextColumn
697 clc ; 2
698 lda #%00100000 ; 2
699 iny ; 2
700 bne contNextColumn ; 2/3= 8/9 ~99% taken
701 inc _patch_ptr0+2 ; 6
702 inc _patch_ptr1+2 ; 6
703 bcc contNextColumn ; 3 = 15
704
705 incHiPtrEnd ; 9
706 _patch_inc2
707 inc tmp0+1 ; 5
708 _patch_clc2
709 clc ; 2
710 bne contHiPtrEnd ; 3
711 ;----------------------------------------------------------
712 .)
713
714 ; *** total timings: ***
715 ; draw_very_horizontal_8 (29.5%): 25.55 (was 25.73)
716 ; draw_mainly_horizontal_8 (20.5%): 40.62 (was 41.30)
717 ; draw_mainly_vertical_8 (50.0%): 42.41 (was 43.77)
718 ;----------------------------------------
719 ; total average (100.0%): 37.07 (was 37.94)

  ViewVC Help
Powered by ViewVC 1.1.26