/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 259 - (show annotations)
Sun Feb 7 23:13:49 2010 UTC (9 years, 8 months ago) by thrust26
File size: 12785 byte(s)
chunking, initial version, 501
1 ; History of timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8 ;534 redid mainly_vertical
9 ;529 removed page penalty
10 ;517 final optimization at mainly_horizontal
11 ;501 chunking, initial version
12
13 .zero
14
15 ; *= tmp1
16
17 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
18 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
19 ;dx .dsb 1 ; Width
20 ;dy .dsb 1 ; Height
21 ;_CurrentPixelX .dsb 1
22 ;_CurrentPixelY .dsb 1
23 ;_OtherPixelX .dsb 1
24 ;_OtherPixelY .dsb 1
25
26 save_a .dsb 1
27 save_y .dsb 1
28 curBit .dsb 1
29 chunk .dsb 1
30
31 #define BYTE_PIXEL 6
32 #define X_SIZE 240
33 #define ROW_SIZE X_SIZE/BYTE_PIXEL
34
35 #define _NOP $ea
36 #define _INX $e8
37 #define _DEX $ca
38 #define _INY $c8
39 #define _DEY $88
40 #define _ASL $0a
41 #define _LSR $4a
42 #define _INC_ZP $e6
43 #define _DEC_ZP $c6
44
45
46 .text
47
48 .dsb 256-(*&255)
49
50 draw_totaly_vertical_8
51 .(
52 ldx _CurrentPixelX
53 ldy _TableDiv6,x
54 lda _TableBit6Reverse,x ; 4
55 sta _mask_patch+1
56
57 ldx dy
58 inx
59
60 clc ; 2
61 loop
62 _mask_patch
63 lda #0 ; 2
64 eor (tmp0),y ; 5
65 sta (tmp0),y ; 6 => total = 13 cycles
66
67 ; update the screen address:
68 .(
69 lda tmp0+0 ; 3
70 adc #ROW_SIZE ; 2
71 sta tmp0+0 ; 3
72 bcc skip ; 2 (+1 if taken)
73 inc tmp0+1 ; 5
74 clc ; 2
75 skip
76 .)
77 ; ------------------Min=13 Max=17
78
79 dex
80 bne loop
81 rts
82 .)
83
84
85
86 ;
87 ; Expects the following variables to be set when called:
88 ; _CurrentPixelX
89 ; _CurrentPixelY
90 ; _OtherPixelX
91 ; _OtherPixelY
92 ;
93 _DrawLine8
94 ;
95 ; Compute deltas and signs
96 ;
97
98 ; Test Y value
99 .(
100 sec
101 lda _CurrentPixelY
102 sbc _OtherPixelY
103 beq end
104 bcc cur_smaller
105
106 cur_bigger ; y1>y2
107 ; Swap X and Y
108 ; So we always draw from top to bottom
109 ldy _CurrentPixelY
110 ldx _OtherPixelY
111 sty _OtherPixelY
112 stx _CurrentPixelY
113
114 ldy _CurrentPixelX
115 ldx _OtherPixelX
116 sty _OtherPixelX
117 stx _CurrentPixelX
118
119 jmp end
120
121 cur_smaller ; y1<y2
122 ; Absolute value
123 eor #$ff
124 adc #1
125 end
126 sta dy
127 .)
128
129 ;
130 ; Initialise screen pointer
131 ;
132 ldy _CurrentPixelY
133 lda _HiresAddrLow,y ; 4
134 sta tmp0+0 ; 3
135 lda _HiresAddrHigh,y ; 4
136 sta tmp0+1 ; 3 => Total 14 cycles
137
138 ; Test X value
139 .(
140 sec
141 lda _CurrentPixelX
142 sbc _OtherPixelX
143 sta dx
144 beq draw_totaly_vertical_8
145 bcc cur_smaller
146
147 cur_bigger ; x1>x2
148 lda #_DEX
149 bne end
150
151 cur_smaller ; x1<x2
152 ; Absolute value
153 eor #$ff
154 adc #1
155 sta dx
156
157 lda #_INX
158 end
159 .)
160
161 jmp alignIt
162
163 .dsb 256-(*&255)
164
165 alignIt
166 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
167 ldy dy
168 beq draw_totaly_horizontal_8
169 cpy dx
170 bcc draw_mainly_horizontal_8
171 jmp draw_mainly_vertical_8
172
173 draw_totaly_horizontal_8
174 .(
175 ; here we have DY in Y, and the OPCODE in A
176 sta _outer_patch ; Write a (dex / nop / inx) instruction
177
178 ldx _OtherPixelX
179 sta __auto_cpx+1
180
181 ldx _CurrentPixelX
182
183 ;
184 ; Draw loop
185 ;
186 outer_loop
187 ldy _TableDiv6,x
188 lda _TableBit6Reverse,x ; 4
189 eor (tmp0),y ; 5
190 sta (tmp0),y ; 6
191
192 _outer_patch
193 inx
194
195 __auto_cpx
196 cpx #00 ; At the endpoint yet?
197 bne outer_loop
198 rts
199 .)
200
201 draw_mainly_horizontal_8
202 .(
203 tax
204 lda dx
205 lsr
206 cmp dy
207 bcs draw_very_horizontal_8
208
209 ; here we have DY in Y, and the OPCODE (inx, dex) in A
210 sty __auto_dy+1
211
212 ; all this stress to be able to use dex, beq :)
213 cpx #_INX
214 beq doInx
215
216 lda #<_TableDiv6-1 ; == 0
217 ; clc ; _DEX < _INX
218 adc _OtherPixelX
219 sta __auto_div6+1
220 lda #<_TableBit6Reverse-1 ; == 0
221 ; clc
222 adc _OtherPixelX
223
224 ldx #>_TableDiv6
225 ldy #>_TableBit6Reverse ;
226 bne endPatch
227
228 doInx
229 lda #X_SIZE-1
230 ; sec
231 sbc _OtherPixelX
232 sta __auto_div6+1
233 lda #X_SIZE-1
234 ; sec
235 sbc _OtherPixelX
236
237 ldx #>_TableDiv6Rev
238 ldy #>_TableBit6 ;
239 endPatch
240 sta __auto_bit6+1
241 stx __auto_div6+2
242 sty __auto_bit6+2
243
244 lda dx
245 tax
246 inx ; 2 +1 since we count to 0
247 sta __auto_dx+1
248 lsr
249 eor #$ff
250 clc
251 ; a = sum, x = dX+1
252
253 loopX
254 sta save_a ; 3 = 3
255 loopY
256 ; Draw the pixel
257 __auto_div6
258 ldy _TableDiv6-1,x ; 4
259 __auto_bit6
260 lda _TableBit6Reverse-1,x;4
261 eor (tmp0),y ; 5*
262 sta (tmp0),y ; 6*= 19
263
264 dex ; 2 Step in x
265 beq exitLoop ; 2/3 At the endpoint yet?
266 lda save_a ; 3
267 __auto_dy
268 adc #00 ; 2 +DY
269 bcc loopX ; 2/3=11/12 ~50% taken
270 ; Time to step in y
271 __auto_dx
272 sbc #00 ; 2 -DX
273 sta save_a ; 3 = 5
274
275 ; update the screen address:
276 lda tmp0+0 ; 3
277 adc #ROW_SIZE ; 2
278 sta tmp0+0 ; 3
279 bcc loopY ; 2/3=10/11 ~84% taken
280 inc tmp0+1 ; 5
281 clc ; 2
282 bcc loopY ; 3 = 10
283 ; average: 12.44
284
285 exitLoop
286 rts
287 ; Timings:
288 ; x++/y : 34
289 ; x++/y++: 47.44
290 ; average: 40.72
291 .)
292
293 draw_very_horizontal_8
294 .(
295 ; dX > 2*dY, here we use "chunking"
296 ; here we have DY in Y, and the OPCODE (inx, dex) in A
297 sty __auto_dy+1
298 cpx #_INX
299 beq doInx
300
301 ldx _CurrentPixelX
302 ldy _TableDiv6-1,x
303 lda _TableMod6,x
304 tax
305
306 lda #_DEY
307 sta __auto_stepx
308 lda #$ff
309 sta __auto_cpY+1
310 lda #_DEC_ZP
311 sta __auto_yHi
312 lda Pot2NTbl,x
313 sta chunk
314 lda #<Pot2NCTbl
315 bne endPatch
316
317 doInx
318 ldx _CurrentPixelX
319 ldy _TableDiv6-1,x
320 lda #BYTE_PIXEL-1
321 sec
322 sbc _TableMod6,x
323 tax
324
325 lda #_INY
326 sta __auto_stepx
327 lda #$00
328 sta __auto_cpY+1
329 lda #_INC_ZP
330 sta __auto_yHi
331 lda Pot2PTbl,x
332 sta chunk
333 lda #<Pot2PCTbl
334 endPatch
335 sta __auto_pot_1+1
336 sta __auto_pot_2+1
337 sta __auto_pot_3+1
338
339 lda dx
340 sta __auto_dx+1
341 lsr
342 eor #$ff
343 clc
344 inc dx ; 5 +1 since we count to 0
345 bcc loopX
346 ; a = sum, x = dX+1
347
348 .dsb 256-(*&255)
349
350 nextColumn ;
351 tax ; 2
352 lda chunk ; 3
353 eor (tmp0),y ; 5
354 sta (tmp0),y ; 6
355 lda #%00111111 ; 2
356 sta chunk ; 3
357 txa ; 2
358 ldx #BYTE_PIXEL-1 ; 2
359 __auto_stepx
360 iny ; 2 next column
361 __auto_cpY
362 cpy #00 ; 2
363 clc ; 2
364 bne contColumn ; 2/3=33/34 99% taken
365 __auto_yHi
366 inc tmp0+1 ; 5 dec/inc
367 bcc contColumn ; 3 = 8
368
369 loopX
370 loopY
371 dex ; 2
372 bmi nextColumn ; 2/37.03 ~17% taken
373 contColumn ; = 9.84
374 dec dx ; 5 Step in x TODO: move into loopY
375 beq exitLoop ; 2/3 At the endpoint yet?
376 __auto_dy
377 adc #00 ; 2 +DY
378 bcc loopX ; 2/3=11/12 ~50% taken
379 ; Time to step in y
380 __auto_dx
381 sbc #00 ; 2 -DX
382 sta save_a ; 3 = 5
383
384 ; plot the last bits of current row:
385 __auto_pot_1
386 lda Pot2PCTbl,x ; 4
387 eor chunk ; 3
388 eor (tmp0),y ; 5
389 sta (tmp0),y ; 6
390 __auto_pot_2
391 lda Pot2PCTbl,x ; 4
392 sta chunk ; 3 = 25
393
394 ; update the screen address:
395 tya ; 2
396 adc #ROW_SIZE ; 2
397 tay ; 2
398 lda save_a ; 3
399 bcc loopY ; 2/3=11/12 ~84% taken
400 inc tmp0+1 ; 5
401 clc ; 2
402 bcc loopY ; 3 = 10
403 ; average: 13.44
404
405 exitLoop
406 ; plot last byte:
407 __auto_pot_3
408 lda Pot2PCTbl,x ; 4
409 eor chunk ; 3
410 eor (tmp0),y ; 5
411 sta (tmp0),y ; 6 = 18
412 rts
413 ; Timings:
414 ; x++/y : 21.84 (75%)
415 ; x++/y++: 64.28 (25%)
416 ; average: 32.45
417
418 Pot2PTbl
419 ; .byte %00000001, %00000011, %00000111, %00001111
420 ; .byte %00011111, %00111111
421 Pot2PCTbl
422 .byte %00000001, %00000011, %00000111, %00001111
423 .byte %00011111, %00111111
424 Pot2NTbl
425 Pot2NCTbl
426 .byte %00100000, %00110000
427 .byte %00111000, %00111100, %00111110, %00111111
428 .)
429
430
431 .dsb 256-(*&255)
432 ;
433 ; This code is used when the things are moving faster
434 ; vertically than horizontally
435 ;
436 ; dy>dx
437 ;
438 draw_mainly_vertical_8
439 ; here we have DY in Y, and the OPCODE in A
440 .(
441 ; setup bresenham values:
442 sty __auto_dy+1
443 ldx dx
444 stx __auto_dx+1
445
446 ; TODO: two separate branches depending on x-direction
447 ; setup direction:
448 cmp #_DEX ; which direction?
449 bne doInx
450 ; dex -> moving left:
451 lda #%00100000
452 sta __auto_cpBit+1
453 lda #_ASL ;
454 sta __auto_shBit
455 lda #%00000001
456 sta __auto_ldBit+1
457 lda #_DEY
458 sta __auto_yLo
459 ldx #$ff
460 lda #_DEC_ZP
461 bne endPatch
462
463 doInx
464 ; inx -> moving right:
465 lda #%00000001
466 sta __auto_cpBit+1
467 lda #_LSR
468 sta __auto_shBit
469 lda #%00100000
470 sta __auto_ldBit+1
471 lda #_INY
472 sta __auto_yLo
473 ldx #$00
474 lda #_INC_ZP
475 endPatch
476 stx __auto_cpY+1
477 sta __auto_yHi
478 ; setup X
479 tya ; y = dY
480 tax
481 inx ; x = dY+1
482 ; setup current bit:
483 ldy _CurrentPixelX
484 lda _TableBit6Reverse,y ; 4
485 sta curBit
486 ; setup pointer and Y:
487 ; TODO: self-modyfing code?
488 lda _TableDiv6,y
489 clc
490 adc tmp0
491 tay
492 lda #0
493 sta tmp0
494 bcc skipTmp0
495 inc tmp0+1
496 skipTmp0
497 ; calculate initial bresenham sum:
498 lda dy
499 lsr
500 eor #$ff ; -DY/2
501 clc ; 2
502 bcc loopY ; 3
503 ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
504
505 incHiPtr ; 9
506 inc tmp0+1 ; 5
507 clc ; 2
508 bcc contHiPtr ; 3
509 ;----------------------------------------------------------
510 loopY
511 sta save_a ; 3 = 3
512 ; Draw the pixel
513 lda curBit ; 3
514 eor (tmp0),y ; 5
515 sta (tmp0),y ; 6 = 14
516
517 dex ; 2 At the endpoint yet?
518 beq exitLoop ; 2/3= 4/5
519 loopX
520 ; update the screen address:
521 tya ; 2
522 adc #ROW_SIZE ; 2
523 tay ; 2
524 bcs incHiPtr ; 2/13 ~16% taken
525 contHiPtr ; = 9.76 average
526
527 lda save_a ; 3
528 __auto_dx
529 adc #00 ; 2 +DX
530 bcc loopY ; 2/3= 7/8 ~50% taken
531
532 ; Time to step in x
533 __auto_dy
534 sbc #00 ; 2 -DY
535 sta save_a ; 3 = 5
536
537 lda curBit ; 3
538 __auto_cpBit ; TODO: optimize
539 cmp #%00100000 ; 2 %00100000/%00000001
540 beq nextColumn ; 2/14.07 ~17% taken
541 __auto_shBit
542 asl ; 2 asl/lsr, clears carry
543 contNextColumn
544 sta curBit ; 3 =~13.71
545
546 ; Draw the pixel
547 eor (tmp0),y ; 5
548 sta (tmp0),y ; 6 = 11
549 dex ; 2 At the endpoint yet?
550 bne loopX ; 2/3= 4/5
551 exitLoop
552 rts
553 ;----------------------------------------------------------
554 nextColumn
555 __auto_ldBit
556 lda #%00000001 ; 2 %00000001/%00100000
557 __auto_yLo
558 dey ; 2 dey/iny
559 __auto_cpY
560 cpy #$ff ; 2 $ff/$00
561 clc ; 2 TODO: optimize
562 bne contNextColumn ; 2/3 ~99% taken
563 __auto_yHi
564 dec tmp0+1 ; 5 dec/inc
565 bcc contNextColumn ; 3
566
567 ; x ,y++: 38.76 (50%)
568 ; x++,y++: 51.47 (50%)
569 ; average: 45.11
570 .)
571
572
573
574

  ViewVC Help
Powered by ViewVC 1.1.26