/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Annotation of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 255 - (hide annotations)
Sat Feb 6 22:32:14 2010 UTC (9 years, 10 months ago) by thrust26
File size: 8741 byte(s)
optimized draw_mainly_vertical_8, linebench @ 534 now
1 dbug 249 ; History of timings...
2     ;649
3     ;614 (replacing the update of tmp0)
4     ;607
5 thrust26 253 ;588
6     ;583 after alignment
7     ;579
8 dbug 218
9 thrust26 253 .zero
10 dbug 218
11 thrust26 253 ; *= tmp1
12 dbug 218
13 thrust26 253 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
14     ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
15     ;dx .dsb 1 ; Width
16     ;dy .dsb 1 ; Height
17     ;_CurrentPixelX .dsb 1
18     ;_CurrentPixelY .dsb 1
19     ;_OtherPixelX .dsb 1
20     ;_OtherPixelY .dsb 1
21    
22     save_a .dsb 1
23     save_y .dsb 1
24 thrust26 255 curBit .dsb 1
25 thrust26 253
26     .text
27    
28     .dsb 256-(*&255)
29    
30 dbug 219 ; nop $ea
31     ; inx $e8 11101000
32     ; dex $ca 11001010
33     ; iny $c8 11001000
34     ; dey $88 10001000
35 dbug 222
36 thrust26 255 #define _NOP $ea
37     #define _INX $e8
38     #define _DEX $ca
39     #define _INY $c8
40     #define _DEY $88
41     #define _ASL $0a
42     #define _LSR $4a
43     #define _INC_ZP $e6
44     #define _DEC_ZP $c6
45 dbug 222
46    
47 thrust26 255
48 dbug 222 draw_totaly_vertical_8
49     .(
50 thrust26 253 ldx _CurrentPixelX
51     ldy _TableDiv6,x
52     lda _TableBit6Reverse,x ; 4
53     sta _mask_patch+1
54    
55     ldx dy
56     inx
57    
58     clc ; 2
59 dbug 222 loop
60     _mask_patch
61 thrust26 253 lda #0 ; 2
62     eor (tmp0),y ; 5
63     sta (tmp0),y ; 6 => total = 13 cycles
64 dbug 220
65 thrust26 253 ; Update screen adress
66     .(
67     lda tmp0+0 ; 3
68     adc #40 ; 2
69     sta tmp0+0 ; 3
70     bcc skip ; 2 (+1 if taken)
71     inc tmp0+1 ; 5
72     clc ; 2
73 dbug 222 skip
74 thrust26 253 .)
75     ; ------------------Min=13 Max=17
76 dbug 222
77 thrust26 253 dex
78     bne loop
79     rts
80 dbug 222 .)
81 dbug 218
82 thrust26 253
83    
84 dbug 219 ;
85     ; Expects the following variables to be set when called:
86     ; _CurrentPixelX
87     ; _CurrentPixelY
88     ; _OtherPixelX
89     ; _OtherPixelY
90 thrust26 253 ;
91 dbug 219 _DrawLine8
92 thrust26 253 ;
93     ; Compute deltas and signs
94     ;
95    
96     ; Test Y value
97 dbug 219 .(
98 thrust26 253 sec
99     lda _CurrentPixelY
100     sbc _OtherPixelY
101     beq end
102     bcc cur_smaller
103 dbug 218
104 thrust26 253 cur_bigger ; y1>y2
105     ; Swap X and Y
106     ; So we always draw from top to bottom
107     ldy _CurrentPixelY
108     ldx _OtherPixelY
109     sty _OtherPixelY
110     stx _CurrentPixelY
111 dbug 218
112 thrust26 253 ldy _CurrentPixelX
113     ldx _OtherPixelX
114     sty _OtherPixelX
115     stx _CurrentPixelX
116    
117     jmp end
118    
119     cur_smaller ; y1<y2
120     ; Absolute value
121     eor #$ff
122     adc #1
123 dbug 222 end
124 thrust26 253 sta dy
125 dbug 219 .)
126 dbug 218
127 thrust26 253 ;
128     ; Initialise screen pointer
129     ;
130     ldy _CurrentPixelY
131     lda _HiresAddrLow,y ; 4
132     sta tmp0+0 ; 3
133     lda _HiresAddrHigh,y ; 4
134     sta tmp0+1 ; 3 => Total 14 cycles
135    
136     ; Test X value
137 dbug 219 .(
138 thrust26 253 sec
139     lda _CurrentPixelX
140     sbc _OtherPixelX
141     sta dx
142     beq draw_totaly_vertical_8
143     bcc cur_smaller
144 dbug 218
145 thrust26 253 cur_bigger ; x1>x2
146     lda #$ca ; dex
147     bne end
148 dbug 218
149 thrust26 253 cur_smaller ; x1<x2
150     ; Absolute value
151     eor #$ff
152     adc #1
153     sta dx
154    
155     lda #$e8 ; inx
156 dbug 219 end
157     .)
158 dbug 218
159 thrust26 253 jmp alignIt
160 dbug 218
161 thrust26 253 .dsb 256-(*&255)
162    
163     alignIt
164     ; Compute slope and call the specialized code for mostly horizontal or vertical lines
165     ldy dy
166     beq draw_totaly_horizontal_8
167     cpy dx
168 thrust26 255 bcs draw_mainly_vertical_8
169 thrust26 253
170 thrust26 255 draw_mainly_horizontal_8
171 thrust26 253 .(
172     ; here we have DY in Y, and the OPCODE in A
173     sta __auto_stepx ; Write a (dex / nop / inx) instruction
174     cmp #$ca ; dex?
175     bne skipDex
176     dey ; adjust for carry being set in loop
177     skipDex
178     sty __auto_ady+1
179    
180     lda dx
181     sta __auto_dx+1
182    
183     lda _OtherPixelX
184     sta __auto_cpx+1
185    
186     ldx _CurrentPixelX ;Plotting coordinates
187     ldy _CurrentPixelY ;in X and Y
188    
189 thrust26 255 lda dx
190 thrust26 253 lsr
191     eor #$ff
192     ; clc
193    
194     loopX
195     sta save_a ; 3 = 3
196     loopY
197     ; Draw the pixel
198     __auto_div6
199     ldy _TableDiv6,x ; 4
200     __auto_bit6
201     lda _TableBit6Reverse,x ; 4
202     eor (tmp0),y ; 5*
203     sta (tmp0),y ; 6*= 19
204    
205     __auto_cpx
206     cpx #00 ; 2 At the endpoint yet?
207     beq exitLoop ; 2/3
208 dbug 222 __auto_stepx
209 thrust26 253 inx ; 2 Step in x
210     lda save_a ; 3
211 dbug 222 __auto_ady
212 thrust26 253 adc #00 ; 2 +DY
213     bcc loopX ; 2/3=13/14
214     ; Time to step in y
215     __auto_dx
216     sbc #00 ; 2 -DX
217     sta save_a ; 3 = 5
218    
219     ; Set the new screen adress
220     lda tmp0+0 ; 3
221     adc #40 ; 2
222     sta tmp0+0 ; 3
223     bcc loopY ; 2/3=10/11 ~84 taken
224     inc tmp0+1 ; 5
225     bcs loopY ; 3 = 8
226     ; average: 12.12
227    
228     exitLoop
229     rts
230     ; Timings:
231     ; x++/y : 36
232     ; x++/y++: 49.12
233     ; average: 42.56
234     .)
235    
236 dbug 219 draw_totaly_horizontal_8
237     .(
238 thrust26 253 ; here we have DY in Y, and the OPCODE in A
239     sta _outer_patch ; Write a (dex / nop / inx) instruction
240    
241     ldx _OtherPixelX
242     sta __auto_cpx+1
243    
244     ldx _CurrentPixelX
245    
246     ;
247     ; Draw loop
248     ;
249 dbug 219 outer_loop
250 thrust26 253 ldy _TableDiv6,x
251     lda _TableBit6Reverse,x ; 4
252     eor (tmp0),y ; 5
253     sta (tmp0),y ; 6
254 dbug 219
255     _outer_patch
256 thrust26 253 inx
257 dbug 219
258 thrust26 253 __auto_cpx
259     cpx #00 ; At the endpoint yet?
260     bne outer_loop
261     rts
262     .)
263    
264    
265 dbug 219 ;
266     ; This code is used when the things are moving faster
267     ; vertically than horizontally
268     ;
269     ; dy>dx
270     ;
271 thrust26 255 draw_mainly_vertical_8
272     ; here we have DY in Y, and the OPCODE in A
273 thrust26 253 .(
274 thrust26 255 ; setup bresenham values:
275 thrust26 253 sty __auto_dy+1
276 thrust26 255 ldx dx
277     stx __auto_dx+1
278 dbug 219
279 thrust26 255 ; setup direction:
280     ; sta __auto_stepx ; Write a (dex / nop / inx) instruction
281     cmp #_DEX ; which direction
282     bne doInx
283     ; dex, moving left:
284     lda #%00100000
285     sta __auto_cpBit+1
286     lda #_ASL ;
287     sta __auto_shBit
288     lda #%00000001
289     sta __auto_ldBit+1
290     lda #_DEY
291     sta __auto_yLo
292     lda #$ff
293     sta __auto_cpY+1
294     lda #_DEC_ZP
295     sta __auto_yHi
296     bne endX
297 thrust26 253
298 thrust26 255 doInx
299     ; inx, moving right
300     lda #%00000001
301     sta __auto_cpBit+1
302     lda #_LSR
303     sta __auto_shBit
304     lda #%00100000
305     sta __auto_ldBit+1
306     lda #_INY
307     sta __auto_yLo
308     lda #$00
309     sta __auto_cpY+1
310     lda #_INC_ZP
311     sta __auto_yHi
312     endX
313     ; setup X
314     tya ; y = dY
315     tax
316     inx ; x = dY+1
317     ; setup current bit:
318     ldy _CurrentPixelX
319     lda _TableBit6Reverse,y ; 4
320     sta curBit
321     ; setup pointer and Y:
322     ; TODO: self-modyfing code?
323     lda _TableDiv6,y
324     clc
325     adc tmp0
326     tay
327     lda #0
328     sta tmp0
329     bcc skipTmp0
330     inc tmp0+1
331     skipTmp0
332     ; calculate initial bresenham sum:
333     lda dy
334 thrust26 253 lsr
335     eor #$ff ; -DX/2
336 thrust26 255 clc ; 2
337     bcc loopY ; 3
338     ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
339 thrust26 253
340 thrust26 255 incHiPtr ; 9
341     inc tmp0+1 ; 5
342     clc ; 2
343     bcc contHiPtr ; 3
344    
345     loopY
346     sta save_a ; 3 = 3
347 thrust26 253 ; Draw the pixel
348 thrust26 255 lda curBit ; 3
349 thrust26 253 eor (tmp0),y ; 5*
350 thrust26 255 sta (tmp0),y ; 6*= 14**
351 thrust26 253
352 thrust26 255 dex ; 2 At the endpoint yet?
353     beq exitLoop ; 2/3= 4/5
354     loopX
355     ; Update screen adress
356     tya ; 2
357     adc #40 ; 2
358     tay ; 2
359     bcs incHiPtr ; 2/13 ~16% taken
360     contHiPtr ; = 9.76 average
361 thrust26 253
362 thrust26 255 lda save_a ; 3
363     __auto_dx
364 thrust26 253 adc #00 ; 2 +DX
365 thrust26 255 bcc loopY ; 2/3= 7/8 ~50% taken
366    
367 thrust26 253 ; Time to step in x
368     __auto_dy
369 thrust26 255 sbc #00 ; 2 -DY
370     sta save_a ; 3 = 5
371 dbug 219
372 thrust26 255 lda curBit ; 3
373     __auto_cpBit
374     cmp #%00100000 ; 2 %00100000/%00000001
375     beq nextColumn ; 2/14.07 ~17% taken
376     __auto_shBit
377     asl ; 2 asl/lsr, clears carry
378     contNextColumn
379     sta curBit ; 3 =~13.71
380 thrust26 253
381 thrust26 255 ; Draw the pixel
382     eor (tmp0),y ; 5*
383     sta (tmp0),y ; 6*= 11**
384     dex ; 2 At the endpoint yet?
385     bne loopX ; 2/3= 4/5
386 thrust26 253 exitLoop
387     rts
388 thrust26 255
389     nextColumn
390     __auto_ldBit
391     lda #%00000001 ; 2 %00000001/%00100000
392     __auto_yLo
393     dey ; 2
394     __auto_cpY
395     cpy #$ff ; 2
396     clc ; 2 TODO: optimize
397     bne contNextColumn ; 2/3 ~99% taken
398     __auto_yHi
399     dec tmp0+1 ; 5
400     bcc contNextColumn ; 3
401    
402     ; x ,y++: 38.76** (50%)
403     ; x++,y++: 51.47** (50%)
404     ; average: 45.11**
405    
406    
407     ; x ,y++: 54.12**
408     ; x++,y++: 57.12**
409     ; average: 55.62**
410 thrust26 253 .)

  ViewVC Help
Powered by ViewVC 1.1.26