/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 257 - (show annotations)
Sat Feb 6 23:15:29 2010 UTC (9 years, 10 months ago) by thrust26
File size: 8940 byte(s)
some minor fixes, 529
1 ; History of timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8 ;534 redid mainly vertical
9 ;529 removed page penalty
10
11 .zero
12
13 ; *= tmp1
14
15 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
16 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
17 ;dx .dsb 1 ; Width
18 ;dy .dsb 1 ; Height
19 ;_CurrentPixelX .dsb 1
20 ;_CurrentPixelY .dsb 1
21 ;_OtherPixelX .dsb 1
22 ;_OtherPixelY .dsb 1
23
24 save_a .dsb 1
25 save_y .dsb 1
26 curBit .dsb 1
27
28 #define ROW_SIZE 40
29
30 #define _NOP $ea
31 #define _INX $e8
32 #define _DEX $ca
33 #define _INY $c8
34 #define _DEY $88
35 #define _ASL $0a
36 #define _LSR $4a
37 #define _INC_ZP $e6
38 #define _DEC_ZP $c6
39
40 .text
41
42 .dsb 256-(*&255)
43
44 draw_totaly_vertical_8
45 .(
46 ldx _CurrentPixelX
47 ldy _TableDiv6,x
48 lda _TableBit6Reverse,x ; 4
49 sta _mask_patch+1
50
51 ldx dy
52 inx
53
54 clc ; 2
55 loop
56 _mask_patch
57 lda #0 ; 2
58 eor (tmp0),y ; 5
59 sta (tmp0),y ; 6 => total = 13 cycles
60
61 ; Update screen adress
62 .(
63 lda tmp0+0 ; 3
64 adc #ROW_SIZE ; 2
65 sta tmp0+0 ; 3
66 bcc skip ; 2 (+1 if taken)
67 inc tmp0+1 ; 5
68 clc ; 2
69 skip
70 .)
71 ; ------------------Min=13 Max=17
72
73 dex
74 bne loop
75 rts
76 .)
77
78
79
80 ;
81 ; Expects the following variables to be set when called:
82 ; _CurrentPixelX
83 ; _CurrentPixelY
84 ; _OtherPixelX
85 ; _OtherPixelY
86 ;
87 _DrawLine8
88 ;
89 ; Compute deltas and signs
90 ;
91
92 ; Test Y value
93 .(
94 sec
95 lda _CurrentPixelY
96 sbc _OtherPixelY
97 beq end
98 bcc cur_smaller
99
100 cur_bigger ; y1>y2
101 ; Swap X and Y
102 ; So we always draw from top to bottom
103 ldy _CurrentPixelY
104 ldx _OtherPixelY
105 sty _OtherPixelY
106 stx _CurrentPixelY
107
108 ldy _CurrentPixelX
109 ldx _OtherPixelX
110 sty _OtherPixelX
111 stx _CurrentPixelX
112
113 jmp end
114
115 cur_smaller ; y1<y2
116 ; Absolute value
117 eor #$ff
118 adc #1
119 end
120 sta dy
121 .)
122
123 ;
124 ; Initialise screen pointer
125 ;
126 ldy _CurrentPixelY
127 lda _HiresAddrLow,y ; 4
128 sta tmp0+0 ; 3
129 lda _HiresAddrHigh,y ; 4
130 sta tmp0+1 ; 3 => Total 14 cycles
131
132 ; Test X value
133 .(
134 sec
135 lda _CurrentPixelX
136 sbc _OtherPixelX
137 sta dx
138 beq draw_totaly_vertical_8
139 bcc cur_smaller
140
141 cur_bigger ; x1>x2
142 lda #_DEX ; dex
143 bne end
144
145 cur_smaller ; x1<x2
146 ; Absolute value
147 eor #$ff
148 adc #1
149 sta dx
150
151 lda #_INX ; inx
152 end
153 .)
154
155 jmp alignIt
156
157 .dsb 256-(*&255)
158
159 alignIt
160 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
161 ldy dy
162 beq draw_totaly_horizontal_8
163 cpy dx
164 bcc draw_mainly_horizontal_8
165 jmp draw_mainly_vertical_8
166
167 draw_mainly_horizontal_8
168 .(
169 ; here we have DY in Y, and the OPCODE in A
170 sta __auto_stepx ; Write a (dex / nop / inx) instruction
171 cmp #_DEX ; dex?
172 bne skipDex
173 dey ; adjust for carry being set in loop
174 skipDex
175 sty __auto_ady+1
176
177 lda _OtherPixelX
178 sta __auto_cpx+1
179
180 ldx _CurrentPixelX ;Plotting coordinates
181 ldy _CurrentPixelY ;in X and Y
182
183 lda dx
184 sta __auto_dx+1
185 lsr
186 eor #$ff
187 ; clc
188
189 loopX
190 sta save_a ; 3 = 3
191 loopY
192 ; Draw the pixel
193 __auto_div6
194 ldy _TableDiv6,x ; 4
195 __auto_bit6
196 lda _TableBit6Reverse,x ; 4
197 eor (tmp0),y ; 5*
198 sta (tmp0),y ; 6*= 19
199
200 __auto_cpx
201 cpx #00 ; 2 At the endpoint yet?
202 beq exitLoop ; 2/3
203 __auto_stepx
204 inx ; 2 Step in x
205 lda save_a ; 3
206 __auto_ady
207 adc #00 ; 2 +DY TODO: bugfix carry
208 bcc loopX ; 2/3=13/14 ~50% taken
209 ; Time to step in y
210 __auto_dx
211 sbc #00 ; 2 -DX
212 sta save_a ; 3 = 5
213
214 ; Set the new screen adress
215 lda tmp0+0 ; 3
216 adc #ROW_SIZE ; 2
217 sta tmp0+0 ; 3
218 bcc loopY ; 2/3=10/11 ~84% taken
219 inc tmp0+1 ; 5
220 bcs loopY ; 3 = 8
221 ; average: 12.12
222
223 exitLoop
224 rts
225 ; Timings:
226 ; x++/y : 36
227 ; x++/y++: 49.12
228 ; average: 42.56
229 .)
230
231 draw_totaly_horizontal_8
232 .(
233 ; here we have DY in Y, and the OPCODE in A
234 sta _outer_patch ; Write a (dex / nop / inx) instruction
235
236 ldx _OtherPixelX
237 sta __auto_cpx+1
238
239 ldx _CurrentPixelX
240
241 ;
242 ; Draw loop
243 ;
244 outer_loop
245 ldy _TableDiv6,x
246 lda _TableBit6Reverse,x ; 4
247 eor (tmp0),y ; 5
248 sta (tmp0),y ; 6
249
250 _outer_patch
251 inx
252
253 __auto_cpx
254 cpx #00 ; At the endpoint yet?
255 bne outer_loop
256 rts
257 .)
258
259 .dsb 256-(*&255)
260
261 ;
262 ; This code is used when the things are moving faster
263 ; vertically than horizontally
264 ;
265 ; dy>dx
266 ;
267 draw_mainly_vertical_8
268 ; here we have DY in Y, and the OPCODE in A
269 .(
270 ; setup bresenham values:
271 sty __auto_dy+1
272 ldx dx
273 stx __auto_dx+1
274
275 ; TODO: two separate branches depending on x-direction
276 ; setup direction:
277 cmp #_DEX ; which direction?
278 bne doInx
279 ; dex -> moving left:
280 lda #%00100000
281 sta __auto_cpBit+1
282 lda #_ASL ;
283 sta __auto_shBit
284 lda #%00000001
285 sta __auto_ldBit+1
286 lda #_DEY
287 sta __auto_yLo
288 ldx #$ff
289 lda #_DEC_ZP
290 bne endX
291
292 doInx
293 ; inx -> moving right:
294 lda #%00000001
295 sta __auto_cpBit+1
296 lda #_LSR
297 sta __auto_shBit
298 lda #%00100000
299 sta __auto_ldBit+1
300 lda #_INY
301 sta __auto_yLo
302 ldx #$00
303 lda #_INC_ZP
304 endX
305 stx __auto_cpY+1
306 sta __auto_yHi
307 ; setup X
308 tya ; y = dY
309 tax
310 inx ; x = dY+1
311 ; setup current bit:
312 ldy _CurrentPixelX
313 lda _TableBit6Reverse,y ; 4
314 sta curBit
315 ; setup pointer and Y:
316 ; TODO: self-modyfing code?
317 lda _TableDiv6,y
318 clc
319 adc tmp0
320 tay
321 lda #0
322 sta tmp0
323 bcc skipTmp0
324 inc tmp0+1
325 skipTmp0
326 ; calculate initial bresenham sum:
327 lda dy
328 lsr
329 eor #$ff ; -DY/2
330 clc ; 2
331 bcc loopY ; 3
332 ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
333
334 incHiPtr ; 9
335 inc tmp0+1 ; 5
336 clc ; 2
337 bcc contHiPtr ; 3
338 ;----------------------------------------------------------
339 loopY
340 sta save_a ; 3 = 3
341 ; Draw the pixel
342 lda curBit ; 3
343 eor (tmp0),y ; 5
344 sta (tmp0),y ; 6 = 14
345
346 dex ; 2 At the endpoint yet?
347 beq exitLoop ; 2/3= 4/5
348 loopX
349 ; Update screen adress
350 tya ; 2
351 adc #ROW_SIZE ; 2
352 tay ; 2
353 bcs incHiPtr ; 2/13 ~16% taken
354 contHiPtr ; = 9.76 average
355
356 lda save_a ; 3
357 __auto_dx
358 adc #00 ; 2 +DX
359 bcc loopY ; 2/3= 7/8 ~50% taken
360
361 ; Time to step in x
362 __auto_dy
363 sbc #00 ; 2 -DY
364 sta save_a ; 3 = 5
365
366 lda curBit ; 3
367 __auto_cpBit ; TODO: optimize
368 cmp #%00100000 ; 2 %00100000/%00000001
369 beq nextColumn ; 2/14.07 ~17% taken
370 __auto_shBit
371 asl ; 2 asl/lsr, clears carry
372 contNextColumn
373 sta curBit ; 3 =~13.71
374
375 ; Draw the pixel
376 eor (tmp0),y ; 5
377 sta (tmp0),y ; 6 = 11
378 dex ; 2 At the endpoint yet?
379 bne loopX ; 2/3= 4/5
380 exitLoop
381 rts
382 ;----------------------------------------------------------
383 nextColumn
384 __auto_ldBit
385 lda #%00000001 ; 2 %00000001/%00100000
386 __auto_yLo
387 dey ; 2 dey/iny
388 __auto_cpY
389 cpy #$ff ; 2 $ff/$00
390 clc ; 2 TODO: optimize
391 bne contNextColumn ; 2/3 ~99% taken
392 __auto_yHi
393 dec tmp0+1 ; 5 dec/inc
394 bcc contNextColumn ; 3
395
396 ; x ,y++: 38.76 (50%)
397 ; x++,y++: 51.47 (50%)
398 ; average: 45.11
399 .)

  ViewVC Help
Powered by ViewVC 1.1.26