/[projet1]/public/oric/routines/rasterization/linebench/line8.s
Defence Force logotype

Contents of /public/oric/routines/rasterization/linebench/line8.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 256 - (show annotations)
Sat Feb 6 22:34:33 2010 UTC (9 years, 10 months ago) by thrust26
File size: 8771 byte(s)
added bench value
1 ; History of timings...
2 ;649
3 ;614 (replacing the update of tmp0)
4 ;607
5 ;588
6 ;583 after alignment
7 ;579
8 ;534 redid mainly vertical
9
10
11 .zero
12
13 ; *= tmp1
14
15 ;e .dsb 2 ; Error decision factor (slope) 2 bytes in zero page
16 ;i .dsb 1 ; Number of pixels to draw (iteration counter) 1 byte in zp
17 ;dx .dsb 1 ; Width
18 ;dy .dsb 1 ; Height
19 ;_CurrentPixelX .dsb 1
20 ;_CurrentPixelY .dsb 1
21 ;_OtherPixelX .dsb 1
22 ;_OtherPixelY .dsb 1
23
24 save_a .dsb 1
25 save_y .dsb 1
26 curBit .dsb 1
27
28 .text
29
30 .dsb 256-(*&255)
31
32 ; nop $ea
33 ; inx $e8 11101000
34 ; dex $ca 11001010
35 ; iny $c8 11001000
36 ; dey $88 10001000
37
38 #define _NOP $ea
39 #define _INX $e8
40 #define _DEX $ca
41 #define _INY $c8
42 #define _DEY $88
43 #define _ASL $0a
44 #define _LSR $4a
45 #define _INC_ZP $e6
46 #define _DEC_ZP $c6
47
48
49
50 draw_totaly_vertical_8
51 .(
52 ldx _CurrentPixelX
53 ldy _TableDiv6,x
54 lda _TableBit6Reverse,x ; 4
55 sta _mask_patch+1
56
57 ldx dy
58 inx
59
60 clc ; 2
61 loop
62 _mask_patch
63 lda #0 ; 2
64 eor (tmp0),y ; 5
65 sta (tmp0),y ; 6 => total = 13 cycles
66
67 ; Update screen adress
68 .(
69 lda tmp0+0 ; 3
70 adc #40 ; 2
71 sta tmp0+0 ; 3
72 bcc skip ; 2 (+1 if taken)
73 inc tmp0+1 ; 5
74 clc ; 2
75 skip
76 .)
77 ; ------------------Min=13 Max=17
78
79 dex
80 bne loop
81 rts
82 .)
83
84
85
86 ;
87 ; Expects the following variables to be set when called:
88 ; _CurrentPixelX
89 ; _CurrentPixelY
90 ; _OtherPixelX
91 ; _OtherPixelY
92 ;
93 _DrawLine8
94 ;
95 ; Compute deltas and signs
96 ;
97
98 ; Test Y value
99 .(
100 sec
101 lda _CurrentPixelY
102 sbc _OtherPixelY
103 beq end
104 bcc cur_smaller
105
106 cur_bigger ; y1>y2
107 ; Swap X and Y
108 ; So we always draw from top to bottom
109 ldy _CurrentPixelY
110 ldx _OtherPixelY
111 sty _OtherPixelY
112 stx _CurrentPixelY
113
114 ldy _CurrentPixelX
115 ldx _OtherPixelX
116 sty _OtherPixelX
117 stx _CurrentPixelX
118
119 jmp end
120
121 cur_smaller ; y1<y2
122 ; Absolute value
123 eor #$ff
124 adc #1
125 end
126 sta dy
127 .)
128
129 ;
130 ; Initialise screen pointer
131 ;
132 ldy _CurrentPixelY
133 lda _HiresAddrLow,y ; 4
134 sta tmp0+0 ; 3
135 lda _HiresAddrHigh,y ; 4
136 sta tmp0+1 ; 3 => Total 14 cycles
137
138 ; Test X value
139 .(
140 sec
141 lda _CurrentPixelX
142 sbc _OtherPixelX
143 sta dx
144 beq draw_totaly_vertical_8
145 bcc cur_smaller
146
147 cur_bigger ; x1>x2
148 lda #$ca ; dex
149 bne end
150
151 cur_smaller ; x1<x2
152 ; Absolute value
153 eor #$ff
154 adc #1
155 sta dx
156
157 lda #$e8 ; inx
158 end
159 .)
160
161 jmp alignIt
162
163 .dsb 256-(*&255)
164
165 alignIt
166 ; Compute slope and call the specialized code for mostly horizontal or vertical lines
167 ldy dy
168 beq draw_totaly_horizontal_8
169 cpy dx
170 bcs draw_mainly_vertical_8
171
172 draw_mainly_horizontal_8
173 .(
174 ; here we have DY in Y, and the OPCODE in A
175 sta __auto_stepx ; Write a (dex / nop / inx) instruction
176 cmp #$ca ; dex?
177 bne skipDex
178 dey ; adjust for carry being set in loop
179 skipDex
180 sty __auto_ady+1
181
182 lda dx
183 sta __auto_dx+1
184
185 lda _OtherPixelX
186 sta __auto_cpx+1
187
188 ldx _CurrentPixelX ;Plotting coordinates
189 ldy _CurrentPixelY ;in X and Y
190
191 lda dx
192 lsr
193 eor #$ff
194 ; clc
195
196 loopX
197 sta save_a ; 3 = 3
198 loopY
199 ; Draw the pixel
200 __auto_div6
201 ldy _TableDiv6,x ; 4
202 __auto_bit6
203 lda _TableBit6Reverse,x ; 4
204 eor (tmp0),y ; 5*
205 sta (tmp0),y ; 6*= 19
206
207 __auto_cpx
208 cpx #00 ; 2 At the endpoint yet?
209 beq exitLoop ; 2/3
210 __auto_stepx
211 inx ; 2 Step in x
212 lda save_a ; 3
213 __auto_ady
214 adc #00 ; 2 +DY
215 bcc loopX ; 2/3=13/14
216 ; Time to step in y
217 __auto_dx
218 sbc #00 ; 2 -DX
219 sta save_a ; 3 = 5
220
221 ; Set the new screen adress
222 lda tmp0+0 ; 3
223 adc #40 ; 2
224 sta tmp0+0 ; 3
225 bcc loopY ; 2/3=10/11 ~84 taken
226 inc tmp0+1 ; 5
227 bcs loopY ; 3 = 8
228 ; average: 12.12
229
230 exitLoop
231 rts
232 ; Timings:
233 ; x++/y : 36
234 ; x++/y++: 49.12
235 ; average: 42.56
236 .)
237
238 draw_totaly_horizontal_8
239 .(
240 ; here we have DY in Y, and the OPCODE in A
241 sta _outer_patch ; Write a (dex / nop / inx) instruction
242
243 ldx _OtherPixelX
244 sta __auto_cpx+1
245
246 ldx _CurrentPixelX
247
248 ;
249 ; Draw loop
250 ;
251 outer_loop
252 ldy _TableDiv6,x
253 lda _TableBit6Reverse,x ; 4
254 eor (tmp0),y ; 5
255 sta (tmp0),y ; 6
256
257 _outer_patch
258 inx
259
260 __auto_cpx
261 cpx #00 ; At the endpoint yet?
262 bne outer_loop
263 rts
264 .)
265
266
267 ;
268 ; This code is used when the things are moving faster
269 ; vertically than horizontally
270 ;
271 ; dy>dx
272 ;
273 draw_mainly_vertical_8
274 ; here we have DY in Y, and the OPCODE in A
275 .(
276 ; setup bresenham values:
277 sty __auto_dy+1
278 ldx dx
279 stx __auto_dx+1
280
281 ; setup direction:
282 ; sta __auto_stepx ; Write a (dex / nop / inx) instruction
283 cmp #_DEX ; which direction
284 bne doInx
285 ; dex, moving left:
286 lda #%00100000
287 sta __auto_cpBit+1
288 lda #_ASL ;
289 sta __auto_shBit
290 lda #%00000001
291 sta __auto_ldBit+1
292 lda #_DEY
293 sta __auto_yLo
294 lda #$ff
295 sta __auto_cpY+1
296 lda #_DEC_ZP
297 sta __auto_yHi
298 bne endX
299
300 doInx
301 ; inx, moving right
302 lda #%00000001
303 sta __auto_cpBit+1
304 lda #_LSR
305 sta __auto_shBit
306 lda #%00100000
307 sta __auto_ldBit+1
308 lda #_INY
309 sta __auto_yLo
310 lda #$00
311 sta __auto_cpY+1
312 lda #_INC_ZP
313 sta __auto_yHi
314 endX
315 ; setup X
316 tya ; y = dY
317 tax
318 inx ; x = dY+1
319 ; setup current bit:
320 ldy _CurrentPixelX
321 lda _TableBit6Reverse,y ; 4
322 sta curBit
323 ; setup pointer and Y:
324 ; TODO: self-modyfing code?
325 lda _TableDiv6,y
326 clc
327 adc tmp0
328 tay
329 lda #0
330 sta tmp0
331 bcc skipTmp0
332 inc tmp0+1
333 skipTmp0
334 ; calculate initial bresenham sum:
335 lda dy
336 lsr
337 eor #$ff ; -DX/2
338 clc ; 2
339 bcc loopY ; 3
340 ; a = sum, y = tmp0, x = dY+1, tmp0 = 0
341
342 incHiPtr ; 9
343 inc tmp0+1 ; 5
344 clc ; 2
345 bcc contHiPtr ; 3
346
347 loopY
348 sta save_a ; 3 = 3
349 ; Draw the pixel
350 lda curBit ; 3
351 eor (tmp0),y ; 5*
352 sta (tmp0),y ; 6*= 14**
353
354 dex ; 2 At the endpoint yet?
355 beq exitLoop ; 2/3= 4/5
356 loopX
357 ; Update screen adress
358 tya ; 2
359 adc #40 ; 2
360 tay ; 2
361 bcs incHiPtr ; 2/13 ~16% taken
362 contHiPtr ; = 9.76 average
363
364 lda save_a ; 3
365 __auto_dx
366 adc #00 ; 2 +DX
367 bcc loopY ; 2/3= 7/8 ~50% taken
368
369 ; Time to step in x
370 __auto_dy
371 sbc #00 ; 2 -DY
372 sta save_a ; 3 = 5
373
374 lda curBit ; 3
375 __auto_cpBit
376 cmp #%00100000 ; 2 %00100000/%00000001
377 beq nextColumn ; 2/14.07 ~17% taken
378 __auto_shBit
379 asl ; 2 asl/lsr, clears carry
380 contNextColumn
381 sta curBit ; 3 =~13.71
382
383 ; Draw the pixel
384 eor (tmp0),y ; 5*
385 sta (tmp0),y ; 6*= 11**
386 dex ; 2 At the endpoint yet?
387 bne loopX ; 2/3= 4/5
388 exitLoop
389 rts
390
391 nextColumn
392 __auto_ldBit
393 lda #%00000001 ; 2 %00000001/%00100000
394 __auto_yLo
395 dey ; 2
396 __auto_cpY
397 cpy #$ff ; 2
398 clc ; 2 TODO: optimize
399 bne contNextColumn ; 2/3 ~99% taken
400 __auto_yHi
401 dec tmp0+1 ; 5
402 bcc contNextColumn ; 3
403
404 ; x ,y++: 38.76** (50%)
405 ; x++,y++: 51.47** (50%)
406 ; average: 45.11**
407
408
409 ; x ,y++: 54.12**
410 ; x++,y++: 57.12**
411 ; average: 55.62**
412 .)

  ViewVC Help
Powered by ViewVC 1.1.26