标签:
1 MODULE ARM_MEMORY 2 3 PUBLIC ARM_MEMCPY 4 PUBLIC ARM_MEMSET 5 PUBLIC ARM_MEMSET8 6 PUBLIC ARM_MEMSET16 7 PUBLIC ARM_MEMSET32 8 9 SECTION .text:CODE:NOROOT(2) 10 CODE32 11 12 ;------------------------------------------------------------------------------- 13 ; void ARM_MEMCPY(void* pDest, void* pSrc, U32 NumBytes) 14 ; 15 ; Function description 16 ; Copy data in memory from source address to destination address. 17 ; 18 ; Register usage: 19 ; 20 ; R0 pDest 21 ; R1 pSrc 22 ; R2 NumBytes 23 ; 24 ; R3 Used for data transfers 25 ; R4 Used for data transfers 26 ; R12 Used for data transfers 27 ; R14 Used for data transfers 28 ; 29 ; R13 SP 30 ; R14 LR (contains return address) 31 ; R15 PC 32 ; 33 ;------------------------------------------------------------------------------- 34 ARM_MEMCPY: 35 ;------------------------------------------------------------------------------- 36 cmp R2, #+3 ; R2 = NumBytes 37 bls ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer 38 39 ands R12, R0, #+3 ; R0 = destination address 40 beq ARM_MEMCPY_DestIsDWordAligned ; Is destination address already word aligned ? 41 42 ;------------------------------------------------------------------------------- 43 ; Handle as much bytes as necessary to align destination address 44 ; 45 ldrb R3, [R1], #+1 ; We need at least one byte to the next word alignment, so we read one. 46 cmp R12, #+2 ; Set condition codes according to the mis-alignment 47 add R2, R2, R12 ; Adjust NumBytes : 1, 2, 3 48 ldrbls R12, [R1], #+1 ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address 49 strb R3, [R0], #+1 50 ldrbcc R3, [R1], #+1 ; Carry clear (CC)? -> We need one more byte 51 strbls R12, [R0], #+1 52 sub R2, R2, #+4 ; Adjust NumBytes 53 strbcc R3, [R0], #+1 ; now destination address already is word aligned 54 55 ;------------------------------------------------------------------------------- 56 ; Choose best way to transfer data 57 ; 58 ARM_MEMCPY_DestIsDWordAligned: 59 ands R3, R1, #+3 60 beq ARM_MEMCPY_HandleBulkWordData ; If source and destination are aligned, use bulk word transfer 61 62 subs R2, R2, #+4 63 bcc ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word left, use single byte transfer 64 65 ldr R12, [R1, -R3]! ; Read first mis-aligned data word and word align source address 66 cmp R3, #+2 67 beq ARM_MEMCPY_Loop16BitShift 68 69 bhi ARM_MEMCPY_Loop24BitShift 70 71 ;------------------------------------------------------------------------------- 72 ; Handle data in units of word 73 ; 74 ; This is done by reading mis-aligned words from source address and 75 ; shift them into the right alignment. After this the next data word 76 ; will be read to complete the missing data part. 77 ; 78 ARM_MEMCPY_Loop8BitShift: 79 mov R3, R12, LSR #+8 ; Shift data word into right position 80 ldr R12, [R1, #+4]! ; Load next mis-aligned data word 81 subs R2, R2, #+4 ; Decrement NumBytes 82 orr R3, R3, R12, LSL #+24 ; Combine missing part of data to build full data word 83 str R3, [R0], #+4 ; Store complete word 84 bcs ARM_MEMCPY_Loop8BitShift 85 86 add R1, R1, #+1 ; Adjust source address 87 b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes 88 89 ARM_MEMCPY_Loop16BitShift: 90 mov R3, R12, LSR #+16 ; Shift data word into right position 91 ldr R12, [R1, #+4]! ; Load next mis-aligned data word 92 subs R2, R2, #+4 ; Decrement NumBytes 93 orr R3, R3, R12, LSL #+16 ; Combine missing part of data to build full data word 94 str R3, [R0], #+4 ; Store complete word 95 bcs ARM_MEMCPY_Loop16BitShift 96 97 add R1, R1, #+2 ; Adjust source address 98 b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes 99 100 ARM_MEMCPY_Loop24BitShift: 101 mov R3, R12, LSR #+24 ; Shift data word into right position 102 ldr R12, [R1, #+4]! ; Load next mis-aligned data word 103 subs R2, R2, #+4 ; Decrement NumBytes 104 orr R3, R3, R12, LSL #+8 ; Combine missing part of data to build full data word 105 str R3, [R0], #+4 ; Store complete word 106 bcs ARM_MEMCPY_Loop24BitShift 107 108 add R1, R1, #+3 ; Adjust source address 109 b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes 110 111 ;------------------------------------------------------------------------------- 112 ; Handle large bulk data in blocks of 8 words (32 bytes) 113 ; 114 ARM_MEMCPY_HandleBulkWordData: 115 subs R2, R2, #+0x20 116 stmdb SP!, {R4, LR} 117 bcc ARM_MEMCPY_HandleTrailingWords 118 119 ARM_MEMCPY_LoopHandleBulkWord: 120 ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once 121 stm R0!, {R3, R4, R12, LR} 122 ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once 123 stm R0!, {R3, R4, R12, LR} 124 subs R2, R2, #+0x20 125 bcs ARM_MEMCPY_LoopHandleBulkWord 126 127 ;------------------------------------------------------------------------------- 128 ; Handle trailing 7 words 129 ; 130 ARM_MEMCPY_HandleTrailingWords: 131 movs R12, R2, LSL #28 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 132 133 ldmcs R1!, {R3, R4, R12, LR} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set) 134 stmcs R0!, {R3, R4, R12, LR} 135 ldmmi R1!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set) 136 stmmi R0!, {R3, R4} 137 138 movs R12, R2, LSL #+30 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 139 140 ldmia SP!, {R4, LR} 141 ldrcs R3, [R1], #+4 ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set) 142 strcs R3, [R0], #+4 143 bxeq LR 144 145 ;------------------------------------------------------------------------------- 146 ; Handle trailing 3 bytes 147 ; 148 ; N Z C V Q ***** I F T M4 3 2 1 0 149 ; N = bit[31] 150 ; C = last shift bit : shift 151 ; C = 1 ADD/CMN has carry bit 152 ; C = 0 SUB/CMP no borrow bit 153 ; xxxxxxxxxxxxxxxxxxxx10 << 31 : N=0, C=1 154 ; xxxxxxxxxxxxxxxxxxxx01 << 31 : N=1, C=0 155 ; BMI : N=1 156 ; BCS : C=1 157 ARM_MEMCPY_HandleTrailingBytes: 158 movs R2, R2, LSL #+31 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 159 160 ldrbmi R2, [R1], #+1 ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set) 161 ldrbcs R3, [R1], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set) 162 ldrbcs R12, [R1], #+1 163 strbmi R2, [R0], #+1 164 strbcs R3, [R0], #+1 165 strbcs R12, [R0], #+1 166 bx LR 167 168 169 ;------------------------------------------------------------------------------- 170 ; void ARM_MEMSET(void* pDest, U32 c, U32 NumBytes) 171 ; 172 ; Function description 173 ; Copy data in memory from source address to destination address. 174 ; 175 ; Register usage: 176 ; 177 ; R0 pDest 178 ; R1 c 179 ; R2 NumBytes 180 ; 181 ; R3 Used for data transfers 182 ; R4 Used for data transfers 183 ; R5 Used for data transfers 184 ; R6 Used for data transfers 185 ; 186 ; R13 SP 187 ; R14 LR (contains return address) 188 ; R15 PC 189 ; 190 ;------------------------------------------------------------------------------- 191 ARM_MEMSET: 192 ;------------------------------------------------------------------------------- 193 orr R1, R1, R1, LSL #+8 194 orr R1, R1, R1, LSL #+16 195 196 cmp R2, #+3 ; R2 = NumBytes 197 bls ARM_MEMSET_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer 198 199 ands R3, R0, #+3 ; R0 = destination address 200 beq ARM_MEMSET_DestIsAligned ; Is destination address already word aligned ? 201 202 ; Handle as much bytes as necessary to align destination address 203 204 strb R1, [R0], #+1 ; We need at least one byte to the next word alignment, so we read one. 205 cmp R3, #+2 ; Set condition codes according to the mis-alignment 206 add R2, R2, R3 ; Adjust NumBytes 207 strbls R1, [R0], #+1 ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address 208 sub R2, R2, #+4 ; Adjust NumBytes 209 strbcc R1, [R0], #+1 ; Carry clear (CC)? -> We need one more byte 210 211 ; Choose best way to transfer data 212 213 ARM_MEMSET_DestIsAligned: ; destination is aligned, use bulk word transfer 214 215 ; Handle large bulk data in blocks of 8 words (32 bytes) 216 217 ARM_MEMSET_HandleBulkWordData: 218 stmdb SP!, {R4, R5, R6} 219 220 mov R3, R1, LSL #+0 ; Transfer 16 bytes at once 221 mov R4, R1, LSL #+0 222 mov R5, R1, LSL #+0 223 224 subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords 225 bcc ARM_MEMSET_HandleTrailingWords 226 227 ARM_MEMSET_LoopHandleBulkWord: 228 stm R0!, {R1, R3, R4, R5} 229 stm R0!, {R1, R3, R4, R5} 230 subs R2, R2, #+0x20 231 bcs ARM_MEMSET_LoopHandleBulkWord 232 233 234 ; Handle trailing 7 words 235 236 ARM_MEMSET_HandleTrailingWords: 237 movs R6, R2, LSL #28 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 238 stmcs R0!, {R1, R3, R4, R5} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set) 239 stmmi R0!, {R1, R3} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set) 240 241 movs R6, R2, LSL #+30 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 242 strcs R1, [R0], #+4 ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set) 243 244 ldmia SP!, {R4, R5, R6} 245 bxeq LR ; Z flag contain no Trailing Bytes 246 247 248 ; Handle trailing 3 bytes 249 250 ARM_MEMSET_HandleTrailingBytes: 251 movs R2, R2, LSL #+31 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 252 strbmi R1, [R0], #+1 ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set) 253 strbcs R1, [R0], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set) 254 strbcs R1, [R0], #+1 255 bx LR 256 257 258 ; int ARM_MEMSET8(void* pDest, U32 c, U32 NumBytes); 259 ;------------------------------------------------------------------------------- 260 ARM_MEMSET8: 261 ;------------------------------------------------------------------------------- 262 stmdb SP!, {R4, R5} 263 cmp R2, #4 264 blt ARM_MEMSET8_loop3 265 266 ; Alignment is unknown 267 tst R0, #1 268 strneb R1, [R0], #1 269 subne R2, R2, #1 270 271 ; Now we are 16-bit aligned (need to upgrade ‘c‘ to 16-bit) 272 orr R1, R1, R1, LSL #8 273 tst R0, #2 274 strneh R1, [R0], #2 275 subne R2, R2, #2 276 277 ; Now we are 32-bit aligned (need to upgrade ‘c‘ to 32-bit) 278 orr R1, R1, R1, LSL #16 279 mov R3, R1 280 cmp R2, #16 281 blt ARM_MEMSET8_loop2 282 tst R0, #4 283 strne R1, [R0], #4 284 subne R2, R2, #4 285 tst R0, #8 286 stmneia R0!, {R1, R3} 287 subne R2, R2, #8 288 289 ; Now we are 128-bit aligned 290 mov R4, R1 291 mov R5, R1 292 ARM_MEMSET8_loop1: 293 ; Copy 4 32-bit values per loop iteration 294 subs R2, R2, #16 295 stmgeia R0!, {R1, R3, R4, R5} 296 bge ARM_MEMSET8_loop1 297 add R2, R2, #16 298 299 ARM_MEMSET8_loop2: 300 ; Copy up to 3 remaining 32-bit values 301 tst R2, #8 302 stmneia R0!, {R1, R3} 303 tst R2, #4 304 strne R1, [R0], #4 305 and R2, R2, #3 306 307 ARM_MEMSET8_loop3: 308 ; Copy up to 3 remaining bytes 309 subs R2, R2, #1 310 strgeb R1, [R0], #1 311 subs R2, R2, #1 312 strgeb R1, [R0], #1 313 subs R2, R2, #1 314 strgeb R1, [R0], #1 315 ldmia SP!, {R4, R5} 316 bx LR 317 318 ; int ARM_MEMSET16(void* pDest, U32 c, U32 NumHalfWords); 319 ;------------------------------------------------------------------------------- 320 ARM_MEMSET16: 321 ;------------------------------------------------------------------------------- 322 stmdb SP!, {R4, R5} 323 324 cmp R2, #2 325 blt ARM_MEMSET16_HandleTrailingHalfWord ; 1 or 0 326 327 ; Alignment is known to be at least 16-bit 328 tst R0, #2 329 strneh R1, [R0], #2 ; xxxx-xx10 ---> 330 subne R2, R2, #1 ; xxxx-xx00 331 332 ; Now we are 32-bit aligned (need to upgrade ‘c‘ to 32-bit ) 333 orr R1, R1, R1, LSL #16 334 mov R4, R1 335 336 cmp R2, #8 337 blt ARM_MEMSET16_HandleTrailingWords ; 7, 6, ... 0 338 339 tst R0, #4 340 strne R1, [R0], #4 ; xxxx-x100 ---> 341 subne R2, R2, #2 ; xxxx-x000 ---> 342 343 ; Now we are 64-bit aligned 344 tst R0, #8 345 stmneia R0!, {R1, R4} ; xxxx-1000 ---> 346 subne R2, R2, #4 ; xxxx-0000 ---> 347 348 ARM_MEMSET16_HandleBulkWordData: 349 ; Now we are 128-bit aligned 350 mov R5, R1 351 mov R3, R1 352 353 ARM_MEMSET16_LoopHandleBulkWord: 354 ; Copy 4 32-bit values per loop iteration 355 subs R2, R2, #8 356 stmgeia R0!, {R1, R3, R4, R5} 357 bge ARM_MEMSET16_LoopHandleBulkWord 358 add R2, R2, #8 359 360 ARM_MEMSET16_HandleTrailingWords: 361 ; Copy up to 3 remaining 32-bit values 362 tst R2, #4 363 stmneia R0!, {R1, R4} 364 365 tst R2, #2 366 strne R1, [R0], #4 367 368 and R2, R2, #1 369 370 ARM_MEMSET16_HandleTrailingHalfWord: 371 ; Copy up to 1 remaining 16-bit value 372 subs R2, R2, #1 373 strgeh R1, [R0], #2 374 375 ldmia SP!, {R4, R5} 376 bx LR 377 378 379 ; int ARM_MEMSET32(void* pDest, U32 c, U32 NumWords); 380 ;------------------------------------------------------------------------------- 381 ARM_MEMSET32: 382 ;------------------------------------------------------------------------------- 383 stmdb SP!, {R4, R5} 384 385 cmp R2, #4 386 blt ARM_MEMSET32_loop2 387 388 ; Alignment is known to be at least 32-bit 389 mov R3, R1 390 391 tst R0, #4 392 strne R1, [R0], #4 393 subne R2, R2, #1 394 395 ; Now we are 64-bit aligned 396 tst R0, #8 397 stmneia R0!, {R1, R3} 398 subne R2, R2, #2 399 400 ; Now we are 128-bit aligned 401 mov R4, R1 402 mov R5, R1 403 ARM_MEMSET32_loop1: 404 ; Copy 4 32-bit values per loop iteration 405 subs R2, R2, #4 406 stmgeia R0!, {R1, R3, R4, R5} 407 bge ARM_MEMSET32_loop1 408 add R2, R2, #4 409 410 ARM_MEMSET32_loop2: 411 ; Copy up to 3 remaining 32-bit values 412 subs R2, R2, #1 413 strge R1, [R0], #4 414 subs R2, R2, #1 415 strge R1, [R0], #4 416 subs R2, R2, #1 417 strge R1, [R0], #4 418 419 ldmia SP!, {R4, R5} 420 bx LR 421 422 ;-__arm void ARM_memxor(void* pDest, U32 c, U32 NumBytes); 423 ; r0 r1 r2 424 ;------------------------------------------------------------------------------- 425 arm_memxor: 426 ;------------------------------------------------------------------------------- 427 orr R1, R1, R1, LSL #+8 428 orr R1, R1, R1, LSL #+16 429 430 cmp R2, #+3 ; R2 = NumBytes 431 bls arm_memxor_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer 432 433 ands R3, R0, #+3 ; R0 = destination address 434 beq arm_memxor_DestIsAligned ; Is destination address already word aligned ? 435 436 ;- 437 ; Handle as much bytes as necessary to align destination address 438 ;- 439 ldrb R12, [R0], #+0 ; We need at least one byte to the next word alignment, so we read one. 440 eor R12, R12, r1 441 strb R12, [R0], #+1 ; We need at least one byte to the next word alignment, so we read one. 442 443 cmp R3, #+2 ; Set condition codes according to the mis-alignment 444 add R2, R2, R3 ; Adjust NumBytes 445 446 ldrbls R3, [R0], #+0 ; We need at least one byte to the next word alignment, so we read one. 447 eorls R3, R3, r1 448 strbls R3, [R0], #+1 ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address 449 450 sub R2, R2, #+4 ; Adjust NumBytes 451 452 ldrbcc R3, [R0], #+0 ; We need at least one byte to the next word alignment, so we read one. 453 eorcc R3, R3, r1 454 strbcc R3, [R0], #+1 ; Carry clear (CC)? -> We need one more byte 455 456 ;- 457 ; Choose best way to transfer data 458 ;- 459 arm_memxor_DestIsAligned: ; destination is aligned, use bulk word transfer 460 ;- 461 ; Handle large bulk data in blocks of 8 words (32 bytes) 462 ;- 463 arm_memxor_HandleBulkWordData: 464 stmdb SP!, {R4, R5, R6, R7} 465 466 subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords 467 bcc arm_memxor_HandleTrailingWords 468 469 arm_memxor_LoopHandleBulkWord: 470 ldm R0, {R3, R4, R5, R6} 471 eor r3, r3, r1 472 eor r4, r4, r1 473 eor r5, r5, r1 474 eor r6, r6, r1 475 stm R0!, {R3, R4, R5, R6} 476 477 ldm R0, {R3, R4, R5, R6} 478 eor r3, r3, r1 479 eor r4, r4, r1 480 eor r5, r5, r1 481 eor r6, r6, r1 482 stm R0!, {R3, R4, R5, R6} 483 484 subs R2, R2, #+0x20 485 bcs arm_memxor_LoopHandleBulkWord 486 487 ;- 488 ; Handle trailing 7 words 489 ;- 490 arm_memxor_HandleTrailingWords: 491 movs R7, R2, LSL #28 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 492 493 ldmcs R0, {R3, R4, R5, R6} 494 eorcs r3, r3, r1 495 eorcs r4, r4, r1 496 eorcs r5, r5, r1 497 eorcs r6, r6, r1 498 stmcs R0!, {R3, R4, R5, R6} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is xor) 499 500 ldmmi R0, {R3, R4} 501 eormi r3, r3, r1 502 eormi r4, r4, r1 503 stmmi R0!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is xor) 504 505 movs R7, R2, LSL #+30 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 506 507 ldrcs R3, [R0] 508 eorcs r3, r3, r1 509 strcs R3, [R0], #+4 ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is xor) 510 511 ldmia SP!, {R4, R5, R6, R7} 512 bxeq LR ; Z flag contain no Trailing Bytes 513 514 ;- 515 ; Handle trailing 3 bytes 516 ;- 517 arm_memxor_HandleTrailingBytes: 518 movs R2, R2, LSL #+31 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data 519 520 ldrmi R2, [R0] 521 eormi R2, R2, r1 522 strbmi R2, [R0], #+1 ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is xor) 523 524 ldrcs R2, [R0] 525 eorcs R2, R2, r1 526 strbcs R2, [R0], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) 527 528 ldrcs R2, [R0] 529 eorcs R2, R2, r1 530 strbcs R2, [R0], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) 531 532 bx LR 533 534 ;-__arm int arm_memxor8(void* pDest, U32 c, U32 NumBytes); 535 ; r0 r1 r2 536 ;------------------------------------------------------------------------------- 537 arm_memxor8: 538 ;------------------------------------------------------------------------------- 539 stmdb SP!, {R4, R5, R6} 540 541 orr R1, R1, R1, LSL #+8 542 orr R1, R1, R1, LSL #+16 543 544 cmp R2, #4 545 blt arm_memxor8_loop3 546 547 ; Alignment is unknown 548 tst R0, #1 549 550 ldrneb R6, [R0] 551 eorne R6, r6, R1 552 strneb R6, [R0], #1 553 554 subne R2, R2, #1 555 556 ; Now we are 16-bit aligned (need to upgrade ‘c‘ to 16-bit) 557 tst R0, #2 558 559 ldrneh R6, [R0] 560 eorne R6, r6, R1 561 strneh R6, [R0], #2 562 563 subne R2, R2, #2 564 565 ; Now we are 32-bit aligned (need to upgrade ‘c‘ to 32-bit) 566 cmp R2, #16 567 blt arm_memxor8_loop2 568 tst R0, #4 569 570 ldrne R6, [R0] 571 eorne R6, r6, R1 572 strne R6, [R0], #4 573 ; Now we are 32-bit aligned (need to upgrade ‘c‘ to 32-bit) 574 subne R2, R2, #4 575 tst R0, #8 576 577 ldmneia R0, {R3, R6} 578 eorne R3, r3, R1 579 eorne R6, r6, R1 580 stmneia R0!, {R3, R6} 581 582 subne R2, R2, #8 583 584 ; Now we are 128-bit aligned 585 mov R4, R1 586 mov R5, R1 587 arm_memxor8_loop1: 588 ; Copy 4 32-bit values per loop iteration 589 subs R2, R2, #16 590 591 ldmgeia R0, {R3, R4, R5, R6} 592 eorge r3, r3, r1 593 eorge r4, r4, r1 594 eorge r5, r5, r1 595 eorge r6, r6, r1 596 stmgeia R0!, {R3, R4, R5, R6} 597 598 bge arm_memxor8_loop1 599 add R2, R2, #16 600 601 arm_memxor8_loop2: 602 ; Copy up to 3 remaining 32-bit values 603 tst R2, #8 604 605 ldmneia R0, {R3, R4} 606 eorne r3, r3, r1 607 eorne r4, r4, r1 608 stmneia R0!, {R3, R4} 609 610 tst R2, #4 611 612 ldrne R3, [R0] 613 eorne r3, r3, r1 614 strne R3, [R0], #4 615 616 and R2, R2, #3 617 618 arm_memxor8_loop3: 619 ; Copy up to 3 remaining bytes 620 subs R2, R2, #1 621 622 ldrgeb R3, [R0] 623 eorge r3, r3, r1 624 strgeb R3, [R0], #1 625 626 subs R2, R2, #1 627 628 ldrgeb R3, [R0] 629 eorge r3, r3, r1 630 strgeb R1, [R0], #1 631 632 subs R2, R2, #1 633 634 ldrgeb R3, [R0] 635 eorge r3, r3, r1 636 strgeb R1, [R0], #1 637 638 ldmia SP!, {R4, R5, R6} 639 bx LR 640 641 ;-__arm int arm_memxor16(void* pDest, U32 c, U32 NumHalfWords); 642 ; r0 r1 r2 643 ;------------------------------------------------------------------------------- 644 arm_memxor16: 645 ;------------------------------------------------------------------------------- 646 stmdb SP!, {R4, R5, R6} 647 orr R1, R1, R1, LSL #+16 648 649 cmp R2, #2 650 blt arm_memxor16_HandleTrailingHalfWord ; 1 or 0 651 652 ; Alignment is known to be at least 16-bit 653 tst R0, #2 654 655 ldrneh R6, [R0] 656 eorne R6, r6, R1 657 strneh R6, [R0], #2 ; xxxx-xx10 ---> 658 659 subne R2, R2, #1 ; xxxx-xx00 660 661 ; Now we are 32-bit aligned (need to upgrade ‘c‘ to 32-bit ) 662 cmp R2, #8 663 blt arm_memxor16_HandleTrailingWords ; 7, 6, ... 0 664 665 tst R0, #4 666 667 ldrne R3, [R0] 668 eorne r3, r3, r1 669 strne R3, [R0], #4 ; xxxx-x100 ---> 670 671 672 subne R2, R2, #2 ; xxxx-x000 ---> 673 674 ; Now we are 64-bit aligned 675 tst R0, #8 676 677 ldmneia R0, {R3, R4} 678 eorne r3, r3, r1 679 eorne r4, r4, r1 680 stmneia R0!, {R3, R4} ; xxxx-1000 ---> 681 682 subne R2, R2, #4 ; xxxx-0000 ---> 683 684 arm_memxor16_HandleBulkWordData: 685 ; Now we are 128-bit aligned 686 mov R5, R1 687 mov R6, R1 688 689 arm_memxor16_LoopHandleBulkWord: 690 ; Copy 4 32-bit values per loop iteration 691 subs R2, R2, #8 692 693 ldmgeia R0, {R3, R4, R5, R6} 694 eorge r3, r3, r1 695 eorge r4, r4, r1 696 eorge r5, r5, r1 697 eorge r6, r6, r1 698 stmgeia R0!, {R3, R4, R5, R6} 699 700 bge arm_memxor16_LoopHandleBulkWord 701 add R2, R2, #8 702 703 arm_memxor16_HandleTrailingWords: 704 ; Copy up to 3 remaining 32-bit values 705 tst R2, #4 706 707 ldmneia R0, {R3, R4} 708 eorne r3, r3, r1 709 eorne r4, r4, r1 710 stmneia R0!, {R3, R4} 711 712 tst R2, #2 713 714 ldrne R3, [R0] 715 eorne r3, r3, r1 716 strne R3, [R0], #4 717 718 and R2, R2, #1 719 720 arm_memxor16_HandleTrailingHalfWord: 721 ; Copy up to 1 remaining 16-bit value 722 subs R2, R2, #1 723 724 ldrgeh R3, [R0] 725 eorge r3, r3, r1 726 strgeh R3, [R0], #2 727 728 ldmia SP!, {R4, R5, R6} 729 bx LR 730 731 732 ;-__arm int arm_memxor32(void* pDest, U32 c, U32 NumWords); 733 ; r0 r1 r2 734 ;------------------------------------------------------------------------------- 735 arm_memxor32: 736 ;------------------------------------------------------------------------------- 737 stmdb SP!, {R4, R5, R6} 738 739 cmp R2, #4 740 blt arm_memxor32_loop2 741 742 ; Alignment is known to be at least 32-bit, is it 64-bit aligned ? 743 tst R0, #4 744 ; No, it is 32-bit aligned 745 ldrne R3, [R0] 746 eorne R3, r3, R1 747 strne R3, [R0], #4 748 subne R2, R2, #1 749 750 ; Now we are 64-bit aligned, is it 128-bit aligned ? 751 tst R0, #8 752 ; No, it is 64-bit aligned 753 ldmneia R0, {R3, R4} 754 eorne r3, r3, r1 755 eorne r4, r4, r1 756 stmneia R0!, {R3, R4} ; xxxx-1000 ---> 757 subne R2, R2, #2 758 759 ; Now we are 128-bit aligned 760 mov R4, R1 761 mov R5, R1 762 arm_memxor32_loop1: 763 ; Copy 4 32-bit values per loop iteration 764 subs R2, R2, #4 765 766 ldmgeia R0, {R3, R4, R5, R6} 767 eorge r3, r3, r1 768 eorge r4, r4, r1 769 eorge r5, r5, r1 770 eorge r6, r6, r1 771 stmgeia R0!, {R3, R4, R5, R6} 772 773 bge arm_memxor32_loop1 774 add R2, R2, #4 775 776 arm_memxor32_loop2: 777 ; Copy up to 3 remaining 32-bit values 778 779 subs R2, R2, #1 780 ldrge R3, [R0] 781 eorge r3, r3, r1 782 strge R3, [R0], #4 783 784 subs R2, R2, #1 785 ldrge R3, [R0] 786 eorge r3, r3, r1 787 strge R3, [R0], #4 788 789 subs R2, R2, #1 790 ldrge R3, [R0] 791 eorge r3, r3, r1 792 strge R3, [R0], #4 793 794 ldmia SP!, {R4, R5, R6} 795 bx LR 796 797 798 END
标签:
原文地址:http://www.cnblogs.com/shangdawei/p/4651343.html