Loading...
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
/*
 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * The contents of this file constitute Original Code as defined in and
 * are subject to the Apple Public Source License Version 1.1 (the
 * "License").  You may not use this file except in compliance with the
 * License.  Please obtain a copy of the License at
 * http://www.apple.com/publicsource and read it before using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */ 																							

/* Emulate64.s
 *
 * Software emulation of instructions not handled in hw, on 64-bit machines.
 */
 
#include <sys/appleapiopts.h>
#include <ppc/asm.h>
#include <ppc/proc_reg.h>
#include <ppc/exception.h>
#include <mach/machine/vm_param.h>
#include <ppc/cpu_capabilities.h>
#include <assym.s>

// CR bit set if the instruction is an "update" form (LFDU, STWU, etc):
#define	kUpdate	25

// CR bit set if interrupt occured in trace mode (ie, MSR_SE_BIT):
#define kTrace	8

// CR bit set if notification on alignment interrupts is requested (notifyUnalignbit in spcFlags):
#define	kNotify	9

// CR bit distinguishes between alignment and program exceptions:
#define	kAlignment	10



// *************************************
// * P R O G R A M   I N T E R R U P T *
// *************************************
//
// These are floating pt exceptions, illegal instructions, privileged mode violations,
// and traps.  All we're interested in at this low level is illegal instructions.
// The ones we "emulate" are:
//		DCBA,  which is not implemented in the IBM 970.  The emulation is to ignore it,
//			   as it is just a hint.
//		MCRXR, which is not implemented on the IBM 970, but is in the PPC ISA.
//
// Additionally, to facilitate debugging the alignment handler, we recognize a special
// diagnostic mode that is used to simulate alignment exceptions.  When in this mode,
// if the instruction has opcode==0 and the extended opcode is one of the X-form
// instructions that can take an alignment interrupt, then we change the opcode to
// 31 and pretend it got an alignment interrupt.  This exercises paths that
// are hard to drive or perhaps never driven on this particular CPU.

        .text
        .globl	EXT(Emulate64)
        .align	5
LEXT(Emulate64)
        crclr	kAlignment						// not an alignment exception
        b		a64AlignAssistJoin				// join alignment handler
        
        
// Return from alignment handler with all the regs loaded for opcode emulation.
        
a64HandleProgramInt:
        rlwinm.	r0,r29,0,SRR1_PRG_ILL_INS_BIT,SRR1_PRG_ILL_INS_BIT	// illegal opcode?
        beq		a64PassAlong					// No, must have been trap or priv violation etc
        rlwinm	r3,r20,6,26,31					// right justify opcode field (bits 0-5)
        rlwinm	r4,r20,31,22,31					// right justify extended opcode field (bits 21-30)
        cmpwi	cr0,r3,31						// X-form?
        cmpwi	cr1,r4,758						// DCBA?
        cmpwi	cr4,r4,512						// MCRXR?
        crand	cr1_eq,cr0_eq,cr1_eq			// merge the two tests for DCBA
        crand	cr4_eq,cr0_eq,cr4_eq			// and for MCRXR
        beq++	cr1_eq,a64ExitEm				// was DCBA, so ignore
        bne--	cr4_eq,a64NotEmulated			// skip if not MCRXR
        
// Was MCRXR, so emulate.

        ld		r3,savexer(r13)					// get the XER
        lwz		r4,savecr(r13)					// and the CR
        rlwinm	r5,r20,11,27,29					// get (CR# * 4) from instruction
        rlwinm	r6,r3,0,4,31					// zero XER[32-35] (also XER[0-31])
        sld		r4,r4,r5						// move target CR field to bits 32-35
        rlwimi	r4,r3,0,0,3						// move XER[32-35] into CR field
        stw		r6,savexer+4(r13)				// update XER
        srd		r4,r4,r5						// re-position CR
        stw		r4,savecr(r13)					// update CR
        b		a64ExitEm						// done

// Not an opcode we normally emulate.  If in special diagnostic mode and opcode=0,
// emulate as an alignment exception.  This special case is for test software.

a64NotEmulated:
        lwz		r30,dgFlags(0)					// Get the flags
        rlwinm.	r0,r30,0,enaDiagEMb,enaDiagEMb	// Do we want to try to emulate something?
        beq++	a64PassAlong					// No emulation allowed
        cmpwi	r3,0							// opcode==0 ?
        bne		a64PassAlong					// not the special case
        oris	r20,r20,0x7C00					// change opcode to 31
        crset	kAlignment						// say we took alignment exception
        rlwinm	r5,r4,0,26+1,26-1				// mask Update bit (32) out of extended opcode
        rlwinm	r5,r5,0,0,31					// Clean out leftover junk from rlwinm

        cmpwi	r4,1014							// dcbz/dcbz128 ?
        crmove	cr1_eq,cr0_eq
        cmpwi	r5,21							// ldx/ldux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,599							// lfdx/lfdux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,535							// lfsx/lfsux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,343							// lhax/lhaux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,790							// lhbrx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,279							// lhzx/lhzux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,597							// lswi ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,533							// lswx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,341							// lwax/lwaux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,534							// lwbrx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,23							// lwz/lwzx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,149							// stdx/stdux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,727							// stfdx/stfdux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,983							// stfiwx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,663							// stfsx/stfsux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,918							// sthbrx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,407							// sthx/sthux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,725							// stswi ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,661							// stswx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r4,662							// stwbrx ?
        cror	cr1_eq,cr0_eq,cr1_eq
        cmpwi	r5,151							// stwx/stwux ?
        cror	cr1_eq,cr0_eq,cr1_eq
        
        beq++	cr1,a64GotInstruction			// it was one of the X-forms we handle
        crclr	kAlignment						// revert to program interrupt
        b		a64PassAlong					// not recognized extended opcode
        

// *****************************************
// * A L I G N M E N T   I N T E R R U P T *
// *****************************************
//
// We get here in exception context, ie with interrupts disabled, translation off, and
// in 64-bit mode, with:
//		r13 = save-area pointer, with general context already saved in it
//		cr6 = feature flags
// We preserve r13 and cr6.  Other GPRs and CRs, the LR and CTR are used.
//
// Current 64-bit processors (GPUL) handle almost all misaligned operations in hardware,
// so this routine usually isn't called very often.  Only floating pt ops that cross a page
// boundary and are not word aligned, and LMW/STMW can take exceptions to cacheable memory.
// However, in contrast to G3 and G4, any misaligned load/store will get an alignment
// interrupt on uncached memory.
//
// We always emulate scalar ops with a series of byte load/stores.  Doing so is no slower
// than LWZ/STW in cases where a scalar op gets an alignment exception.
//
// This routine supports all legal permutations of alignment interrupts occuring in user or
// supervisor mode, 32 or 64-bit addressing, and translation on or off.  We do not emulate
// instructions that go past the end of an address space, such as "LHZ -1(0)"; we just pass
// along the alignment exception rather than wrap around to byte 0.
//
// First, check for a few special cases such as virtual machines, etc.

        .globl	EXT(AlignAssist64)
        .align	5
LEXT(AlignAssist64)
        crset	kAlignment								// mark as alignment interrupt

a64AlignAssistJoin:										// join here from program interrupt handler
      	li		r0,0									// Get a 0
        mfsprg	r31,0									// get the per_proc data ptr
        mcrf	cr3,cr6									// save feature flags here...
        lwz		r21,spcFlags(r31)						// grab the special flags
        ld		r29,savesrr1(r13)						// get the MSR etc at the fault
        ld		r28,savesrr0(r13)						// get the EA of faulting instruction
       	stw		r0,savemisc3(r13)						// Assume we will handle this ok
        mfmsr	r26										// save MSR at entry
        rlwinm.	r0,r21,0,runningVMbit,runningVMbit		// Are we running a VM?
        lwz		r19,dgFlags(0)							// Get the diagnostics flags
        bne--	a64PassAlong							// yes, let the virtual machine monitor handle


// Set up the MSR shadow regs.  We turn on FP in this routine, and usually set DR and RI
// when accessing user space (the SLB is still set up with all the user space translations.)
// However, if the interrupt occured in the kernel with DR off, we keep it off while
// accessing the "target" address space.  If we set DR to access the target space, we also
// set RI.  The RI bit tells the exception handlers to clear cr0 beq and return if we get an
// exception accessing the user address space.  We are careful to test cr0 beq after every such
// access.  We keep the following "shadows" of the MSR in global regs across this code:
//		r25 = MSR at entry, plus FP and probably DR and RI (used to access target space)
//		r26 = MSR at entry
//		r27 = free
//		r29 = SRR1 (ie, MSR at interrupt)
// Note that EE and IR are always off, and SF is always on in this code.

		rlwinm	r3,r29,0,MSR_DR_BIT,MSR_DR_BIT			// was translation on at fault?
        rlwimi	r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	// if DR was set, set RI too
        or		r25,r26,r3								// assemble MSR to use accessing target space
        

// Because the DSISR and DAR are either not set or are not to be trusted on some 64-bit
// processors on an alignment interrupt, we must fetch the faulting instruction ourselves,
// then decode/hash the opcode and reconstruct the EA manually.

        mtmsr	r25					// turn on FP and (if it was on at fault) DR and RI
        isync						// wait for it to happen
		cmpw	r0,r0				// turn on beq so we can check for DSIs
        lwz		r20,0(r28)			// fetch faulting instruction, probably with DR on
        bne--	a64RedriveAsISI		// got a DSI trying to fetch it, pretend it was an ISI
        mtmsr	r26					// turn DR back off
        isync						// wait for it to happen


// Set a few flags while we wait for the faulting instruction to arrive from cache.

        rlwinm.	r0,r29,0,MSR_SE_BIT,MSR_SE_BIT				// Were we single stepping?
		stw		r20,savemisc2(r13)	// Save the instruction image in case we notify
        crnot	kTrace,cr0_eq
        rlwinm.	r0,r19,0,enaNotifyEMb,enaNotifyEMb			// Should we notify?
        crnot	kNotify,cr0_eq        


// Hash the intruction into a 5-bit value "AAAAB" used to index the branch table, and a
// 1-bit kUpdate flag, as follows:
//  ¥ for X-form instructions (with primary opcode 31):
//       the "AAAA" bits are bits 21-24 of the instruction
//       the "B" bit is the XOR of bits 29 and 30
//       the update bit is instruction bit 25
//	¥ for D and DS-form instructions (actually, any primary opcode except 31):
//       the "AAAA" bits are bits 1-4 of the instruction
//       the "B" bit is 0
//       the update bit is instruction bit 5
//
// Just for fun (and perhaps a little speed on deep-pipe machines), we compute the hash,
// update flag, and EA without branches and with ipc >= 2.
//
// When we "bctr" to the opcode-specific reoutine, the following are all set up:
//		MSR = EE and IR off, SF and FP on
//		r12 = full 64-bit EA (r17 is clamped EA)
//		r13 = save-area pointer (physical)
//		r14 = ptr to saver0 in save-area (ie, to base of GPRs)
//		r15 = 0x00000000FFFFFFFF if 32-bit mode fault, 0xFFFFFFFFFFFFFFFF if 64
//		r16 = RA * 8 (ie, reg# not reg value)
//		r17 = EA, clamped to 32 bits if 32-bit mode fault (see also r12)
//		r18 = (RA|0) (reg value)
//		r19 = -1 if X-form, 0 if D-form
//		r20 = faulting instruction
//		r21 = RT * 8 (ie, reg# not reg value)
//		r22 = addr(aaFPopTable)+(RT*32), ie ptr to floating pt table for target register
//		r25 = MSR at entrance, probably with DR and RI set (for access to target space)
//		r26 = MSR at entrance
//		r27 = free
//		r28 = SRR0 (ie, EA of faulting instruction)
//		r29 = SRR1 (ie, MSR at fault)
//		r30 = scratch, usually user data
//		r31 = per-proc pointer
//		cr2 = kTrace, kNotify, and kAlignment flags
//      cr3 = saved copy of feature flags used in lowmem vector code
//		cr6 = bits 24-27 of CR are bits 24-27 of opcode if X-form, or bits 4-5 and 00 if D-form
//			  bit 25 is the kUpdate flag, set for update form instructions
//		cr7 = bits 28-31 of CR are bits 28-31 of opcode if X-form, or 0 if D-form

a64GotInstruction:					// here from program interrupt with instruction in r20
        rlwinm	r21,r20,6+6,20,25	// move the primary opcode (bits 0-6) to bits 20-25
        la		r14,saver0(r13)		// r14 <- base address of GPR registers
        xori	r19,r21,0x07C0		// iff primary opcode is 31, set r19 to 0
        rlwinm	r16,r20,16+3,24,28	// r16 <- RA*8
        subi	r19,r19,1			// set bit 0 iff X-form (ie, if primary opcode is 31)
        rlwinm	r17,r20,21+3,24,28	// r17 <- RB*8 (if X-form)
        sradi	r19,r19,63			// r19 <- -1 if X-form, 0 if D-form
        extsh	r22,r20				// r22 <- displacement (if D-form)

        ldx		r23,r14,r17			// get (RB), if any
        and		r15,r20,r19			// instruction if X, 0 if D
        andc	r17,r21,r19			// primary opcode in bits 20-25 if D, 0 if X
        ldx		r18,r14,r16			// get (RA)
        subi	r24,r16,1			// set bit 0 iff RA==0
        or		r21,r15,r17			// r21 <- instruction if X, or bits 0-5 in bits 20-25 if D
        sradi	r24,r24,63			// r24 <- -1 if RA==0, 0 otherwise
        rlwinm	r17,r21,32-4,25,28	// shift opcode bits 21-24 to 25-28 (hash "AAAA" bits)
        lis		r10,ha16(a64BranchTable)	// start to build up branch table address
        rlwimi	r17,r21,0,29,29		// move opcode bit 29 into hash as start of "B" bit
        rlwinm	r30,r21,1,29,29		// position opcode bit 30 in position 29
        and		r12,r23,r19			// RB if X-form, 0 if D-form
        andc	r11,r22,r19			// 0 if X-form, sign extended displacement if D-form
        xor		r17,r17,r30			// bit 29 ("B") of hash is xor(bit29,bit30)
        addi	r10,r10,lo16(a64BranchTable)
        or		r12,r12,r11			// r12 <- (RB) or displacement, as appropriate
        lwzx	r30,r10,r17			// get address from branch table
        mtcrf	0x01,r21			// move opcode bits 28-31 to CR7
        sradi	r15,r29,32			// propogate SF bit from SRR1 (MSR_SF, which is bit 0)
        andc	r18,r18,r24			// r18 <- (RA|0)
        mtcrf	0x02,r21			// move opcode bits 24-27 to CR6 (kUpdate is bit 25)
        add		r12,r18,r12			// r12 <- 64-bit EA
        mtctr	r30					// set up branch address
        
        oris	r15,r15,0xFFFF		// start to fill low word of r15 with 1s
        rlwinm	r21,r20,11+3,24,28	// r21 <- RT * 8
        lis		r22,ha16(EXT(aaFPopTable))	// start to compute address of floating pt table
        ori		r15,r15,0xFFFF		// now bits 32-63 of r15 are 1s
        addi	r22,r22,lo16(EXT(aaFPopTable))
        and		r17,r12,r15			// clamp EA to 32 bits if fault occured in 32-bit mode
        rlwimi	r22,r21,2,22,26		// move RT into aaFPopTable address (which is 1KB aligned)
        
        bf--	kAlignment,a64HandleProgramInt	// return to Program Interrupt handler
        bctr						// if alignment interrupt, jump to opcode-specific routine
        
        
// Floating-pt load single (lfs[u], lfsx[u])

a64LfsLfsx:
        bl		a64Load4Bytes		// get data in r30
        mtctr	r22					// set up address of "lfs fRT,emfp0(r31)"
        stw		r30,emfp0(r31)		// put word here for aaFPopTable routine
        bctrl						// do the lfs
        b		a64UpdateCheck		// update RA if necessary and exit
        
        
// Floating-pt store single (stfs[u], stfsx[u])

a64StfsStfsx:
        ori		r22,r22,8			// set dir==1 (ie, single store) in aaFPopTable
        mtctr	r22					// set up address of "stfs fRT,emfp0(r31)"
        bctrl						// execute the store into emfp0
        lwz		r30,emfp0(r31)		// get the word
        bl		a64Store4Bytes		// store r30 into user space
        b		a64UpdateCheck		// update RA if necessary and exit
        

// Floating-pt store as integer word (stfiwx)

a64Stfiwx:
        ori		r22,r22,16+8		// set size=1, dir==1 (ie, double store) in aaFPopTable
        mtctr	r22					// set up FP register table address
        bctrl						// double precision store into emfp0
        lwz		r30,emfp0+4(r31)	// get the low-order word
        bl		a64Store4Bytes		// store r30 into user space
        b		a64Exit				// successfully emulated
        

// Floating-pt load double (lfd[u], lfdx[u])

a64LfdLfdx:
        ori		r22,r22,16			// set Double bit in aaFPopTable address
        bl		a64Load8Bytes		// get data in r30
        mtctr	r22					// set up address of "lfd fRT,emfp0(r31)"
        std		r30,emfp0(r31)		// put doubleword here for aaFPopTable routine
        bctrl						// execute the load
        b		a64UpdateCheck		// update RA if necessary and exit


// Floating-pt store double (stfd[u], stfdx[u])

a64StfdStfdx:
        ori		r22,r22,16+8		// set size=1, dir==1 (ie, double store) in aaFPopTable address
        mtctr	r22					// address of routine to stfd RT
        bctrl						// store into emfp0
        ld		r30,emfp0(r31)		// get the doubleword
        bl		a64Store8Bytes		// store r30 into user space
        b		a64UpdateCheck		// update RA if necessary and exit


// Load halfword w 0-fill (lhz[u], lhzx[u])

a64LhzLhzx:
        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
        stdx	r30,r14,r21			// store into RT slot in register file
        b		a64UpdateCheck		// update RA if necessary and exit


// Load halfword w sign fill (lha[u], lhax[u])

a64LhaLhax:
        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
        extsh	r30,r30				// sign-extend
        stdx	r30,r14,r21			// store into RT slot in register file
        b		a64UpdateCheck		// update RA if necessary and exit


// Load halfword byte reversed (lhbrx)

a64Lhbrx:
        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
        rlwinm	r3,r30,8,16,23		// reverse bytes into r3
        rlwimi	r3,r30,24,24,31
        stdx	r3,r14,r21			// store into RT slot in register file
        b		a64Exit				// successfully emulated


// Store halfword (sth[u], sthx[u])

a64SthSthx:
        ldx		r30,r14,r21			// get RT
        bl		a64Store2Bytes		// store r30 into user space
        b		a64UpdateCheck		// update RA if necessary and exit


// Store halfword byte reversed (sthbrx)

a64Sthbrx:
        addi	r21,r21,6			// point to low two bytes of RT
        lhbrx	r30,r14,r21			// load and reverse
        bl		a64Store2Bytes		// store r30 into user space
        b		a64Exit				// successfully emulated


// Load word w 0-fill (lwz[u], lwzx[u]), also lwarx.

a64LwzLwzxLwarx:
        andc	r3,r19,r20			// light bit 30 of r3 iff lwarx
        andi.	r0,r3,2				// is it lwarx?
        bne--	a64PassAlong		// yes, never try to emulate a lwarx
        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
        stdx	r30,r14,r21			// update register file
        b		a64UpdateCheck		// update RA if necessary and exit
        
        
// Load word w sign fill (lwa, lwax[u])

a64Lwa:
        crclr	kUpdate				// no update form of lwa (its a reserved encoding)
a64Lwax:
        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
        extsw	r30,r30				// sign extend
        stdx	r30,r14,r21			// update register file
        b		a64UpdateCheck		// update RA if necessary and exit


// Load word byte reversed (lwbrx)

a64Lwbrx:
        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
        rlwinm	r3,r30,24,0,31		// flip bytes 1234 to 4123
        rlwimi	r3,r30,8,8,15		// r3 is now 4323
        rlwimi	r3,r30,8,24,31		// r3 is now 4321
        stdx	r3,r14,r21			// update register file
        b		a64Exit				// successfully emulated

        
// Store word (stw[u], stwx[u])

a64StwStwx:
        ldx		r30,r14,r21			// get RT
        bl		a64Store4Bytes		// store r30 into user space
        b		a64UpdateCheck		// update RA if necessary and exit


// Store word byte reversed (stwbrx)

a64Stwbrx:
        addi	r21,r21,4			// point to low word of RT
        lwbrx	r30,r14,r21			// load and reverse
        bl		a64Store4Bytes		// store r30 into user space
        b		a64Exit				// successfully emulated


// Load doubleword (ld[u], ldx[u]), also lwa.

a64LdLwa:							// these are DS form: ld=0, ldu=1, and lwa=2
        mtcrf	0x01,r20			// move DS field to cr7
        rlwinm	r3,r20,0,30,31		// must adjust EA by subtracting DS field
        sub		r12,r12,r3			// subtract from full 64-bit EA
        and		r17,r12,r15			// then re-clamp to 32 bits if necessary
        bt		30,a64Lwa			// handle lwa
        crmove	kUpdate,31			// if opcode bit 31 is set, it is ldu so set update flag
a64Ldx:
        bl		a64Load8Bytes		// load 8 bytes from user space into r30
        stdx	r30,r14,r21			// update register file
        b		a64UpdateCheck		// update RA if necessary and exit


// Store doubleword (stdx[u], std[u], stwcx)

a64StdxStwcx:
        bf--	30,a64PassAlong		// stwcx, so pass along alignment exception
        b		a64Stdx				// was stdx
a64StdStfiwx:						// if DS form: 0=std, 1=stdu, 2-3=undefined
        bt		30,a64Stfiwx		// handle stfiwx
        rlwinm	r3,r20,0,30,31		// must adjust EA by subtracting DS field
        mtcrf	0x01,r20			// move DS field to cr7
        sub		r12,r12,r3			// subtract from full 64-bit EA
        and		r17,r12,r15			// then re-clamp to 32 bits if necessary
        crmove	kUpdate,31			// if DS==1, then it is update form
a64Stdx:
        ldx		r30,r14,r21			// get RT
        bl		a64Store8Bytes		// store RT into user space
        b		a64UpdateCheck		// update RA if necessary and exit


// Dcbz and Dcbz128 (bit 10 distinguishes the two forms)

a64DcbzDcbz128:
        andis.	r0,r20,0x0020		// bit 10 set?
        li		r3,0				// get a 0 to store
        li		r0,4				// assume 32-bit version, store 8 bytes 4x
        rldicr	r17,r17,0,63-5		// 32-byte align EA
		li		r4,_COMM_PAGE_BASE_ADDRESS
        beq		a64DcbzSetup		// it was the 32-byte version
        rldicr	r17,r17,0,63-7		// zero low 7 bits of EA
        li		r0,16				// store 8 bytes 16x
a64DcbzSetup:
		sub		r4,r28,r4			// get instruction offset from start of commpage
        and		r4,r4,r15			// mask off high-order bits if 32-bit mode
		cmpldi  r4,_COMM_PAGE_AREA_USED // did fault occur in commpage area?
        bge		a64NotCommpage		// not in commpage
        rlwinm.	r4,r29,0,MSR_PR_BIT,MSR_PR_BIT	// did fault occur in user mode?
        beq--	a64NotCommpage		// do not zero cr7 if kernel got alignment exception
        lwz		r4,savecr(r13)		// if we take a dcbz{128} in the commpage...
        rlwinm	r4,r4,0,0,27		// ...clear user's cr7...
        stw		r4,savecr(r13)		// ...as a flag for commpage code
a64NotCommpage:
        mtctr	r0
        cmpw	r0,r0				// turn cr0 beq on so we can check for DSIs
        mtmsr	r25					// turn on DR and RI so we can address user space
        isync						// wait for it to happen
a64DcbzLoop:
        std		r3,0(r17)			// store into user space
        bne--	a64RedriveAsDSI
        addi	r17,r17,8
        bdnz	a64DcbzLoop
        
        mtmsr	r26					// restore MSR
        isync						// wait for it to happen
        b		a64Exit


// Load and store multiple (lmw, stmw), distinguished by bit 25

a64LmwStmw:
        subfic	r22,r21,32*8		// how many regs to load or store?
        srwi	r22,r22,1			// get bytes to load/store
        bf		25,a64LoadMultiple	// handle lmw
        b		a64StoreMultiple	// it was stmw
        
        
// Load string word immediate (lswi)

a64Lswi:
        rlwinm	r22,r20,21,27,31	// get #bytes in r22
        and		r17,r18,r15			// recompute EA as (RA|0), and clamp
        subi	r3,r22,1			// r22==0?
        rlwimi	r22,r3,6,26,26		// map count of 0 to 32
        b		a64LoadMultiple
        
        
// Store string word immediate (stswi)

a64Stswi:
        rlwinm	r22,r20,21,27,31	// get #bytes in r22
        and		r17,r18,r15			// recompute EA as (RA|0), and clamp
        subi	r3,r22,1			// r22==0?
        rlwimi	r22,r3,6,26,26		// map count of 0 to 32
        b		a64StoreMultiple
        
        
// Load string word indexed (lswx), also lwbrx

a64LswxLwbrx:
        bf		30,a64Lwbrx			// was lwbrx
        ld		r22,savexer(r13)	// get the xer
        rlwinm	r22,r22,0,25,31		// isolate the byte count
        b		a64LoadMultiple		// join common code
        
        
// Store string word indexed (stswx), also stwbrx

a64StswxStwbrx:
        bf		30,a64Stwbrx		// was stwbrx
        ld		r22,savexer(r13)	// get the xer
        rlwinm	r22,r22,0,25,31		// isolate the byte count
        b		a64StoreMultiple	// join common code


// Load multiple words.  This handles lmw, lswi, and lswx.

a64LoadMultiple:					// r22 = byte count, may be 0
        subic.	r3,r22,1			// get (#bytes-1)
        blt		a64Exit				// done if 0
        add		r4,r17,r3			// get EA of last operand byte
        and		r4,r4,r15			// clamp
        cmpld	r4,r17				// address space wrap?
        blt--	a64PassAlong		// pass along exception if so
        srwi.	r4,r22,2			// get # full words to load
        rlwinm	r22,r22,0,30,31		// r22 <- leftover byte count
        cmpwi	cr1,r22,0			// leftover bytes?
        beq		a64Lm3				// no words
        mtctr	r4					// set up word count
        cmpw	r0,r0				// set beq for DSI test
a64Lm2:
        mtmsr	r25					// turn on DR and RI
        isync						// wait for it to happen
        lbz		r3,0(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r4,1(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r5,2(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r6,3(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        rlwinm	r30,r3,24,0,7		// pack bytes into r30
        rldimi	r30,r4,16,40
        rldimi	r30,r5,8,48
        rldimi	r30,r6,0,56
        mtmsr	r26					// turn DR back off so we can store into register file
        isync
        addi	r17,r17,4			// bump EA
        stdx	r30,r14,r21			// pack into register file
        addi	r21,r21,8			// bump register file offset
        rlwinm	r21,r21,0,24,28		// wrap around to 0
        bdnz	a64Lm2
a64Lm3:								// cr1/r22 = leftover bytes (0-3), cr0 beq set
        beq		cr1,a64Exit			// no leftover bytes
        mtctr	r22
        mtmsr	r25					// turn on DR so we can access user space
        isync
        lbz		r3,0(r17)			// get 1st leftover byte
        bne--	a64RedriveAsDSI		// got a DSI
        rlwinm	r30,r3,24,0,7		// position in byte 4 of r30 (and clear rest of r30)
        bdz		a64Lm4				// only 1 byte leftover
        lbz		r3,1(r17)			// get 2nd byte
        bne--	a64RedriveAsDSI		// got a DSI
        rldimi	r30,r3,16,40		// insert into byte 5 of r30
        bdz		a64Lm4				// only 2 bytes leftover
        lbz		r3,2(r17)			// get 3rd byte
        bne--	a64RedriveAsDSI		// got a DSI
        rldimi	r30,r3,8,48			// insert into byte 6
a64Lm4:
        mtmsr	r26					// turn DR back off so we can store into register file
        isync
        stdx	r30,r14,r21			// pack partially-filled word into register file
        b		a64Exit


// Store multiple words.  This handles stmw, stswi, and stswx.

a64StoreMultiple:					// r22 = byte count, may be 0
        subic.	r3,r22,1			// get (#bytes-1)
        blt		a64Exit				// done if 0
        add		r4,r17,r3			// get EA of last operand byte
        and		r4,r4,r15			// clamp
        cmpld	r4,r17				// address space wrap?
        blt--	a64PassAlong		// pass along exception if so
        srwi.	r4,r22,2			// get # full words to load
        rlwinm	r22,r22,0,30,31		// r22 <- leftover byte count
        cmpwi	cr1,r22,0			// leftover bytes?
        beq		a64Sm3				// no words
        mtctr	r4					// set up word count
        cmpw	r0,r0				// turn on beq so we can check for DSIs
a64Sm2:
        ldx		r30,r14,r21			// get next register
        addi	r21,r21,8			// bump register file offset
        rlwinm	r21,r21,0,24,28		// wrap around to 0
        srwi	r3,r30,24			// shift the four bytes into position
        srwi	r4,r30,16
        srwi	r5,r30,8
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        stb		r3,0(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r4,1(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r5,2(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r30,3(r17)
        bne--	a64RedriveAsDSI		// got a DSI
        mtmsr	r26					// turn DR back off
        isync
        addi	r17,r17,4			// bump EA
        bdnz	a64Sm2
a64Sm3:								// r22 = 0-3, cr1 set on r22, cr0 beq set
        beq		cr1,a64Exit			// no leftover bytes
        ldx		r30,r14,r21			// get last register
        mtctr	r22
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
a64Sm4:
        rlwinm	r30,r30,8,0,31		// position next byte
        stb		r30,0(r17)			// pack into user space
        addi	r17,r17,1			// bump user space ptr
        bne--	a64RedriveAsDSI		// got a DSI
        bdnz	a64Sm4
        mtmsr	r26					// turn DR back off
        isync
        b		a64Exit


// Subroutines to load bytes from user space.

a64Load2Bytes:						// load 2 bytes right-justified into r30
        addi	r7,r17,1			// get EA of last byte
        and		r7,r7,r15			// clamp
        cmpld	r7,r17				// address wrap?
        blt--	a64PassAlong		// yes
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        sub.	r30,r30,r30			// 0-fill dest and set beq
        b		a64Load2			// jump into routine
a64Load4Bytes:						// load 4 bytes right-justified into r30 (ie, low order word)
        addi	r7,r17,3			// get EA of last byte
        and		r7,r7,r15			// clamp
        cmpld	r7,r17				// address wrap?
        blt--	a64PassAlong		// yes
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        sub.	r30,r30,r30			// 0-fill dest and set beq
        b		a64Load4			// jump into routine
a64Load8Bytes:						// load 8 bytes into r30
        addi	r7,r17,7			// get EA of last byte
        and		r7,r7,r15			// clamp
        cmpld	r7,r17				// address wrap?
        blt--	a64PassAlong		// yes
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        sub.	r30,r30,r30			// 0-fill dest and set beq
        lbz		r3,-7(r7)			// get byte 0
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r4,-6(r7)			// and byte 1, etc
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r5,-5(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r6,-4(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        rldimi	r30,r3,56,0			// position bytes in upper word
        rldimi	r30,r4,48,8
        rldimi	r30,r5,40,16
        rldimi	r30,r6,32,24
a64Load4:
        lbz		r3,-3(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r4,-2(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        rldimi	r30,r3,24,32		// insert bytes 4 and 5 into r30
        rldimi	r30,r4,16,40
a64Load2:
        lbz		r3,-1(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        lbz		r4,0(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        mtmsr	r26					// turn DR back off
        isync
        rldimi	r30,r3,8,48			// insert bytes 6 and 7 into r30
        rldimi	r30,r4,0,56
        blr
        
        
// Subroutines to store bytes into user space.

a64Store2Bytes:						// store bytes 6 and 7 of r30
        addi	r7,r17,1			// get EA of last byte
        and		r7,r7,r15			// clamp
        cmpld	r7,r17				// address wrap?
        blt--	a64PassAlong		// yes
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        cmpw	r0,r0				// set beq so we can check for DSI
        b		a64Store2			// jump into routine
a64Store4Bytes:						// store bytes 4-7 of r30 (ie, low order word)
        addi	r7,r17,3			// get EA of last byte
        and		r7,r7,r15			// clamp
        cmpld	r7,r17				// address wrap?
        blt--	a64PassAlong		// yes
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        cmpw	r0,r0				// set beq so we can check for DSI
        b		a64Store4			// jump into routine
a64Store8Bytes:						// r30 = bytes
        addi	r7,r17,7			// get EA of last byte
        and		r7,r7,r15			// clamp
        cmpld	r7,r17				// address wrap?
        blt--	a64PassAlong		// yes
        mtmsr	r25					// turn on DR so we can access user space
        isync						// wait for it to happen
        cmpw	r0,r0				// set beq so we can check for DSI
        rotldi	r3,r30,8			// shift byte 0 into position
        rotldi	r4,r30,16			// and byte 1
        rotldi	r5,r30,24			// and byte 2
        rotldi	r6,r30,32			// and byte 3
        stb		r3,-7(r7)			// store byte 0
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r4,-6(r7)			// and byte 1 etc...
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r5,-5(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r6,-4(r7)
        bne--	a64RedriveAsDSI		// got a DSI
a64Store4:
        rotldi	r3,r30,40			// shift byte 4 into position
        rotldi	r4,r30,48			// and byte 5
        stb		r3,-3(r7)
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r4,-2(r7)
        bne--	a64RedriveAsDSI		// got a DSI
a64Store2:
        rotldi	r3,r30,56			// shift byte 6 into position
        stb		r3,-1(r7)			// store byte 6
        bne--	a64RedriveAsDSI		// got a DSI
        stb		r30,0(r7)			// store byte 7, which is already positioned
        bne--	a64RedriveAsDSI		// got a DSI
        mtmsr	r26					// turn off DR
        isync
        blr
        
                
// Exit routines.

a64ExitEm:
		li		r30,T_EMULATE			// Change exception code to emulate
		stw		r30,saveexception(r13)	// Save it
		b		a64Exit					// Join standard exit routine...

a64PassAlong:							// unhandled exception, just pass it along
        li		r0,1					// Set that the alignment/program exception was not emulated
        crset	kNotify					// return T_ALIGNMENT or T_PROGRAM
		stw		r0,savemisc3(r13)		// Set that emulation was not done
        crclr	kTrace					// not a trace interrupt
        b		a64Exit1
a64UpdateCheck:							// successfully emulated, may be update form
        bf		kUpdate,a64Exit			// update?
        stdx	r12,r14,r16				// yes, store 64-bit EA into RA
a64Exit:								// instruction successfully emulated
        addi	r28,r28,4				// bump SRR0 past the emulated instruction
        li		r30,T_IN_VAIN			// eat the interrupt since we emulated it
        and		r28,r28,r15				// clamp to address space size (32 vs 64)
        std		r28,savesrr0(r13)		// save, so we return to next instruction
a64Exit1:
        bt--	kTrace,a64Trace			// were we in single-step at fault?
        bt--	kNotify,a64Notify		// should we say T_ALIGNMENT anyway?
a64Exit2:
        mcrf	cr6,cr3					// restore feature flags
        mr		r11,r30					// pass back exception code (T_IN_VAIN etc) in r11
        b		EXT(EmulExit)			// return to exception processing


// Notification requested: pass exception upstairs even though it might have been emulated.

a64Notify:
        li		r30,T_ALIGNMENT			// somebody wants to know about it (but don't redrive)
        bt		kAlignment,a64Exit2		// was an alignment exception
        li		r30,T_PROGRAM			// was an emulated instruction
        b		a64Exit2


// Emulate a trace interrupt after handling alignment interrupt.

a64Trace:
        lwz		r9,SAVflags(r13)		// get the save-area flags
        li		r30,T_TRACE
        oris	r9,r9,hi16(SAVredrive)	// Set the redrive bit
        stw		r30,saveexception(r13)	// Set the exception code
        stw		r9,SAVflags(r13)		// Set the flags
        b		a64Exit2				// Exit and do trace interrupt...


// Got a DSI accessing user space.  Redrive.  One way this can happen is if another
// processor removes a mapping while we are emulating.

a64RedriveAsISI:						// this DSI happened fetching the opcode (r1==DSISR  r4==DAR)
        mtmsr	r26						// turn DR back off
        isync							// wait for it to happen
        li		r30,T_INSTRUCTION_ACCESS
        rlwimi	r29,r1,0,0,4			// insert the fault type from DSI's DSISR
        std		r29,savesrr1(r13)		// update SRR1 to look like an ISI
        b		a64Redrive

a64RedriveAsDSI:						// r0==DAR  r1==DSISR
        mtmsr	r26						// turn DR back off
        isync							// wait for it to happen
        stw		r1,savedsisr(r13)		// Set the DSISR of failed access
        std		r0,savedar(r13)			// Set the address of the failed access
        li		r30,T_DATA_ACCESS		// Set failing data access code
a64Redrive:
        lwz		r9,SAVflags(r13)		// Pick up the flags
        stw		r30,saveexception(r13)	// Set the replacement code
        oris	r9,r9,hi16(SAVredrive)	// Set the redrive bit
        stw		r9,SAVflags(r13)		// Set redrive request
        crclr	kTrace					// don't take a trace interrupt
        crclr	kNotify					// don't pass alignment exception
        b		a64Exit2				// done
        

// This is the branch table, indexed by the "AAAAB" opcode hash.

a64BranchTable:
        .long	a64LwzLwzxLwarx		// 00000  lwz[u], lwzx[u], lwarx
        .long	a64Ldx				// 00001  ldx[u]
        .long	a64PassAlong		// 00010  ldarx 	(never emulate these)
        .long	a64PassAlong		// 00011
        .long	a64StwStwx			// 00100  stw[u], stwx[u]
        .long	a64StdxStwcx		// 00101  stdx[u], stwcx
        .long	a64PassAlong		// 00110
        .long	a64PassAlong		// 00111  stdcx		(never emulate these)
        .long	a64LhzLhzx			// 01000  lhz[u], lhzx[u]
        .long	a64PassAlong		// 01001
        .long	a64LhaLhax			// 01010  lha[u], lhax[u]
        .long	a64Lwax				// 01011  lwax[u]
        .long	a64SthSthx			// 01100  sth[u], sthx[u]
        .long	a64PassAlong		// 01101
        .long	a64LmwStmw			// 01110  lmw, stmw
        .long	a64PassAlong		// 01111
        .long	a64LfsLfsx			// 10000  lfs[u], lfsx[u]
        .long	a64LswxLwbrx		// 10001  lswx, lwbrx
        .long	a64LfdLfdx			// 10010  lfd[u], lfdx[u]
        .long	a64Lswi				// 10011  lswi
        .long	a64StfsStfsx		// 10100  stfs[u], stfsx[u]
        .long	a64StswxStwbrx		// 10101  stswx, stwbrx
        .long	a64StfdStfdx		// 10110  stfd[u], stfdx[u]
        .long	a64Stswi			// 10111  stswi
        .long	a64PassAlong		// 11000
        .long	a64Lhbrx			// 11001  lhbrx
        .long	a64LdLwa			// 11010  ld[u], lwa
        .long	a64PassAlong		// 11011
        .long	a64PassAlong		// 11100
        .long	a64Sthbrx			// 11101  sthbrx
        .long	a64StdStfiwx		// 11110  std[u], stfiwx
        .long	a64DcbzDcbz128		// 11111  dcbz, dcbz128