/* **********************************************************
 * Copyright (C) 1998-2000 VMware, Inc.
 * All Rights Reserved
 * **********************************************************/
#ifdef VMX86_DEVEL
char rcsId_task[] = "$Id: task.c,v 1.5 2003/09/13 11:01:55 jdolecek Exp $";
#else
#define FILECODE "F(302)"
#endif 



/*
 * task.c --
 *
 *      Task initialization and switching routines between the host
 *      and the monitor. 
 *     
 *      A task switch: 
 *          -saves the EFLAGS,CR0,CR2,CR4, and IDT
 *          -jumps to code on the shared page
 *              which saves the registers, GDT and CR3
 *              which then restores the registers, GDT and CR3
 *          -restores the IDT,CR0,CR2,CR4 and EFLAGS
 *                
 *      This file is pretty much independent of the host OS.
 *      
 */

/*
 * Must come before any linux header files, which the exception of
 * "linux/version.h", which is safe --hpreg
 */
#include "hostif.h"

#ifdef linux
#include "driver-config.h"

#ifdef USE_PERFCOUNTERS
#include "perfctr.h"
#endif

#ifdef KERNEL_2_1
#define EXPORT_SYMTAB
#endif
#endif /* linux */

#include "include/vm_types.h"
#include "include/vm_assert.h"
#include "include/modulecall.h"
#include "include/vm_assert.h"
#include "vmx86.h"
#include "task.h"
#include "include/taskswitch.h"
#include "include/vtrace.h"
#include "cpuid.h"

/*
 *----------------------------------------------------------------------
 *
 * Task_InitCrosspage
 *
 *      Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *      Non-zero on an error. 
 *      
 *      
 *
 * Side effects:
 *  
 *----------------------------------------------------------------------
 */

static int sysenterMSR=0;
int 
Task_InitCrosspage(VMDriver *vm,
                   InitBlock *initParams) /* Initial params from the VM */
{
   LA baseLinearAddr = HOST_KERNEL_VA_2_LA(0);
   uint32 cr3reg;
   
   ASSERT(sizeof(VMCrossPage) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN==1);
   vm->crosspage = (VMCrossPage *) HostIF_UserToDriverPtr(vm, initParams->crosspage);
   if (vm->crosspage == NULL) {
     return 1;
   }
   GET_CR3(cr3reg);
   vm->crosspage->hostContext.task.cr3 = cr3reg;
   
   vm->crosspage->hostVA = vm->crosspage;
   vm->crosspage->hostContextVA = (uint32)
      &vm->crosspage->hostContext;
   vm->crosspage->crosspageLinearAddr = baseLinearAddr + (uint32)vm->crosspage;

   { 
     DTR idtrReg;
     GET_IDT(idtrReg);
     vm->crosspage->monContext.idtr.dtr = idtrReg; /* irrelevant */
   }

   {    
     uint16 trReg;
     GET_TR(trReg);
     vm->crosspage->hostContext.tr = trReg;
   }

   vm->crosspage->irqRelocateOffset[0] = IRQ_HOST_INTR1_BASE; 
   vm->crosspage->irqRelocateOffset[1] = IRQ_HOST_INTR2_BASE;

#if defined(WINNT_DDK) && defined(VMX86_DEBUG)
   vm->crosspage->vmPtr = vm;
#endif
   return 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state. 
 *
 * Results:
 *      
 *      Next module call (or user call for that matter) is
 *      returned. 
 *      
 *      
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt. 
 *
 *----------------------------------------------------------------------
 */
static Bool DisableNMIDelivery(volatile uint32* regPtr) {
   uint32 reg;
   
   reg = *regPtr;
   if ((APIC_LVT_DELVMODE(reg) == APIC_LVT_DELVMODE_NMI) &&
       (! APIC_LVT_ISMASKED(reg))) {
      *regPtr = reg | APIC_LVT_MASK;
      return TRUE;
   }
   return FALSE;
}

static void DisableNMI(VMDriver *vm, Bool *lint0NMI, 
		       Bool *lint1NMI, Bool *pcNMI)
{
   if (vm->hostAPIC) {
      *lint0NMI = DisableNMIDelivery(&APIC_LINT0_REG(vm->hostAPIC));
      *lint1NMI = DisableNMIDelivery(&APIC_LINT1_REG(vm->hostAPIC));
      *pcNMI = DisableNMIDelivery(&APIC_PC_REG(vm->hostAPIC));
   }
}

static void RestoreNMI(VMDriver *vm, Bool lint0NMI, Bool lint1NMI, Bool pcNMI)
{
#define RestoreNMIDelivery(cond,apicr) 				\
   do {								\
      if (cond) {						\
         uint32 reg;						\
	 							\
         reg = apicr;						\
         apicr = reg & ~APIC_LVT_MASK;				\
      }								\
   } while (0)

   RestoreNMIDelivery(lint0NMI, APIC_LINT0_REG(vm->hostAPIC));
   RestoreNMIDelivery(lint1NMI, APIC_LINT1_REG(vm->hostAPIC));
   RestoreNMIDelivery(pcNMI, APIC_PC_REG(vm->hostAPIC));
#undef RestoreNMIDelivery
}

void 
Task_Switch(VMDriver *vm)
{

   uint32      monContextVA;     
   DTR         gdtreg;
   Descriptor *desc;
   uint32      flags,cr0reg, cr2reg,cr4reg;
   uint32      dr0reg, dr1reg, dr2reg, dr3reg, dr6reg, dr7reg;
   uint64      savedMSR;
   Selector    gs, fs;
   VA          vAddr;
   uint16 trReg;   
   uint16 ldtreg;
   Bool lint0NMI = FALSE;
   Bool lint1NMI = FALSE;
   Bool pcNMI = FALSE;
   VMCrossPage *crosspage = vm->crosspage;

  
   /*
    * monContextVA points to the same location as
    * vm->crosspage->monContext, only refers to it
    * in the monitor's virtual address space
    */
   monContextVA = (uint32)
      (VPN_2_VA(MODULECALL_CROSS_PAGE_START)+ offsetof(VMCrossPage,monContext)); 

#if defined(linux) && defined(USE_PERFCOUNTERS)
   PerfCtr_SnapshotFromHost();
#endif
   DisableNMI(vm, &lint0NMI, &lint1NMI, &pcNMI);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();


   VTRACE_SET(0,0x41000001);
   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);

   GET_DR0(dr0reg);
   GET_DR1(dr1reg);
   GET_DR2(dr2reg);
   GET_DR3(dr3reg);
   GET_DR6(dr6reg);
   GET_DR7(dr7reg);

   GET_GDT(gdtreg); 
   GET_LDT(ldtreg); 

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */
   gs = GET_GS();
   fs = GET_FS();

#ifdef KERNEL_2_1
   { 
     /*
      * Clear cr4 so only the 4MB pages are supported. Besure that
      * the CR4_PGE (global bit enable is cleared).
      */
     uint32 new_cr4 = CR4_PSE;
     SET_CR4(new_cr4);
   }
#endif
#ifdef WINNT_DDK
   { 
     /*
      * Clear cr4 so only the 4MB pages are supported. Besure that
      * the CR4_PGE (global bit enable is cleared).
      */
     uint32 new_cr4 = CR4_PSE;
     SET_CR4(new_cr4);
   }
#endif
#if defined(__FreeBSD__) || defined(__NetBSD__)
   { 
     /*
      * Clear cr4 so only the 4MB pages are supported. Besure that
      * the CR4_PGE (global bit enable is cleared).
      */
     uint32 new_cr4 = CR4_PSE;
     SET_CR4(new_cr4);

     /* Clear GS, beacause it have invalid value (0x1F ? ? ) */
     SET_GS(0);
     SET_FS(0);
   }
#endif
   /* Unnecessary initialization, but it pacifies the compiler warnings
    * about possible uninitialized use.
    */
   savedMSR = 0;
   if (sysenterMSR) {
      savedMSR = HostIF_RDMSR(sysenterMSR);
      HostIF_WRMSR(sysenterMSR, 0);
   }

   /*
    * Mark our task descriptor as unused, or we won't be
    * able to come back to it (SET_TR set the busy bit)
    * 
    */
   
   GET_TR(trReg);
   crosspage->hostContext.tr = trReg;
   if (trReg) {
     vAddr = HOST_KERNEL_LA_2_VA(gdtreg.offset) + trReg;
     desc = (Descriptor *)(vAddr);
     if (Desc_Type(desc) == TASK_DESC_BUSY) {
       Desc_SetType(desc, TASK_DESC);
     }
   }


   /*
    * Save the CS on this side
    */

   *(uint16*) (&crosspage->hostContext.jump[4]) = GET_CS();
   crosspage->hostContext.task.cs = GET_CS();
   
   /*
    * see comment in taskswitch.h 
    */

   SwitchToMonitor(&crosspage->contextSwitchCode[0],
                  &crosspage->hostContext,
                  &crosspage->monContext,
                  monContextVA);

   /*
    * Restore state
    */
   if (sysenterMSR) {
      HostIF_WRMSR(sysenterMSR, savedMSR);
   }
   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   
   SET_DR0(dr0reg);
   SET_DR1(dr1reg);
   SET_DR2(dr2reg);
   SET_DR3(dr3reg);
   SET_DR6(dr6reg);
   SET_DR7(dr7reg);

   SET_LDT(ldtreg);  
   
   SET_FS(fs);
   SET_GS(gs);
   
   ASSERT_NO_INTERRUPTS();

   if (crosspage->moduleCallType == MODULECALL_INTR) {
      VTRACE_SET(0,0x42000001|((crosspage->args[0]&0xff)<<4));
      /*
       * Note we must do thje RAISE_INTERRUPT before ever enabling 
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw). 
       * Note2; RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement. 
       */
      #define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
      switch (crosspage->args[0]) {
	 /* These are the general IO interrupts */
	 /* It would be nice to generate this dynamically, but see Note2 above. */
	 IRQ_INT(0x20); IRQ_INT(0x21); IRQ_INT(0x22); IRQ_INT(0x23); 
	 IRQ_INT(0x24); IRQ_INT(0x25); IRQ_INT(0x26); IRQ_INT(0x27); 
	 IRQ_INT(0x28); IRQ_INT(0x29); IRQ_INT(0x2a); IRQ_INT(0x2b); 
	 IRQ_INT(0x2c); IRQ_INT(0x2d); IRQ_INT(0x2e); IRQ_INT(0x2f); 
  						        
	 IRQ_INT(0x30); IRQ_INT(0x31); IRQ_INT(0x32); IRQ_INT(0x33); 
	 IRQ_INT(0x34); IRQ_INT(0x35); IRQ_INT(0x36); IRQ_INT(0x37); 
	 IRQ_INT(0x38); IRQ_INT(0x39); IRQ_INT(0x3a); IRQ_INT(0x3b); 
	 IRQ_INT(0x3c); IRQ_INT(0x3d); IRQ_INT(0x3e); IRQ_INT(0x3f); 
  						        
	 IRQ_INT(0x40); IRQ_INT(0x41); IRQ_INT(0x42); IRQ_INT(0x43); 
	 IRQ_INT(0x44); IRQ_INT(0x45); IRQ_INT(0x46); IRQ_INT(0x47); 
	 IRQ_INT(0x48); IRQ_INT(0x49); IRQ_INT(0x4a); IRQ_INT(0x4b); 
	 IRQ_INT(0x4c); IRQ_INT(0x4d); IRQ_INT(0x4e); IRQ_INT(0x4f); 
  
	 IRQ_INT(0x50); IRQ_INT(0x51); IRQ_INT(0x52); IRQ_INT(0x53); 
	 IRQ_INT(0x54); IRQ_INT(0x55); IRQ_INT(0x56); IRQ_INT(0x57); 
	 IRQ_INT(0x58); IRQ_INT(0x59); IRQ_INT(0x5a); IRQ_INT(0x5b); 
	 IRQ_INT(0x5c); IRQ_INT(0x5d); IRQ_INT(0x5e); IRQ_INT(0x5f); 
     						        
	 IRQ_INT(0x60); IRQ_INT(0x61); IRQ_INT(0x62); IRQ_INT(0x63); 
	 IRQ_INT(0x64); IRQ_INT(0x65); IRQ_INT(0x66); IRQ_INT(0x67); 
	 IRQ_INT(0x68); IRQ_INT(0x69); IRQ_INT(0x6a); IRQ_INT(0x6b); 
	 IRQ_INT(0x6c); IRQ_INT(0x6d); IRQ_INT(0x6e); IRQ_INT(0x6f); 
  						        
	 IRQ_INT(0x70); IRQ_INT(0x71); IRQ_INT(0x72); IRQ_INT(0x73); 
	 IRQ_INT(0x74); IRQ_INT(0x75); IRQ_INT(0x76); IRQ_INT(0x77); 
	 IRQ_INT(0x78); IRQ_INT(0x79); IRQ_INT(0x7a); IRQ_INT(0x7b); 
	 IRQ_INT(0x7c); IRQ_INT(0x7d); IRQ_INT(0x7e); IRQ_INT(0x7f); 
  
	 IRQ_INT(0x80); IRQ_INT(0x81); IRQ_INT(0x82); IRQ_INT(0x83); 
	 IRQ_INT(0x84); IRQ_INT(0x85); IRQ_INT(0x86); IRQ_INT(0x87); 
	 IRQ_INT(0x88); IRQ_INT(0x89); IRQ_INT(0x8a); IRQ_INT(0x8b); 
	 IRQ_INT(0x8c); IRQ_INT(0x8d); IRQ_INT(0x8e); IRQ_INT(0x8f); 
  						        
	 IRQ_INT(0x90); IRQ_INT(0x91); IRQ_INT(0x92); IRQ_INT(0x93); 
	 IRQ_INT(0x94); IRQ_INT(0x95); IRQ_INT(0x96); IRQ_INT(0x97); 
	 IRQ_INT(0x98); IRQ_INT(0x99); IRQ_INT(0x9a); IRQ_INT(0x9b); 
	 IRQ_INT(0x9c); IRQ_INT(0x9d); IRQ_INT(0x9e); IRQ_INT(0x9f); 
  						        
	 IRQ_INT(0xa0); IRQ_INT(0xa1); IRQ_INT(0xa2); IRQ_INT(0xa3); 
	 IRQ_INT(0xa4); IRQ_INT(0xa5); IRQ_INT(0xa6); IRQ_INT(0xa7); 
	 IRQ_INT(0xa8); IRQ_INT(0xa9); IRQ_INT(0xaa); IRQ_INT(0xab); 
	 IRQ_INT(0xac); IRQ_INT(0xad); IRQ_INT(0xae); IRQ_INT(0xaf); 
  						        
	 IRQ_INT(0xb0); IRQ_INT(0xb1); IRQ_INT(0xb2); IRQ_INT(0xb3); 
	 IRQ_INT(0xb4); IRQ_INT(0xb5); IRQ_INT(0xb6); IRQ_INT(0xb7); 
	 IRQ_INT(0xb8); IRQ_INT(0xb9); IRQ_INT(0xba); IRQ_INT(0xbb); 
	 IRQ_INT(0xbc); IRQ_INT(0xbd); IRQ_INT(0xbe); IRQ_INT(0xbf); 
  						        
	 IRQ_INT(0xc0); IRQ_INT(0xc1); IRQ_INT(0xc2); IRQ_INT(0xc3); 
	 IRQ_INT(0xc4); IRQ_INT(0xc5); IRQ_INT(0xc6); IRQ_INT(0xc7); 
	 IRQ_INT(0xc8); IRQ_INT(0xc9); IRQ_INT(0xca); IRQ_INT(0xcb); 
	 IRQ_INT(0xcc); IRQ_INT(0xcd); IRQ_INT(0xce); IRQ_INT(0xcf); 
  						        
	 IRQ_INT(0xd0); IRQ_INT(0xd1); IRQ_INT(0xd2); IRQ_INT(0xd3); 
	 IRQ_INT(0xd4); IRQ_INT(0xd5); IRQ_INT(0xd6); IRQ_INT(0xd7); 
	 IRQ_INT(0xd8); IRQ_INT(0xd9); IRQ_INT(0xda); IRQ_INT(0xdb); 
	 IRQ_INT(0xdc); IRQ_INT(0xdd); IRQ_INT(0xde); IRQ_INT(0xdf); 
  
	 IRQ_INT(0xe0); IRQ_INT(0xe1); IRQ_INT(0xe2); IRQ_INT(0xe3); 
	 IRQ_INT(0xe4); IRQ_INT(0xe5); IRQ_INT(0xe6); IRQ_INT(0xe7); 
	 IRQ_INT(0xe8); IRQ_INT(0xe9); IRQ_INT(0xea); IRQ_INT(0xeb); 
	 IRQ_INT(0xec); IRQ_INT(0xed); IRQ_INT(0xee); IRQ_INT(0xef); 
  						        
	 IRQ_INT(0xf0); IRQ_INT(0xf1); IRQ_INT(0xf2); IRQ_INT(0xf3); 
	 IRQ_INT(0xf4); IRQ_INT(0xf5); IRQ_INT(0xf6); IRQ_INT(0xf7); 
	 IRQ_INT(0xf8); IRQ_INT(0xf9); IRQ_INT(0xfa); IRQ_INT(0xfb); 
	 IRQ_INT(0xfc); IRQ_INT(0xfd); IRQ_INT(0xfe); IRQ_INT(0xff); 
      default: 
	 VTRACE_SET(0,0x4f000001);
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }

      VTRACE_SET(0,0x43000001);

   } else {
     VTRACE_SET(0,0x44000001);
   }
   
   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint0NMI, lint1NMI, pcNMI);
#if defined(linux) && defined(USE_PERFCOUNTERS)
   PerfCtr_SnapshotFromMonitor();
#endif
      
   return;
}

/*
 *----------------------------------------------------------------------
 * Task_ProbeSysenterMSR --
 *
 *    This function computes sysenterMSR. Note that we assume that if a
 *    host is ever going to use the sysenter MSRs, they will have been
 *    initialized here; if some future host lazily sets up the sysenter
 *    MSRs, this code will cause trouble.
 *
 *----------------------------------------------------------------------
 */
void
Task_ProbeSysenterMSR(void)
{
   CPUOem cpuOem = CPUID_GetOEM();
   uint32 features = CPUID_GetFeatures();
   uint32 cpuversion = CPUID_GetVersion();

   if (cpuOem == CPUID_OEM_INTEL && 
       CPUID_FAMILY(cpuversion) == 0x6 && CPUID_MODEL(cpuversion) == 1) {
      /* The Intel manual warns that the PPro erroneously sets the SEP
       * CPUID bit.
       */
      Warning("VMMON: ignoring dubious sysenter bit on PPro.\n");
      sysenterMSR = 0;
   } else if (features & CPUID_FEATURE_COMMON_SEP) {
      uint64 tmpMSR;
      /* The CPU appears to support sysenter. Does the host actually
       * use it?
       */
      tmpMSR = HostIF_RDMSR(MSR_SYSENTER_CS);
      sysenterMSR = tmpMSR ? MSR_SYSENTER_CS : 0;
   }
   /* XXX: check for AMD style syscall/sysreturn */
}
