; This program is made by Daniel Horchner.
; email: dbjh@gmx.net

        .386p
        locals

code32  segment para public use32
        assume cs:code32, ds:code32, ss:code32

include raw32.inc

;32-bit data
TSS1            TSS     <>
                db      2000h dup(0)    ; 1 bit for each port; 64K / 8 = 8K
TSS2            TSS     <>
                db      2000h dup(0)

LDTdsc          seg_descriptor  <,,, 82h, 0,>           ; dsc type 2=LDT
LDTsel          dw      0
TSS1dsc         seg_descriptor  <2067h,,, 89h, 0,>      ; dsc type 9=TSS
TSS2dsc         seg_descriptor  <2067h,,, 89h, 0,>      ; 68h bytes for task
TSS1sel         dw      0                               ;  state + 2000h
TSS2sel         dw      0                               ;  bytes for I/O map

gate_dsc        struc
  offset0_15    dw      0
  selector      dw      0
  count         db      0
  access        db      0
  offset16_31   dw      0
gate_dsc        ends

callgatedsc     gate_dsc        <,, 0, 8ch, 0>          ; dsc type ch=call
returnsel       dw      0                               ;  gate
org_tr          dw      0               ; original Task Register contents
gdtr            dw      0,0,0           ; 16-bit limit, 32-bit base
LDT             dd      0               ; pointer to mem allocated for LDT

msg0            db      'Not running at Privilege Level 0.','$'
msg1            db      'Press a key to continue.',0
msg2            db      'Message from procedure!',0
msg3            db      'Message from task2!',0

;32-bit code
main:
                                        ; First, check if running at PL 0
        mov ebx,cs
        lar ecx,ebx
        and ecx,6000h                   ; Only the DPL bits are needed
        jz short @@PL0
        mov edx,offset msg0
        call dosprint
        jmp @exit
@@PL0:
;
                                        ; Make a copy of the GDT to create a
        sgdt fword ptr gdtr             ;  LDT; Make all descriptors PL 3
        movzx ecx,gdtr
        mov LDTdsc.limit0_15,cx         ; ecx=GDT limit in bytes
        inc ecx                         ; ecx=GDT size in bytes
        mov eax,ecx
        call getlomem                   ; LOmem -> Handy when using TD ;)
        mov LDT,eax                     ; Save ptr to mem allocated for LDT
        mov edi,eax
        push ds
        pop es               
        mov esi,dword ptr gdtr[2]
        mov ds,zerosel
        rep movsb
        mov ds,cs:data32sel             ; Restore ds
                                        ; Change the DPL field of the
        movzx ecx,gdtr                  ;  descriptors to PL 3
        inc ecx
        shr ecx,3                       ; ecx=number of selectors in GDT/LDT
        mov edi,LDT
next_descriptor:
        mov ebx,ecx
        dec ebx
        or byte ptr [edi+ebx*8+5],3 shl 5
        loop next_descriptor            ; PL 3; DPL bits are bits 5 and 6

        mov cx,1                        ; Allocate 1 descriptor for LDT
        call getdsc
        jc @exit
        mov LDTsel,ax

        mov eax,code32a
        add eax,LDT
        mov LDTdsc.base0_15,ax
        shr eax,16
        mov LDTdsc.base16_23,al
        mov LDTdsc.base24_31,ah

        mov edi,offset LDTdsc           ; es:edi=pointer to 8 byte dsc buffer
        mov bx,LDTsel                   ; bx=selector
        call setdsc
        jc @exit
                                        ; Store new LDTR in current TSS ->
        xor ebx,ebx                     ;  avoids problems with 386SWAT
        str bx
        add ebx,dword ptr gdtr[2]
        sub ebx,code32a                 ; ebx=address of TSS descriptor

        mov dh,[ebx+7]
        mov dl,[ebx+4]
        shl edx,16
        mov dx,[ebx+2]
        sub edx,code32a                 ; edx=address of TSS

        mov ax,LDTsel
        mov [edx+60h],ax                ; LDTR at TSS[60h]

        lldt ax
;
; Switch to a task running at a Privilege Level > 0
        mov cx,2                        ; Allocate 2 descriptors for TSS's
        call getdsc
        jc @exit
        mov TSS1sel,ax
        add ax,8
        mov TSS2sel,ax

        mov eax,code32a
        add eax,offset TSS1
        mov TSS1dsc.base0_15,ax
        shr eax,16
        mov TSS1dsc.base16_23,al
        mov TSS1dsc.base24_31,ah
        mov eax,code32a
        add eax,offset TSS2
        mov TSS2dsc.base0_15,ax
        shr eax,16
        mov TSS2dsc.base16_23,al
        mov TSS2dsc.base24_31,ah

        mov edi,offset TSS1dsc
        mov bx,TSS1sel                  ; bx=selector
        call setdsc
        jc @exit
        mov edi,offset TSS2dsc
        mov bx,TSS2sel
        call setdsc
        jc @exit

        str org_tr                      ; Save original Task Register
        cli

        mov eax,cr3                     ; CR3 and LDTR are loaded but not
        mov TSS1.@cr3,eax               ;  stored on a task switch
        mov TSS2.@cr3,eax
        sldt TSS1.ldtr
        sldt TSS2.ldtr
        ltr TSS1sel                     ; TSS1sel = current task

        pushfd
        pop eax
        mov TSS2.eflags,eax
        mov eax,ds
        or eax,7                        ; Set TI bit; Set RPL to 3
        mov TSS2.@ds,eax
        mov TSS2.@es,eax
        mov TSS2.@fs,eax
        mov TSS2.@gs,eax
        mov TSS2.@ss,eax
        mov TSS2.ss0,ss                 ; Save PL 0 ss
        mov TSS2.esp0,esp               ; Save PL 0 esp
        mov eax,200h                    ; Allocate a 200h stack
        call gethimem
        jc @exit
        add eax,200h                    ; Stack grows down...
        mov TSS2.@esp,eax
        mov eax,cs
        or eax,7                        ; Set TI bit; Set RPL to 3
        mov TSS2.@cs,eax
        mov TSS2.eip,offset task2
        call fword ptr TSS2sel-4        ; Switch to other task ('task2:')
;
; Call a procedure at a Privilege Level > 0 and return to PL 0 using a call
;  gate
        mov cx,1                        ; Allocate 1 descriptor for call gate
        call getdsc
        jc @exit
        mov returnsel,ax

        mov callgatedsc.selector,cs
        mov callgatedsc.offset0_15,small offset return_to_PL0
        or callgatedsc.access,3 shl 5   ; PL 3 and higher PL code must be
                                        ;  able to call it
        mov edi,offset callgatedsc
        mov bx,returnsel                ; bx=selector
        call setdsc
        jc @exit

        push ds es fs gs ss
                                        ; The PL 0 ss:esp will be fetched
        mov TSS1.ss0,ss                 ;  from the current TSS after the
        mov TSS1.esp0,esp               ;  interlevel CALL

        mov eax,ss                      ; Since cs on stack will be a sel in
        or eax,7                        ;  the LDT with PL > 0 IRETD will be
        push eax                        ;  an interlevel one -> ss:esp are
        push esp                        ;  also popped from the stack; ss's
        pushfd                          ;  RPL must be equal to cs's RPL
        mov eax,cs
        or eax,7                        ; Set TI bit; Set RPL to 3
        push eax
        push offset procedure
                                        ; Change all segment registers for
        mov eax,ds                      ;  their PL 3 equivalent in the LDT
        or eax,7                        ;  If they are not made PL 3 all
        mov ds,eax                      ;  these regs will be set to zero
        mov eax,es
        or eax,7                        ; Set Table Indicator bit; Set RPL to
        mov es,eax                      ;  3
        mov eax,fs
        or eax,7
        mov fs,eax
        mov eax,gs
        or eax,7
        mov gs,eax

        iretd                           ; Jump to 'procedure:'
return_to_PL0:
        add esp,16                      ; Remove ss:esp and cs:eip from stack
        pop ss gs fs es ds              ;  (result of call to call gate)
;
        mov eax,dword ptr gdtr[2]
        movzx ebx,org_tr
        and ebx,not 3                   ; ebx=offset of TSS dsc in GDT
        and byte ptr gs:[eax+ebx+5],not 2 ; Clear Busy bit
        ltr org_tr                      ; Restore original Task Register
        sti

        mov esi,offset msg1
        @rlp edi,0b8000h+3*160
        mov bl,1fh
        call putstr
        mov v86r_ah,0                   ; ah=0 -> Wait for key and read char
        mov al,16h
        int RMCALL_VECT

@exit:
        jmp exit                        ; Return to real/V86 mode

;
procedure:
        mov esi,offset msg2
        @rlp edi,0b8000h+160
        mov bl,1fh
        call putstr
;        mov eax,cr0                     ; Exception 13 if this is PL > 0 code
        call fword ptr returnsel-4      ; This is allowed because the offset
                                        ;  is discarded
;
task2:
        mov esi,offset msg3
        @rlp edi,0b8000h+2*160
        mov bl,1fh
        call putstr
;        mov eax,cr0                     ; Exception 13 if this is PL > 0 code
        iretd

code32  ends
        end
