diff --git a/CMakeLists.txt b/CMakeLists.txt index ab6a474..fb00460 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_EXPORT_COMPILE_COMMANDS True) -set(BASE_COMPILE_FLAGS "-ffreestanding -fno-rtti -fno-exceptions -mgeneral-regs-only") +set(BASE_COMPILE_FLAGS "-ffreestanding -fno-rtti -fno-exceptions") set(BASE_LINK_FLAGS "-nostdlib") add_subdirectory(zion) diff --git a/lib/glacier/CMakeLists.txt b/lib/glacier/CMakeLists.txt index 5139546..259b811 100644 --- a/lib/glacier/CMakeLists.txt +++ b/lib/glacier/CMakeLists.txt @@ -26,4 +26,4 @@ target_include_directories(glacier_kernel PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") set_target_properties(glacier_kernel PROPERTIES - COMPILE_FLAGS "${CMAKE_CXX_FLAGS} ${BASE_COMPILE_FLAGS} -mcmodel=kernel") + COMPILE_FLAGS "${CMAKE_CXX_FLAGS} ${BASE_COMPILE_FLAGS} -mcmodel=kernel -mgeneral-regs-only") diff --git a/zion/common/cpu.cpp b/zion/common/cpu.cpp index 16b4768..cba35ba 100644 --- a/zion/common/cpu.cpp +++ b/zion/common/cpu.cpp @@ -17,6 +17,10 @@ void ProbeCpuAndEnableFeatures() { panic("SSE3, SSSE3 not available."); } + if (!(ecx & (0x3 << 19))) { + dbgln("SSE4 not available."); + } + dbgln("Setting SSE"); asm volatile( "mov %%cr0, %%rax;" diff --git a/zion/object/thread.cpp b/zion/object/thread.cpp index d938103..f524d55 100644 --- a/zion/object/thread.cpp +++ b/zion/object/thread.cpp @@ -30,7 +30,8 @@ glcr::RefPtr Thread::Create(Process& proc, uint64_t tid) { return glcr::MakeRefCounted(proc, tid); } -Thread::Thread(Process& proc, uint64_t tid) : process_(proc), id_(tid) { +Thread::Thread(Process& proc, uint64_t tid) + : process_(proc), id_(tid), fx_data_(new uint8_t[520]) { uint64_t* stack_ptr = reinterpret_cast(proc.vmas()->AllocateKernelStack()); // 0: rip @@ -43,6 +44,11 @@ Thread::Thread(Process& proc, uint64_t tid) : process_(proc), id_(tid) { *(stack_ptr - 16) = proc.vmas()->cr3(); rsp0_ = reinterpret_cast(stack_ptr - 16); rsp0_start_ = reinterpret_cast(stack_ptr); + + // Super hacky way to align to 16 bits. + if (reinterpret_cast(fx_data_) & 0x8) { + fx_data_ += 8; + } } uint64_t Thread::pid() const { return process_.id(); } diff --git a/zion/object/thread.h b/zion/object/thread.h index 20c4c56..c678d99 100644 --- a/zion/object/thread.h +++ b/zion/object/thread.h @@ -43,6 +43,8 @@ class Thread : public KernelObject, public glcr::IntrusiveListNode { uint64_t* Rsp0Ptr() { return &rsp0_; } uint64_t Rsp0Start() { return rsp0_start_; } + uint8_t* FxData() { return fx_data_; } + // Switches the thread's state to runnable and enqueues it. void Start(uint64_t entry, uint64_t arg1, uint64_t arg2); @@ -86,5 +88,8 @@ class Thread : public KernelObject, public glcr::IntrusiveListNode { // I don't think me mind clobbering the stack here. uint64_t rsp0_start_; + // Pointer to a 512 byte region for FXSAVE and FXRSTOR + uint8_t* fx_data_ = nullptr; + glcr::IntrusiveList blocked_threads_; }; diff --git a/zion/scheduler/context_switch.s b/zion/scheduler/context_switch.s index b9503f6..8d45a1b 100644 --- a/zion/scheduler/context_switch.s +++ b/zion/scheduler/context_switch.s @@ -1,5 +1,10 @@ .global context_switch context_switch: + # %rdi -> Prev Task RSP address + # %rsi -> New Task RSP Address + # %rdx -> Prev Task FXSAVE Address + # %rcx -> New Task FXRSTOR Address + push %rax push %rcx push %rdx @@ -18,9 +23,25 @@ context_switch: mov %cr3, %rax push %rax + // For the sleep thread rdx will + // be a nullptr. + test %rdx, %rdx + jz switch + + fxsave (%rdx) + +switch: mov %rsp, (%rdi) # Save rsp to the prev task. mov (%rsi), %rsp # Load the next task's rsp. + // For the sleep thread rcx will + // be a nullptr. + test %rcx, %rcx + jz restore + + fxrstor (%rcx) + +restore: pop %rax mov %rax, %cr3 pop %r15 diff --git a/zion/scheduler/scheduler.cpp b/zion/scheduler/scheduler.cpp index 89aa36e..5d745c8 100644 --- a/zion/scheduler/scheduler.cpp +++ b/zion/scheduler/scheduler.cpp @@ -6,7 +6,8 @@ namespace { -extern "C" void context_switch(uint64_t* current_esp, uint64_t* next_esp); +extern "C" void context_switch(uint64_t* current_esp, uint64_t* next_esp, + uint8_t* current_fx_data, uint8_t* next_fx_data); } // namespace @@ -26,7 +27,8 @@ void Scheduler::SwapToCurrent(Thread& prev) { current_thread_->SetState(Thread::RUNNING); SetRsp0(current_thread_->Rsp0Start()); - context_switch(prev.Rsp0Ptr(), current_thread_->Rsp0Ptr()); + context_switch(prev.Rsp0Ptr(), current_thread_->Rsp0Ptr(), prev.FxData(), + current_thread_->FxData()); asm volatile("sti"); }