AHCI Post complete.
|
@ -83,4 +83,6 @@ figcaption {
|
||||||
font-style: italic;
|
font-style: italic;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.chroma {
|
||||||
|
padding: 10px;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
/* Background */ .bg { color: #d0d0d0; background-color: #202020; }
|
||||||
|
/* PreWrapper */ .chroma { color: #d0d0d0; background-color: #202020; }
|
||||||
|
/* Other */ .chroma .x { }
|
||||||
|
/* Error */ .chroma .err { color: #a61717; background-color: #e3d2d2 }
|
||||||
|
/* CodeLine */ .chroma .cl { }
|
||||||
|
/* LineLink */ .chroma .lnlinks { outline: none; text-decoration: none; color: inherit }
|
||||||
|
/* LineTableTD */ .chroma .lntd { vertical-align: top; padding: 0; margin: 0; border: 0; }
|
||||||
|
/* LineTable */ .chroma .lntable { border-spacing: 0; padding: 0; margin: 0; border: 0; }
|
||||||
|
/* LineHighlight */ .chroma .hl { background-color: #363636 }
|
||||||
|
/* LineNumbersTable */ .chroma .lnt { white-space: pre; -webkit-user-select: none; user-select: none; margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #686868 }
|
||||||
|
/* LineNumbers */ .chroma .ln { white-space: pre; -webkit-user-select: none; user-select: none; margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #686868 }
|
||||||
|
/* Line */ .chroma .line { display: flex; }
|
||||||
|
/* Keyword */ .chroma .k { color: #6ab825; font-weight: bold }
|
||||||
|
/* KeywordConstant */ .chroma .kc { color: #6ab825; font-weight: bold }
|
||||||
|
/* KeywordDeclaration */ .chroma .kd { color: #6ab825; font-weight: bold }
|
||||||
|
/* KeywordNamespace */ .chroma .kn { color: #6ab825; font-weight: bold }
|
||||||
|
/* KeywordPseudo */ .chroma .kp { color: #6ab825 }
|
||||||
|
/* KeywordReserved */ .chroma .kr { color: #6ab825; font-weight: bold }
|
||||||
|
/* KeywordType */ .chroma .kt { color: #6ab825; font-weight: bold }
|
||||||
|
/* Name */ .chroma .n { }
|
||||||
|
/* NameAttribute */ .chroma .na { color: #bbbbbb }
|
||||||
|
/* NameBuiltin */ .chroma .nb { color: #24909d }
|
||||||
|
/* NameBuiltinPseudo */ .chroma .bp { }
|
||||||
|
/* NameClass */ .chroma .nc { color: #447fcf; text-decoration: underline }
|
||||||
|
/* NameConstant */ .chroma .no { color: #40ffff }
|
||||||
|
/* NameDecorator */ .chroma .nd { color: #ffa500 }
|
||||||
|
/* NameEntity */ .chroma .ni { }
|
||||||
|
/* NameException */ .chroma .ne { color: #bbbbbb }
|
||||||
|
/* NameFunction */ .chroma .nf { color: #447fcf }
|
||||||
|
/* NameFunctionMagic */ .chroma .fm { }
|
||||||
|
/* NameLabel */ .chroma .nl { }
|
||||||
|
/* NameNamespace */ .chroma .nn { color: #447fcf; text-decoration: underline }
|
||||||
|
/* NameOther */ .chroma .nx { }
|
||||||
|
/* NameProperty */ .chroma .py { }
|
||||||
|
/* NameTag */ .chroma .nt { color: #6ab825; font-weight: bold }
|
||||||
|
/* NameVariable */ .chroma .nv { color: #40ffff }
|
||||||
|
/* NameVariableClass */ .chroma .vc { }
|
||||||
|
/* NameVariableGlobal */ .chroma .vg { }
|
||||||
|
/* NameVariableInstance */ .chroma .vi { }
|
||||||
|
/* NameVariableMagic */ .chroma .vm { }
|
||||||
|
/* Literal */ .chroma .l { }
|
||||||
|
/* LiteralDate */ .chroma .ld { }
|
||||||
|
/* LiteralString */ .chroma .s { color: #ed9d13 }
|
||||||
|
/* LiteralStringAffix */ .chroma .sa { color: #ed9d13 }
|
||||||
|
/* LiteralStringBacktick */ .chroma .sb { color: #ed9d13 }
|
||||||
|
/* LiteralStringChar */ .chroma .sc { color: #ed9d13 }
|
||||||
|
/* LiteralStringDelimiter */ .chroma .dl { color: #ed9d13 }
|
||||||
|
/* LiteralStringDoc */ .chroma .sd { color: #ed9d13 }
|
||||||
|
/* LiteralStringDouble */ .chroma .s2 { color: #ed9d13 }
|
||||||
|
/* LiteralStringEscape */ .chroma .se { color: #ed9d13 }
|
||||||
|
/* LiteralStringHeredoc */ .chroma .sh { color: #ed9d13 }
|
||||||
|
/* LiteralStringInterpol */ .chroma .si { color: #ed9d13 }
|
||||||
|
/* LiteralStringOther */ .chroma .sx { color: #ffa500 }
|
||||||
|
/* LiteralStringRegex */ .chroma .sr { color: #ed9d13 }
|
||||||
|
/* LiteralStringSingle */ .chroma .s1 { color: #ed9d13 }
|
||||||
|
/* LiteralStringSymbol */ .chroma .ss { color: #ed9d13 }
|
||||||
|
/* LiteralNumber */ .chroma .m { color: #3677a9 }
|
||||||
|
/* LiteralNumberBin */ .chroma .mb { color: #3677a9 }
|
||||||
|
/* LiteralNumberFloat */ .chroma .mf { color: #3677a9 }
|
||||||
|
/* LiteralNumberHex */ .chroma .mh { color: #3677a9 }
|
||||||
|
/* LiteralNumberInteger */ .chroma .mi { color: #3677a9 }
|
||||||
|
/* LiteralNumberIntegerLong */ .chroma .il { color: #3677a9 }
|
||||||
|
/* LiteralNumberOct */ .chroma .mo { color: #3677a9 }
|
||||||
|
/* Operator */ .chroma .o { }
|
||||||
|
/* OperatorWord */ .chroma .ow { color: #6ab825; font-weight: bold }
|
||||||
|
/* Punctuation */ .chroma .p { }
|
||||||
|
/* Comment */ .chroma .c { color: #999999; font-style: italic }
|
||||||
|
/* CommentHashbang */ .chroma .ch { color: #999999; font-style: italic }
|
||||||
|
/* CommentMultiline */ .chroma .cm { color: #999999; font-style: italic }
|
||||||
|
/* CommentSingle */ .chroma .c1 { color: #999999; font-style: italic }
|
||||||
|
/* CommentSpecial */ .chroma .cs { color: #e50808; background-color: #520000; font-weight: bold }
|
||||||
|
/* CommentPreproc */ .chroma .cp { color: #cd2828; font-weight: bold }
|
||||||
|
/* CommentPreprocFile */ .chroma .cpf { color: #cd2828; font-weight: bold }
|
||||||
|
/* Generic */ .chroma .g { }
|
||||||
|
/* GenericDeleted */ .chroma .gd { color: #d22323 }
|
||||||
|
/* GenericEmph */ .chroma .ge { font-style: italic }
|
||||||
|
/* GenericError */ .chroma .gr { color: #d22323 }
|
||||||
|
/* GenericHeading */ .chroma .gh { color: #ffffff; font-weight: bold }
|
||||||
|
/* GenericInserted */ .chroma .gi { color: #589819 }
|
||||||
|
/* GenericOutput */ .chroma .go { color: #cccccc }
|
||||||
|
/* GenericPrompt */ .chroma .gp { color: #aaaaaa }
|
||||||
|
/* GenericStrong */ .chroma .gs { font-weight: bold }
|
||||||
|
/* GenericSubheading */ .chroma .gu { color: #ffffff; text-decoration: underline }
|
||||||
|
/* GenericTraceback */ .chroma .gt { color: #d22323 }
|
||||||
|
/* GenericUnderline */ .chroma .gl { text-decoration: underline }
|
||||||
|
/* TextWhitespace */ .chroma .w { color: #666666 }
|
After Width: | Height: | Size: 135 KiB |
After Width: | Height: | Size: 127 KiB |
After Width: | Height: | Size: 93 KiB |
After Width: | Height: | Size: 74 KiB |
After Width: | Height: | Size: 106 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 56 KiB |
|
@ -0,0 +1,617 @@
|
||||||
|
---
|
||||||
|
title: "Writing an AHCI Driver"
|
||||||
|
date: 2024-01-08
|
||||||
|
draft: true
|
||||||
|
tags: ['osdev']
|
||||||
|
---
|
||||||
|
|
||||||
|
Now that I've wrapped up the [0.1.0 Release](/blog/2023/12/acadia-0.1.0) of
|
||||||
|
AcadiaOS I'm looking to cleanup some of the "just get it working" hacks that
|
||||||
|
exist in the codebase. First up on that list is the AHCI Driver.
|
||||||
|
|
||||||
|
## What is AHCI
|
||||||
|
|
||||||
|
AHCI stands for Advanced Host Controller Interface and if you like acronyms boy
|
||||||
|
are you in for a treat. AHCI is a way to interface with SATA (which replaced
|
||||||
|
PATA (a.k.a. IDE)) via its HBA. AHCI has since been superseded by NVMe but is
|
||||||
|
simpler to implement (or so I've been told) so I've started here.
|
||||||
|
|
||||||
|
To try to explain it without acronym soup, AHCI allows you to access disk
|
||||||
|
drives and optical drives (SATA devices) by writing relevant ATA commands to
|
||||||
|
memory addresses that are backed by hardware firmware. There are a wide variety
|
||||||
|
of commands available but best I can tell the main ones used these days are to
|
||||||
|
identify the device and read/write via direct memory access (DMA).
|
||||||
|
|
||||||
|
Essentially you give the device an offset to read from as well as physical
|
||||||
|
memory address to write to. The device firmware copies the amount of data you
|
||||||
|
requested to the physical address then triggers an interrupt to indicate that
|
||||||
|
the operation is complete. Likewise writing via DMA is the same but in reverse.
|
||||||
|
|
||||||
|
Disclaimer, all of the above is basically just summarizing Wikipedia and the
|
||||||
|
OSDev wiki and I don't really know what I'm talking about.
|
||||||
|
|
||||||
|
## Current State
|
||||||
|
|
||||||
|
The current AHCI implementation in Denali is straightforward but very brittle.
|
||||||
|
It relies on everything following the happy path and is cobbled together more
|
||||||
|
based on trial an error of what worked rather than following the specification
|
||||||
|
closely.
|
||||||
|
|
||||||
|
As a part of this article we're going to dive into the related specs and look at
|
||||||
|
how they relate to each other. The trickiest part of writing the driver is the
|
||||||
|
fact that the necessary information is spread across several different specs
|
||||||
|
rather than contained in one place. The specs I reference in this post are:
|
||||||
|
|
||||||
|
- [AHCI 1.3.1](https://www.intel.com/content/www/us/en/io/serial-ata/serial-ata-ahci-spec-rev1-3-1.html)
|
||||||
|
- SATA 3.2
|
||||||
|
- ATA/ATAPI Command Set 3 (ACS-3)
|
||||||
|
|
||||||
|
The SATA and ACS specs cost money so I can't link them directly but it isn't
|
||||||
|
hard to find drafts of them available online.
|
||||||
|
|
||||||
|
## How AHCI Works
|
||||||
|
|
||||||
|
AHCI allows you to control SATA devices by writing commands to memory. The
|
||||||
|
layout of these structures is nicely shown in the AHCI Spec Figure 4:
|
||||||
|
|
||||||
|
![AHCI Memory](images/HBA_Memory_Annotated.png)
|
||||||
|
|
||||||
|
There are several pieces here that I've annotated:
|
||||||
|
|
||||||
|
1. The Generic Host Control (GHC) is a set of registers that allow you to manage the
|
||||||
|
whole controller and get its status. These registers are referred to in the
|
||||||
|
spec using GHC.RegisterName so the interrupt status register for instance is
|
||||||
|
"GHC.IS" for short.
|
||||||
|
2. Each device (hard disk or disc drive) that is attached to the controller is
|
||||||
|
exposed as a "port" with a set of registers to control it individually.
|
||||||
|
These registers are referred to as PxRegisterName so for instance the command
|
||||||
|
issue register is PxCI.
|
||||||
|
3. For each port it has a separately allocated piece of memory that can accept
|
||||||
|
up to 32 "commands" to execute.
|
||||||
|
4. When the controller is finished executing a command for a device it will
|
||||||
|
write a Frame Information Structure (FIS) and raise an interrupt. (The GHC
|
||||||
|
has a register to show which devices have a pending interrupt GHC.IS).
|
||||||
|
|
||||||
|
Each device can support up to 32 pending commands (but only receive one FIS at a
|
||||||
|
time). The memory structure is as follows:
|
||||||
|
|
||||||
|
![Port Memory](images/HBA_Port_Memory.png)
|
||||||
|
|
||||||
|
To issue a command we can:
|
||||||
|
|
||||||
|
1. Select a command slot that isn't currently in use.
|
||||||
|
2. Write the command to that command table (the specifics of this are explained
|
||||||
|
later) and then set that command as active in the commands issued register
|
||||||
|
for that port (PxCI).
|
||||||
|
3. Wait an interrupt indicating that this command has finished and read the
|
||||||
|
resulting FIS. We mostly just care about the status byte. The controller
|
||||||
|
will unset the bit for the completed command in the PxCI register when it
|
||||||
|
raises the interrupt so we can disambiguate which command has finished.
|
||||||
|
|
||||||
|
Although this sounds straightforward there are a few moving parts here that we
|
||||||
|
need to get up and running.
|
||||||
|
|
||||||
|
- We need to find where the HBA structure is in memory.
|
||||||
|
- We need to allocate some space for the command tables and received FIS
|
||||||
|
structures.
|
||||||
|
- We need to set up interrupt handling for each port.
|
||||||
|
- Before this we will issue a hardware reset command to the controller to get
|
||||||
|
everything in a clean state.
|
||||||
|
|
||||||
|
## Finding the HBA structure
|
||||||
|
|
||||||
|
We find the HBA structure by iterating through the PCI configuration space and
|
||||||
|
finding AHCI device. I'm not going to delve too deeply into this because PCI
|
||||||
|
could be a whole separate post and it is a little trickier to explain because
|
||||||
|
it is harder to access the specifications (the good people at the PCI "Special
|
||||||
|
Interest Group" are happy to let you download the PDF for the low low price of
|
||||||
|
$4500 if you are not a member).
|
||||||
|
|
||||||
|
The short story is that we are looking for the device with the right [class
|
||||||
|
code](https://wiki.osdev.org/PCI#Class_Codes) - Class Code 0x1 (Storage Device),
|
||||||
|
Subclass 0x6 (SATA Controller), Subtype 0x1 (AHCI).
|
||||||
|
|
||||||
|
Once we have the correct configuration space we cn read the address at offset
|
||||||
|
0x24 (called the ABAR for AHCI Base Address) which points to the start of the
|
||||||
|
GHC registers.
|
||||||
|
|
||||||
|
We can mostly ignore the other information in the configuration space for now as
|
||||||
|
we aren't dealing with Message Signaled Interrupts yet.
|
||||||
|
|
||||||
|
## Hardware Reset
|
||||||
|
|
||||||
|
Now that we have found the Global Host Controller registers we're going to
|
||||||
|
initiate a hardware reset of the AHCI Controller. The advantage of this is we
|
||||||
|
will know the exact state that the controller and its ports are in. Other than
|
||||||
|
that it ensures that we aren't dependent on the specific way limine uses the
|
||||||
|
AHCI controller.
|
||||||
|
|
||||||
|
I suspect that this is **not** how most production operating systems handle
|
||||||
|
things but this should give us a clean slate for now.
|
||||||
|
|
||||||
|
From the AHCI spec:
|
||||||
|
|
||||||
|
> 10.4.3 HBA Reset
|
||||||
|
>
|
||||||
|
> If the HBA becomes unusable for multiple ports, and a software reset or port
|
||||||
|
> reset does not correct the problem, software may reset the entire HBA by
|
||||||
|
> setting GHC.HR to ‘1’. When software sets the GHC.HR bit to ‘1’, the HBA
|
||||||
|
> shall perform an internal reset action. The bit shall be cleared to ‘0’ by
|
||||||
|
> the HBA when the reset is complete. A software write of ‘0’ to GHC.HR shall
|
||||||
|
> have no effect. To perform the HBA reset, software sets GHC.HR to ‘1’ and may
|
||||||
|
> poll until this bit is read to be ‘0’, at which point software knows that the
|
||||||
|
> HBA reset has completed.
|
||||||
|
>
|
||||||
|
> If the HBA has not cleared GHC.HR to ‘0’ within 1 second of software setting
|
||||||
|
> GHC.HR to ‘1’, the HBA is in a hung or locked state.
|
||||||
|
>
|
||||||
|
> When GHC.HR is set to ‘1’, GHC.AE, GHC.IE, the IS register, and all port
|
||||||
|
> register fields (except PxFB/PxFBU/PxCLB/PxCLBU) that are not HwInit in the
|
||||||
|
> HBA’s register memory space are reset. The HBA’s configuration space and all
|
||||||
|
> other global registers/bits are not affected by setting GHC.HR to ‘1’. Any
|
||||||
|
> HwInit bits in the port specific registers are not affected by setting GHC.HR
|
||||||
|
> to ‘1’. The port specific registers PxFB, PxFBU, PxCLB, and PxCLBU are not
|
||||||
|
> affected by setting GHC.HR to ‘1’. If the HBA supports staggered spin-up, the
|
||||||
|
> PxCMD.SUD bit will be reset to ‘0’; software is responsible for setting the
|
||||||
|
> PxCMD.SUD and PxSCTL.DET fields appropriately such that communication can be
|
||||||
|
> established on the Serial ATA link. If the HBA does not support staggered
|
||||||
|
> spin-up, the HBA reset shall cause a COMRESET to be sent on the port.
|
||||||
|
|
||||||
|
Despite the long text, this process is fairly straightforward. We set the
|
||||||
|
Hardware Reset bit and then poll for it to be set to 0. We then set the AHCI
|
||||||
|
enable bit. For now we can leave interrupts disabled until we have reset the
|
||||||
|
ports. Once this is done we sleep for a few milliseconds to allow the ports time
|
||||||
|
to spin up. For now we are just using 50ms because that is the smallest
|
||||||
|
resolution we support sleeping for (1 scheduling tick) but I think theoretically
|
||||||
|
we could sleep for only a millisecond or two.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
ahci_hba_->global_host_control |= kGlobalHostControl_HW_Reset;
|
||||||
|
|
||||||
|
while (ahci_hba_->global_host_control & kGlobalHostControl_HW_Reset) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ahci_hba_->global_host_control |= kGlobalHostControl_AHCI_Enable;
|
||||||
|
|
||||||
|
return static_cast<glcr::ErrorCode>(ZThreadSleep(50));
|
||||||
|
```
|
||||||
|
|
||||||
|
## Port Initialization
|
||||||
|
|
||||||
|
Now we can initialize each port that is implemented. There are two cases we
|
||||||
|
need to handle. Either the port has received a COMRESET and is running, or
|
||||||
|
staggered spin up is supported and we need to enable the port. As our VM doesn't
|
||||||
|
require staggered spin up, we will skip it for now and come back to it in the
|
||||||
|
future.
|
||||||
|
|
||||||
|
Before initializing each port we need to check if it has a device attached. We
|
||||||
|
can do that by checking the PxSSTS register described in the AHCI spec section
|
||||||
|
3.3.10.
|
||||||
|
|
||||||
|
![AHCI 1.3.1 Section 3.3.10](images/PxSSTS.png)
|
||||||
|
|
||||||
|
We are looking for a value 0x103, 0x100 indicating that the device is active
|
||||||
|
and 0x3 indicating that communication is established. For each port where we
|
||||||
|
detect this value we continue the initialization process.
|
||||||
|
|
||||||
|
### Memory Structures
|
||||||
|
|
||||||
|
We need to initialize the memory structures for each active port as shown in the
|
||||||
|
image before (under How AHCI works).
|
||||||
|
|
||||||
|
We need a command list structure of length 0x400 (technically it need not be
|
||||||
|
that long if fewer than 32 commands are supported but it doesn't use much
|
||||||
|
additional memory). Additionally a spot is needed for received FIS structure of
|
||||||
|
length 0x100. Finally each of the 32 commands in the command list must point to
|
||||||
|
a command table. Technically these can be quite large because each can hold up
|
||||||
|
to 2^16 physical region descriptors (using ~1 MiB of memory). I've opted
|
||||||
|
to limit it to just 8 16-byte descriptors so each command table would be length
|
||||||
|
0x100 as well. For now we don't support scatter gather buffers and just allocate
|
||||||
|
one contiguous memory section for each read.
|
||||||
|
|
||||||
|
In total all of these memory structures takes 0x2500 bytes (3 pages of RAM). We
|
||||||
|
allocate them all in one block and manually set up the pointers to their
|
||||||
|
physical addresses in the HBA port control.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
// 0x0-0x400 -> Command List
|
||||||
|
// 0x400-0x500 -> Received FIS
|
||||||
|
// 0x500-0x2500 -> Command Tables (0x100 each) (Max PRDT Length is 8 for now)
|
||||||
|
uint64_t paddr;
|
||||||
|
command_structures_ =
|
||||||
|
mmth::OwnedMemoryRegion::ContiguousPhysical(0x2500, &paddr);
|
||||||
|
command_list_ = reinterpret_cast<CommandList*>(command_structures_.vaddr());
|
||||||
|
port_struct_->command_list_base = paddr;
|
||||||
|
|
||||||
|
received_fis_ =
|
||||||
|
reinterpret_cast<ReceivedFis*>(command_structures_.vaddr() + 0x400);
|
||||||
|
port_struct_->fis_base = paddr + 0x400;
|
||||||
|
port_struct_->command |= kCommand_FIS_Receive_Enable;
|
||||||
|
|
||||||
|
command_tables_ = glcr::ArrayView(
|
||||||
|
reinterpret_cast<CommandTable*>(command_structures_.vaddr() + 0x500), 32);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < 32; i++) {
|
||||||
|
// This leaves space for 2 prdt entries.
|
||||||
|
command_list_->command_headers[i].command_table_base_addr =
|
||||||
|
(paddr + 0x500) + (0x100 * i);
|
||||||
|
commands_[i] = nullptr;
|
||||||
|
}
|
||||||
|
port_struct_->interrupt_enable =
|
||||||
|
kInterrupt_D2H_FIS | kInterrupt_PIO_FIS | kInterrupt_DMA_FIS |
|
||||||
|
kInterrupt_DeviceBits_FIS | kInterrupt_Unknown_FIS;
|
||||||
|
port_struct_->sata_error = -1;
|
||||||
|
port_struct_->command |= kCommand_Start;
|
||||||
|
```
|
||||||
|
|
||||||
|
There are a few other things going on here. Once we allocate the space to
|
||||||
|
receive FIS structures we let the port know that it can send FISes using the
|
||||||
|
PxCMD register.
|
||||||
|
|
||||||
|
Additionally at the end we enable interrupts, clear the error register, and
|
||||||
|
tell the port it can start processing commands.
|
||||||
|
|
||||||
|
## Interrupt Handling
|
||||||
|
|
||||||
|
Now that the device is initialized we can actually begin to send it commands.
|
||||||
|
To do so we need to register an interrupt handler with the correct PCI
|
||||||
|
interrupt line (for now we will use the direct interrupt line rather than
|
||||||
|
Message Signaled Interrupts). Registering interrupt handlers is a whole other
|
||||||
|
beast so for this post we will just focus on their implementation.
|
||||||
|
|
||||||
|
The first step is to de-multiplex the interrupt in the controller by checking
|
||||||
|
the interrupt status register. Each port that has an interrupt pending will
|
||||||
|
raise it's corresponding bit in the Interrupt Status register. We can delegate
|
||||||
|
to each port the handling of an interrupt, then clear the interrupt bit once it
|
||||||
|
is done. The relevant code in this case looks like this:
|
||||||
|
|
||||||
|
```c++
|
||||||
|
for (uint64_t i = 0; i < num_ports_; i++) {
|
||||||
|
if (!ports_[i].empty() && (ahci_hba_->interrupt_status & (1 << i))) {
|
||||||
|
ports_[i]->HandleIrq();
|
||||||
|
ahci_hba_->interrupt_status &= ~(1 << i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Then on the port side we can handle the interrupt. This requires determining
|
||||||
|
what kind of interrupt was generated using the port's Interrupt Status register
|
||||||
|
(PxIS). Each of the 17 defined bits in this register correspond to a different
|
||||||
|
interrupt type and can be individual enabled and disabled using the port's
|
||||||
|
Interrupt Enable register (PxIE). For now as we registered when setting up the
|
||||||
|
port we will only handle the interrupts related to receiving FISes from the
|
||||||
|
device.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
void AhciDevice::HandleIrq() {
|
||||||
|
uint32_t int_status = port_struct_->interrupt_status;
|
||||||
|
port_struct_->interrupt_status = int_status;
|
||||||
|
|
||||||
|
bool has_error = false;
|
||||||
|
if (int_status & kInterrupt_D2H_FIS) {
|
||||||
|
dbgln("D2H Received");
|
||||||
|
// Device to host.
|
||||||
|
volatile DeviceToHostRegisterFis& fis =
|
||||||
|
received_fis_->device_to_host_register_fis;
|
||||||
|
if (!CheckFisType(FIS_TYPE_REG_D2H, fis.fis_type)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (fis.error) {
|
||||||
|
dbgln("D2H err: {x}", fis.error);
|
||||||
|
dbgln("status: {x}", fis.status);
|
||||||
|
has_error = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (int_status & kInterrupt_PIO_FIS) {
|
||||||
|
// Like above ...
|
||||||
|
}
|
||||||
|
if (int_status & kInterrupt_DMA_FIS) {
|
||||||
|
// Like above ...
|
||||||
|
}
|
||||||
|
// ...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
To handle the interrupt we read the raised interrupts from the PxIS register and
|
||||||
|
write the values back to it to clear them. Then we can specify how to handle
|
||||||
|
each type of interrupt that we receive. For now we will just debug print the
|
||||||
|
type and any errors from the interrupt since we aren't sending any commands.
|
||||||
|
|
||||||
|
Something I'm not sure about is that as soon as we enable interrupts we seem to
|
||||||
|
receive a FIS from the device with an error bit set. Both the hard drive and the
|
||||||
|
optical drive on qemu send a FIS with error bit 0x1 set. Additionally the status
|
||||||
|
field is set to 0x30 for the hard drive and 0x70 for the optical drive.
|
||||||
|
|
||||||
|
I was able to find a [OSDev Forum
|
||||||
|
post](https://forum.osdev.org/viewtopic.php?f=1&t=56462&start=15#p342163)
|
||||||
|
referencing that this behavior is caused by the reset sending an EXECUTE DEVICE
|
||||||
|
DIAGNOSTIC command (0x90) to the device. It notes that this is largely
|
||||||
|
undocumented behavior but at least this information offers some clarity on the
|
||||||
|
outputs. Reading the ATA Command Set section 7.9.4 we can see that the command
|
||||||
|
ouputs code 0x01 to the error bits when `Device 0 passed, Device 1 passed or not
|
||||||
|
present`. According a footnote we can "See the appropriate transport standard
|
||||||
|
for the definition of device 0 and device 1." I really thought I was already
|
||||||
|
looking at the "appropriate transport standard" but alas. All that to say we'll
|
||||||
|
just ignore this interrupt for now.
|
||||||
|
|
||||||
|
## Sending a Command
|
||||||
|
|
||||||
|
Now that the AHCI ports are initialized and can handle an interrupt, we can send
|
||||||
|
commands to them. To start with lets send the IDENTIFY DEVICE command to each
|
||||||
|
device. This command asks the device to send 512 bytes of information about
|
||||||
|
itself back to us. These bytes contain 40 years of certified-crufty backwards
|
||||||
|
compatability. I mean just feast your eyes on the number of retired and obsolete
|
||||||
|
fields in just the first page of the spec.
|
||||||
|
|
||||||
|
![IDENTIFY DEVICE Response](images/IDENTIFY_DEVICE.png)
|
||||||
|
|
||||||
|
We'll ignore almost all of this information and just try to get the sector size
|
||||||
|
and sector count from the drive. To do so we need to figure out how to send a
|
||||||
|
command to the device. To be honest I feel like the specs fall down here in
|
||||||
|
actually explaining this. The trick is to send a Register Host to Device FIS in one
|
||||||
|
of the command slots. This FIS type has a field for the command as well as some
|
||||||
|
common parameters such as lba and count. In retrospect it is fairly clear once
|
||||||
|
you are aware of it, but if you are just reading the SATA spec and looking at
|
||||||
|
the possible commands, making the logical jump to the Register Host To Device
|
||||||
|
FIS feels damn near impossible.
|
||||||
|
|
||||||
|
First up we chose an empty command slot to use:
|
||||||
|
|
||||||
|
```c++
|
||||||
|
uint64_t slot;
|
||||||
|
for (slot = 0; slot < 32; slot++) {
|
||||||
|
if (!(commands_issued_ & (1 << slot))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (slot == 32) {
|
||||||
|
dbgln("All slots full");
|
||||||
|
return glcr::INTERNAL;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `commands_issued_` variable is just for our own accounting of which slots
|
||||||
|
are currently in use by another command.
|
||||||
|
|
||||||
|
Next we can populate the FIS for that slot. The spec for the Register Host to
|
||||||
|
Device FIS is as follows:
|
||||||
|
|
||||||
|
![Register Host to Device FIS Layout](images/RegisterHostToDeviceFIS.png)
|
||||||
|
|
||||||
|
We don't need to initialize most of the fields here because the IDENTIFY_DEVICE
|
||||||
|
call doesn't rely on an lba or sector count. One of the keys is setting the high
|
||||||
|
bit "C" in the byte that contains PM Port which indicates to the HBA that this
|
||||||
|
FIS contains a new command (I spent a while trying to figure out why this wasn't
|
||||||
|
working without that). The code for this is relatively straightforward.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
auto* fis = reinterpret_cast<HostToDeviceRegisterFis*>(
|
||||||
|
command_tables_[slot].command_fis);
|
||||||
|
*fis = HostToDeviceRegisterFis{
|
||||||
|
.fis_type = FIS_TYPE_REG_H2D,
|
||||||
|
.pmp_and_c = 0x80,
|
||||||
|
.command = kIdentifyDevice, // 0xEC
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
We also need to let the HBA know where it can put the result in memory. For this
|
||||||
|
we use the physical region descriptor table corresponding to this command slot.
|
||||||
|
As described before, for simplicity now we are only using a single entry to do
|
||||||
|
this. We allocate a 512 byte memory region and set it's physical address and
|
||||||
|
size in the first slot of the command slots PRDT.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
uint64_t paddr;
|
||||||
|
auto region =
|
||||||
|
mmth::OwnedMemoryRegion::ContiguousPhysical(0x200, &paddr);
|
||||||
|
command_tables_[slot].prdt[0].region_address = command.paddr;
|
||||||
|
command_tables_[slot].prdt[0].byte_count = 0x200; // 512 bytes
|
||||||
|
command_list_->command_headers[slot].prd_table_length = 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
All that is left to do is to issue the command! We set the size of the command
|
||||||
|
FIS (in double words for some reason?) as well as let the HBA know it can
|
||||||
|
prefetch the data from memory. Then we set the bit for this command slot in the
|
||||||
|
PxCI register which will cause the device to start processing it.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
// Set the command FIS length (in double words).
|
||||||
|
command_list_->command_headers[slot].command =
|
||||||
|
(sizeof(HostToDeviceRegisterFis) / 4) & 0x1F;
|
||||||
|
|
||||||
|
// Set prefetch bit.
|
||||||
|
command_list_->command_headers[slot].command |= (1 << 7);
|
||||||
|
|
||||||
|
// TODO: Synchronization-wise we need to ensure this is set in the same
|
||||||
|
// critical section as where we select a slot.
|
||||||
|
commands_issued_ |= (1 << slot);
|
||||||
|
port_struct_->command_issue |= (1 << slot);
|
||||||
|
```
|
||||||
|
|
||||||
|
But wait! How will we know when this command has completed? We somehow need to
|
||||||
|
wait until we receive an interrupt for this command to proccess the data it
|
||||||
|
sent. To handle this we can add a semaphore for each port command slot to allow
|
||||||
|
signalling when we recieve a completion interrupt for that command. I think it
|
||||||
|
might make sense to have some sort of callback instead so we can pass errors
|
||||||
|
back to the caller instead of just a completion signal. However I'm not sure
|
||||||
|
what type of errors exist that are resolvable by the caller so for now this
|
||||||
|
works.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
void IdentifyDevice() {
|
||||||
|
...
|
||||||
|
// Issue command.
|
||||||
|
commands_issued_ |= (1 << slot);
|
||||||
|
port_struct_->command_issue |= (1 << slot);
|
||||||
|
|
||||||
|
command_signals_[slot].Wait();
|
||||||
|
|
||||||
|
// Continue processing.
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
void AhciPort::HandleIrq() {
|
||||||
|
uint32_t int_status = port_struct_->interrupt_status;
|
||||||
|
port_struct_->interrupt_status = int_status;
|
||||||
|
|
||||||
|
...
|
||||||
|
// Parse received FIS.
|
||||||
|
...
|
||||||
|
|
||||||
|
uint32_t commands_finished = commands_issued_ & ~port_struct_->command_issue;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < 32; i++) {
|
||||||
|
if (commands_finished & (1 << i)) {
|
||||||
|
command_signals_[i].Signal();
|
||||||
|
commands_issued_ &= ~(1 << i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Ok now that we have retrieved the information from the drive we can parse it.
|
||||||
|
For the sector size, the default is 512 bytes which we will use unless the
|
||||||
|
`LOGICAL SECTOR SIZE SUPPORTED` bit is set in double word 106, bit 12. If that
|
||||||
|
is set we can check the double words at 117 and 118 to get the 32 bit sector
|
||||||
|
size value. For the sector count, we need to check if the device supports 48 bit
|
||||||
|
addressing using double word 83 bit 10. If it is used we can get the number of
|
||||||
|
sectors from the 4 double words starting at 100. Otherwise we read the number of
|
||||||
|
sectors from the 2 double words starting at index 60.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
uint16_t* ident = reinterpret_cast<uint16_t*>(region.vaddr());
|
||||||
|
if (ident[106] & (1 << 12)) {
|
||||||
|
sector_size_ = *reinterpret_cast<uint32_t*>(ident + 117);
|
||||||
|
} else {
|
||||||
|
sector_size_ = 512;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ident[83] & (1 << 10)) {
|
||||||
|
lba_count_ = *reinterpret_cast<uint64_t*>(ident + 100);
|
||||||
|
} else {
|
||||||
|
lba_count_ = *reinterpret_cast<uint32_t*>(ident + 60);
|
||||||
|
}
|
||||||
|
dbgln("Sector size: {x}", sector_size_);
|
||||||
|
dbgln("LBA Count: {x}", lba_count_);
|
||||||
|
is_init_ = true;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
You might be rightfully thinking that it would be less brittle to make a struct
|
||||||
|
definition that we could point at this address which would implicitly contain
|
||||||
|
these offsets - and you would be correct. But to be honest, I can't be bothered
|
||||||
|
to create a 256 entry struct definition just to get these values. Maybe in the
|
||||||
|
future.
|
||||||
|
|
||||||
|
## Reading Data
|
||||||
|
|
||||||
|
Now that we have the ability to read the IDENTIFY DEVICE data we are only a
|
||||||
|
short hop, skip, and jump away from reading data from the drive. The main
|
||||||
|
differences when reading data are (a) the command number, (b) we must specify
|
||||||
|
the Logical Block Address (LBA) we want to read from and the number of sectors
|
||||||
|
to read, and (c) we need to dynamically size the entry in the Physical Region
|
||||||
|
Descriptor Table (we will still use only one entry for now).
|
||||||
|
|
||||||
|
Because much of this is similar we can fairly easily create a shared struct with
|
||||||
|
the necessary information and construct the requests in parallel.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
struct Command {
|
||||||
|
uint8_t command;
|
||||||
|
uint64_t lba;
|
||||||
|
uint32_t sectors;
|
||||||
|
uint64_t paddr;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
Then from that we can create an IssueCommand function that constructs the
|
||||||
|
Register Host to Device FIS in a similar way for both. Before that I'd like to
|
||||||
|
take this opportunity to point out how the LBA in this FIS is stored in a way
|
||||||
|
that truly only a mother could love:
|
||||||
|
|
||||||
|
![Register Host to Device Layout LBA](images/RegisterHostToDeviceFISLBA.png)
|
||||||
|
|
||||||
|
That asside we simply update the FIS construction to set the command, LBA, and
|
||||||
|
sector count. Following that we set the PRDT values (although we still only use
|
||||||
|
one slot).
|
||||||
|
|
||||||
|
```c++
|
||||||
|
auto* fis = reinterpret_cast<HostToDeviceRegisterFis*>(
|
||||||
|
command_tables_[slot].command_fis);
|
||||||
|
*fis = HostToDeviceRegisterFis{
|
||||||
|
.fis_type = FIS_TYPE_REG_H2D,
|
||||||
|
.pmp_and_c = 0x80,
|
||||||
|
.command = command.command,
|
||||||
|
|
||||||
|
.lba0 = static_cast<uint8_t>(command.lba & 0xFF),
|
||||||
|
.lba1 = static_cast<uint8_t>((command.lba >> 8) & 0xFF),
|
||||||
|
.lba2 = static_cast<uint8_t>((command.lba >> 16) & 0xFF),
|
||||||
|
.device = (1 << 6), // ATA LBA Mode
|
||||||
|
|
||||||
|
.lba3 = static_cast<uint8_t>((command.lba >> 24) & 0xFF),
|
||||||
|
.lba4 = static_cast<uint8_t>((command.lba >> 32) & 0xFF),
|
||||||
|
.lba5 = static_cast<uint8_t>((command.lba >> 40) & 0xFF),
|
||||||
|
|
||||||
|
.count = command.sectors,
|
||||||
|
};
|
||||||
|
|
||||||
|
command_tables_[slot].prdt[0].region_address = command.paddr;
|
||||||
|
command_tables_[slot].prdt[0].byte_count = 512 * command.sectors;
|
||||||
|
```
|
||||||
|
|
||||||
|
Then issuing either the identify device command or the read command is
|
||||||
|
relatively straightforward:
|
||||||
|
|
||||||
|
```c++
|
||||||
|
// IDENTIFY DEVICE
|
||||||
|
CommandInfo identify{
|
||||||
|
.command = kIdentifyDevice,
|
||||||
|
.lba = 0,
|
||||||
|
.sectors = 1,
|
||||||
|
.paddr = 0,
|
||||||
|
};
|
||||||
|
auto region =
|
||||||
|
mmth::OwnedMemoryRegion::ContiguousPhysical(0x200, &identify.paddr);
|
||||||
|
ASSIGN_OR_RETURN(auto* sem, IssueCommand(identify));
|
||||||
|
sem->Wait();
|
||||||
|
|
||||||
|
// DMA READ
|
||||||
|
CommandInfo dma_read{
|
||||||
|
.command = kDmaReadExt,
|
||||||
|
.lba = lba,
|
||||||
|
.sectors = sector_cnt,
|
||||||
|
.paddr = 0,
|
||||||
|
};
|
||||||
|
auto region =
|
||||||
|
mmth::OwnedMemoryRegion::ContiguousPhysical(0x200 * sector_cnt, &read.paddr);
|
||||||
|
ASSIGN_OR_RETURN(auto* sem, IssueCommand(dma_read));
|
||||||
|
sem->Wait();
|
||||||
|
```
|
||||||
|
|
||||||
|
From here the world is our oyster and we can read any arbitrary data from the
|
||||||
|
disk. The bulk of this code isn't actually all that long (~200 LOC in the [AHCI
|
||||||
|
Port implementation](https://gitea.tiramisu.one/drew/acadia/src/commit/21265e76edf4fa93b8ec1795da4bdd2fc70b79d9/sys/denali/ahci/ahci_port.cpp)
|
||||||
|
). However I probably added and deleted several times that trying to get
|
||||||
|
everything working and refactored down to a nice interface.
|
||||||
|
|
||||||
|
## Coming next
|
||||||
|
|
||||||
|
This is nowhere near a full implementation. Among the things we
|
||||||
|
skipped that I plan to come back to at some point are:
|
||||||
|
|
||||||
|
- **Staggered spin up:** In controllers that support this, each device is
|
||||||
|
powered down after RESET and must be started individually.
|
||||||
|
- **Message Signaled Interrupts:** The hot new way to handle PCI device
|
||||||
|
interrupts. Has only been available since 1998 so support may vary.
|
||||||
|
- **Port Multiplier Support:** Something that gets mentioned all over the specs
|
||||||
|
but I've avoided evening looking into until this moment. But it looks like it
|
||||||
|
allows several devices behind a single port.
|
||||||
|
- **Scatter Gather buffers:** For big files we may not always be able to find a
|
||||||
|
sufficient contiguous chunk of physical memory. This means we may have to use
|
||||||
|
more than one entry in the PRDT!
|
||||||
|
- **Error Handling & Retry:** Even though QEMU may succeed in executing commands
|
||||||
|
100% of the time, real hardware may not and we should probably handle that.
|
||||||
|
- **Less that 32 commands supported:** We kinda always assume that the device
|
||||||
|
can handle 32 commands even though it may not (how many it does is exposed in
|
||||||
|
the GHC registers).
|
|
@ -12,3 +12,7 @@ title = "Drew's Site"
|
||||||
url = 'https://www.linkedin.com/in/drew-galbraith/'
|
url = 'https://www.linkedin.com/in/drew-galbraith/'
|
||||||
weight = 30
|
weight = 30
|
||||||
|
|
||||||
|
[markup]
|
||||||
|
[markup.highlight]
|
||||||
|
noClasses = false
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,16 @@
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
|
{{- with resources.Get "css/syntax.css" }}
|
||||||
|
{{- if eq hugo.Environment "development" }}
|
||||||
|
<link rel="stylesheet" href="{{ .RelPermalink }}">
|
||||||
|
{{- else }}
|
||||||
|
{{- with . | minify | fingerprint }}
|
||||||
|
<link rel="stylesheet" href="{{ .RelPermalink }}" integrity="{{ .Data.Integrity }}" crossorigin="anonymous">
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{- range .Resources.Match "css/*.css" }}
|
{{- range .Resources.Match "css/*.css" }}
|
||||||
{{- if eq hugo.Environment "development" }}
|
{{- if eq hugo.Environment "development" }}
|
||||||
<link rel="stylesheet" href="{{ .RelPermalink }}">
|
<link rel="stylesheet" href="{{ .RelPermalink }}">
|
||||||
|
|