diff --git a/assets/css/styles.css b/assets/css/styles.css index 23a1d16..24a1f0e 100644 --- a/assets/css/styles.css +++ b/assets/css/styles.css @@ -83,4 +83,6 @@ figcaption { font-style: italic; } - +.chroma { + padding: 10px; +} diff --git a/assets/css/syntax.css b/assets/css/syntax.css new file mode 100644 index 0000000..eac32ab --- /dev/null +++ b/assets/css/syntax.css @@ -0,0 +1,86 @@ +/* Background */ .bg { color: #d0d0d0; background-color: #202020; } +/* PreWrapper */ .chroma { color: #d0d0d0; background-color: #202020; } +/* Other */ .chroma .x { } +/* Error */ .chroma .err { color: #a61717; background-color: #e3d2d2 } +/* CodeLine */ .chroma .cl { } +/* LineLink */ .chroma .lnlinks { outline: none; text-decoration: none; color: inherit } +/* LineTableTD */ .chroma .lntd { vertical-align: top; padding: 0; margin: 0; border: 0; } +/* LineTable */ .chroma .lntable { border-spacing: 0; padding: 0; margin: 0; border: 0; } +/* LineHighlight */ .chroma .hl { background-color: #363636 } +/* LineNumbersTable */ .chroma .lnt { white-space: pre; -webkit-user-select: none; user-select: none; margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #686868 } +/* LineNumbers */ .chroma .ln { white-space: pre; -webkit-user-select: none; user-select: none; margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #686868 } +/* Line */ .chroma .line { display: flex; } +/* Keyword */ .chroma .k { color: #6ab825; font-weight: bold } +/* KeywordConstant */ .chroma .kc { color: #6ab825; font-weight: bold } +/* KeywordDeclaration */ .chroma .kd { color: #6ab825; font-weight: bold } +/* KeywordNamespace */ .chroma .kn { color: #6ab825; font-weight: bold } +/* KeywordPseudo */ .chroma .kp { color: #6ab825 } +/* KeywordReserved */ .chroma .kr { color: #6ab825; font-weight: bold } +/* KeywordType */ .chroma .kt { color: #6ab825; font-weight: bold } +/* Name */ .chroma .n { } +/* NameAttribute */ .chroma .na { color: #bbbbbb } +/* NameBuiltin */ .chroma .nb { color: #24909d } +/* NameBuiltinPseudo */ .chroma .bp { } +/* NameClass */ .chroma .nc { color: #447fcf; text-decoration: underline } +/* NameConstant */ .chroma .no { color: #40ffff } +/* NameDecorator */ .chroma .nd { color: #ffa500 } +/* NameEntity */ .chroma .ni { } +/* NameException */ .chroma .ne { color: #bbbbbb } +/* NameFunction */ .chroma .nf { color: #447fcf } +/* NameFunctionMagic */ .chroma .fm { } +/* NameLabel */ .chroma .nl { } +/* NameNamespace */ .chroma .nn { color: #447fcf; text-decoration: underline } +/* NameOther */ .chroma .nx { } +/* NameProperty */ .chroma .py { } +/* NameTag */ .chroma .nt { color: #6ab825; font-weight: bold } +/* NameVariable */ .chroma .nv { color: #40ffff } +/* NameVariableClass */ .chroma .vc { } +/* NameVariableGlobal */ .chroma .vg { } +/* NameVariableInstance */ .chroma .vi { } +/* NameVariableMagic */ .chroma .vm { } +/* Literal */ .chroma .l { } +/* LiteralDate */ .chroma .ld { } +/* LiteralString */ .chroma .s { color: #ed9d13 } +/* LiteralStringAffix */ .chroma .sa { color: #ed9d13 } +/* LiteralStringBacktick */ .chroma .sb { color: #ed9d13 } +/* LiteralStringChar */ .chroma .sc { color: #ed9d13 } +/* LiteralStringDelimiter */ .chroma .dl { color: #ed9d13 } +/* LiteralStringDoc */ .chroma .sd { color: #ed9d13 } +/* LiteralStringDouble */ .chroma .s2 { color: #ed9d13 } +/* LiteralStringEscape */ .chroma .se { color: #ed9d13 } +/* LiteralStringHeredoc */ .chroma .sh { color: #ed9d13 } +/* LiteralStringInterpol */ .chroma .si { color: #ed9d13 } +/* LiteralStringOther */ .chroma .sx { color: #ffa500 } +/* LiteralStringRegex */ .chroma .sr { color: #ed9d13 } +/* LiteralStringSingle */ .chroma .s1 { color: #ed9d13 } +/* LiteralStringSymbol */ .chroma .ss { color: #ed9d13 } +/* LiteralNumber */ .chroma .m { color: #3677a9 } +/* LiteralNumberBin */ .chroma .mb { color: #3677a9 } +/* LiteralNumberFloat */ .chroma .mf { color: #3677a9 } +/* LiteralNumberHex */ .chroma .mh { color: #3677a9 } +/* LiteralNumberInteger */ .chroma .mi { color: #3677a9 } +/* LiteralNumberIntegerLong */ .chroma .il { color: #3677a9 } +/* LiteralNumberOct */ .chroma .mo { color: #3677a9 } +/* Operator */ .chroma .o { } +/* OperatorWord */ .chroma .ow { color: #6ab825; font-weight: bold } +/* Punctuation */ .chroma .p { } +/* Comment */ .chroma .c { color: #999999; font-style: italic } +/* CommentHashbang */ .chroma .ch { color: #999999; font-style: italic } +/* CommentMultiline */ .chroma .cm { color: #999999; font-style: italic } +/* CommentSingle */ .chroma .c1 { color: #999999; font-style: italic } +/* CommentSpecial */ .chroma .cs { color: #e50808; background-color: #520000; font-weight: bold } +/* CommentPreproc */ .chroma .cp { color: #cd2828; font-weight: bold } +/* CommentPreprocFile */ .chroma .cpf { color: #cd2828; font-weight: bold } +/* Generic */ .chroma .g { } +/* GenericDeleted */ .chroma .gd { color: #d22323 } +/* GenericEmph */ .chroma .ge { font-style: italic } +/* GenericError */ .chroma .gr { color: #d22323 } +/* GenericHeading */ .chroma .gh { color: #ffffff; font-weight: bold } +/* GenericInserted */ .chroma .gi { color: #589819 } +/* GenericOutput */ .chroma .go { color: #cccccc } +/* GenericPrompt */ .chroma .gp { color: #aaaaaa } +/* GenericStrong */ .chroma .gs { font-weight: bold } +/* GenericSubheading */ .chroma .gu { color: #ffffff; text-decoration: underline } +/* GenericTraceback */ .chroma .gt { color: #d22323 } +/* GenericUnderline */ .chroma .gl { text-decoration: underline } +/* TextWhitespace */ .chroma .w { color: #666666 } diff --git a/content/blog/2024/01/ahci-driver/images/HBA_Memory_Annotated.png b/content/blog/2024/01/ahci-driver/images/HBA_Memory_Annotated.png new file mode 100644 index 0000000..5f19a3b Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/HBA_Memory_Annotated.png differ diff --git a/content/blog/2024/01/ahci-driver/images/HBA_Memory_Space.png b/content/blog/2024/01/ahci-driver/images/HBA_Memory_Space.png new file mode 100644 index 0000000..90a3011 Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/HBA_Memory_Space.png differ diff --git a/content/blog/2024/01/ahci-driver/images/HBA_Port_Memory.png b/content/blog/2024/01/ahci-driver/images/HBA_Port_Memory.png new file mode 100644 index 0000000..0a2bbe5 Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/HBA_Port_Memory.png differ diff --git a/content/blog/2024/01/ahci-driver/images/IDENTIFY_DEVICE.png b/content/blog/2024/01/ahci-driver/images/IDENTIFY_DEVICE.png new file mode 100644 index 0000000..153a484 Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/IDENTIFY_DEVICE.png differ diff --git a/content/blog/2024/01/ahci-driver/images/PxSSTS.png b/content/blog/2024/01/ahci-driver/images/PxSSTS.png new file mode 100644 index 0000000..48b6ae9 Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/PxSSTS.png differ diff --git a/content/blog/2024/01/ahci-driver/images/RegisterHostToDeviceFIS.png b/content/blog/2024/01/ahci-driver/images/RegisterHostToDeviceFIS.png new file mode 100644 index 0000000..facfb6f Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/RegisterHostToDeviceFIS.png differ diff --git a/content/blog/2024/01/ahci-driver/images/RegisterHostToDeviceFISLBA.png b/content/blog/2024/01/ahci-driver/images/RegisterHostToDeviceFISLBA.png new file mode 100644 index 0000000..3e801b6 Binary files /dev/null and b/content/blog/2024/01/ahci-driver/images/RegisterHostToDeviceFISLBA.png differ diff --git a/content/blog/2024/01/ahci-driver/index.md b/content/blog/2024/01/ahci-driver/index.md new file mode 100644 index 0000000..60a374b --- /dev/null +++ b/content/blog/2024/01/ahci-driver/index.md @@ -0,0 +1,617 @@ +--- +title: "Writing an AHCI Driver" +date: 2024-01-08 +draft: true +tags: ['osdev'] +--- + +Now that I've wrapped up the [0.1.0 Release](/blog/2023/12/acadia-0.1.0) of +AcadiaOS I'm looking to cleanup some of the "just get it working" hacks that +exist in the codebase. First up on that list is the AHCI Driver. + +## What is AHCI + +AHCI stands for Advanced Host Controller Interface and if you like acronyms boy +are you in for a treat. AHCI is a way to interface with SATA (which replaced +PATA (a.k.a. IDE)) via its HBA. AHCI has since been superseded by NVMe but is +simpler to implement (or so I've been told) so I've started here. + +To try to explain it without acronym soup, AHCI allows you to access disk +drives and optical drives (SATA devices) by writing relevant ATA commands to +memory addresses that are backed by hardware firmware. There are a wide variety +of commands available but best I can tell the main ones used these days are to +identify the device and read/write via direct memory access (DMA). + +Essentially you give the device an offset to read from as well as physical +memory address to write to. The device firmware copies the amount of data you +requested to the physical address then triggers an interrupt to indicate that +the operation is complete. Likewise writing via DMA is the same but in reverse. + +Disclaimer, all of the above is basically just summarizing Wikipedia and the +OSDev wiki and I don't really know what I'm talking about. + +## Current State + +The current AHCI implementation in Denali is straightforward but very brittle. +It relies on everything following the happy path and is cobbled together more +based on trial an error of what worked rather than following the specification +closely. + +As a part of this article we're going to dive into the related specs and look at +how they relate to each other. The trickiest part of writing the driver is the +fact that the necessary information is spread across several different specs +rather than contained in one place. The specs I reference in this post are: + +- [AHCI 1.3.1](https://www.intel.com/content/www/us/en/io/serial-ata/serial-ata-ahci-spec-rev1-3-1.html) +- SATA 3.2 +- ATA/ATAPI Command Set 3 (ACS-3) + +The SATA and ACS specs cost money so I can't link them directly but it isn't +hard to find drafts of them available online. + +## How AHCI Works + +AHCI allows you to control SATA devices by writing commands to memory. The +layout of these structures is nicely shown in the AHCI Spec Figure 4: + +![AHCI Memory](images/HBA_Memory_Annotated.png) + +There are several pieces here that I've annotated: + +1. The Generic Host Control (GHC) is a set of registers that allow you to manage the + whole controller and get its status. These registers are referred to in the + spec using GHC.RegisterName so the interrupt status register for instance is + "GHC.IS" for short. +2. Each device (hard disk or disc drive) that is attached to the controller is + exposed as a "port" with a set of registers to control it individually. + These registers are referred to as PxRegisterName so for instance the command + issue register is PxCI. +3. For each port it has a separately allocated piece of memory that can accept + up to 32 "commands" to execute. +4. When the controller is finished executing a command for a device it will + write a Frame Information Structure (FIS) and raise an interrupt. (The GHC + has a register to show which devices have a pending interrupt GHC.IS). + +Each device can support up to 32 pending commands (but only receive one FIS at a +time). The memory structure is as follows: + +![Port Memory](images/HBA_Port_Memory.png) + +To issue a command we can: + +1. Select a command slot that isn't currently in use. +2. Write the command to that command table (the specifics of this are explained + later) and then set that command as active in the commands issued register + for that port (PxCI). +3. Wait an interrupt indicating that this command has finished and read the + resulting FIS. We mostly just care about the status byte. The controller + will unset the bit for the completed command in the PxCI register when it + raises the interrupt so we can disambiguate which command has finished. + +Although this sounds straightforward there are a few moving parts here that we +need to get up and running. + +- We need to find where the HBA structure is in memory. +- We need to allocate some space for the command tables and received FIS + structures. +- We need to set up interrupt handling for each port. +- Before this we will issue a hardware reset command to the controller to get + everything in a clean state. + +## Finding the HBA structure + +We find the HBA structure by iterating through the PCI configuration space and +finding AHCI device. I'm not going to delve too deeply into this because PCI +could be a whole separate post and it is a little trickier to explain because +it is harder to access the specifications (the good people at the PCI "Special +Interest Group" are happy to let you download the PDF for the low low price of +$4500 if you are not a member). + +The short story is that we are looking for the device with the right [class +code](https://wiki.osdev.org/PCI#Class_Codes) - Class Code 0x1 (Storage Device), +Subclass 0x6 (SATA Controller), Subtype 0x1 (AHCI). + +Once we have the correct configuration space we cn read the address at offset +0x24 (called the ABAR for AHCI Base Address) which points to the start of the +GHC registers. + +We can mostly ignore the other information in the configuration space for now as +we aren't dealing with Message Signaled Interrupts yet. + +## Hardware Reset + +Now that we have found the Global Host Controller registers we're going to +initiate a hardware reset of the AHCI Controller. The advantage of this is we +will know the exact state that the controller and its ports are in. Other than +that it ensures that we aren't dependent on the specific way limine uses the +AHCI controller. + +I suspect that this is **not** how most production operating systems handle +things but this should give us a clean slate for now. + +From the AHCI spec: + +> 10.4.3 HBA Reset +> +> If the HBA becomes unusable for multiple ports, and a software reset or port +> reset does not correct the problem, software may reset the entire HBA by +> setting GHC.HR to ‘1’. When software sets the GHC.HR bit to ‘1’, the HBA +> shall perform an internal reset action. The bit shall be cleared to ‘0’ by +> the HBA when the reset is complete. A software write of ‘0’ to GHC.HR shall +> have no effect. To perform the HBA reset, software sets GHC.HR to ‘1’ and may +> poll until this bit is read to be ‘0’, at which point software knows that the +> HBA reset has completed. +> +> If the HBA has not cleared GHC.HR to ‘0’ within 1 second of software setting +> GHC.HR to ‘1’, the HBA is in a hung or locked state. +> +> When GHC.HR is set to ‘1’, GHC.AE, GHC.IE, the IS register, and all port +> register fields (except PxFB/PxFBU/PxCLB/PxCLBU) that are not HwInit in the +> HBA’s register memory space are reset. The HBA’s configuration space and all +> other global registers/bits are not affected by setting GHC.HR to ‘1’. Any +> HwInit bits in the port specific registers are not affected by setting GHC.HR +> to ‘1’. The port specific registers PxFB, PxFBU, PxCLB, and PxCLBU are not +> affected by setting GHC.HR to ‘1’. If the HBA supports staggered spin-up, the +> PxCMD.SUD bit will be reset to ‘0’; software is responsible for setting the +> PxCMD.SUD and PxSCTL.DET fields appropriately such that communication can be +> established on the Serial ATA link. If the HBA does not support staggered +> spin-up, the HBA reset shall cause a COMRESET to be sent on the port. + +Despite the long text, this process is fairly straightforward. We set the +Hardware Reset bit and then poll for it to be set to 0. We then set the AHCI +enable bit. For now we can leave interrupts disabled until we have reset the +ports. Once this is done we sleep for a few milliseconds to allow the ports time +to spin up. For now we are just using 50ms because that is the smallest +resolution we support sleeping for (1 scheduling tick) but I think theoretically +we could sleep for only a millisecond or two. + +```c++ +ahci_hba_->global_host_control |= kGlobalHostControl_HW_Reset; + +while (ahci_hba_->global_host_control & kGlobalHostControl_HW_Reset) { + continue; +} + +ahci_hba_->global_host_control |= kGlobalHostControl_AHCI_Enable; + +return static_cast(ZThreadSleep(50)); +``` + +## Port Initialization + +Now we can initialize each port that is implemented. There are two cases we +need to handle. Either the port has received a COMRESET and is running, or +staggered spin up is supported and we need to enable the port. As our VM doesn't +require staggered spin up, we will skip it for now and come back to it in the +future. + +Before initializing each port we need to check if it has a device attached. We +can do that by checking the PxSSTS register described in the AHCI spec section +3.3.10. + +![AHCI 1.3.1 Section 3.3.10](images/PxSSTS.png) + +We are looking for a value 0x103, 0x100 indicating that the device is active +and 0x3 indicating that communication is established. For each port where we +detect this value we continue the initialization process. + +### Memory Structures + +We need to initialize the memory structures for each active port as shown in the +image before (under How AHCI works). + +We need a command list structure of length 0x400 (technically it need not be +that long if fewer than 32 commands are supported but it doesn't use much +additional memory). Additionally a spot is needed for received FIS structure of +length 0x100. Finally each of the 32 commands in the command list must point to +a command table. Technically these can be quite large because each can hold up +to 2^16 physical region descriptors (using ~1 MiB of memory). I've opted +to limit it to just 8 16-byte descriptors so each command table would be length +0x100 as well. For now we don't support scatter gather buffers and just allocate +one contiguous memory section for each read. + +In total all of these memory structures takes 0x2500 bytes (3 pages of RAM). We +allocate them all in one block and manually set up the pointers to their +physical addresses in the HBA port control. + +```c++ +// 0x0-0x400 -> Command List +// 0x400-0x500 -> Received FIS +// 0x500-0x2500 -> Command Tables (0x100 each) (Max PRDT Length is 8 for now) +uint64_t paddr; +command_structures_ = + mmth::OwnedMemoryRegion::ContiguousPhysical(0x2500, &paddr); +command_list_ = reinterpret_cast(command_structures_.vaddr()); +port_struct_->command_list_base = paddr; + +received_fis_ = + reinterpret_cast(command_structures_.vaddr() + 0x400); +port_struct_->fis_base = paddr + 0x400; +port_struct_->command |= kCommand_FIS_Receive_Enable; + +command_tables_ = glcr::ArrayView( + reinterpret_cast(command_structures_.vaddr() + 0x500), 32); + +for (uint64_t i = 0; i < 32; i++) { + // This leaves space for 2 prdt entries. + command_list_->command_headers[i].command_table_base_addr = + (paddr + 0x500) + (0x100 * i); + commands_[i] = nullptr; +} +port_struct_->interrupt_enable = + kInterrupt_D2H_FIS | kInterrupt_PIO_FIS | kInterrupt_DMA_FIS | + kInterrupt_DeviceBits_FIS | kInterrupt_Unknown_FIS; +port_struct_->sata_error = -1; +port_struct_->command |= kCommand_Start; +``` + +There are a few other things going on here. Once we allocate the space to +receive FIS structures we let the port know that it can send FISes using the +PxCMD register. + +Additionally at the end we enable interrupts, clear the error register, and +tell the port it can start processing commands. + +## Interrupt Handling + +Now that the device is initialized we can actually begin to send it commands. +To do so we need to register an interrupt handler with the correct PCI +interrupt line (for now we will use the direct interrupt line rather than +Message Signaled Interrupts). Registering interrupt handlers is a whole other +beast so for this post we will just focus on their implementation. + +The first step is to de-multiplex the interrupt in the controller by checking +the interrupt status register. Each port that has an interrupt pending will +raise it's corresponding bit in the Interrupt Status register. We can delegate +to each port the handling of an interrupt, then clear the interrupt bit once it +is done. The relevant code in this case looks like this: + +```c++ +for (uint64_t i = 0; i < num_ports_; i++) { + if (!ports_[i].empty() && (ahci_hba_->interrupt_status & (1 << i))) { + ports_[i]->HandleIrq(); + ahci_hba_->interrupt_status &= ~(1 << i); + } +} +``` + +Then on the port side we can handle the interrupt. This requires determining +what kind of interrupt was generated using the port's Interrupt Status register +(PxIS). Each of the 17 defined bits in this register correspond to a different +interrupt type and can be individual enabled and disabled using the port's +Interrupt Enable register (PxIE). For now as we registered when setting up the +port we will only handle the interrupts related to receiving FISes from the +device. + +```c++ +void AhciDevice::HandleIrq() { + uint32_t int_status = port_struct_->interrupt_status; + port_struct_->interrupt_status = int_status; + + bool has_error = false; + if (int_status & kInterrupt_D2H_FIS) { + dbgln("D2H Received"); + // Device to host. + volatile DeviceToHostRegisterFis& fis = + received_fis_->device_to_host_register_fis; + if (!CheckFisType(FIS_TYPE_REG_D2H, fis.fis_type)) { + return; + } + if (fis.error) { + dbgln("D2H err: {x}", fis.error); + dbgln("status: {x}", fis.status); + has_error = true; + } + } + if (int_status & kInterrupt_PIO_FIS) { + // Like above ... + } + if (int_status & kInterrupt_DMA_FIS) { + // Like above ... + } + // ... +} +``` + +To handle the interrupt we read the raised interrupts from the PxIS register and +write the values back to it to clear them. Then we can specify how to handle +each type of interrupt that we receive. For now we will just debug print the +type and any errors from the interrupt since we aren't sending any commands. + +Something I'm not sure about is that as soon as we enable interrupts we seem to +receive a FIS from the device with an error bit set. Both the hard drive and the +optical drive on qemu send a FIS with error bit 0x1 set. Additionally the status +field is set to 0x30 for the hard drive and 0x70 for the optical drive. + +I was able to find a [OSDev Forum +post](https://forum.osdev.org/viewtopic.php?f=1&t=56462&start=15#p342163) +referencing that this behavior is caused by the reset sending an EXECUTE DEVICE +DIAGNOSTIC command (0x90) to the device. It notes that this is largely +undocumented behavior but at least this information offers some clarity on the +outputs. Reading the ATA Command Set section 7.9.4 we can see that the command +ouputs code 0x01 to the error bits when `Device 0 passed, Device 1 passed or not +present`. According a footnote we can "See the appropriate transport standard +for the definition of device 0 and device 1." I really thought I was already +looking at the "appropriate transport standard" but alas. All that to say we'll +just ignore this interrupt for now. + +## Sending a Command + +Now that the AHCI ports are initialized and can handle an interrupt, we can send +commands to them. To start with lets send the IDENTIFY DEVICE command to each +device. This command asks the device to send 512 bytes of information about +itself back to us. These bytes contain 40 years of certified-crufty backwards +compatability. I mean just feast your eyes on the number of retired and obsolete +fields in just the first page of the spec. + +![IDENTIFY DEVICE Response](images/IDENTIFY_DEVICE.png) + +We'll ignore almost all of this information and just try to get the sector size +and sector count from the drive. To do so we need to figure out how to send a +command to the device. To be honest I feel like the specs fall down here in +actually explaining this. The trick is to send a Register Host to Device FIS in one +of the command slots. This FIS type has a field for the command as well as some +common parameters such as lba and count. In retrospect it is fairly clear once +you are aware of it, but if you are just reading the SATA spec and looking at +the possible commands, making the logical jump to the Register Host To Device +FIS feels damn near impossible. + +First up we chose an empty command slot to use: + +```c++ +uint64_t slot; +for (slot = 0; slot < 32; slot++) { + if (!(commands_issued_ & (1 << slot))) { + break; + } +} +if (slot == 32) { + dbgln("All slots full"); + return glcr::INTERNAL; +} +``` + +The `commands_issued_` variable is just for our own accounting of which slots +are currently in use by another command. + +Next we can populate the FIS for that slot. The spec for the Register Host to +Device FIS is as follows: + +![Register Host to Device FIS Layout](images/RegisterHostToDeviceFIS.png) + +We don't need to initialize most of the fields here because the IDENTIFY_DEVICE +call doesn't rely on an lba or sector count. One of the keys is setting the high +bit "C" in the byte that contains PM Port which indicates to the HBA that this +FIS contains a new command (I spent a while trying to figure out why this wasn't +working without that). The code for this is relatively straightforward. + +```c++ +auto* fis = reinterpret_cast( + command_tables_[slot].command_fis); +*fis = HostToDeviceRegisterFis{ + .fis_type = FIS_TYPE_REG_H2D, + .pmp_and_c = 0x80, + .command = kIdentifyDevice, // 0xEC +}; +``` + +We also need to let the HBA know where it can put the result in memory. For this +we use the physical region descriptor table corresponding to this command slot. +As described before, for simplicity now we are only using a single entry to do +this. We allocate a 512 byte memory region and set it's physical address and +size in the first slot of the command slots PRDT. + +```c++ +uint64_t paddr; +auto region = + mmth::OwnedMemoryRegion::ContiguousPhysical(0x200, &paddr); +command_tables_[slot].prdt[0].region_address = command.paddr; +command_tables_[slot].prdt[0].byte_count = 0x200; // 512 bytes +command_list_->command_headers[slot].prd_table_length = 1; +``` + +All that is left to do is to issue the command! We set the size of the command +FIS (in double words for some reason?) as well as let the HBA know it can +prefetch the data from memory. Then we set the bit for this command slot in the +PxCI register which will cause the device to start processing it. + +```c++ +// Set the command FIS length (in double words). +command_list_->command_headers[slot].command = + (sizeof(HostToDeviceRegisterFis) / 4) & 0x1F; + +// Set prefetch bit. +command_list_->command_headers[slot].command |= (1 << 7); + +// TODO: Synchronization-wise we need to ensure this is set in the same +// critical section as where we select a slot. +commands_issued_ |= (1 << slot); +port_struct_->command_issue |= (1 << slot); +``` + +But wait! How will we know when this command has completed? We somehow need to +wait until we receive an interrupt for this command to proccess the data it +sent. To handle this we can add a semaphore for each port command slot to allow +signalling when we recieve a completion interrupt for that command. I think it +might make sense to have some sort of callback instead so we can pass errors +back to the caller instead of just a completion signal. However I'm not sure +what type of errors exist that are resolvable by the caller so for now this +works. + +```c++ +void IdentifyDevice() { +... + // Issue command. + commands_issued_ |= (1 << slot); + port_struct_->command_issue |= (1 << slot); + + command_signals_[slot].Wait(); + + // Continue processing. +... +} + +void AhciPort::HandleIrq() { + uint32_t int_status = port_struct_->interrupt_status; + port_struct_->interrupt_status = int_status; + +... + // Parse received FIS. +... + + uint32_t commands_finished = commands_issued_ & ~port_struct_->command_issue; + + for (uint64_t i = 0; i < 32; i++) { + if (commands_finished & (1 << i)) { + command_signals_[i].Signal(); + commands_issued_ &= ~(1 << i); + } + } +} +``` + +Ok now that we have retrieved the information from the drive we can parse it. +For the sector size, the default is 512 bytes which we will use unless the +`LOGICAL SECTOR SIZE SUPPORTED` bit is set in double word 106, bit 12. If that +is set we can check the double words at 117 and 118 to get the 32 bit sector +size value. For the sector count, we need to check if the device supports 48 bit +addressing using double word 83 bit 10. If it is used we can get the number of +sectors from the 4 double words starting at 100. Otherwise we read the number of +sectors from the 2 double words starting at index 60. + +```c++ + uint16_t* ident = reinterpret_cast(region.vaddr()); + if (ident[106] & (1 << 12)) { + sector_size_ = *reinterpret_cast(ident + 117); + } else { + sector_size_ = 512; + } + + if (ident[83] & (1 << 10)) { + lba_count_ = *reinterpret_cast(ident + 100); + } else { + lba_count_ = *reinterpret_cast(ident + 60); + } + dbgln("Sector size: {x}", sector_size_); + dbgln("LBA Count: {x}", lba_count_); + is_init_ = true; +} +``` + +You might be rightfully thinking that it would be less brittle to make a struct +definition that we could point at this address which would implicitly contain +these offsets - and you would be correct. But to be honest, I can't be bothered +to create a 256 entry struct definition just to get these values. Maybe in the +future. + +## Reading Data + +Now that we have the ability to read the IDENTIFY DEVICE data we are only a +short hop, skip, and jump away from reading data from the drive. The main +differences when reading data are (a) the command number, (b) we must specify +the Logical Block Address (LBA) we want to read from and the number of sectors +to read, and (c) we need to dynamically size the entry in the Physical Region +Descriptor Table (we will still use only one entry for now). + +Because much of this is similar we can fairly easily create a shared struct with +the necessary information and construct the requests in parallel. + +```c++ +struct Command { + uint8_t command; + uint64_t lba; + uint32_t sectors; + uint64_t paddr; +}; +``` + +Then from that we can create an IssueCommand function that constructs the +Register Host to Device FIS in a similar way for both. Before that I'd like to +take this opportunity to point out how the LBA in this FIS is stored in a way +that truly only a mother could love: + +![Register Host to Device Layout LBA](images/RegisterHostToDeviceFISLBA.png) + +That asside we simply update the FIS construction to set the command, LBA, and +sector count. Following that we set the PRDT values (although we still only use +one slot). + +```c++ +auto* fis = reinterpret_cast( + command_tables_[slot].command_fis); +*fis = HostToDeviceRegisterFis{ + .fis_type = FIS_TYPE_REG_H2D, + .pmp_and_c = 0x80, + .command = command.command, + + .lba0 = static_cast(command.lba & 0xFF), + .lba1 = static_cast((command.lba >> 8) & 0xFF), + .lba2 = static_cast((command.lba >> 16) & 0xFF), + .device = (1 << 6), // ATA LBA Mode + + .lba3 = static_cast((command.lba >> 24) & 0xFF), + .lba4 = static_cast((command.lba >> 32) & 0xFF), + .lba5 = static_cast((command.lba >> 40) & 0xFF), + + .count = command.sectors, +}; + +command_tables_[slot].prdt[0].region_address = command.paddr; +command_tables_[slot].prdt[0].byte_count = 512 * command.sectors; +``` + +Then issuing either the identify device command or the read command is +relatively straightforward: + +```c++ +// IDENTIFY DEVICE +CommandInfo identify{ + .command = kIdentifyDevice, + .lba = 0, + .sectors = 1, + .paddr = 0, +}; +auto region = + mmth::OwnedMemoryRegion::ContiguousPhysical(0x200, &identify.paddr); +ASSIGN_OR_RETURN(auto* sem, IssueCommand(identify)); +sem->Wait(); + +// DMA READ +CommandInfo dma_read{ + .command = kDmaReadExt, + .lba = lba, + .sectors = sector_cnt, + .paddr = 0, +}; +auto region = + mmth::OwnedMemoryRegion::ContiguousPhysical(0x200 * sector_cnt, &read.paddr); +ASSIGN_OR_RETURN(auto* sem, IssueCommand(dma_read)); +sem->Wait(); +``` + +From here the world is our oyster and we can read any arbitrary data from the +disk. The bulk of this code isn't actually all that long (~200 LOC in the [AHCI +Port implementation](https://gitea.tiramisu.one/drew/acadia/src/commit/21265e76edf4fa93b8ec1795da4bdd2fc70b79d9/sys/denali/ahci/ahci_port.cpp) +). However I probably added and deleted several times that trying to get +everything working and refactored down to a nice interface. + +## Coming next + +This is nowhere near a full implementation. Among the things we +skipped that I plan to come back to at some point are: + +- **Staggered spin up:** In controllers that support this, each device is + powered down after RESET and must be started individually. +- **Message Signaled Interrupts:** The hot new way to handle PCI device + interrupts. Has only been available since 1998 so support may vary. +- **Port Multiplier Support:** Something that gets mentioned all over the specs + but I've avoided evening looking into until this moment. But it looks like it + allows several devices behind a single port. +- **Scatter Gather buffers:** For big files we may not always be able to find a + sufficient contiguous chunk of physical memory. This means we may have to use + more than one entry in the PRDT! +- **Error Handling & Retry:** Even though QEMU may succeed in executing commands + 100% of the time, real hardware may not and we should probably handle that. +- **Less that 32 commands supported:** We kinda always assume that the device + can handle 32 commands even though it may not (how many it does is exposed in + the GHC registers). diff --git a/hugo.toml b/hugo.toml index 112aee7..308a0aa 100644 --- a/hugo.toml +++ b/hugo.toml @@ -12,3 +12,7 @@ title = "Drew's Site" url = 'https://www.linkedin.com/in/drew-galbraith/' weight = 30 +[markup] + [markup.highlight] + noClasses = false + diff --git a/layouts/partials/head/css.html b/layouts/partials/head/css.html index 312858f..9a4e757 100644 --- a/layouts/partials/head/css.html +++ b/layouts/partials/head/css.html @@ -8,6 +8,16 @@ {{- end }} {{- end }} +{{- with resources.Get "css/syntax.css" }} + {{- if eq hugo.Environment "development" }} + + {{- else }} + {{- with . | minify | fingerprint }} + + {{- end }} + {{- end }} +{{- end }} + {{- range .Resources.Match "css/*.css" }} {{- if eq hugo.Environment "development" }}