MonkOS  v0.1
A simple 64-bit operating system (x86_64)
paging.c
Go to the documentation of this file.
1 //============================================================================
2 /// @file paging.c
3 /// @brief Paged memory management.
4 //
5 // Copyright 2016 Brett Vickers.
6 // Use of this source code is governed by a BSD-style license that can
7 // be found in the MonkOS LICENSE file.
8 //============================================================================
9 
10 #include <core.h>
11 #include <libc/stdlib.h>
12 #include <libc/string.h>
13 #include <kernel/x86/cpu.h>
15 #include <kernel/mem/kmem.h>
16 #include <kernel/mem/pmap.h>
17 #include <kernel/mem/paging.h>
18 
19 // add_pte addflags
20 #define CONTAINS_TABLE (1 << 0)
21 
22 // Page shift constants
23 #define PAGE_SHIFT 12 // 1<<12 = 4KiB
24 #define PAGE_SHIFT_LARGE 21 // 1<<21 = 2MiB
25 
26 // Page frame number constants
27 #define PFN_INVALID ((uint32_t)-1)
28 
29 // Helper macros
30 #define PADDR_TO_PF(a) ((pf_t *)(pfdb.pf + ((a) >> PAGE_SHIFT)))
31 #define PF_TO_PADDR(pf) ((uint64_t)((pf) - pfdb.pf) << PAGE_SHIFT)
32 #define PFN_TO_PF(pfn) ((pf_t *)((pfn) + pfdb.pf))
33 #define PF_TO_PFN(pf) ((uint32_t)((pf) - pfdb.pf))
34 #define PTE_TO_PADDR(pte) ((pte) & ~PGMASK_OFFSET)
35 
36 // Page frame types
37 enum
38 {
42 };
43 
44 /// The pf structure represents a record in the page frame database.
45 typedef struct pf
46 {
47  uint32_t prev; ///< Index of prev pfn on available list
48  uint32_t next; ///< Index of next pfn on available list
49  uint16_t refcount; ///< Number of references to this page
50  uint16_t sharecount; ///< Number of processes sharing page
51  uint16_t flags;
52  uint8_t type; ///< PFTYPE of page frame
53  uint8_t reserved0;
54  uint64_t reserved1;
55  uint64_t reserved2;
56 } pf_t;
57 
58 STATIC_ASSERT(sizeof(pf_t) == 32, "Unexpected page frame size");
59 
60 /// The pfdb describes the state of the page frame database.
61 struct pfdb
62 {
63  pf_t *pf; ///< Pointer to array of page frames
64  uint32_t count; ///< Total number of frames in the pfdb
65  uint32_t avail; ///< Available number of frames in the pfdb
66  uint32_t head; ///< Index of available frame list head
67  uint32_t tail; ///< Index of available frame list tail
68 };
69 
70 static struct pfdb pfdb; // Global page frame database
71 static pagetable_t kpt; // Kernel page table (all physical memory)
72 static pagetable_t *active_pt; // Currently active page table
73 
74 // TODO: Modify to support multi-core
75 
76 /// Reserve an aligned region of memory managed by the memory table module.
77 static void *
78 reserve_region(const pmap_t *map, uint64_t size, uint32_t alignshift)
79 {
80  const pmapregion_t *r = map->region;
81  const pmapregion_t *t = map->region + map->count;
82  for (; r < t; r++) {
83 
84  // Skip reserved regions and regions that are too small.
85  if (r->type != PMEMTYPE_USABLE)
86  continue;
87  if (r->size < size)
88  continue;
89 
90  // Find address of first properly aligned byte in the region.
91  uint64_t paddr = r->addr + (1 << alignshift) - 1;
92  paddr >>= alignshift;
93  paddr <<= alignshift;
94 
95  // If the region is too small after alignment, skip it.
96  if (paddr + size > r->addr + r->size)
97  continue;
98 
99  // Reserve the aligned memory region and return its address.
100  pmap_add(paddr, size, PMEMTYPE_RESERVED);
101  return (void *)paddr;
102  }
103  return NULL;
104 }
105 
106 void
108 {
109  // Retrieve the physical memory map.
110  const pmap_t *map = pmap();
111  if (map->last_usable == 0)
112  fatal();
113 
114  // Calculate the size of the page frame database. It needs to describe
115  // each page up to and including the last physical address.
116  pfdb.count = map->last_usable / PAGE_SIZE;
117  uint64_t pfdbsize = pfdb.count * sizeof(pf_t);
118 
119  // Round the database size up to the nearest 2MiB since we'll be using
120  // large pages in the kernel page table to describe the database.
121  pfdbsize += PAGE_SIZE_LARGE - 1;
122  pfdbsize >>= PAGE_SHIFT_LARGE;
123  pfdbsize <<= PAGE_SHIFT_LARGE;
124 
125  // Find a contiguous, 2MiB-aligned region of memory large enough to hold
126  // the entire pagedb.
127  pfdb.pf = (pf_t *)reserve_region(map, pfdbsize, PAGE_SHIFT_LARGE);
128  if (pfdb.pf == NULL)
129  fatal();
130 
131  // Initialize the kernel's page table.
132  kmem_init(&kpt);
133  set_pagetable(kpt.proot);
134  active_pt = &kpt;
135 
136  // Create the page frame database in the newly mapped virtual memory.
137  memzero(pfdb.pf, pfdbsize);
138 
139  // Initialize available page frame list.
140  pfdb.avail = 0;
143 
144  // Traverse the memory table, adding page frame database entries for each
145  // region in the table.
146  for (uint64_t r = 0; r < map->count; r++) {
147  const pmapregion_t *region = &map->region[r];
148 
149  // Ignore non-usable memory regions.
150  if (region->type != PMEMTYPE_USABLE)
151  continue;
152 
153  // Create a chain of available page frames.
154  uint64_t pfn0 = region->addr >> PAGE_SHIFT;
155  uint64_t pfnN = (region->addr + region->size) >> PAGE_SHIFT;
156  for (uint64_t pfn = pfn0; pfn < pfnN; pfn++) {
157  pf_t *pf = PFN_TO_PF(pfn);
158  pf->prev = pfn - 1;
159  pf->next = pfn + 1;
160  pf->type = PFTYPE_AVAILABLE;
161  }
162 
163  // Link the chain to the list of available frames.
164  if (pfdb.tail == PFN_INVALID)
165  pfdb.head = pfn0;
166  else
167  pfdb.pf[pfdb.tail].next = pfn0;
168  pfdb.pf[pfn0].prev = pfdb.tail;
169  pfdb.pf[pfnN - 1].next = PFN_INVALID;
170  pfdb.tail = pfnN - 1;
171 
172  // Update the total number of available frames.
173  pfdb.avail += (uint32_t)(pfnN - pfn0);
174  }
175 
176  // TODO: Install page fault handler
177 }
178 
179 static pf_t *
181 {
182  // For now, fatal out. Later, we'll add swapping.
183  if (pfdb.avail == 0)
184  fatal();
185 
186  // Grab the first available page frame from the database.
187  pf_t *pf = PFN_TO_PF(pfdb.head);
188 
189  // Update the available frame list.
191  if (pfdb.head != PFN_INVALID)
193 
194  // Initialize and return the page frame.
195  memzero(pf, sizeof(pf_t));
196  pf->refcount = 1;
197  pf->type = PFTYPE_ALLOCATED;
198  return pf;
199 }
200 
201 static void
203 {
204  if (pf->type != PFTYPE_ALLOCATED)
205  fatal();
206 
207  // Re-initialize the page frame record.
208  memzero(pf, sizeof(pf_t));
209  pf->prev = PFN_INVALID;
210  pf->next = pfdb.head;
211  pf->type = PFTYPE_AVAILABLE;
212 
213  // Update the database's available frame list.
214  uint32_t pfn = PF_TO_PFN(pf);
215  pfdb.pf[pfdb.head].prev = pfn;
216  pfdb.head = pfn;
217 
218  pfdb.avail++;
219 }
220 
221 static uint64_t
223 {
224  // Allocate a page frame from the db and compute its physical address.
225  pf_t *pf = pfalloc();
226  uint64_t paddr = PF_TO_PADDR(pf);
227 
228  // Always zero the contents of newly allocated pages.
229  memzero((void *)paddr, PAGE_SIZE);
230 
231  // Return the page's physical address.
232  return paddr;
233 }
234 
235 static void
236 pgfree(uint64_t paddr)
237 {
238  pf_t *pf = PADDR_TO_PF(paddr);
239  if (--pf->refcount == 0)
240  pffree(pf);
241 }
242 
243 static void
244 pgfree_recurse(page_t *page, int level)
245 {
246  // If we're at the PT level, return the leaf pages to the page frame
247  // database.
248  if (level == 1) {
249  for (uint64_t e = 0; e < 512; e++) {
250  uint64_t paddr = PTE_TO_PADDR(page->entry[e]);
251  if (paddr == 0)
252  continue;
253  pf_t *pf = PADDR_TO_PF(paddr);
254  if (pf->type == PFTYPE_ALLOCATED)
255  pgfree(paddr);
256  }
257  }
258 
259  // If we're at the PD level or above, recursively traverse child tables
260  // until we reach the PT level.
261  else {
262  for (uint64_t e = 0; e < 512; e++) {
263  if (page->entry[e] & PF_SYSTEM) // Never free system tables
264  continue;
265  page_t *child = PGPTR(page->entry[e]);
266  if (child == NULL)
267  continue;
268  pgfree_recurse(child, level - 1);
269  }
270  }
271 }
272 
273 /// Add to the page table an entry mapping a virtual address to a physical
274 /// address.
275 static void
276 add_pte(pagetable_t *pt, uint64_t vaddr, uint64_t paddr, uint32_t pflags,
277  uint32_t addflags)
278 {
279  // Fatal out if virtual address space is exhausted.
280  if ((addflags & CONTAINS_TABLE) && (vaddr >= pt->vterm))
281  fatal();
282 
283  // Keep track of the pages we add to the page table as we add this new
284  // page table entry.
285  uint64_t added[3];
286  int count = 0;
287 
288  // Decompose the virtual address into its hierarchical table components.
289  uint32_t pml4e = PML4E(vaddr);
290  uint32_t pdpte = PDPTE(vaddr);
291  uint32_t pde = PDE(vaddr);
292  uint32_t pte = PTE(vaddr);
293 
294  // Follow the page table hierarchy down to the lowest level, creating
295  // new pages for tables as needed.
296 
297  page_t *pml4t = (page_t *)pt->proot;
298  if (pml4t->entry[pml4e] == 0) {
299  uint64_t pgaddr = pgalloc();
300  added[count++] = pgaddr;
301  pml4t->entry[pml4e] = pgaddr | PF_PRESENT | PF_RW;
302  }
303  else if (pml4t->entry[pml4e] & PF_SYSTEM) {
304  // A system page table should never be modified. This check is
305  // performed only within the root PML4 table because checks on
306  // lower level tables would be redundant.
307  fatal();
308  }
309 
310  page_t *pdpt = PGPTR(pml4t->entry[pml4e]);
311  if (pdpt->entry[pdpte] == 0) {
312  uint64_t pgaddr = pgalloc();
313  added[count++] = pgaddr;
314  pdpt->entry[pdpte] = pgaddr | PF_PRESENT | PF_RW;
315  }
316 
317  page_t *pdt = PGPTR(pdpt->entry[pdpte]);
318  if (pdt->entry[pde] == 0) {
319  uint64_t pgaddr = pgalloc();
320  added[count++] = pgaddr;
321  pdt->entry[pde] = pgaddr | PF_PRESENT | PF_RW;
322  }
323 
324  // Add the page table entry.
325  page_t *ptt = PGPTR(pdt->entry[pde]);
326  ptt->entry[pte] = paddr | pflags;
327 
328  // If adding the new entry required the page table to grow, make sure to
329  // add the page table's new pages as well.
330  for (int i = 0; i < count; i++) {
331  add_pte(pt, pt->vnext, added[i], PF_PRESENT | PF_RW, CONTAINS_TABLE);
332  pt->vnext += PAGE_SIZE;
333  }
334 }
335 
336 static uint64_t
337 remove_pte(pagetable_t *pt, uint64_t vaddr)
338 {
339  // Decompose the virtual address into its hierarchical table components.
340  uint32_t pml4e = PML4E(vaddr);
341  uint32_t pdpte = PDPTE(vaddr);
342  uint32_t pde = PDE(vaddr);
343  uint32_t pte = PTE(vaddr);
344 
345  // Traverse the hierarchy, looking for the page.
346  page_t *pml4t = (page_t *)pt->proot;
347  page_t *pdpt = PGPTR(pml4t->entry[pml4e]);
348  page_t *pdt = PGPTR(pdpt->entry[pdpte]);
349  page_t *ptt = PGPTR(pdt->entry[pde]);
350  page_t *pg = PGPTR(ptt->entry[pte]);
351 
352  // Clear the page table entry for the virtual address.
353  ptt->entry[pte] = 0;
354 
355  // Invalidate the TLB entry for the page that was just removed.
356  if (pt == active_pt)
357  invalidate_page((void *)vaddr);
358 
359  // Return the physical address of the page table entry that was removed.
360  return (uint64_t)pg;
361 }
362 
363 void
364 pagetable_create(pagetable_t *pt, void *vaddr, uint64_t size)
365 {
366  if (size % PAGE_SIZE != 0)
367  fatal();
368 
369  // Allocate a page from the top level of the page table hierarchy.
370  pt->proot = pgalloc();
371  pt->vroot = (uint64_t)vaddr;
372  pt->vnext = (uint64_t)vaddr + PAGE_SIZE;
373  pt->vterm = (uint64_t)vaddr + size;
374 
375  // Install the kernel's page table into the created page table.
376  page_t *src = (page_t *)kpt.proot;
377  page_t *dst = (page_t *)pt->proot;
378  for (int i = 0; i < 512; i++)
379  dst->entry[i] = src->entry[i];
380 }
381 
382 void
384 {
385  if (pt->proot == 0)
386  fatal();
387 
388  // Recursively destroy all pages starting from the PML4 table.
389  pgfree_recurse((page_t *)pt->proot, 4);
390 
391  // Invalidate TLB entries of all table pages.
392  if (pt == active_pt) {
393  for (uint64_t vaddr = pt->vroot; vaddr < pt->vterm;
394  vaddr += PAGE_SIZE) {
395  invalidate_page((void *)vaddr);
396  }
397  }
398 
399  memzero(pt, sizeof(pagetable_t));
400 }
401 
402 void
404 {
405  if (pt == NULL)
406  pt = &kpt;
407  if (pt->proot == 0)
408  fatal();
409 
410  set_pagetable(pt->proot);
411  active_pt = pt;
412 }
413 
414 void *
415 page_alloc(pagetable_t *pt, void *vaddr_in, int count)
416 {
417  for (uint64_t vaddr = (uint64_t)vaddr_in; count--; vaddr += PAGE_SIZE) {
418  uint64_t paddr = pgalloc();
419  add_pte(pt, vaddr, paddr, PF_PRESENT | PF_RW, 0);
420  }
421  return vaddr_in;
422 }
423 
424 void
425 page_free(pagetable_t *pt, void *vaddr_in, int count)
426 {
427  for (uint64_t vaddr = (uint64_t)vaddr_in; count--; vaddr += PAGE_SIZE) {
428  uint64_t paddr = remove_pte(pt, vaddr);
429  pgfree(paddr);
430  }
431 }
uint32_t head
Index of available frame list head.
Definition: paging.c:66
const pmap_t * pmap()
Return a pointer to the current physical memory map.
Definition: pmap.c:316
uint32_t tail
Index of available frame list tail.
Definition: paging.c:67
uint32_t avail
Available number of frames in the pfdb.
Definition: paging.c:65
Physical memory map describing usable and reserved regions of physical memory.
#define PF_TO_PADDR(pf)
Definition: paging.c:31
#define PAGE_SHIFT
Definition: paging.c:23
#define PF_TO_PFN(pf)
Definition: paging.c:33
String and memory operations.
#define PF_PRESENT
Definition: paging.h:20
void pagetable_create(pagetable_t *pt, void *vaddr, uint64_t size)
Create a new page table that can be used to associate virtual addresses with physical addresses...
Definition: paging.c:364
static void add_pte(pagetable_t *pt, uint64_t vaddr, uint64_t paddr, uint32_t pflags, uint32_t addflags)
Add to the page table an entry mapping a virtual address to a physical address.
Definition: paging.c:276
__forceinline void set_pagetable(uint64_t paddr)
Definition: cpu_inl.h:113
void page_init()
Initialize the page frame database.
Definition: paging.c:107
#define PTE(a)
Definition: paging.h:43
Core include file.
static void * reserve_region(const pmap_t *map, uint64_t size, uint32_t alignshift)
Reserve an aligned region of memory managed by the memory table module.
Definition: paging.c:78
Kernel physical (and virtual) memory map.
void pagetable_destroy(pagetable_t *pt)
Destroy a page table.
Definition: paging.c:383
#define PFN_TO_PF(pfn)
Definition: paging.c:32
uint32_t prev
Index of prev pfn on available list.
Definition: paging.c:47
void kmem_init(pagetable_t *pt)
Using the contents of the physical memory map, identity map all physical memory into the kernel&#39;s pag...
Definition: kmem.c:189
STATIC_ASSERT(sizeof(pf_t)==32,"Unexpected page frame size")
Reported usable by the BIOS.
Definition: pmap.h:23
#define PML4E(a)
Definition: paging.h:40
void * page_alloc(pagetable_t *pt, void *vaddr_in, int count)
Allocate one or more pages contiguous in virtual memory.
Definition: paging.c:415
#define PAGE_SIZE_LARGE
Definition: paging.h:16
#define PADDR_TO_PF(a)
Definition: paging.c:30
static void pgfree_recurse(page_t *page, int level)
Definition: paging.c:244
void pagetable_activate(pagetable_t *pt)
Activate a page table on the CPU, so all virtual memory operations are performed relative to the page...
Definition: paging.c:403
__forceinline void fatal()
Definition: cpu_inl.h:158
uint64_t reserved2
Definition: paging.c:55
A pagetable structure.
Definition: paging.h:66
#define PF_SYSTEM
Definition: paging.h:29
uint8_t reserved0
Definition: paging.c:53
static uint64_t pgalloc()
Definition: paging.c:222
The pfdb describes the state of the page frame database.
Definition: paging.c:61
static void pffree(pf_t *pf)
Definition: paging.c:202
static pmap_t * map
Definition: pmap.c:21
A pagetable page record.
Definition: paging.h:54
Paged memory management.
uint64_t reserved1
Definition: paging.c:54
#define PAGE_SIZE
Definition: paging.h:15
void pmap_add(uint64_t addr, uint64_t size, enum pmemtype type)
Add a region of memory to the physical memory map.
Definition: pmap.c:322
uint64_t entry[PAGE_SIZE/sizeof(uint64_t)]
Definition: paging.h:56
uint64_t proot
Physical address of root page table (PML4T) entry.
Definition: paging.h:68
uint16_t flags
Definition: paging.c:51
The pf structure represents a record in the page frame database.
Definition: paging.c:45
#define CONTAINS_TABLE
Definition: paging.c:20
Interrupt handling operations.
x86 CPU-specific function implementations.
#define PFN_INVALID
Definition: paging.c:27
#define PF_RW
Definition: paging.h:21
__forceinline void invalidate_page(void *vaddr)
Definition: cpu_inl.h:124
#define PDPTE(a)
Definition: paging.h:41
uint16_t sharecount
Number of processes sharing page.
Definition: paging.c:50
Reported (or inferred) to be reserved.
Definition: pmap.h:24
pf_t * pf
Pointer to array of page frames.
Definition: paging.c:63
static pagetable_t * active_pt
Definition: paging.c:72
uint64_t vnext
Virtual address to use for table&#39;s next page.
Definition: paging.h:70
void * memzero(void *dst, size_t num)
Fill a region of memory with zeroes.
#define PTE_TO_PADDR(pte)
Definition: paging.c:34
General utilities library.
uint64_t vterm
Boundary of pages used to store the table.
Definition: paging.h:71
void page_free(pagetable_t *pt, void *vaddr_in, int count)
Free one or more contiguous pages from virtual memory.
Definition: paging.c:425
static pagetable_t kpt
Definition: paging.c:71
uint64_t vroot
Virtual address of root page table (PML4T) entry.
Definition: paging.h:69
uint16_t refcount
Number of references to this page.
Definition: paging.c:49
static pf_t * pfalloc()
Definition: paging.c:180
static void pgfree(uint64_t paddr)
Definition: paging.c:236
uint32_t next
Index of next pfn on available list.
Definition: paging.c:48
uint32_t count
Total number of frames in the pfdb.
Definition: paging.c:64
#define PAGE_SHIFT_LARGE
Definition: paging.c:24
static uint64_t remove_pte(pagetable_t *pt, uint64_t vaddr)
Definition: paging.c:337
#define PDE(a)
Definition: paging.h:42
#define PGPTR(pte)
Definition: paging.h:46
uint8_t type
PFTYPE of page frame.
Definition: paging.c:52