《ULK》中第八章节的部分内容讲解了伙伴系统分配内存的方法。这里作者简单的根据书上的内容对了下代码,在这里笔记下:
page_alloc.c部分内容
/* * function for dealing with page's order in buddy system. * zone->lock is already acquired when we use these. * So, we don't need atomic page->flags operations here. */ /* 返回,设置,清除某块buffer对应的order号码, 这里根据ULK P314介绍,该order是放到private字段中 */ static inline unsigned long page_order(struct page *page) { return page->private; } static inline void set_page_order(struct page *page, int order) { page->private = order; __SetPagePrivate(page); } static inline void rmv_page_order(struct page *page) { __ClearPagePrivate(page); page->private = 0; } /* * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if * (a) the buddy is free && * (b) the buddy is on the buddy system && * (c) a page and its buddy have the same order. * for recording page's order, we use page->private and PG_private. * */ /* 检测一个空闲的buffer是否可以组成伙伴, page对应的是buffer的头部,这里主要是检测 该buffer是否是空闲的。 */ static inline int page_is_buddy(struct page *page, int order) { if (PagePrivate(page) && (page_order(page) == order) && !PageReserved(page) && page_count(page) == 0) return 1; return 0; } /* * Freeing function for a buddy system allocator. * * The concept of a buddy system is to maintain direct-mapped table * (containing bit values) for memory blocks of various "orders". * The bottom level table contains the map for the smallest allocatable * units of memory (here, pages), and each level above it describes * pairs of units from the levels below, hence, "buddies". * At a high level, all that happens here is marking the table entry * at the bottom level available, and propagating the changes upward * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous * free pages of length of (1 << order) and marked with PG_Private.Page's * order is recorded in page->private field. * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this * triggers coalescing into a block of larger size. * * -- wli */ /* 释放一块buffer */ static inline void __free_pages_bulk (struct page *page, struct page *base, struct zone *zone, unsigned int order) { unsigned long page_idx; struct page *coalesced; int order_size = 1 << order; if (unlikely(order)) destroy_compound_page(page, order); /* base表示整个伙伴系统的头,通过相减可以得到当前buffer的 相对位置 */ page_idx = page - base; BUG_ON(page_idx & (order_size - 1)); BUG_ON(bad_range(zone, page)); zone->free_pages += order_size; while (order < MAX_ORDER-1) { struct free_area *area; struct page *buddy; int buddy_idx; /* 通过当前的order(可以由此根据order计算出buffer的大小)和相对位置可以计算出该buffer对应的伙伴系统的 相对位置 伙伴系统的两个buffer是相邻的,而且大小都是 (1 << order),因此可以根据buffer的 相对位置异或上buffer的大小获取到该buffer的相邻的buffer的相对位置。。(晕了) */ buddy_idx = (page_idx ^ (1 << order)); /* 然后根据相对位置可以计算出buffer的绝对地址 */ buddy = base + buddy_idx; if (bad_range(zone, buddy)) break; if (!page_is_buddy(buddy, order))/* 如果该buffer是空闲的,那么就可以整合成一个 */ break; /* Move the buddy up one level. */ list_del(&buddy->lru); area = zone->free_area + order; area->nr_free--; rmv_page_order(buddy); page_idx &= buddy_idx; order++; } coalesced = base + page_idx; set_page_order(coalesced, order); list_add(&coalesced->lru, &zone->free_area[order].free_list); zone->free_area[order].nr_free++; } static inline void free_pages_check(const char *function, struct page *page) { if ( page_mapped(page) || page->mapping != NULL || page_count(page) != 0 || (page->flags & ( 1 << PG_lru | 1 << PG_private | 1 << PG_locked | 1 << PG_active | 1 << PG_reclaim | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback ))) bad_page(function, page); if (PageDirty(page)) ClearPageDirty(page); } /* * Frees a list of pages. * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free, or 0 for all on the list. * * If the zone was previously in an "all pages pinned" state then look to * see if this freeing clears that state. * * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ /* 删除一组buffer,buffer使用链表链起来,大小是相同的。*/ static int free_pages_bulk(struct zone *zone, int count, struct list_head *list, unsigned int order) { unsigned long flags; struct page *base, *page = NULL; int ret = 0; base = zone->zone_mem_map; spin_lock_irqsave(&zone->lock, flags); zone->all_unreclaimable = 0; zone->pages_scanned = 0; while (!list_empty(list) && count--) { page = list_entry(list->prev, struct page, lru); /* have to delete it as __free_pages_bulk list manipulates */ list_del(&page->lru); /* 遍历链表,通过内部函数一个一个删除 */ __free_pages_bulk(page, base, zone, order); ret++; } spin_unlock_irqrestore(&zone->lock, flags); return ret; } void __free_pages_ok(struct page *page, unsigned int order) { LIST_HEAD(list); int i; arch_free_page(page, order); mod_page_state(pgfree, 1 << order); #ifndef CONFIG_MMU if (order > 0) for (i = 1 ; i < (1 << order) ; ++i) __put_page(page + i); #endif for (i = 0 ; i < (1 << order) ; ++i) free_pages_check(__FUNCTION__, page + i); list_add(&page->lru, &list); kernel_map_pages(page, 1<<order, 0); free_pages_bulk(page_zone(page), 1, &list, order); } /* * The order of subdivision here is critical for the IO subsystem. * Please do not alter this order without good reasons and regression * testing. Specifically, as large blocks of memory are subdivided, * the order in which smaller blocks are delivered depends on the order * they're subdivided in this function. This is the primary factor * influencing the order in which pages are delivered to the IO * subsystem according to empirical testing, and this is also justified * by considering the behavior of a buddy system containing a single * large block of memory acted on by a series of small allocations. * This behavior is a critical factor in sglist merging's success. * * -- wli */ /* zone:指定的管理区 page:空闲buffer的头部的page指针 low:希望的order号码 high:该空闲buffer对应的order号码 area:该空闲buffer对应的管理结构体指针 */ static inline struct page * expand(struct zone *zone, struct page *page, int low, int high, struct free_area *area) { unsigned long size = 1 << high; while (high > low) { area--; high--; size >>= 1; BUG_ON(bad_range(zone, &page[size])); list_add(&page[size].lru, &area->free_list); area->nr_free++; set_page_order(&page[size], high); } /* 这里可以看到,返回的空闲地址是buffer的头部 */ return page; } void set_page_refs(struct page *page, int order) { #ifdef CONFIG_MMU set_page_count(page, 1); #else int i; /* * We need to reference all the pages for this order, otherwise if * anyone accesses one of the pages with (get/put) it will be freed. * - eg: access_process_vm() */ for (i = 0; i < (1 << order); i++) set_page_count(page + i, 1); #endif /* CONFIG_MMU */ } /* * This page is about to be returned from the page allocator */ static void prep_new_page(struct page *page, int order) { if (page->mapping || page_mapped(page) || (page->flags & ( 1 << PG_private | 1 << PG_locked | 1 << PG_lru | 1 << PG_active | 1 << PG_dirty | 1 << PG_reclaim | 1 << PG_swapcache | 1 << PG_writeback ))) bad_page(__FUNCTION__, page); page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked | 1 << PG_mappedtodisk); page->private = 0; set_page_refs(page, order); kernel_map_pages(page, 1 << order, 1); } /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ /* 伙伴系统分配实现 */ static struct page *__rmqueue(struct zone *zone, unsigned int order) { struct free_area * area; unsigned int current_order; struct page *page; /* 首先从指定的order开始找,如果找不到,就从更高的order开始找, 直到找到一个空闲的buffer,然后把前面的page返回,后面剩下的 根据大小放到指定的队列中 */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = zone->free_area + current_order;/* 根据order获取对应的管理结构体指针 */ if (list_empty(&area->free_list))/* 如果当前大小下没有空闲buffer,就向上提一级 */ continue; /* 根据ULK P313 ,lru保存的是指向该空闲链表的指针, 因此可以根据lru获取到对应空闲buffer的首地址对应的指针*/ page = list_entry(area->free_list.next, struct page, lru); list_del(&page->lru);/* 将该buffer从链表中删掉 */ rmv_page_order(page);/* 该buffer的order需要重新计算,这里先设置为0 */ area->nr_free--;/* 空闲数目减少 */ zone->free_pages -= 1UL << order;/* zone中的空闲大小减少 */ return expand(zone, page, order, current_order, area); } return NULL; } /* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ /* 工具函数,从伙伴系统中申请若干个大小相同的buffer, 然后用链表串起来 */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list) { unsigned long flags; int i; int allocated = 0; struct page *page; spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { page = __rmqueue(zone, order); if (page == NULL) break; allocated++; list_add_tail(&page->lru, list); } spin_unlock_irqrestore(&zone->lock, flags); return allocated; }
发表评论