slab源码分析--销毁函数

时间：2017-01-16 17:04:40 阅读：179 评论：0 收藏：0 [点我收藏+]

标签：time release rip java 开始模块 numa pen 链表

这次来谈一下slab机制中的所有销毁函数。

释放对象

注意释放对象仅仅是回收回slab，并不会将内存还给伙伴系统。

回收对象时有以下原则：

本地高速缓存的空间还可以容纳空闲对象，则直接将对象放回本地高速缓存。
本地高速缓存的空间已满，则按batchcount的值将对象从本地高速缓存转移到本地共享缓存shared中，如果没有设置本地共享缓存，那么就转移到slab三链中。转移时基于先进先出原则的，也就是转移entry数组最前面的batchcount个空闲对象，因为这些对象在数组中存在的时间相对较长，为“冷数据”，不大可能仍然驻留在CPU高速缓存中。

释放对象比如调用：

    //底层调用__cache_free()函数
    __cache_free(cachep, objp);

那么释放对象就正式开始了，首先是__cache_free()函数，在该函数中首先确定ac有没有超过上限，如果没有超过上限，那就把该对象放入ac即可。否则，就按照batchcount转移出ac一大批对象，为这一个对象腾地方（一大批为一个，这样效率高一些，免得以后再转移）。

/*
 * Release an obj back to its cache. If the obj has a constructed state, it must
 * be in this state _before_ it is released.  Called with disabled ints.
 */   //回收函数
static inline void __cache_free(struct kmem_cache *cachep, void *objp)
{
    //获得本CPU的本地缓存
    struct array_cache *ac = cpu_cache_get(cachep);

    check_irq_off();
    objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));

    //NUMA相关，目前版本是空函数
    if (cache_free_alien(cachep, objp))
        return;

    //下面开始选择释放位置进行释放

    //本地缓存中的空闲对象小于上限时，只需将对象释放回entry数组中
    if (likely(ac->avail < ac->limit)) {
        STATS_INC_FREEHIT(cachep);
        ac->entry[ac->avail++] = objp;
        return;
    } else {
        //这是本地缓存空闲对象大于上限的情况，先调整本地缓存
        STATS_INC_FREEMISS(cachep);
        cache_flusharray(cachep, ac);
        //不过之后还是要把该对象释放给本地缓存
        ac->entry[ac->avail++] = objp; 
    }
}

再看cache_flusharray()函数：

static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
    int batchcount;
    struct kmem_list3 *l3;
    int node = numa_node_id();

    //本地缓存能一次转出多少个对象，这个是之前规定的
    batchcount = ac->batchcount;
#if DEBUG
    BUG_ON(!batchcount || batchcount > ac->avail);
#endif
    check_irq_off();
    //获得此缓存器的三链
    l3 = cachep->nodelists[node];
    spin_lock(&l3->list_lock);
    //看是否存在本地共享缓存
    if (l3->shared) {
        struct array_cache *shared_array = l3->shared;
        //本地 共享 缓存还可承载的最大数目
        int max = shared_array->limit - shared_array->avail;
        if (max) {
            //最大只能为max
            if (batchcount > max)
                batchcount = max;
            //将本地缓存前面的几个对象转入本地共享缓存中，因为前面的是最早不用的
            memcpy(&(shared_array->entry[shared_array->avail]),
                   ac->entry, sizeof(void *) * batchcount);
            //更新本地共享缓存
            shared_array->avail += batchcount;
            goto free_done;
        }
    }

    //没有配置本地共享缓存，只能释放对象到三链中
    //注意此时的 batchcount 就是原始的 batchcount，也就是说可以把达到本地缓存一次性转出 batchcount 的目标
    //而上面的本地共享缓存如果使用的话，有可能达不到这个目标，因为它也有 limit
    //不过即便达不到，由于本地共享缓存效率比三链高，这种情况也不会在到三链来，而是直接goto free_done。
    free_block(cachep, ac->entry, batchcount, node);
free_done:
#if STATS
    //...DEBUG
#endif
    spin_unlock(&l3->list_lock);
    //更新本地缓存的情况
    ac->avail -= batchcount;
    //把后面的移动到本地缓存数组前面来
    memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}

如果没有本地共享缓存，或者本地共享缓存达到limit了，那就把该对象回收到三链中：

/*
 * Caller needs to acquire correct kmem_list‘s list_lock
 */   //释放一定数目的对象
static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
               int node)
{
    int i;
    struct kmem_list3 *l3;

    //逐一释放对象到三链中
    for (i = 0; i < nr_objects; i++) {
        void *objp = objpp[i];
        struct slab *slabp;

        //通过对象的虚拟地址得到page，再通过page得到slab
        slabp = virt_to_slab(objp);
        //获得slab三链
        l3 = cachep->nodelists[node];
        //先将对象所在的slab从链表中摘除
        list_del(&slabp->list);
        check_spinlock_acquired_node(cachep, node);
        check_slabp(cachep, slabp);
        //将对象放到其 slab 中
        slab_put_obj(cachep, slabp, objp, node);
        STATS_DEC_ACTIVE(cachep);
        //增加空闲对象计数
        l3->free_objects++;
        check_slabp(cachep, slabp);

        /* fixup slab chains */
        //如果slab中全都是空闲对象
        if (slabp->inuse == 0) {
            //如果三链中空闲对象数目超过上限，直接回收整个 slab 到内存，空闲对象数减去每个slab中对象数
            if (l3->free_objects > l3->free_limit) {
                l3->free_objects -= cachep->num;
                /* No need to drop any previously held
                 * lock here, even if we have a off-slab slab
                 * descriptor it is guaranteed to come from
                 * a different cache, refer to comments before
                 * alloc_slabmgmt.
                 */
                 //销毁slab对象
                slab_destroy(cachep, slabp);
            } else {  //到这里说明空闲对象数目还没有超过三链设置的上限
                //只需将此slab添加到空slab链表中
                list_add(&slabp->list, &l3->slabs_free);
            }
        } else {
            /* Unconditionally move a slab to the end of the
             * partial list on free - maximum time for the
             * other objects to be freed, too.
             */
             //将此slab添加到部分满的链表中
            list_add_tail(&slabp->list, &l3->slabs_partial);
        }
    }
}

整个流程就是这样，回收优先级：本地缓存>本地共享缓存>三链。
?

销毁slab

销毁slab就是释放slab管理区和对象占用的空间，还给伙伴系统。

**
 * slab_destroy - destroy and release all objects in a slab
 * @cachep: cache pointer being destroyed   
 * @slabp: slab pointer being destroyed      
 *
 * Destroy all the objs in a slab, and release the mem back to the system.
 * Before calling the slab must have been unlinked from the cache.  The
 * cache-lock is not held/needed. 
 */
 //销毁slab，需要释放管理对象和slab对象
static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
{
    //获得slab页面的首地址，是用第一个对象的地址colouroff(对于内置式slab,colouroff已经将slab管理者包括在内了)
    void *addr = slabp->s_mem - slabp->colouroff;
    //debug用
    slab_destroy_objs(cachep, slabp);

    //使用SLAB_DESTROY_BY_RCU来创建的高速缓存
    if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
        //rcu方式释放,暂时不做分析，主要是做并行优化
        struct slab_rcu *slab_rcu;

        slab_rcu = (struct slab_rcu *)slabp;
        slab_rcu->cachep = cachep;
        slab_rcu->addr = addr;
        //注册一个回调来延期释放slab
        call_rcu(&slab_rcu->head, kmem_rcu_free);
    } else {
        //释放slab占用的页面到伙伴系统中
        //如果是内置式，slab管理者和slab对象在一起，可以同时释放
        kmem_freepages(cachep, addr);
        if (OFF_SLAB(cachep))
            //外置式，还需释放slab管理对象
            kmem_cache_free(cachep->slabp_cache, slabp);
    }
}

与伙伴系统交互的函数暂不再本文讨论范围之内。
?

销毁缓存器

销毁缓存器首先要保证的一点就是当前缓存器中所有的对象都是空闲的，也就是之前分配出去的对象都已经释放回来了，其主要的步骤如下：

将缓存器 kmem_cache 从 cache_chain 链表中删除。
将本地高速缓存，align高速缓存和本地共享缓存中的对象都回收到slab三链，并释放所有的free链表，然后判断full链表以及partial链表是否都为空，如果有一个不为空说明存在非空闲slab，也就是说**还有对象未释放，此时无法销毁缓存器，必须重新将缓存器添加到 cache_chain 链表中。
确定所有的对象都为空闲状态后，将缓存器涉及到的所有描述符都释放（这些描述符都是保存在通用缓存器中的，如slab管理者）。

负责销毁缓存器的函数为kmem_cache_destroy()：

/**
 * kmem_cache_destroy - delete a cache
 * @cachep: the cache to destroy
 *
 * Remove a &struct kmem_cache object from the slab cache.
 *
 * It is expected this function will be called by a module when it is
 * unloaded.  This will remove the cache completely, and avoid a duplicate
 * cache being allocated each time a module is loaded and unloaded, if the
 * module doesn‘t have persistent in-kernel storage across loads and unloads.
 *
 * The cache must be empty before calling this function.
 *
 * The caller must guarantee that noone will allocate memory from the cache
 * during the kmem_cache_destroy().
 */
 //销毁一个缓存器，通常这只发生在卸载module时
void kmem_cache_destroy(struct kmem_cache *cachep)
{
    BUG_ON(!cachep || in_interrupt());

    /* Find the cache in the chain of caches. */
    mutex_lock(&cache_chain_mutex);
    /*
     * the chain is never empty, cache_cache is never destroyed
     */
     //将缓存器从cache_chain的链表中摘除
    list_del(&cachep->next);
    if (__cache_shrink(cachep)) {  //释放空链表中的slab，并检查其他两个链表。在销毁缓存器前，必须先销毁其中的slab
        //满slab链或部分满slab链不为空
        slab_error(cachep, "Can‘t free all objects");
        //缓存器非空，不能销毁，重新加入到cache_chain链表中
        list_add(&cachep->next, &cache_chain);

        mutex_unlock(&cache_chain_mutex);
        return;
    }

    //有关rcu
    if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
        synchronize_rcu();

    //底层调用__kmem_cache_destroy()函数来实现
    __kmem_cache_destroy(cachep);
    mutex_unlock(&cache_chain_mutex);
}

释放空链表的slab由下面这个函数负责：

/* Called with cache_chain_mutex held to protect against cpu hotplug */
//释放空链表中的slab
static int __cache_shrink(struct kmem_cache *cachep)
{
    int ret = 0, i = 0;
    struct kmem_list3 *l3;

    //释放本地缓存中对象
    drain_cpu_caches(cachep);

    check_irq_on();
    for_each_online_node(i) {
        l3 = cachep->nodelists[i];
        if (!l3)
            continue;

        //释放空链表中的slab
        drain_freelist(cachep, l3, l3->free_objects);
        //检查满slab链表和部分满slab链表是否还有slab
        ret += !list_empty(&l3->slabs_full) ||
            !list_empty(&l3->slabs_partial);
    }
    return (ret ? 1 : 0);
}

其中用到了这几个函数：

//释放本地缓存和本地共享缓存中的对象
static void drain_cpu_caches(struct kmem_cache *cachep)
{
    struct kmem_list3 *l3;
    int node;

    //释放每个本地缓存中的对象，注意没有 "online"
    on_each_cpu(do_drain, cachep, 1, 1);  //调用了do_drain()函数
    check_irq_on();

    //NUMA相关，释放每个NUMA节点的alien
    for_each_online_node(node) {
        l3 = cachep->nodelists[node];
        if (l3 && l3->alien)
            //本版本目前是空函数，暂不支持
            drain_alien_cache(cachep, l3->alien);
    }

    //释放本地共享缓存中的对象
    for_each_online_node(node) {
        l3 = cachep->nodelists[node];
        if (l3)
            drain_array(cachep, l3, l3->shared, 1, node);
    }
}

//释放本地缓存中的对象
static void do_drain(void *arg)
{
    struct kmem_cache *cachep = arg;
    struct array_cache *ac;
    int node = numa_node_id();
    check_irq_off();

    //获得本地缓存
    ac = cpu_cache_get(cachep);
    spin_lock(&cachep->nodelists[node]->list_lock);

    //释放本地缓存中的对象
    free_block(cachep, ac->entry, ac->avail, node);

    spin_unlock(&cachep->nodelists[node]->list_lock);
    ac->avail = 0;
}

/*
 * Drain an array if it contains any elements taking the l3 lock only if
 * necessary. Note that the l3 listlock also protects the array_cache
 * if drain_array() is used on the shared array.
 */
void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
             struct array_cache *ac, int force, int node)
{
    int tofree;

    if (!ac || !ac->avail)
        return;
    if (ac->touched && !force) {
        ac->touched = 0;
    } else {
        spin_lock_irq(&l3->list_lock);
        if (ac->avail) {
            //计算释放对象的数目，可见这个函数还支持部分释放，取决于force的bool属性
            //从 drain_cpu_caches()进入时，force=1，是要全部释放的
            tofree = force ? ac->avail : (ac->limit + 4) / 5;
            if (tofree > ac->avail)
                tofree = (ac->avail + 1) / 2;
            //释放对象，从entry前面开始
            free_block(cachep, ac->entry, tofree, node);
            ac->avail -= tofree;
            //后面的对象前移
            memmove(ac->entry, &(ac->entry[tofree]),
                sizeof(void *) * ac->avail);
        }
        spin_unlock_irq(&l3->list_lock);
    }
}

drain_array()函数是把本地缓存和本地共享缓存释放到三链中，所以会用到：

/*
 * Caller needs to acquire correct kmem_list‘s list_lock
 */   //释放一定数目的对象
static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
               int node)
{
    int i;
    struct kmem_list3 *l3;

    //逐一释放对象到三链中
    for (i = 0; i < nr_objects; i++) {
        void *objp = objpp[i];
        struct slab *slabp;

        //通过对象的虚拟地址得到page，再通过page得到slab
        slabp = virt_to_slab(objp);
        //获得slab三链
        l3 = cachep->nodelists[node];
        //先将对象所在的slab从链表中摘除
        list_del(&slabp->list);
        check_spinlock_acquired_node(cachep, node);
        check_slabp(cachep, slabp);
        //将对象放到其 slab 中
        slab_put_obj(cachep, slabp, objp, node);
        STATS_DEC_ACTIVE(cachep);
        //增加空闲对象计数
        l3->free_objects++;
        check_slabp(cachep, slabp);

        /* fixup slab chains */
        //如果slab中全都是空闲对象
        if (slabp->inuse == 0) {
            //如果三链中空闲对象数目超过上限，直接回收整个 slab 到内存，空闲对象数减去每个slab中对象数
            if (l3->free_objects > l3->free_limit) {
                l3->free_objects -= cachep->num;
                /* No need to drop any previously held
                 * lock here, even if we have a off-slab slab
                 * descriptor it is guaranteed to come from
                 * a different cache, refer to comments before
                 * alloc_slabmgmt.
                 */
                 //销毁slab对象
                slab_destroy(cachep, slabp);
            } else {  //到这里说明空闲对象数目还没有超过三链设置的上限
                //只需将此slab添加到空slab链表中
                list_add(&slabp->list, &l3->slabs_free);
            }
        } else {
            /* Unconditionally move a slab to the end of the
             * partial list on free - maximum time for the
             * other objects to be freed, too.
             */
             //将此slab添加到部分满的链表中
            list_add_tail(&slabp->list, &l3->slabs_partial);
        }
    }
}

不管怎么样，每次销毁缓存器最终都会释放其空链表中的slab，然后检测满链表和部分满链表是否还有slab，如果还有，那么不能销毁缓存器。如果没有，那么底层调用下面的函数：

/缓存器的销毁很简单，依次检查和释放本地CPU缓存，本地共享，三链，以及缓存器本身。
//该函数通常只发生在卸载module(模块)的时候
static void __kmem_cache_destroy(struct kmem_cache *cachep)
{
    int i;
    struct kmem_list3 *l3;

    //释放每个CPU本地缓存，注意此时CPU是 online 在线状态，如果是down状态，并没有释放。( 对离线无法释放感到无语:) )
    for_each_online_cpu(i)   // online
        kfree(cachep->array[i]);

    /* NUMA: free the list3 structures */
    for_each_online_node(i) {  //对每个在线的节点
        l3 = cachep->nodelists[i];
        if (l3) {
            //释放本地共享缓存使用的array_cache对象
            kfree(l3->shared);
            free_alien_cache(l3->alien);
            kfree(l3);  //释放三链
        }
    }
    //释放缓存器，因为缓存器是属于 cache_cache 的对象，所以调用对象释放函数，该函数释放slab之前申请过的某个对象
    kmem_cache_free(&cache_cache, cachep);
}

?
这样所有的销毁工作都已经全部展现，日后我打算分析伙伴系统的内存管理机制。因为我觉得我已经分析了slab机制了，如果不往下深入，我可能不会满足。

嗯，大概花了整整一星期时间。

?
参考：http://blog.csdn.net/vanbreaker/article/details/7674601

slab源码分析--销毁函数

标签：time release rip java 开始模块 numa pen 链表

原文地址：http://blog.csdn.net/freeelinux/article/details/54574720

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行