OVS流表查询过程分析

时间：2014-07-01 11:09:12 阅读：638 评论：0 收藏：0 [点我收藏+]

标签：open vswitch flow table sw_flow jhash2 hlist

OVS中流表操作的理解关键在于这里哈希表的实现，引入的 flex_array方便了内存的管理，通过 hash&（桶数-1）可以随机的将一个元素定位到某一个桶中。

接下来是代码细节。

一. 核心数据结构

//流表

struct flow_table{

struct flex_array * buckets; //具体的流表项

unsigned int count , n_buckets ; //

struct rcu_head rcu;

int node_ver;

//node_ver的存在使得我们可以控制sw_flow的哪个hlist_node 链入到这个bucket中

//但是仍然没有体会这样做的巧妙之处？

u32 hash_seed ; //哈希算法需要的种子

bool keep_flows ; //是否保留该流表项

};

//流表项

struct sw_flow{

struct rcu_head rcu;

struct hlist_node hash_node [2]; //通过其一链入所在桶的链表

u32 hash ; //存hash值

struct sw_flow_key key; //

struct sw_flow_actions __rcu *sf_acts;

spinlock_t lock ; /* Lock for values below. */

unsigned long used ;/* Last used time (in jiffies). */

u64 packet_count ; /* Number of packets matched. */

u64 byte_count ; /* Number of bytes matched. */

u8 tcp_flags ; /* Union of seen TCP flags. */

};

//弹性数组？？用于大量需要数组类型的时候

struct flex_array{

union {

struct {

int element_size ;//每个元素尺寸

int total_nr_elements ; //元素个数

int elems_per_part ; //每个部分元素的个数

u32 reciprocal_elems;

struct flex_array_part *parts []; //可以有多个部分

};

//这个的组织可以使得flex_array 的大小刚好是一个物理页PAGE_SIZE

char padding [FLEX_ARRAY_BASE_SIZE ];

};

//flex_array 中的每个块也是物理页大小的空间

struct flex_array_part{

char elements[ FLEX_ARRAY_PART_SIZE];

};

二. 代码分析

1.先看看flow_table的初始化，可以看出相应数据结构的作用。

//构建一个flow_table，有 new_size个buckets

struct flow_table *ovs_flow_tbl_alloc( int new_size)

{

struct flow_table *table = kmalloc( sizeof (*table), GFP_KERNEL);

if (!table)

return NULL;

table-> buckets = alloc_buckets(new_size);

if (!table-> buckets) {

kfree(table);

return NULL;

}

table-> n_buckets = new_size;

table-> count = 0;

table-> node_ver = 0;

table-> keep_flows = false;

get_random_bytes(&table-> hash_seed , sizeof (u32));

return table;

}

2.接下来看datapath中比较核心的查询流表操作的细节。

//根据key来进行流表查询

struct sw_flow *ovs_flow_tbl_lookup( struct flow_table *table,

struct sw_flow_key *key, int key_len)

{

struct sw_flow *flow;

struct hlist_node *n;

struct hlist_head *head;

u8 *_key;

int key_start;

u32 hash;

//因为sw_flow_key中开始有tunnel相关字段，所以需要知道key的开始地址

key_start = flow_key_start(key);

//jhash2算法

hash = ovs_flow_hash(key, key_start, key_len);

_key = ( u8 *) key + key_start;

//根据hash值找到相应的桶，

head = find_bucket(table, hash);

hlist_for_each_entry_rcu(flow, n, head, hash_node[table-> node_ver]) {

if (flow->hash == hash &&

!memcmp(( u8 *)&flow-> key + key_start, _key, key_len - key_start)) {

return flow;

}

return NULL;

}

//通过hash值找到所在bucket的头指针

static struct hlist_head *find_bucket (struct flow_table *table, u32 hash)

{

//因为hash本来就是一个字（u32）

hash = jhash_1word(hash, table-> hash_seed);

return flex_array_get(table-> buckets,(hash & (table-> n_buckets - 1)));

}

//返回弹性数组中 element_nr号元素指针

void *flex_array_get( struct flex_array *fa, unsigned int element_nr)

{

int part_nr = 0;

struct flex_array_part *part;

if (!fa-> element_size)

return NULL;

if (element_nr >= fa-> total_nr_elements)

return NULL;

//如果当前页够容纳这些元素的话，内存区域仍然就使用parts[0]

if (elements_fit_in_base(fa))

part = ( struct flex_array_part *)&fa->parts[0];

else {

part_nr = fa_element_to_part_nr(fa, element_nr);

part = fa-> parts[part_nr];

if (!part)

return NULL;

}

return &part-> elements[index_inside_part(fa, element_nr, part_nr)];

}

//在基本数据结构flex_array中除去元数据之后，所剩的空间

//也就是当前page 除去前面几个基本字段，看剩余的空间是否够存储当前元素

static inline int elements_fit_in_base (struct flex_array *fa)

{

int data_size = fa-> element_size * fa->total_nr_elements;

if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)

return 1;

return 0;

}

#define FLEX_ARRAY_BASE_BYTES_LEFT \

(FLEX_ARRAY_BASE_SIZE - offsetof( struct flex_array, parts))

//这里的倒数除只是定义了一种机制，能随机从元素号映射到flex_array的part number；

static int fa_element_to_part_nr (struct flex_array *fa,

unsigned int element_nr)

{

return reciprocal_divide(element_nr, fa->reciprocal_elems);

}

static inline u32 reciprocal_divide(u32 A, u32 R)

{

return ( u32)(((u64)A * R) >> 32);

}

//求得这个元素在这个part中的索引号

static unsigned int index_inside_part (struct flex_array *fa,

unsigned int element_nr,

unsigned int part_nr)

{

unsigned int part_offset;

part_offset = element_nr - part_nr * fa->elems_per_part;

return part_offset * fa-> element_size;

}

find_bucket执行完成后，最终返回该hash值对应的元素在flex_array中的地址，当然在流表中元素指的就是sw_flow。

三. 相关知识点

1.使用的是linux内核中的 jhash2 哈希算法;

110 /* jhash2 - hash an array of u32‘s

111 * @k: the key which must be an array of u32‘s

112 * @length: the number of u32‘s in the key

113 * @initval: the previous hash, or an arbitray value

114 *

115 * Returns the hash value of the key.

116 */

117 static inline u32 jhash2(const u32 *k, u32 length, u32 initval)

118 {

119 u32 a, b, c;

120

121 /* Set up the internal state */

122 a = b = c = JHASH_INITVAL + (length<<2) + initval;

123

124 /* Handle most of the key */

125 while (length > 3) {

126 a += k[0];

127 b += k[1];

128 c += k[2];

129 __jhash_mix(a, b, c);

130 length -= 3;

131 k += 3;

132 }

133

134 /* Handle the last 3 u32‘s: all the case statements fall through */

135 switch (length) {

136 case 3: c += k[2];

137 case 2: b += k[1];

138 case 1: a += k[0];

139 __jhash_final(a, b, c);

140 case 0: /* Nothing left to add */

141 break;

142 }

143

144 return c;

145 }

148 /* jhash_3words - hash exactly 3, 2 or 1 word(s) */

149 static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)

150 {

151 a += JHASH_INITVAL;

152 b += JHASH_INITVAL;

153 c += initval;

154

155 __jhash_final(a, b, c);

156

157 return c;

158 }

159

160 static inline u32 jhash_2words(u32 a, u32 b, u32 initval)

161 {

162 return jhash_3words(a, b, 0, initval);

163 }

164

165 static inline u32 jhash_1word(u32 a, u32 initval)

166 {

167 return jhash_3words(a, 0, 0, initval);

168 }

2. 巩固内核中hlist遍历原理。

转载注明出处：http://blog.csdn.net/vonzhoufz/article/details/36005667。

OVS流表查询过程分析,布布扣,bubuko.com

OVS流表查询过程分析

标签：open vswitch flow table sw_flow jhash2 hlist

原文地址：http://blog.csdn.net/vonzhoufz/article/details/36005667

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行