X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_gro%2Fgro_tcp4.h;h=bb875a5ef061133e45f9e2552f88e5058ea1ab7a;hb=986f2134c336a086054980903e819308dcfd43ce;hp=d12952393ce90b2499c0c60808165068d9cbda0e;hpb=369991d997e4abdee355e19ffbb41a4d246cafa2;p=dpdk.git diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h index d12952393c..bb875a5ef0 100644 --- a/lib/librte_gro/gro_tcp4.h +++ b/lib/librte_gro/gro_tcp4.h @@ -5,19 +5,28 @@ #ifndef _GRO_TCP4_H_ #define _GRO_TCP4_H_ +#include +#include +#include + #define INVALID_ARRAY_INDEX 0xffffffffUL #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) /* - * the max L3 length of a TCP/IPv4 packet. The L3 length - * is the sum of ipv4 header, tcp header and L4 payload. + * The max length of a IPv4 packet, which includes the length of the L3 + * header, the L4 header and the data payload. */ -#define TCP4_MAX_L3_LENGTH UINT16_MAX +#define MAX_IPV4_PKT_LENGTH UINT16_MAX + +/* The maximum TCP header length */ +#define MAX_TCP_HLEN 60 +#define INVALID_TCP_HDRLEN(len) \ + (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN)) -/* criteria of mergeing packets */ -struct tcp4_key { - struct ether_addr eth_saddr; - struct ether_addr eth_daddr; +/* Header fields representing a TCP/IPv4 flow */ +struct tcp4_flow_key { + struct rte_ether_addr eth_saddr; + struct rte_ether_addr eth_daddr; uint32_t ip_src_addr; uint32_t ip_dst_addr; @@ -26,44 +35,43 @@ struct tcp4_key { uint16_t dst_port; }; -struct gro_tcp4_key { - struct tcp4_key key; +struct gro_tcp4_flow { + struct tcp4_flow_key key; /* - * the index of the first packet in the item group. - * If the value is INVALID_ARRAY_INDEX, it means - * the key is empty. + * The index of the first packet in the flow. + * INVALID_ARRAY_INDEX indicates an empty flow. */ uint32_t start_index; }; struct gro_tcp4_item { /* - * first segment of the packet. If the value + * The first MBUF segment of the packet. If the value * is NULL, it means the item is empty. */ struct rte_mbuf *firstseg; - /* last segment of the packet */ + /* The last MBUF segment of the packet */ struct rte_mbuf *lastseg; /* - * the time when the first packet is inserted - * into the table. If a packet in the table is - * merged with an incoming packet, this value - * won't be updated. We set this value only - * when the first packet is inserted into the - * table. + * The time when the first packet is inserted into the table. + * This value won't be updated, even if the packet is merged + * with other packets. */ uint64_t start_time; /* - * we use next_pkt_idx to chain the packets that - * have same key value but can't be merged together. + * next_pkt_idx is used to chain the packets that + * are in the same flow but can't be merged together + * (e.g. caused by packet reordering). */ uint32_t next_pkt_idx; - /* the sequence number of the packet */ + /* TCP sequence number of the packet */ uint32_t sent_seq; - /* the IP ID of the packet */ + /* IPv4 ID of the packet */ uint16_t ip_id; /* the number of merged packets */ uint16_t nb_merged; + /* Indicate if IPv4 ID can be ignored */ + uint8_t is_atomic; }; /* @@ -72,31 +80,31 @@ struct gro_tcp4_item { struct gro_tcp4_tbl { /* item array */ struct gro_tcp4_item *items; - /* key array */ - struct gro_tcp4_key *keys; + /* flow array */ + struct gro_tcp4_flow *flows; /* current item number */ uint32_t item_num; - /* current key num */ - uint32_t key_num; + /* current flow num */ + uint32_t flow_num; /* item array size */ uint32_t max_item_num; - /* key array size */ - uint32_t max_key_num; + /* flow array size */ + uint32_t max_flow_num; }; /** * This function creates a TCP/IPv4 reassembly table. * * @param socket_id - * socket index for allocating TCP/IPv4 reassemble table + * Socket index for allocating the TCP/IPv4 reassemble table * @param max_flow_num - * the maximum number of flows in the TCP/IPv4 GRO table + * The maximum number of flows in the TCP/IPv4 GRO table * @param max_item_per_flow - * the maximum packet number per flow. + * The maximum number of packets per flow * * @return - * if create successfully, return a pointer which points to the - * created TCP/IPv4 GRO table. Otherwise, return NULL. + * - Return the table pointer on success. + * - Return NULL on failure. */ void *gro_tcp4_tbl_create(uint16_t socket_id, uint16_t max_flow_num, @@ -106,62 +114,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id, * This function destroys a TCP/IPv4 reassembly table. * * @param tbl - * a pointer points to the TCP/IPv4 reassembly table. + * Pointer pointing to the TCP/IPv4 reassembly table. */ void gro_tcp4_tbl_destroy(void *tbl); /** - * This function searches for a packet in the TCP/IPv4 reassembly table - * to merge with the inputted one. To merge two packets is to chain them - * together and update packet headers. Packets, whose SYN, FIN, RST, PSH - * CWR, ECE or URG bit is set, are returned immediately. Packets which - * only have packet headers (i.e. without data) are also returned - * immediately. Otherwise, the packet is either merged, or inserted into - * the table. Besides, if there is no available space to insert the - * packet, this function returns immediately too. + * This function merges a TCP/IPv4 packet. It doesn't process the packet, + * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have + * payload. * - * This function assumes the inputted packet is with correct IPv4 and - * TCP checksums. And if two packets are merged, it won't re-calculate - * IPv4 and TCP checksums. Besides, if the inputted packet is IP - * fragmented, it assumes the packet is complete (with TCP header). + * This function doesn't check if the packet has correct checksums and + * doesn't re-calculate checksums for the merged packet. Additionally, + * it assumes the packets are complete (i.e., MF==0 && frag_off==0), + * when IP fragmentation is possible (i.e., DF==0). It returns the + * packet, if the packet has invalid parameters (e.g. SYN bit is set) + * or there is no available space in the table. * * @param pkt - * packet to reassemble. + * Packet to reassemble * @param tbl - * a pointer that points to a TCP/IPv4 reassembly table. + * Pointer pointing to the TCP/IPv4 reassembly table * @start_time - * the start time that the packet is inserted into the table + * The time when the packet is inserted into the table * * @return - * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE - * or URG bit is set, or there is no available space in the table to - * insert a new item or a new key, return a negative value. If the - * packet is merged successfully, return an positive value. If the - * packet is inserted into the table, return 0. + * - Return a positive value if the packet is merged. + * - Return zero if the packet isn't merged but stored in the table. + * - Return a negative value for invalid parameters or no available + * space in the table. */ int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, struct gro_tcp4_tbl *tbl, uint64_t start_time); /** - * This function flushes timeout packets in a TCP/IPv4 reassembly table - * to applications, and without updating checksums for merged packets. - * The max number of flushed timeout packets is the element number of - * the array which is used to keep flushed packets. + * This function flushes timeout packets in a TCP/IPv4 reassembly table, + * and without updating checksums. * * @param tbl - * a pointer that points to a TCP GRO table. + * TCP/IPv4 reassembly table pointer * @param flush_timestamp - * this function flushes packets which are inserted into the table - * before or at the flush_timestamp. + * Flush packets which are inserted into the table before or at the + * flush_timestamp. * @param out - * pointer array which is used to keep flushed packets. + * Pointer array used to keep flushed packets * @param nb_out - * the element number of out. It's also the max number of timeout + * The element number in 'out'. It also determines the maximum number of * packets that can be flushed finally. * * @return - * the number of packets that are returned. + * The number of flushed packets */ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, uint64_t flush_timestamp, @@ -173,10 +175,133 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, * reassembly table. * * @param tbl - * pointer points to a TCP/IPv4 reassembly table. + * TCP/IPv4 reassembly table pointer * * @return - * the number of packets in the table + * The number of packets in the table */ uint32_t gro_tcp4_tbl_pkt_count(void *tbl); + +/* + * Check if two TCP/IPv4 packets belong to the same flow. + */ +static inline int +is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2) +{ + return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && + rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && + (k1.ip_src_addr == k2.ip_src_addr) && + (k1.ip_dst_addr == k2.ip_dst_addr) && + (k1.recv_ack == k2.recv_ack) && + (k1.src_port == k2.src_port) && + (k1.dst_port == k2.dst_port)); +} + +/* + * Merge two TCP/IPv4 packets without updating checksums. + * If cmp is larger than 0, append the new packet to the + * original packet. Otherwise, pre-pend the new packet to + * the original packet. + */ +static inline int +merge_two_tcp4_packets(struct gro_tcp4_item *item, + struct rte_mbuf *pkt, + int cmp, + uint32_t sent_seq, + uint16_t ip_id, + uint16_t l2_offset) +{ + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; + uint16_t hdr_len, l2_len; + + if (cmp > 0) { + pkt_head = item->firstseg; + pkt_tail = pkt; + } else { + pkt_head = pkt; + pkt_tail = item->firstseg; + } + + /* check if the IPv4 packet length is greater than the max value */ + hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + + pkt_head->l4_len; + l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; + if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - + hdr_len > MAX_IPV4_PKT_LENGTH)) + return 0; + + /* remove the packet header for the tail packet */ + rte_pktmbuf_adj(pkt_tail, hdr_len); + + /* chain two packets together */ + if (cmp > 0) { + item->lastseg->next = pkt; + item->lastseg = rte_pktmbuf_lastseg(pkt); + /* update IP ID to the larger value */ + item->ip_id = ip_id; + } else { + lastseg = rte_pktmbuf_lastseg(pkt); + lastseg->next = item->firstseg; + item->firstseg = pkt; + /* update sent_seq to the smaller value */ + item->sent_seq = sent_seq; + item->ip_id = ip_id; + } + item->nb_merged++; + + /* update MBUF metadata for the merged packet */ + pkt_head->nb_segs += pkt_tail->nb_segs; + pkt_head->pkt_len += pkt_tail->pkt_len; + + return 1; +} + +/* + * Check if two TCP/IPv4 packets are neighbors. + */ +static inline int +check_seq_option(struct gro_tcp4_item *item, + struct rte_tcp_hdr *tcph, + uint32_t sent_seq, + uint16_t ip_id, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint16_t l2_offset, + uint8_t is_atomic) +{ + struct rte_mbuf *pkt_orig = item->firstseg; + struct rte_ipv4_hdr *iph_orig; + struct rte_tcp_hdr *tcph_orig; + uint16_t len, tcp_hl_orig; + + iph_orig = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) + + l2_offset + pkt_orig->l2_len); + tcph_orig = (struct rte_tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len); + tcp_hl_orig = pkt_orig->l4_len; + + /* Check if TCP option fields equal */ + len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr); + if ((tcp_hl != tcp_hl_orig) || ((len > 0) && + (memcmp(tcph + 1, tcph_orig + 1, + len) != 0))) + return 0; + + /* Don't merge packets whose DF bits are different */ + if (unlikely(item->is_atomic ^ is_atomic)) + return 0; + + /* check if the two packets are neighbors */ + len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - + pkt_orig->l3_len - tcp_hl_orig; + if ((sent_seq == item->sent_seq + len) && (is_atomic || + (ip_id == item->ip_id + 1))) + /* append the new packet */ + return 1; + else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic || + (ip_id + item->nb_merged == item->ip_id))) + /* pre-pend the new packet */ + return -1; + + return 0; +} #endif