|
Prev: [PATCH 2/3 2.6.27] open-iscsi - add support for pdu digest offload and payload direct-placement
Next: [PATCH 0/3 2.6.27] cxgb3i: Add iSCSI driver
From: Karen Xie on 19 Jun 2008 01:50 From: Karen Xie <kxie(a)chelsio.com> New cxgb3i iscsi driver. The driver interfaces with cxgb3 driver to access the hardware. Signed-off-by: Karen Xie <kxie(a)chelsio.com> --- drivers/scsi/Kconfig | 2 drivers/scsi/Makefile | 1 drivers/scsi/cxgb3i/Kconfig | 6 drivers/scsi/cxgb3i/Makefile | 5 drivers/scsi/cxgb3i/cxgb3i.h | 190 ++ drivers/scsi/cxgb3i/cxgb3i_init.c | 107 + drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 797 ++++++++++ drivers/scsi/cxgb3i/cxgb3i_offload.c | 2808 ++++++++++++++++++++++++++++++++++ drivers/scsi/cxgb3i/cxgb3i_offload.h | 259 +++ drivers/scsi/cxgb3i/cxgb3i_ulp2.c | 722 +++++++++ drivers/scsi/cxgb3i/cxgb3i_ulp2.h | 102 + security/security.c | 1 12 files changed, 5000 insertions(+), 0 deletions(-) create mode 100644 drivers/scsi/cxgb3i/Kconfig create mode 100644 drivers/scsi/cxgb3i/Makefile create mode 100644 drivers/scsi/cxgb3i/cxgb3i.h create mode 100644 drivers/scsi/cxgb3i/cxgb3i_init.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_iscsi.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.h create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ulp2.c create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ulp2.h diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 22070e9..5ae06a8 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1759,6 +1759,8 @@ config ZFCP source "drivers/scsi/bnx2i/Kconfig" +source "drivers/scsi/cxgb3i/Kconfig" + config SCSI_SRP tristate "SCSI RDMA Protocol helper library" depends on SCSI && PCI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index a3f6866..b830af3 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -123,6 +123,7 @@ obj-$(CONFIG_SCSI_STEX) += stex.o obj-$(CONFIG_SCSI_MVSAS) += mvsas.o obj-$(CONFIG_PS3_ROM) += ps3rom.o obj-$(CONFIG_SCSI_BNX2_ISCSI) += bnx2i/ +obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i/ obj-$(CONFIG_ARM) += arm/ diff --git a/drivers/scsi/cxgb3i/Kconfig b/drivers/scsi/cxgb3i/Kconfig new file mode 100644 index 0000000..2762814 --- /dev/null +++ b/drivers/scsi/cxgb3i/Kconfig @@ -0,0 +1,6 @@ +config SCSI_CXGB3_ISCSI + tristate "Chelsio S3xx iSCSI support" + select CHELSIO_T3 + select SCSI_ISCSI_ATTRS + ---help--- + This driver supports iSCSI offload for the Chelsio S3 series devices. diff --git a/drivers/scsi/cxgb3i/Makefile b/drivers/scsi/cxgb3i/Makefile new file mode 100644 index 0000000..8c8a894 --- /dev/null +++ b/drivers/scsi/cxgb3i/Makefile @@ -0,0 +1,5 @@ +EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 + +cxgb3i-y := cxgb3i_init.o cxgb3i_iscsi.o cxgb3i_ulp2.o cxgb3i_offload.o + +obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i.o diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h new file mode 100644 index 0000000..39a3b94 --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i.h @@ -0,0 +1,190 @@ +/* + * cxgb3i.h: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie(a)chelsio.com) + */ + +#ifndef __CXGB3I_H__ +#define __CXGB3I_H__ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/in.h> +#include <linux/kfifo.h> +#include <linux/netdevice.h> +#include <linux/completion.h> +#include <linux/scatterlist.h> + +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_device.h> +#include <scsi/scsi_eh.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi.h> +#include <scsi/iscsi_proto.h> +#include <scsi/libiscsi.h> +#include <scsi/scsi_transport_iscsi.h> +#include <linux/crypto.h> +#include "../iscsi_tcp.h" + +/* from cxgb3 LLD */ +#include "common.h" +#include "t3_cpl.h" +#include "t3cdev.h" +#include "cxgb3_ctl_defs.h" +#include "cxgb3_offload.h" +#include "firmware_exports.h" +#include "cxgb3i_offload.h" + +/** + * message + */ +#define cxgb3i_log_error(fmt...) printk(KERN_ERR "cxgb3i: ERR! " fmt) +#define cxgb3i_log_warn(fmt...) printk(KERN_WARNING "cxgb3i: WARN! " fmt) +#define cxgb3i_log_info(fmt...) printk(KERN_INFO "cxgb3i: " fmt) + +#ifdef __DEBUG_CXGB3I__ +#define cxgb3i_log_debug(fmt, args...) \ + printk(KERN_ERR "cxgb3i: %s - " fmt, __func__ , ## args) +#else +#define cxgb3i_log_debug(fmt...) +#endif + +#define CXGB3I_SCSI_QDEPTH_DFLT 128 + +struct cxgb3i_adapter; +struct cxgb3i_hba; +struct cxgb3i_endpoint; + +/** + * struct cxgb3i_tag_format - cxgb3i ulp tag for steering pdu payload + * + * @rsvd_bits: # of bits used by h/w + * @rsvd_shift: shift left + * @rsvd_mask: bit mask + * + */ +struct cxgb3i_tag_format { + unsigned char idx_bits; + unsigned char age_bits; + unsigned char rsvd_bits; + unsigned char rsvd_shift; + u32 rsvd_mask; +}; + +/** + * struct cxgb3i_ddp_info - cxgb3i direct data placement for pdu payload + * + * @llimit: lower bound of the page pod memory + * @ulimit: upper bound of the page pod memory + * @nppods: # of page pod entries + * @idx_last: page pod entry last used + * @map_lock: lock to synchonize access to the page pod map + * @map: page pod map + */ +struct cxgb3i_ddp_info { + unsigned int llimit; + unsigned int ulimit; + unsigned int nppods; + unsigned int idx_last; + spinlock_t map_lock; + u8 *map; +}; + +struct cxgb3i_hba { + struct cxgb3i_adapter *snic; + struct net_device *ndev; + struct Scsi_Host *shost; + + rwlock_t cconn_rwlock; + struct list_head cconn_list; +}; + +struct cxgb3i_adapter { + struct list_head list_head; + spinlock_t lock; + struct t3cdev *tdev; + struct pci_dev *pdev; + unsigned char hba_cnt; + struct cxgb3i_hba *hba[MAX_NPORTS]; + + unsigned int tx_max_size; + unsigned int rx_max_size; + + struct cxgb3i_tag_format tag_format; + struct cxgb3i_ddp_info ddp; +}; + +struct cxgb3i_conn { + struct list_head list_head; + + struct cxgb3i_endpoint *cep; + struct iscsi_conn *conn; + struct cxgb3i_hba *hba; +}; + +struct cxgb3i_endpoint { + struct socket *sock; + struct cxgb3i_hba *hba; + struct cxgb3i_conn *cconn; +}; + +int cxgb3i_iscsi_init(void); +void cxgb3i_iscsi_cleanup(void); + +struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *); +struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *); +void cxgb3i_adapter_remove(struct cxgb3i_adapter *); +int cxgb3i_adapter_ulp_init(struct cxgb3i_adapter *); +void cxgb3i_adapter_ulp_cleanup(struct cxgb3i_adapter *); + +struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *); +struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *, + struct net_device *); +void cxgb3i_hba_host_remove(struct cxgb3i_hba *); + +void cxgb3i_hba_conn_add(struct cxgb3i_conn *, struct cxgb3i_hba *); +void cxgb3i_hba_conn_remove(struct cxgb3i_conn *); + +int cxgb3i_ulp2_init(void); +void cxgb3i_ulp2_cleanup(void); +int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int); + +void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32, + struct scatterlist *, unsigned int); +u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int, + u32, unsigned int, struct scatterlist *, + unsigned int); +static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format, + u32 tag, u32 *rsvd_bits, u32 *sw_bits) +{ + if (rsvd_bits) + *rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask; + if (sw_bits) { + *sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits)) + << format->rsvd_shift; + *sw_bits |= tag & ((1 << format->rsvd_shift) - 1); + } +} + +void cxgb3i_sk_set_callbacks(struct sock *, struct iscsi_conn *); +void cxgb3i_sk_restore_callbacks(struct sock *, struct iscsi_conn *); + +int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *); + +void cxgb3i_display_byte_string(char *, unsigned char *, int, int); + +#endif diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c new file mode 100644 index 0000000..b848e4c --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_init.c @@ -0,0 +1,107 @@ +/* cxgb3i_init.c: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie(a)chelsio.com) + */ + +#include "cxgb3i.h" + +#define DRV_MODULE_NAME "cxgb3i" +#define DRV_MODULE_VERSION "1.0.0" +#define DRV_MODULE_RELDATE "May 1, 2008" + +static char version[] __devinitdata = + "Chelsio S3xx iSCSI Driver " DRV_MODULE_NAME + " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + +MODULE_AUTHOR("Karen Xie <kxie(a)chelsio.com>"); +MODULE_DESCRIPTION("Chelsio S3xx iSCSI Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +static void open_s3_dev(struct t3cdev *); +static void close_s3_dev(struct t3cdev *); +cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS]; +struct cxgb3_client t3c_client = { + .name = "iscsi_cxgb3", + .handlers = cxgb3i_cpl_handlers, + .add = open_s3_dev, + .remove = close_s3_dev, +}; + +/** + * open_s3_dev - register with cxgb3 LLD + * @t3dev cxgb3 adapter instance + */ +static void open_s3_dev(struct t3cdev *t3dev) +{ + static int vers_printed; + + if (!vers_printed) + printk(KERN_INFO "%s", version); + + cxgb3i_log_debug("open cxgb3 %s.\n", t3dev->name); + + cxgb3i_tcp_add(t3dev, &t3c_client); + cxgb3i_adapter_add(t3dev); +} + +/** + * close_s3_dev - de-register with cxgb3 LLD + * @t3dev cxgb3 adapter instance + */ +static void close_s3_dev(struct t3cdev *t3dev) +{ + struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(t3dev); + cxgb3i_log_debug("close cxgb3 %s.\n", t3dev->name); + if (snic) + cxgb3i_adapter_remove(snic); + cxgb3i_tcp_remove(t3dev); +} + +/** + * cxgb3i_init_module - module init entry point + * + * initialize any driver wide global data structures and register itself + * with the cxgb3 module + */ +static int __init cxgb3i_init_module(void) +{ + int err; + + err = cxgb3i_tcp_init(cxgb3i_cpl_handlers); + if (err < 0) + return err; + + err = cxgb3i_iscsi_init(); + if (err < 0) + return err; + + err = cxgb3i_ulp2_init(); + if (err < 0) + return err; + + cxgb3_register_client(&t3c_client); + return 0; +} + +/** + * cxgb3i_exit_module - module cleanup/exit entry point + * + * go through the driver hba list and for each hba, release any resource held. + * and unregisters iscsi transport and the cxgb3 module + */ +static void __exit cxgb3i_exit_module(void) +{ + cxgb3_unregister_client(&t3c_client); + cxgb3i_ulp2_cleanup(); + cxgb3i_iscsi_cleanup(); +} + +module_init(cxgb3i_init_module); +module_exit(cxgb3i_exit_module); diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c new file mode 100644 index 0000000..ea7e21f --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -0,0 +1,797 @@ +/* cxgb3i_iscsi.c: Chelsio S3xx iSCSI driver. + * + * Copyright (c) 2008 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie(a)chelsio.com) + */ + +#include <net/tcp.h> +#include "cxgb3i.h" + +static struct scsi_transport_template *cxgb3i_scsi_transport; +static struct scsi_host_template cxgb3i_host_template; +static struct iscsi_transport cxgb3i_iscsi_transport; + +static LIST_HEAD(cxgb3i_snic_list); +static DEFINE_RWLOCK(cxgb3i_snic_rwlock); + +/** + * cxgb3i_adapter_add - initialize a s3 adapter structure and any h/w settings + * necessary + * @snic: pointer to adapter instance + */ +struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *t3dev) +{ + struct cxgb3i_adapter *snic; + struct adapter *adapter = tdev2adap(t3dev); + int i; + + snic = kzalloc(sizeof(*snic), GFP_KERNEL); + if (!snic) { + cxgb3i_log_debug("cxgb3 %s, OOM.\n", t3dev->name); + return NULL; + } + + spin_lock_init(&snic->lock); + snic->tdev = t3dev; + snic->pdev = adapter->pdev; + + if (cxgb3i_adapter_ulp_init(snic)) + goto free_snic; + + for_each_port(adapter, i) { + snic->hba[i] = cxgb3i_hba_host_add(snic, adapter->port[i]); + if (!snic->hba[i]) + goto ulp_cleanup; + } + snic->hba_cnt = adapter->params.nports; + + /* add to the list */ + write_lock(&cxgb3i_snic_rwlock); + list_add_tail(&snic->list_head, &cxgb3i_snic_list); + write_unlock(&cxgb3i_snic_rwlock); + + return snic; + +ulp_cleanup: + cxgb3i_adapter_ulp_cleanup(snic); +free_snic: + kfree(snic); + return NULL; +} + +/** + * cxgb3i_snic_cleanup - release all the resources held and cleanup any h/w + * settings necessary + * @snic: pointer to adapter instance + */ +void cxgb3i_adapter_remove(struct cxgb3i_adapter *snic) +{ + int i; + + /* remove from the list */ + write_lock(&cxgb3i_snic_rwlock); + list_del(&snic->list_head); + write_unlock(&cxgb3i_snic_rwlock); + + for (i = 0; i < snic->hba_cnt; i++) { + if (snic->hba[i]) { + cxgb3i_hba_host_remove(snic->hba[i]); + snic->hba[i] = NULL; + } + } + + /* release ddp resources */ + cxgb3i_adapter_ulp_cleanup(snic); + kfree(snic); +} + +struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *t3dev) +{ + struct cxgb3i_adapter *snic; + + read_lock(&cxgb3i_snic_rwlock); + list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { + if (snic->tdev == t3dev) { + read_unlock(&cxgb3i_snic_rwlock); + return snic; + } + } + read_unlock(&cxgb3i_snic_rwlock); + + return NULL; +} + +struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) +{ + struct cxgb3i_adapter *snic; + int i; + + read_lock(&cxgb3i_snic_rwlock); + list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { + for (i = 0; i < snic->hba_cnt; i++) { + if (snic->hba[i]->ndev == ndev) { + read_unlock(&cxgb3i_snic_rwlock); + return (snic->hba[i]); + } + } + } + read_unlock(&cxgb3i_snic_rwlock); + return NULL; +} + +void cxgb3i_hba_conn_add(struct cxgb3i_conn *cconn, struct cxgb3i_hba *hba) +{ + cconn->hba = hba; + write_lock(&hba->cconn_rwlock); + list_add_tail(&cconn->list_head, &hba->cconn_list); + write_unlock(&hba->cconn_rwlock); +} + +void cxgb3i_hba_conn_remove(struct cxgb3i_conn *cconn) +{ + struct cxgb3i_hba *hba = cconn->hba; + + if (hba) { + write_lock(&hba->cconn_rwlock); + list_del(&cconn->list_head); + write_unlock(&hba->cconn_rwlock); + } +} + +struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *snic, + struct net_device *ndev) +{ + struct cxgb3i_hba *hba; + struct Scsi_Host *shost; + int err; + + shost = iscsi_host_alloc(&cxgb3i_host_template, + sizeof(struct cxgb3i_hba), + CXGB3I_SCSI_QDEPTH_DFLT); + if (!shost) { + cxgb3i_log_info("iscsi_host_alloc failed.\n"); + return NULL; + } + + shost->transportt = cxgb3i_scsi_transport; + shost->max_lun = 512; + shost->max_id = 0; + shost->max_channel = 0; + shost->max_cmd_len = 16; + + hba = iscsi_host_priv(shost); + INIT_LIST_HEAD(&hba->cconn_list); + rwlock_init(&hba->cconn_rwlock); + hba->snic = snic; + hba->ndev = ndev; + hba->shost = shost; + + pci_dev_get(snic->pdev); + err = iscsi_host_add(shost, &snic->pdev->dev); + if (err) { + cxgb3i_log_info("iscsi_host_add failed.\n"); + goto pci_dev_put; + } + + cxgb3i_log_debug("shost 0x%p, hba 0x%p, no %u.\n", + shost, hba, shost->host_no); + + return hba; + +pci_dev_put: + pci_dev_put(snic->pdev); + scsi_host_put(shost); + return NULL; +} + +void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba) +{ + if (hba->shost) { + cxgb3i_log_debug("shost 0x%p, hba 0x%p, no %u.\n", + hba->shost, hba, hba->shost->host_no); + iscsi_host_remove(hba->shost); + pci_dev_put(hba->snic->pdev); + /* cleanup connections ? */ + iscsi_host_free(hba->shost); + } +} + +/** + * cxgb3i_ep_connect - establish TCP connection to target portal + * @dst_addr: target IP address + * @non_blocking: blocking or non-blocking call + * + * Initiates a TCP/IP connection to the dst_addr + */ +static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, + int non_blocking) +{ + struct iscsi_endpoint *ep; + struct cxgb3i_endpoint *cep; + struct cxgb3i_hba *hba; + struct socket *sock; + struct sock *sk; + struct tcp_sock *tp; + int err; + + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) + return NULL; + sk = sock->sk; + + sk->sk_allocation = GFP_ATOMIC; + sk->sk_reuse = 1; + tp = tcp_sk(sk); + tp->nonagle |= TCP_NAGLE_OFF; + err = cxgb3i_tcp_connect(sock, dst_addr, sizeof(struct sockaddr), + ULP_MODE_ISCSI); + if (err < 0) { + cxgb3i_log_info("sock 0x%p, connect failed %d.\n", sock, err); + goto release_sock; + } + if (!c3cn_flag(sock->sk, C3CN_OFFLOADED)) { + cxgb3i_log_info("sock 0x%p, NOT offloaded.\n", sock); + goto release_sock; + } + if (C3CN_ULP_MODE(sk) != ULP_MODE_ISCSI) { + cxgb3i_log_info("sock 0x%p, mode 0x%x, NOT expected.\n", + sock, C3CN_ULP_MODE(sk)); + goto release_sock; + } + hba = cxgb3i_hba_find_by_netdev(__sk_dst_get(sk)->dev); + if (!hba) { + cxgb3i_log_info("NOT going through cxgbi device.\n"); + goto release_sock; + } + + ep = iscsi_create_endpoint(sizeof(*cep)); + if (!ep) { + cxgb3i_log_info("iscsi alloc ep, OOM.\n"); + goto release_sock; + } + cep = ep->dd_data; + cep->sock = sock; + cep->hba = hba; + + cxgb3i_log_debug("sock 0x%p, iscsi_ep 0x%p, cxgb_ep 0x%p, hba 0x%p.\n", + sock, ep, cep, hba); + return ep; + +release_sock: + sock_release(sock); + return NULL; +} + +/** + * cxgb3i_ep_poll - polls for TCP connection establishement + * @ep: TCP connection (endpoint) handle + * @timeout_ms: timeout value in milli secs + * + * polls for TCP connect request to complete + */ +static int cxgb3i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) +{ + return 1; +} + +/** + * cxgb3i_ep_disconnect - teardown TCP connection + * @ep: TCP connection (endpoint) handle + * + * teardown TCP connection + */ +static void cxgb3i_ep_disconnect(struct iscsi_endpoint *ep) +{ + struct cxgb3i_endpoint *cep = (struct cxgb3i_endpoint *)ep->dd_data; + struct cxgb3i_conn *cconn = cep->cconn; + + cxgb3i_log_debug("ep 0x%p, cep 0x%p.\n", ep, cep); + + if (cconn && cconn->conn) { + struct iscsi_conn *conn = cconn->conn; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + cxgb3i_sk_restore_callbacks(cep->sock->sk, conn); + write_lock_bh(&cep->sock->sk->sk_callback_lock); + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); + cconn->cep = NULL; + tcp_conn->sock = NULL; + write_unlock_bh(&cep->sock->sk->sk_callback_lock); + } + + sock_release(cep->sock); + iscsi_destroy_endpoint(ep); +} + +/** + * cxgb3i_session_create - create a new iscsi session + * @cmds_max: max # of commands + * @qdepth: scsi queue depth + * @initial_cmdsn: initial iscsi CMDSN for this session + * @host_no: pointer to return host no + * + * Creates a new iSCSI session + */ +static struct iscsi_cls_session *cxgb3i_session_create(struct iscsi_endpoint + *ep, uint16_t cmds_max, + uint16_t qdepth, + uint32_t initial_cmdsn, + uint32_t *host_no) +{ + struct cxgb3i_endpoint *cep; + struct cxgb3i_hba *hba; + struct Scsi_Host *shost; + struct iscsi_cls_session *cls_session; + struct iscsi_session *session; + int i; + + if (!ep) { + cxgb3i_log_error("%s, missing endpoint.\n", __func__); + return NULL; + } + + cep = (struct cxgb3i_endpoint *)ep->dd_data; + hba = cep->hba; + shost = hba->shost; + cxgb3i_log_debug("ep 0x%p, cep 0x%p, hba 0x%p.\n", ep, cep, hba); + BUG_ON(hba != iscsi_host_priv(shost)); + + *host_no = shost->host_no; + + cls_session = iscsi_session_setup(&cxgb3i_iscsi_transport, shost, + cmds_max, + sizeof(struct iscsi_tcp_task), + initial_cmdsn, ISCSI_MAX_TARGET); + if (!cls_session) + return NULL; + + session = cls_session->dd_data; + + for (i = 0; i < session->cmds_max; i++) { + struct iscsi_task *task = session->cmds[i]; + struct iscsi_tcp_task *tcp_task = task->dd_data; + + task->hdr = &tcp_task->hdr.cmd_hdr; + task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE; + } + + if (iscsi_r2tpool_alloc(session)) + goto remove_session; + + return cls_session; + +remove_session: + iscsi_session_teardown(cls_session); + return NULL; +} + +/** + * cxgb3i_session_destroy - destroys iscsi session + * @cls_session: pointer to iscsi cls session + * + * Destroys an iSCSI session instance and releases its all resources held + */ +static void cxgb3i_session_destroy(struct iscsi_cls_session *cls_session) +{ + cxgb3i_log_debug("sess 0x%p.\n", cls_session); + iscsi_r2tpool_free(cls_session->dd_data); + iscsi_session_teardown(cls_session); +} + +/** + * cxgb3i_conn_create - create iscsi connection instance + * @cls_session: pointer to iscsi cls session + * @cid: iscsi cid + * + * Creates a new iSCSI connection instance for a given session + */ +static struct iscsi_cls_conn *cxgb3i_conn_create(struct iscsi_cls_session + *cls_session, uint32_t cid) +{ + struct iscsi_cls_conn *cls_conn; + struct iscsi_conn *conn; + struct iscsi_tcp_conn *tcp_conn; + struct cxgb3i_conn *cconn; + + cxgb3i_log_debug("sess 0x%p, cid %u.\n", cls_session, cid); + + cls_conn = iscsi_conn_setup(cls_session, + sizeof(*tcp_conn) + sizeof(*cconn), cid); + if (!cls_conn) + return NULL; + conn = cls_conn->dd_data; + + conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN; + + tcp_conn = conn->dd_data; + tcp_conn->iscsi_conn = conn; + + cconn = (struct cxgb3i_conn *)(tcp_conn + 1); + cconn->conn = conn; + + return cls_conn; +} + +/** + * cxgb3i_conn_bind - binds iscsi sess, conn and endpoint together + * @cls_session: pointer to iscsi cls session + * @cls_conn: pointer to iscsi cls conn + * @transport_eph: 64-bit EP handle + * @is_leading: leading connection on this session? + * + * Binds together an iSCSI session, an iSCSI connection and a + * TCP connection. This routine returns error code if the TCP + * connection does not belong on the device iSCSI sess/conn is bound + */ + +static int cxgb3i_conn_bind(struct iscsi_cls_session *cls_session, + struct iscsi_cls_conn *cls_conn, + uint64_t transport_eph, int is_leading) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1); + struct iscsi_endpoint *ep; + struct cxgb3i_endpoint *cep; + struct socket *sock; + int err; + + ep = iscsi_lookup_endpoint(transport_eph); + if (!ep) + return -EINVAL; + + cxgb3i_log_debug("ep 0x%p, cls sess 0x%p, cls conn 0x%p.\n", + ep, cls_session, cls_conn); + + err = iscsi_conn_bind(cls_session, cls_conn, is_leading); + if (err) + return -EINVAL; + + cep = (struct cxgb3i_endpoint *)ep->dd_data; + sock = cep->sock; + + tcp_conn->sock = sock; + cconn->hba = cep->hba; + cconn->cep = cep; + cep->cconn = cconn; + + spin_lock_bh(&conn->session->lock); + sprintf(conn->portal_address, NIPQUAD_FMT, + NIPQUAD(inet_sk(sock->sk)->daddr)); + conn->portal_port = ntohs(inet_sk(sock->sk)->dport); + spin_unlock_bh(&conn->session->lock); + + cxgb3i_sk_set_callbacks(sock->sk, conn); + iscsi_tcp_hdr_recv_prep(tcp_conn); + + return 0; +} + +/** + * cxgb3i_conn_flush - flush tx + * @conn: pointer to iscsi conn + */ +static int cxgb3i_conn_flush(struct iscsi_conn *conn) +{ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct iscsi_segment *segment = &tcp_conn->out.segment; + + if (segment->total_copied < segment->total_size) + return cxgb3i_conn_ulp2_xmit(conn); + return 0; +} + +/** + * cxgb3i_conn_get_param - return iscsi connection parameter to caller + * @cls_conn: pointer to iscsi cls conn + * @param: parameter type identifier + * @buf: buffer pointer + * + * returns iSCSI connection parameters + */ +static int cxgb3i_conn_get_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, char *buf) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + int len; + + cxgb3i_log_debug("cls_conn 0x%p, param %d.\n", cls_conn, param); + + switch (param) { + case ISCSI_PARAM_CONN_PORT: + spin_lock_bh(&conn->session->lock); + len = sprintf(buf, "%hu\n", conn->portal_port); + spin_unlock_bh(&conn->session->lock); + break; + case ISCSI_PARAM_CONN_ADDRESS: + spin_lock_bh(&conn->session->lock); + len = sprintf(buf, "%s\n", conn->portal_address); + spin_unlock_bh(&conn->session->lock); + break; + default: + return iscsi_conn_get_param(cls_conn, param, buf); + } + + return len; +} + +static int cxgb3i_conn_set_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, char *buf, int buflen) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_session *session = conn->session; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1); + int value, err = 0; + + switch (param) { + case ISCSI_PARAM_HDRDGST_EN: + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err && conn->hdrdgst_en) + cxgb3i_conn_ulp_setup(cconn, conn->hdrdgst_en, + conn->datadgst_en); + break; + case ISCSI_PARAM_DATADGST_EN: + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err && conn->datadgst_en) + cxgb3i_conn_ulp_setup(cconn, conn->hdrdgst_en, + conn->datadgst_en); + break; + case ISCSI_PARAM_MAX_R2T: + sscanf(buf, "%d", &value); + if (value <= 0 || !is_power_of_2(value)) + return -EINVAL; + if (session->max_r2t == value) + break; + iscsi_r2tpool_free(session); + err = iscsi_set_param(cls_conn, param, buf, buflen); + if (!err && iscsi_r2tpool_alloc(session)) + return -ENOMEM; + case ISCSI_PARAM_MAX_RECV_DLENGTH: + err = iscsi_set_param(cls_conn, param, buf, buflen); + cxgb3i_log_debug("MAX_RECV %u.\n", conn->max_recv_dlength); + break; + case ISCSI_PARAM_MAX_XMIT_DLENGTH: + err = iscsi_set_param(cls_conn, param, buf, buflen); + cxgb3i_log_debug("MAX_XMIT %u.\n", conn->max_xmit_dlength); + break; + default: + return iscsi_set_param(cls_conn, param, buf, buflen); + } + return err; +} + +/** + * cxgb3i_host_get_param - returns host (adapter) related parameters + * @shost: scsi host pointer + * @param: parameter type identifier + * @buf: buffer pointer + */ +static int cxgb3i_host_get_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf) +{ + struct cxgb3i_hba *hba = iscsi_host_priv(shost); + int i; + int len = 0; + + switch (param) { + case ISCSI_HOST_PARAM_HWADDRESS: + for (i = 0; i < 6; i++) + len += + sprintf(buf + len, "%02x.", + hba->ndev->dev_addr[i]); + len--; + buf[len] = '\0'; + break; + case ISCSI_HOST_PARAM_NETDEV_NAME: + len = sprintf(buf, "%s\n", hba->ndev->name); + break; + default: + return iscsi_host_get_param(shost, param, buf); + } + return len; +} + +/** + * cxgb3i_conn_get_stats - returns iSCSI stats + * @cls_conn: pointer to iscsi cls conn + * @stats: pointer to iscsi statistic struct + */ +static void cxgb3i_conn_get_stats(struct iscsi_cls_conn *cls_conn, + struct iscsi_stats *stats) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + + stats->txdata_octets = conn->txdata_octets; + stats->rxdata_octets = conn->rxdata_octets; + stats->scsicmd_pdus = conn->scsicmd_pdus_cnt; + stats->dataout_pdus = conn->dataout_pdus_cnt; + stats->scsirsp_pdus = conn->scsirsp_pdus_cnt; + stats->datain_pdus = conn->datain_pdus_cnt; + stats->r2t_pdus = conn->r2t_pdus_cnt; + stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; + stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; + stats->digest_err = 0; + stats->timeout_err = 0; + stats->custom_length = 1; + strcpy(stats->custom[0].desc, "eh_abort_cnt"); + stats->custom[0].value = conn->eh_abort_cnt; +} + +static inline u32 tag_base(struct cxgb3i_tag_format *format, + unsigned int idx, unsigned int age) +{ + u32 sw_bits = idx | (age << format->idx_bits); + u32 tag = sw_bits >> format->rsvd_shift; + tag <<= format->rsvd_bits + format->rsvd_shift; + tag |= sw_bits & ((1 << format->rsvd_shift) - 1); + return tag; +} + +static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt, + int *idx, int *age) +{ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1); + struct cxgb3i_adapter *snic = cconn->hba->snic; + u32 sw_bits; + + cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits); + if (idx) + *idx = sw_bits & ISCSI_ITT_MASK; + if (age) + *age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK; +} + +static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt) +{ + struct scsi_cmnd *sc = task->sc; + struct iscsi_conn *conn = task->conn; + struct iscsi_session *sess = conn->session; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1); + struct cxgb3i_adapter *snic = cconn->hba->snic; + u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age); + u32 tag = RESERVED_ITT; + + if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) { + struct cxgb3i_tcp_conn *c3cn = + CXGB3_TCP_CONN(tcp_conn->sock->sk); + tag = + cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag, + scsi_out(sc)->length, + scsi_out(sc)->table.sgl, + scsi_out(sc)->table.nents); + } + if (tag == RESERVED_ITT) + tag = sw_tag | (snic->tag_format.rsvd_mask << + snic->tag_format.rsvd_shift); + *hdr_itt = htonl(tag); + return 0; +} + +static void cxgb3i_release_itt(struct iscsi_task *task, itt_t hdr_itt) +{ + struct scsi_cmnd *sc = task->sc; + struct iscsi_conn *conn = task->conn; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1); + struct cxgb3i_adapter *snic = cconn->hba->snic; + + hdr_itt = ntohl(hdr_itt); + if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) + cxgb3i_ddp_tag_release(snic, hdr_itt, + scsi_out(sc)->table.sgl, + scsi_out(sc)->table.nents); +} + +/** + * cxgb3i_host_template -- Scsi_Host_Template structure + * used when registering with the scsi mid layer + */ +static struct scsi_host_template cxgb3i_host_template = { + .module = THIS_MODULE, + .name = "Chelsio S3xx iSCSI Initiator", + .proc_name = "cxgb3i", + .queuecommand = iscsi_queuecommand, + .change_queue_depth = iscsi_change_queue_depth, + .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1), + .sg_tablesize = SG_ALL, + .max_sectors = 0xFFFF, + .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN, + .eh_abort_handler = iscsi_eh_abort, + .eh_device_reset_handler = iscsi_eh_device_reset, + .eh_target_reset_handler = iscsi_eh_target_reset, + .use_clustering = DISABLE_CLUSTERING, + .slave_alloc = iscsi_slave_alloc, + .this_id = -1, +}; + +static struct iscsi_transport cxgb3i_iscsi_transport = { + .owner = THIS_MODULE, + .name = "cxgb3i", + .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST + | CAP_DATADGST | CAP_DIGEST_OFFLOAD, + .param_mask = ISCSI_MAX_RECV_DLENGTH | + ISCSI_MAX_XMIT_DLENGTH | + ISCSI_HDRDGST_EN | + ISCSI_DATADGST_EN | + ISCSI_INITIAL_R2T_EN | + ISCSI_MAX_R2T | + ISCSI_IMM_DATA_EN | + ISCSI_FIRST_BURST | + ISCSI_MAX_BURST | + ISCSI_PDU_INORDER_EN | + ISCSI_DATASEQ_INORDER_EN | + ISCSI_ERL | + ISCSI_CONN_PORT | + ISCSI_CONN_ADDRESS | + ISCSI_EXP_STATSN | + ISCSI_PERSISTENT_PORT | + ISCSI_PERSISTENT_ADDRESS | + ISCSI_TARGET_NAME | ISCSI_TPGT | + ISCSI_USERNAME | ISCSI_PASSWORD | + ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | + ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | + ISCSI_LU_RESET_TMO | + ISCSI_PING_TMO | ISCSI_RECV_TMO | + ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, + .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | + ISCSI_HOST_INITIATOR_NAME | ISCSI_HOST_NETDEV_NAME, + .get_host_param = cxgb3i_host_get_param, + /* session management */ + .create_session = cxgb3i_session_create, + .destroy_session = cxgb3i_session_destroy, + .get_session_param = iscsi_session_get_param, + /* connection management */ + .create_conn = cxgb3i_conn_create, + .bind_conn = cxgb3i_conn_bind, + .destroy_conn = iscsi_conn_teardown, + .start_conn = iscsi_conn_start, + .stop_conn = iscsi_conn_stop, + .flush_conn = cxgb3i_conn_flush, + .get_conn_param = cxgb3i_conn_get_param, + .set_param = cxgb3i_conn_set_param, + .get_stats = cxgb3i_conn_get_stats, + /* pdu xmit req. from user space */ + .send_pdu = iscsi_conn_send_pdu, + /* task */ + .init_task = iscsi_tcp_task_init, + .xmit_task = iscsi_tcp_task_xmit, + .cleanup_task = iscsi_tcp_cleanup_task, + .parse_itt = cxgb3i_parse_itt, + .reserve_itt = cxgb3i_reserve_itt, + .release_itt = cxgb3i_release_itt, + /* TCP connect/disconnect */ + .ep_connect = cxgb3i_ep_connect, + .ep_poll = cxgb3i_ep_poll, + .ep_disconnect = cxgb3i_ep_disconnect, + /* Error recovery timeout call */ + .session_recovery_timedout = iscsi_session_recovery_timedout, +}; + +int cxgb3i_iscsi_init(void) +{ + cxgb3i_scsi_transport = + iscsi_register_transport(&cxgb3i_iscsi_transport); + if (!cxgb3i_scsi_transport) { + cxgb3i_log_error("Could not register cxgb3i transport.\n"); + return -ENODEV; + } + cxgb3i_log_debug("cxgb3i transport 0x%p.\n", cxgb3i_scsi_transport); + return 0; +} + +void cxgb3i_iscsi_cleanup(void) +{ + if (cxgb3i_scsi_transport) { + cxgb3i_log_debug("cxgb3i transport 0x%p.\n", + cxgb3i_scsi_transport); + iscsi_unregister_transport(&cxgb3i_iscsi_transport); + cxgb3i_scsi_transport = NULL; + } +} diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c new file mode 100644 index 0000000..9e80311 --- /dev/null +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c @@ -0,0 +1,2808 @@ +/* + * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. + * + * Written by Dimitris Michailidis (dm(a)chelsio.com) + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this + * release for licensing terms and conditions. + */ + +#include <linux/kallsyms.h> +#include <linux/if_vlan.h> +#include <linux/inet_diag.h> +#include <linux/version.h> + +#ifdef CONFIG_SECURITY_NETWORK +#include <linux/security.h> +#endif + +#include "cxgb3_defs.h" +#include "cxgb3_ctl_defs.h" +#include "firmware_exports.h" +#include "cxgb3i_offload.h" +#include "cxgb3i_ulp2.h" + +#define VALIDATE_SEQ 1 + +typedef int (cxgb3_cpl_handler_decl) (struct t3cdev *, + struct sk_buff *, void *); + +static cxgb3_cpl_handler_decl do_bad_cpl; +static cxgb3_cpl_handler_decl do_act_establish; +static cxgb3_cpl_handler_decl do_act_open_rpl; +static cxgb3_cpl_handler_decl do_wr_ack; +static cxgb3_cpl_handler_decl do_peer_close; +static cxgb3_cpl_handler_decl do_abort_req; +static cxgb3_cpl_handler_decl do_abort_rpl; +static cxgb3_cpl_handler_decl do_close_con_rpl; +static cxgb3_cpl_handler_decl do_iscsi_hdr; + +static struct cxgb3i_tcp_tunables default_cxgb3i_tcp_tunables = { + .max_host_sndbuf = 32 * 1024, + .max_wrs = 15, + .rx_credit_thres = 10 * 1024, + .cong_alg = -1, + .delack = 1, + .tcp_window_scaling = 1, +}; + +/* + * Protocol structure and functions for our sockets. + */ +static struct proto t3_tcp_prot; +static void chelsio_close(struct sock *, long); +static int chelsio_disconnect(struct sock *, int); +static int chelsio_destroy(struct sock *); +static void process_deferq(struct work_struct *); + +static LIST_HEAD(cxgb3_list); +static DECLARE_MUTEX(cxgb3_list_lock); + +/* + * For ULP connections HW may add headers, e.g., for digests, that aren't part + * of the messages sent by the host but that are part of the TCP payload and + * therefore consume TCP sequence space. Tx connection parameters that + * operate in TCP sequence space are affected by the HW additions and need to + * compensate for them to accurately track TCP sequence numbers. This array + * contains the compensating extra lengths for ULP packets. It is indexed by + * a packet's ULP submode. + */ +static const unsigned int cxgb3_ulp_extra_len[] = { 0, 4, 4, 8 }; + +/* + * Return the length of any HW additions that will be made to a Tx packet. + * Such additions can happen for some types of ULP packets. + */ +static inline unsigned int ulp_extra_len(const struct sk_buff *skb) +{ + return cxgb3_ulp_extra_len[skb_ulp_mode(skb) & 3]; +} + +/* + * Size of WRs in bytes. Note that we assume all devices we are handling have + * the same WR size. + */ +static unsigned int wrlen __read_mostly; + +/* + * The number of WRs needed for an skb depends on the number of page fragments + * in the skb and whether it has any payload in its main body. This maps the + * length of the gather list represented by an skb into the # of necessary WRs. + */ +static unsigned int skb_wrs[MAX_SKB_FRAGS + 2] __read_mostly; + +static void t3_init_wr_tab(unsigned int wr_len) +{ + int i; + + if (skb_wrs[1]) /* already initialized */ + return; + + for (i = 1; i < ARRAY_SIZE(skb_wrs); i++) { + int sgl_len = (3 * i) / 2 + (i & 1); + + sgl_len += 3; + skb_wrs[i] = (sgl_len <= wr_len + ? 1 : 1 + (sgl_len - 2) / (wr_len - 1)); + } + + wrlen = wr_len * 8; +} + +/* + * TOE information returned through inet_diag for offloaded connections. + */ +struct t3_inet_diag_info { + u32 toe_id; + u32 tid; + u16 wrs; + u8 ulp_mode:4; + u8 sched_class:4; + u8 ddp_enabled; + char dev_name[T3CNAMSIZ]; +}; + +/* + * Socket filter that drops everything by specifying a 0-length filter program. + */ +static struct sk_filter drop_all = {.refcnt = ATOMIC_INIT(1) }; + +/* + * This sk_buff holds a fake header-only TCP segment that we use whenever we + * need to exploit SW TCP functionality that expects TCP headers, such as + * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple + * CPUs without locking. + */ +static struct sk_buff *tcphdr_skb __read_mostly; + +/* + * Initialize state for cxgb3 API operations. + */ +int cxgb3i_tcp_init(cxgb3_cpl_handler_func *cpl_handlers) +{ + int i; + + /* + * Instialize protocol structure for our sockets. We first copy + * the standard TCP protocol structure so we end up with standard + * values for things like pointers to counters, etc. + */ + t3_tcp_prot = tcp_prot; + t3_tcp_prot.close = chelsio_close; + t3_tcp_prot.disconnect = chelsio_disconnect; + t3_tcp_prot.destroy = chelsio_destroy; + + tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); + if (!tcphdr_skb) { + printk(KERN_ERR + "Chelsio TCP offload: can't allocate sk_buff\n"); + return -1; + } + skb_put(tcphdr_skb, sizeof(struct tcphdr)); + skb_reset_transport_header(tcphdr_skb); + memset(tcphdr_skb->data, 0, tcphdr_skb->len); + /* CIPSO_V4_OPTEXIST is false for tcphdr_skb without anything extra */ + + for (i = 0; i < NUM_CPL_CMDS; i++) + cpl_handlers[i] = do_bad_cpl; + + cpl_handlers[CPL_ACT_ESTABLISH] = do_act_establish; + cpl_handlers[CPL_ACT_OPEN_RPL] = do_act_open_rpl; + cpl_handlers[CPL_PEER_CLOSE] = do_peer_close; + cpl_handlers[CPL_ABORT_REQ_RSS] = do_abort_req; + cpl_handlers[CPL_ABORT_RPL_RSS] = do_abort_rpl; + cpl_handlers[CPL_CLOSE_CON_RPL] = do_close_con_rpl; + cpl_handlers[CPL_TX_DMA_ACK] = do_wr_ack; + cpl_handlers[CPL_ISCSI_HDR] = do_iscsi_hdr; + + return 0; +} + +void cxgb3i_tcp_add(struct t3cdev *cdev, struct cxgb3_client *client) +{ + struct cxgb3i_tcp_data *cdata; + struct adap_ports *ports; + struct ofld_page_info rx_page_info; + unsigned int wr_len; + int i; + + cdata = kzalloc(sizeof *cdata, GFP_KERNEL); + if (!cdata) + return; + ports = kzalloc(sizeof *ports, GFP_KERNEL); + if (!ports) { + kfree(cdata); + return; + } + cdata->ports = ports; + + if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0 || + cdev->ctl(cdev, GET_PORTS, cdata->ports) < 0 || + cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info) < 0) { + kfree(ports); + kfree(cdata); + return; + } + + t3_init_wr_tab(wr_len); + + INIT_LIST_HEAD(&cdata->list); + cdata->cdev = cdev; + cdata->client = client; + cdata->rx_page_size = rx_page_info.page_size; + cdata->conf = default_cxgb3i_tcp_tunables; + cdata->conf.max_wrs = T3C_DATA(cdev)->max_wrs; + skb_queue_head_init(&cdata->deferq); + INIT_WORK(&cdata->deferq_task, process_deferq); + + for (i = 0; i < ports->nports; i++) + NDEV2CDATA(ports->lldevs[i]) = cdata; + + down(&cxgb3_list_lock); + list_add_tail(&cdata->list, &cxgb3_list); + up(&cxgb3_list_lock); + + return; +} + +void cxgb3i_tcp_remove(struct t3cdev *cdev) +{ + struct cxgb3i_tcp_data *cdata = CXGB3_TCP_DATA(cdev); + struct adap_ports *ports = cdata->ports; + int i; + + for (i = 0; i < ports->nports; i++) + NDEV2CDATA(ports->lldevs[i]) = NULL; + + down(&cxgb3_list_lock); + list_del(&cdata->list); + up(&cxgb3_list_lock); + + kfree(ports); + kfree(cdata); +} + +/* + * Return TRUE if the specified net device is for a port on one of our + * registered adapters. + */ +static int is_cxgb3_dev(struct net_device *dev) +{ + struct cxgb3i_tcp_data *cdata; + + down(&cxgb3_list_lock); + list_for_each_entry(cdata, &cxgb3_list, list) { + struct adap_ports *ports = cdata->ports; + int i; + + for (i = 0; i < ports->nports; i++) + if (dev == ports->lldevs[i]) { + up(&cxgb3_list_lock); + return 1; + } + } + up(&cxgb3_list_lock); + return 0; +} + +/* + * Primary cxgb3 API operations. + * ============================= + */ + +static int tcp_v4_connect_offload(struct sock *, struct sockaddr *, int); +static void t3_cleanup_rbuf(struct sock *, int); +static int t3_push_frames(struct sock *, int); +static int t3_send_reset(struct sock *, int, struct sk_buff *); +static int t3_sendskb(struct sock *, struct sk_buff *, int); + +/* + * Return connected socket to specified endpoint. + */ +int cxgb3i_tcp_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int ulp_mode) +{ + struct sock *sk; + struct cxgb3i_tcp_conn *c3cn; + int ret; + + c3cn = kzalloc(sizeof(*c3cn), GFP_KERNEL); + if (c3cn == NULL) + return -ENOMEM; + c3cn->flags = 0; + c3cn->ulp_mode = ulp_mode; + + sk = sock->sk; + CXGB3_TCP_CONN(sk) = c3cn; + + ret = tcp_v4_connect_offload(sk, uaddr, addr_len); + if (ret) { + CXGB3_TCP_CONN(sk) = NULL; + kfree(c3cn); + } + return ret; +} + +void cxgb3i_tcp_cleanup_rbuf(struct sock *sk, int copied) +{ + t3_cleanup_rbuf(sk, copied); + return; +} + +int cxgb3i_tcp_sendskb(struct sock *sk, struct sk_buff *skb, int flags) +{ + return t3_sendskb(sk, skb, flags); +} + +/* + * Protocol operations. + * ==================== + */ + +static int make_close_transition(struct sock *); +static void close_conn(struct sock *); +static void t3_purge_write_queue(struct sock *); + +/* + * Release a socket's local TCP port if the socket is bound. This is normally + * done by tcp_done() but because we need to wait for HW to release TIDs we + * usually call tcp_done at a later time than the SW stack would have. This + * can be used to release the port earlier so the SW stack can reuse it before + * we are done with the connection. + */ +static inline void release_tcp_port(struct sock *sk) +{ + if (inet_csk(sk)->icsk_bind_hash) + inet_put_port(sk); +} + +static void chelsio_close(struct sock *sk, long timeout) +{ + int data_lost, old_state; + + lock_sock(sk); + sk->sk_shutdown |= SHUTDOWN_MASK; + + /* + * We need to flush the receive buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + * reader process may not have drained the data yet! Make a note + * of whether any received data will be lost so we can decide whether + * to FIN or RST. + */ + data_lost = skb_queue_len(&sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_receive_queue); + + if (sk->sk_state == TCP_CLOSE) /* Nothing if we are already closed */ + ; + else if (data_lost || sk->sk_state == TCP_SYN_SENT) { + /* Unread data was tossed, zap the connection. */ + NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); + t3_send_reset(sk, CPL_ABORT_SEND_RST, NULL); + release_tcp_port(sk); + goto unlock; + } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + /* Check zero linger _after_ checking for unread data. */ + sk->sk_prot->disconnect(sk, 0); + NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); + } else if (make_close_transition(sk)) { /* Regular FIN-based close */ + close_conn(sk); + } + + if (timeout) + sk_stream_wait_close(sk, timeout); + +unlock: + old_state = sk->sk_state; + sock_hold(sk); /* must last past the potential inet_csk_destroy_sock */ + sock_orphan(sk); + atomic_inc(sk->sk_prot->orphan_count); + + release_sock(sk); /* Final release_sock in connection's lifetime. */ + + /* + * There are no more user references at this point. Grab the socket + * spinlock and finish the close. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* + * Because the socket was orphaned before the bh_lock_sock + * either the backlog or a BH may have already destroyed it. + * Bail out if so. + */ + if (old_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) + goto out; + + if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 && + !c3cn_flag(sk, C3CN_ABORT_SHUTDOWN)) { + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC); + if (skb) { + t3_send_reset(sk, CPL_ABORT_SEND_RST, skb); + NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); + } + } + + if (sk->sk_state == TCP_CLOSE) + inet_csk_destroy_sock(sk); + +out: + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); +} + +static int chelsio_disconnect(struct sock *sk, int flags) +{ + printk(KERN_ERR "chelsio_disconnect not implemented\n"); + return -ENOTSUPP; +} + +/* + * Our version of tcp_v4_destroy_sock(). We need to do this because + * tcp_writequeue_purge() that is used in the original doesn't quite match + * our needs. If we ever hook into the memory management of the SW stack we + * may be able to use tcp_v4_destroy_sock() directly. + */ +static int chelsio_destroy(struct sock *sk) +{ + struct cxgb3i_tcp_conn *c3cn; + + C3CN_ULP_MODE(sk) = ULP_MODE_NONE; + t3_purge_write_queue(sk); + c3cn = CXGB3_TCP_CONN(sk); + CXGB3_TCP_CONN(sk) = NULL; + kfree(c3cn); + return tcp_prot.destroy(sk); +} + +/* + * Local utility routines used to implement primary cxgb3 API operations. + * ====================================================================== + */ + +static int tcp_connect_offload(struct sock *); +static u32 t3_send_rx_credits(struct sock *, u32, u32, int); +static void mk_act_open_req(struct sock *, struct sk_buff *, + unsigned int, const struct l2t_entry *); +static int wait_for_mem(struct sock *, long *); +static void skb_entail(struct sock *, struct sk_buff *, int); + +static inline int is_t3a(const struct t3cdev *cdev) +{ + return cdev->type == T3A; +} + +/* + * Determine the value of a packet's ->priority field. Bit 0 determines + * whether the packet should use a control Tx queue, bits 1..3 determine + * the queue set to use. + */ +static inline unsigned int mkprio(unsigned int cntrl, const struct sock *sk) +{ + return cntrl; +} + +/* + * Returns true if an sk_buff carries urgent data. + */ +static inline int skb_urgent(struct sk_buff *skb) +{ + return (CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_URG) != 0; +} + +static inline void reset_wr_list(struct tcp_sock *tp) +{ + tp->forward_skb_hint = NULL; +} + +/* + * Add a WR to a socket's list of pending WRs. This is a singly-linked list + * of sk_buffs operating as a FIFO. We use the following sock and sk_buff + * fields to maintain it: + * - sock.forward_skb_hint, sock.retransmit_skb_hint as head and tail pointers + * - sk_buff.sp as packet next pointer + */ +static inline void enqueue_wr(struct tcp_sock *tp, struct sk_buff *skb) +{ + skb->sp = NULL; + + /* + * We want to take an extra reference since both us and the driver + * need to free the packet before it's really freed. We know there's + * just one user currently so we use atomic_set rather than skb_get + * to avoid the atomic op. + */ + atomic_set(&skb->users, 2); + + if (!tp->forward_skb_hint) + tp->forward_skb_hint = skb; + else + tp->retransmit_skb_hint->sp = (void *)skb; + tp->retransmit_skb_hint = skb; +} + +/* Returns bits 2:7 of a socket's TOS field */ +#define SK_TOS(sk) ((inet_sk(sk)->tos >> 2) & M_TOS) + +/* + * The next two functions calculate the option 0 value for a socket. + */ +static inline unsigned int calc_opt0h(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return V_NAGLE((tp->nonagle & TCP_NAGLE_OFF) == 0) | + V_KEEP_ALIVE(sock_flag(sk, SOCK_KEEPOPEN) != 0) | F_TCAM_BYPASS | + V_WND_SCALE(tp->rx_opt.rcv_wscale) | V_MSS_IDX(C3CN_MSS_IDX(sk)); +} + +static inline unsigned int calc_opt0l(struct sock *sk) +{ + unsigned int tos; + struct tcp_sock *tp = tcp_sk(sk); + + tos = SK_TOS(sk); + if ((tos & 0x38) == 0x30) /* suppress values in special range */ + tos = 0; + + return V_TOS(tos) | V_ULP_MODE(C3CN_ULP_MODE(sk)) | + V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32) M_RCV_BUFSIZ)); +} + +static inline unsigned int calc_opt2(const struct sock *sk) +{ + const struct t3cdev *cdev = C3CN_CDEV(sk); + int flv_valid = CXGB3_TCP_TUNABLE(cdev, cong_alg) != -1; + + return V_FLAVORS_VALID(flv_valid) | + V_CONG_CONTROL_FLAVOR(flv_valid ? CXGB3_TCP_TUNABLE(cdev, cong_alg) + : 0); +} + +static inline void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, + int len) +{ + struct tx_data_wr *req; + struct tcp_sock *tp = tcp_sk(sk); + + skb_reset_transport_header(skb); + req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_lo = htonl(V_WR_TID(C3CN_TID(sk))); + req->sndseq = htonl(tp->snd_nxt); + /* len includes the length of any HW ULP additions */ + req->len = htonl(len); + req->param = htonl(V_TX_PORT(C3CN_L2T(sk)->smt_idx)); + /* V_TX_ULP_SUBMODE sets both the mode and submode */ + req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) | + V_TX_URG(skb_urgent(skb)) | + V_TX_SHOVE((!c3cn_flag(sk, C3CN_TX_MORE_DATA)) && + (skb_peek(&sk->sk_write_queue) ? 0 : 1))); + + if (!c3cn_flag(sk, C3CN_TX_DATA_SENT)) { + + req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | + V_TX_CPU_IDX(C3CN_QSET(sk))); + + /* Sendbuffer is in units of 32KB. + */ + req->param |= htonl(V_TX_SNDBUF(sk->sk_sndbuf >> 15)); + c3cn_set_flag(sk, C3CN_TX_DATA_SENT); + } +} + +static int tcp_v4_connect_offload(struct sock *sk, + struct sockaddr *uaddr, int addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + struct rtable *rt; + __be32 daddr, nexthop; + int tmp; + int err; + + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + if (usin->sin_family != AF_INET) + return -EAFNOSUPPORT; + + nexthop = daddr = usin->sin_addr.s_addr; + if (inet->opt && inet->opt->srr) { + if (!daddr) + return -EINVAL; + nexthop = inet->opt->faddr; + } + + tmp = ip_route_connect(&rt, nexthop, inet->saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_TCP, inet->sport, usin->sin_port, sk, 1); + if (tmp < 0) { + if (tmp == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return tmp; + } + + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + ip_rt_put(rt); + return -ENETUNREACH; + } + + if (!inet->opt || !inet->opt->srr) + daddr = rt->rt_dst; + + if (!inet->saddr) + inet->saddr = rt->rt_src; + inet->rcv_saddr = inet->saddr; + + if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { + /* Reset inherited state */ + tp->rx_opt.ts_recent = 0; + tp->rx_opt.ts_recent_stamp = 0; + tp->write_seq = 0; + } + + if (tcp_death_row.sysctl_tw_recycle && + !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { + struct inet_peer *peer = rt->peer; + /* + * VJ's idea. We save last timestamp seen from + * the destination in peer table, when entering state + * TIME-WAIT * and initialize rx_opt.ts_recent from it, + * when trying new connection. + */ + if (peer != NULL && + peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { + tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; + tp->rx_opt.ts_recent = peer->tcp_ts; + } + } + + inet->dport = usin->sin_port; + inet->daddr = daddr; + + inet_csk(sk)->icsk_ext_hdr_len = 0; + if (inet->opt) + inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + + tp->rx_opt.mss_clamp = 536; + + /* Socket identity is still unknown (sport may be zero). + * However we set state to SYN-SENT and not releasing socket + * lock select source port, enter ourselves into the hash tables and + * complete initialization after this. + */ + tcp_set_state(sk, TCP_SYN_SENT); + err = inet_hash_connect(&tcp_death_row, sk); + if (err) + goto failure; + + err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk); + if (err) + goto failure; + + /* OK, now commit destination to socket. */ + sk->sk_gso_type = SKB_GSO_TCPV4; + sk_setup_caps(sk, &rt->u.dst); + + if (tcp_connect_offload(sk)) + return 0; + /* + * If we get here, we don't have an offload connection so simply + * return a failure. + */ + err = -ENOTSUPP; + +failure: + /* + * This unhashes the socket and releases the local port, + * if necessary. + */ + tcp_set_state(sk, TCP_CLOSE); + ip_rt_put(rt); + sk->sk_route_caps = 0; + inet->dport = 0; + return err; +} + +static inline int is_delack_mode_valid(struct t3cdev *cdev, struct sock *sk) +{ + return (!C3CN_ULP_MODE(sk) + || (C3CN_ULP_MODE(sk) == ULP_MODE_TCPDDP && cdev->type >= T3A)); +} + +/* + * Set of states for which we should return RX credits. + */ +#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) + +/* + * Called after some received data has been read. It returns RX credits + * to the HW for the amount of data processed. + */ +static void t3_cleanup_rbuf(struct sock *sk, int copied) +{ + struct tcp_sock *tp; + struct t3cdev *cdev; + int dack_mode, must_send; + u32 thres, credits, dack = 0; + + if (!sk_in_state(sk, CREDIT_RETURN_STATE)) + return; + + tp = tcp_sk(sk); + credits = tp->copied_seq - tp->rcv_wup; + if (unlikely(!credits)) + return; + + cdev = C3CN_CDEV(sk); + thres = CXGB3_TCP_TUNABLE(cdev, rx_credit_thres); + + if (unlikely(thres == 0)) + return; + + if (is_delack_mode_valid(cdev, sk)) { + dack_mode = CXGB3_TCP_TUNABLE(cdev, delack); + if (unlikely(dack_mode != C3CN_DELAK_MODE(sk))) { + u32 r = tp->rcv_nxt - C3CN_DELAK_SEQ(sk); + + if (r >= tp->rcv_wnd || r >= 16 * tp->rx_opt.mss_clamp) + dack = (F_RX_DACK_CHANGE | + V_RX_DACK_MODE(dack_mode)); + } + } else + dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); + + /* + * For coalescing to work effectively ensure the receive window has + * at least 16KB left. + */ + must_send = credits + 16384 >= tp->rcv_wnd; + + if (must_send || credits >= thres) + tp->rcv_wup += t3_send_rx_credits(sk, credits, dack, must_send); +} + +/* + * Generic ARP failure handler that discards the buffer. + */ +static void arp_failure_discard(struct t3cdev *cdev, struct sk_buff *skb) +{ + kfree_skb(skb); +} + +/* + * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a + * socket's send queue and sends them on to the TOE. Must be called with the + * socket lock held. Returns the amount of send buffer space that was freed + * as a result of sending queued data to the TOE. + */ +static int t3_push_frames(struct sock *sk, int req_completion) +{ + int total_size = 0; + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + struct t3cdev *cdev; + struct cxgb3i_tcp_data *cdata; + + if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) + return 0; + + /* + * We shouldn't really be called at all after an abort but check just + * in case. + */ + if (unlikely(c3cn_flag(sk, C3CN_ABORT_SHUTDOWN))) + return 0; + + cdev = C3CN_CDEV(sk); + cdata = CXGB3_TCP_DATA(cdev); + + while (C3CN_WR_AVAIL(sk) + && (skb = skb_peek(&sk->sk_write_queue)) != NULL + && !c3cn_flag(sk, C3CN_TX_WAIT_IDLE) + && (!(CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_HOLD) + || skb_queue_len(&sk->sk_write_queue) > 1)) { + + int len = skb->len; /* length before skb_push */ + int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len); + int wrs_needed = skb_wrs[frags]; + + if (wrs_needed > 1 && len + sizeof(struct tx_data_wr) <= wrlen) + wrs_needed = 1; + + WARN_ON(frags >= ARRAY_SIZE(skb_wrs) || wrs_needed < 1); + if (C3CN_WR_AVAIL(sk) < wrs_needed) + break; + + __skb_unlink(skb, &sk->sk_write_queue); + skb->priority = mkprio(CPL_PRIORITY_DATA, sk); + skb->csum = wrs_needed; /* remember this until the WR_ACK */ + C3CN_WR_AVAIL(sk) -= wrs_needed; + C3CN_WR_UNACKED(sk) += wrs_needed; + enqueue_wr(tp, skb); + + if (likely(CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) { + len += ulp_extra_len(skb); + make_tx_data_wr(sk, skb, len); + tp->snd_nxt += len; + tp->lsndtime = tcp_time_stamp; + if ((req_completion + && C3CN_WR_UNACKED(sk) == wrs_needed) + || (CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_COMPL) + || C3CN_WR_UNACKED(sk) >= C3CN_WR_MAX(sk) / 2) { + struct work_request_hdr *wr = cplhdr(skb); + + wr->wr_hi |= htonl(F_WR_COMPL); + C3CN_WR_UNACKED(sk) = 0; + } + CXGB3_TCP_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR; + } else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON) + c3cn_set_flag(sk, C3CN_CLOSE_CON_REQUESTED); + + total_size += skb->truesize; + if (CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_BARRIER) + c3cn_set_flag(sk, C3CN_TX_WAIT_IDLE); + set_arp_failure_handler(skb, arp_failure_discard); + l2t_send(cdev, skb, C3CN_L2T(sk)); + } + sk->sk_wmem_queued -= total_size; + return total_size; +} + +/* + * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant + * and send it along. + */ +static void abort_arp_failure(struct t3cdev *cdev, struct sk_buff *skb) +{ + struct cpl_abort_req *req = cplhdr(skb); + + req->cmd = CPL_ABORT_NO_RST; + cxgb3_ofld_send(cdev, skb); +} + +/* + * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do + * not send multiple ABORT_REQs for the same connection and also that we do + * not try to send a message after the connection has closed. Returns 1 if + * an ABORT_REQ wasn't generated after all, 0 otherwise. + */ +static int t3_send_reset(struct sock *sk, int mode, struct sk_buff *skb) +{ + struct cpl_abort_req *req; + struct tcp_sock *tp = tcp_sk(sk); + unsigned int tid = C3CN_TID(sk); + + if (unlikely(c3cn_flag(sk, C3CN_ABORT_SHUTDOWN) || !C3CN_CDEV(sk))) { + if (skb) + __kfree_skb(skb); + return 1; + } + + c3cn_set_flag(sk, C3CN_ABORT_RPL_PENDING); + c3cn_set_flag(sk, C3CN_ABORT_SHUTDOWN); + + /* Purge the send queue so we don't send anything after an abort. */ + t3_purge_write_queue(sk); + + if (c3cn_flag(sk, C3CN_CLOSE_CON_REQUESTED) && is_t3a(C3CN_CDEV(sk))) + mode |= CPL_ABORT_POST_CLOSE_REQ; + + if (!skb) + skb = alloc_skb(sizeof(*req), GFP_KERNEL | __GFP_NOFAIL); + skb->priority = mkprio(CPL_PRIORITY_DATA, sk); + set_arp_failure_handler(skb, abort_arp_failure); + + req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); + req->wr.wr_lo = htonl(V_WR_TID(tid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->rsvd0 = htonl(tp->snd_nxt); + req->rsvd1 = !c3cn_flag(sk, C3CN_TX_DATA_SENT); + req->cmd = mode; + if (sk->sk_state == TCP_SYN_SENT) + __skb_queue_tail(&tp->out_of_order_queue, skb); + else + l2t_send(C3CN_CDEV(sk), skb, C3CN_L2T(sk)); + return 0; +} + +/* + * This must be called with the socket locked, otherwise dev may be NULL. + */ +static inline int chelsio_wspace(const struct sock *sk) +{ + struct t3cdev *dev = C3CN_CDEV(sk); + + return (dev ? (CXGB3_TCP_TUNABLE(dev, max_host_sndbuf) + - sk->sk_wmem_queued) + : 0); +} + +static inline int tcp_memory_free(struct sock *sk) +{ + return chelsio_wspace(sk) > 0; +} + +/* + * Add a list of skbs to a socket send queue. This interface is intended for + * use by in-kernel ULPs. The skbs must comply with the max size limit of the + * device and have a headroom of at least TX_HEADER_LEN bytes. + */ +static int t3_sendskb(struct sock *sk, struct sk_buff *skb, int flags) +{ + struct sk_buff *next; + struct tcp_sock *tp = tcp_sk(sk); + int err, copied = 0; + long timeo; + + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + (err = sk_stream_wait_connect(sk, &timeo)) != 0) + goto out_err; + + clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + err = -EPIPE; + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + goto out_err; + + /* + * We check for send buffer space once for the whole skb list. It + * isn't critical if we end up overrunning the send buffer limit as we + * do not allocate any new memory. The benefit is we don't need to + * perform intermediate packet pushes. + */ + while (!tcp_memory_free(sk)) { + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + err = wait_for_mem(sk, &timeo); + if (err) + goto out_err; + } + + while (skb) { + if (unlikely(skb_headroom(skb) < TX_HEADER_LEN)) { + err = -EINVAL; + goto out_err; + } + + next = skb->next; + skb->next = NULL; + skb_entail(sk, skb, C3CB_FLAG_NO_APPEND | C3CB_FLAG_NEED_HDR); + copied += skb->len; + tp->write_seq += skb->len + ulp_extra_len(skb); + skb = next; + } +done: + if (likely(skb_queue_len(&sk->sk_write_queue))) + t3_push_frames(sk, 1); + release_sock(sk); + return copied; + +out_err: + if (copied == 0) + copied = sk_stream_error(sk, flags, err); + goto done; +} + +/* + * Low-level utility routines for primary API functions. + * ===================================================== + */ +/* routines to implement CPL message processing */ +static void sock_act_establish(struct sock *, struct sk_buff *); +static void active_open_failed(struct sock *, struct sk_buff *); +static void wr_ack(struct sock *, struct sk_buff *); +static void do_peer_fin(struct sock *, struct sk_buff *); +static void process_abort_req(struct sock *, struct sk_buff *); +static void process_abort_rpl(struct sock *, struct sk_buff *); +static void process_close_con_rpl(struct sock *, struct sk_buff *); +static void process_rx_iscsi_hdr(struct sock *, struct sk_buff *); + +static struct sk_buff *__get_cpl_reply_skb(struct sk_buff *, size_t, gfp_t); + +static int t3_connect(struct sock *, struct net_device *); +static void tcp_uncork(struct sock *); +static void tcp_push(struct sock *, int); +static void fail_act_open(struct sock *, int); +static void init_offload_sk(struct sock *, struct t3cdev *, struct dst_entry |