From: Karen Xie on
From: Karen Xie <kxie(a)chelsio.com>

New cxgb3i iscsi driver. The driver interfaces with cxgb3 driver to access the hardware.

Signed-off-by: Karen Xie <kxie(a)chelsio.com>
---

drivers/scsi/Kconfig | 2
drivers/scsi/Makefile | 1
drivers/scsi/cxgb3i/Kconfig | 6
drivers/scsi/cxgb3i/Makefile | 5
drivers/scsi/cxgb3i/cxgb3i.h | 190 ++
drivers/scsi/cxgb3i/cxgb3i_init.c | 107 +
drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 797 ++++++++++
drivers/scsi/cxgb3i/cxgb3i_offload.c | 2808 ++++++++++++++++++++++++++++++++++
drivers/scsi/cxgb3i/cxgb3i_offload.h | 259 +++
drivers/scsi/cxgb3i/cxgb3i_ulp2.c | 722 +++++++++
drivers/scsi/cxgb3i/cxgb3i_ulp2.h | 102 +
security/security.c | 1
12 files changed, 5000 insertions(+), 0 deletions(-)
create mode 100644 drivers/scsi/cxgb3i/Kconfig
create mode 100644 drivers/scsi/cxgb3i/Makefile
create mode 100644 drivers/scsi/cxgb3i/cxgb3i.h
create mode 100644 drivers/scsi/cxgb3i/cxgb3i_init.c
create mode 100644 drivers/scsi/cxgb3i/cxgb3i_iscsi.c
create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.c
create mode 100644 drivers/scsi/cxgb3i/cxgb3i_offload.h
create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ulp2.c
create mode 100644 drivers/scsi/cxgb3i/cxgb3i_ulp2.h


diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 22070e9..5ae06a8 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1759,6 +1759,8 @@ config ZFCP

source "drivers/scsi/bnx2i/Kconfig"

+source "drivers/scsi/cxgb3i/Kconfig"
+
config SCSI_SRP
tristate "SCSI RDMA Protocol helper library"
depends on SCSI && PCI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index a3f6866..b830af3 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -123,6 +123,7 @@ obj-$(CONFIG_SCSI_STEX) += stex.o
obj-$(CONFIG_SCSI_MVSAS) += mvsas.o
obj-$(CONFIG_PS3_ROM) += ps3rom.o
obj-$(CONFIG_SCSI_BNX2_ISCSI) += bnx2i/
+obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i/

obj-$(CONFIG_ARM) += arm/

diff --git a/drivers/scsi/cxgb3i/Kconfig b/drivers/scsi/cxgb3i/Kconfig
new file mode 100644
index 0000000..2762814
--- /dev/null
+++ b/drivers/scsi/cxgb3i/Kconfig
@@ -0,0 +1,6 @@
+config SCSI_CXGB3_ISCSI
+ tristate "Chelsio S3xx iSCSI support"
+ select CHELSIO_T3
+ select SCSI_ISCSI_ATTRS
+ ---help---
+ This driver supports iSCSI offload for the Chelsio S3 series devices.
diff --git a/drivers/scsi/cxgb3i/Makefile b/drivers/scsi/cxgb3i/Makefile
new file mode 100644
index 0000000..8c8a894
--- /dev/null
+++ b/drivers/scsi/cxgb3i/Makefile
@@ -0,0 +1,5 @@
+EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3
+
+cxgb3i-y := cxgb3i_init.o cxgb3i_iscsi.o cxgb3i_ulp2.o cxgb3i_offload.o
+
+obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i.o
diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h
new file mode 100644
index 0000000..39a3b94
--- /dev/null
+++ b/drivers/scsi/cxgb3i/cxgb3i.h
@@ -0,0 +1,190 @@
+/*
+ * cxgb3i.h: Chelsio S3xx iSCSI driver.
+ *
+ * Copyright (c) 2008 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie(a)chelsio.com)
+ */
+
+#ifndef __CXGB3I_H__
+#define __CXGB3I_H__
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/kfifo.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+#include <linux/scatterlist.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/iscsi_proto.h>
+#include <scsi/libiscsi.h>
+#include <scsi/scsi_transport_iscsi.h>
+#include <linux/crypto.h>
+#include "../iscsi_tcp.h"
+
+/* from cxgb3 LLD */
+#include "common.h"
+#include "t3_cpl.h"
+#include "t3cdev.h"
+#include "cxgb3_ctl_defs.h"
+#include "cxgb3_offload.h"
+#include "firmware_exports.h"
+#include "cxgb3i_offload.h"
+
+/**
+ * message
+ */
+#define cxgb3i_log_error(fmt...) printk(KERN_ERR "cxgb3i: ERR! " fmt)
+#define cxgb3i_log_warn(fmt...) printk(KERN_WARNING "cxgb3i: WARN! " fmt)
+#define cxgb3i_log_info(fmt...) printk(KERN_INFO "cxgb3i: " fmt)
+
+#ifdef __DEBUG_CXGB3I__
+#define cxgb3i_log_debug(fmt, args...) \
+ printk(KERN_ERR "cxgb3i: %s - " fmt, __func__ , ## args)
+#else
+#define cxgb3i_log_debug(fmt...)
+#endif
+
+#define CXGB3I_SCSI_QDEPTH_DFLT 128
+
+struct cxgb3i_adapter;
+struct cxgb3i_hba;
+struct cxgb3i_endpoint;
+
+/**
+ * struct cxgb3i_tag_format - cxgb3i ulp tag for steering pdu payload
+ *
+ * @rsvd_bits: # of bits used by h/w
+ * @rsvd_shift: shift left
+ * @rsvd_mask: bit mask
+ *
+ */
+struct cxgb3i_tag_format {
+ unsigned char idx_bits;
+ unsigned char age_bits;
+ unsigned char rsvd_bits;
+ unsigned char rsvd_shift;
+ u32 rsvd_mask;
+};
+
+/**
+ * struct cxgb3i_ddp_info - cxgb3i direct data placement for pdu payload
+ *
+ * @llimit: lower bound of the page pod memory
+ * @ulimit: upper bound of the page pod memory
+ * @nppods: # of page pod entries
+ * @idx_last: page pod entry last used
+ * @map_lock: lock to synchonize access to the page pod map
+ * @map: page pod map
+ */
+struct cxgb3i_ddp_info {
+ unsigned int llimit;
+ unsigned int ulimit;
+ unsigned int nppods;
+ unsigned int idx_last;
+ spinlock_t map_lock;
+ u8 *map;
+};
+
+struct cxgb3i_hba {
+ struct cxgb3i_adapter *snic;
+ struct net_device *ndev;
+ struct Scsi_Host *shost;
+
+ rwlock_t cconn_rwlock;
+ struct list_head cconn_list;
+};
+
+struct cxgb3i_adapter {
+ struct list_head list_head;
+ spinlock_t lock;
+ struct t3cdev *tdev;
+ struct pci_dev *pdev;
+ unsigned char hba_cnt;
+ struct cxgb3i_hba *hba[MAX_NPORTS];
+
+ unsigned int tx_max_size;
+ unsigned int rx_max_size;
+
+ struct cxgb3i_tag_format tag_format;
+ struct cxgb3i_ddp_info ddp;
+};
+
+struct cxgb3i_conn {
+ struct list_head list_head;
+
+ struct cxgb3i_endpoint *cep;
+ struct iscsi_conn *conn;
+ struct cxgb3i_hba *hba;
+};
+
+struct cxgb3i_endpoint {
+ struct socket *sock;
+ struct cxgb3i_hba *hba;
+ struct cxgb3i_conn *cconn;
+};
+
+int cxgb3i_iscsi_init(void);
+void cxgb3i_iscsi_cleanup(void);
+
+struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *);
+struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *);
+void cxgb3i_adapter_remove(struct cxgb3i_adapter *);
+int cxgb3i_adapter_ulp_init(struct cxgb3i_adapter *);
+void cxgb3i_adapter_ulp_cleanup(struct cxgb3i_adapter *);
+
+struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *);
+struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *,
+ struct net_device *);
+void cxgb3i_hba_host_remove(struct cxgb3i_hba *);
+
+void cxgb3i_hba_conn_add(struct cxgb3i_conn *, struct cxgb3i_hba *);
+void cxgb3i_hba_conn_remove(struct cxgb3i_conn *);
+
+int cxgb3i_ulp2_init(void);
+void cxgb3i_ulp2_cleanup(void);
+int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int);
+
+void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32,
+ struct scatterlist *, unsigned int);
+u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int,
+ u32, unsigned int, struct scatterlist *,
+ unsigned int);
+static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format,
+ u32 tag, u32 *rsvd_bits, u32 *sw_bits)
+{
+ if (rsvd_bits)
+ *rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask;
+ if (sw_bits) {
+ *sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits))
+ << format->rsvd_shift;
+ *sw_bits |= tag & ((1 << format->rsvd_shift) - 1);
+ }
+}
+
+void cxgb3i_sk_set_callbacks(struct sock *, struct iscsi_conn *);
+void cxgb3i_sk_restore_callbacks(struct sock *, struct iscsi_conn *);
+
+int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *);
+
+void cxgb3i_display_byte_string(char *, unsigned char *, int, int);
+
+#endif
diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c
new file mode 100644
index 0000000..b848e4c
--- /dev/null
+++ b/drivers/scsi/cxgb3i/cxgb3i_init.c
@@ -0,0 +1,107 @@
+/* cxgb3i_init.c: Chelsio S3xx iSCSI driver.
+ *
+ * Copyright (c) 2008 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie(a)chelsio.com)
+ */
+
+#include "cxgb3i.h"
+
+#define DRV_MODULE_NAME "cxgb3i"
+#define DRV_MODULE_VERSION "1.0.0"
+#define DRV_MODULE_RELDATE "May 1, 2008"
+
+static char version[] __devinitdata =
+ "Chelsio S3xx iSCSI Driver " DRV_MODULE_NAME
+ " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("Karen Xie <kxie(a)chelsio.com>");
+MODULE_DESCRIPTION("Chelsio S3xx iSCSI Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+static void open_s3_dev(struct t3cdev *);
+static void close_s3_dev(struct t3cdev *);
+cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
+struct cxgb3_client t3c_client = {
+ .name = "iscsi_cxgb3",
+ .handlers = cxgb3i_cpl_handlers,
+ .add = open_s3_dev,
+ .remove = close_s3_dev,
+};
+
+/**
+ * open_s3_dev - register with cxgb3 LLD
+ * @t3dev cxgb3 adapter instance
+ */
+static void open_s3_dev(struct t3cdev *t3dev)
+{
+ static int vers_printed;
+
+ if (!vers_printed)
+ printk(KERN_INFO "%s", version);
+
+ cxgb3i_log_debug("open cxgb3 %s.\n", t3dev->name);
+
+ cxgb3i_tcp_add(t3dev, &t3c_client);
+ cxgb3i_adapter_add(t3dev);
+}
+
+/**
+ * close_s3_dev - de-register with cxgb3 LLD
+ * @t3dev cxgb3 adapter instance
+ */
+static void close_s3_dev(struct t3cdev *t3dev)
+{
+ struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(t3dev);
+ cxgb3i_log_debug("close cxgb3 %s.\n", t3dev->name);
+ if (snic)
+ cxgb3i_adapter_remove(snic);
+ cxgb3i_tcp_remove(t3dev);
+}
+
+/**
+ * cxgb3i_init_module - module init entry point
+ *
+ * initialize any driver wide global data structures and register itself
+ * with the cxgb3 module
+ */
+static int __init cxgb3i_init_module(void)
+{
+ int err;
+
+ err = cxgb3i_tcp_init(cxgb3i_cpl_handlers);
+ if (err < 0)
+ return err;
+
+ err = cxgb3i_iscsi_init();
+ if (err < 0)
+ return err;
+
+ err = cxgb3i_ulp2_init();
+ if (err < 0)
+ return err;
+
+ cxgb3_register_client(&t3c_client);
+ return 0;
+}
+
+/**
+ * cxgb3i_exit_module - module cleanup/exit entry point
+ *
+ * go through the driver hba list and for each hba, release any resource held.
+ * and unregisters iscsi transport and the cxgb3 module
+ */
+static void __exit cxgb3i_exit_module(void)
+{
+ cxgb3_unregister_client(&t3c_client);
+ cxgb3i_ulp2_cleanup();
+ cxgb3i_iscsi_cleanup();
+}
+
+module_init(cxgb3i_init_module);
+module_exit(cxgb3i_exit_module);
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
new file mode 100644
index 0000000..ea7e21f
--- /dev/null
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -0,0 +1,797 @@
+/* cxgb3i_iscsi.c: Chelsio S3xx iSCSI driver.
+ *
+ * Copyright (c) 2008 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie(a)chelsio.com)
+ */
+
+#include <net/tcp.h>
+#include "cxgb3i.h"
+
+static struct scsi_transport_template *cxgb3i_scsi_transport;
+static struct scsi_host_template cxgb3i_host_template;
+static struct iscsi_transport cxgb3i_iscsi_transport;
+
+static LIST_HEAD(cxgb3i_snic_list);
+static DEFINE_RWLOCK(cxgb3i_snic_rwlock);
+
+/**
+ * cxgb3i_adapter_add - initialize a s3 adapter structure and any h/w settings
+ * necessary
+ * @snic: pointer to adapter instance
+ */
+struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *t3dev)
+{
+ struct cxgb3i_adapter *snic;
+ struct adapter *adapter = tdev2adap(t3dev);
+ int i;
+
+ snic = kzalloc(sizeof(*snic), GFP_KERNEL);
+ if (!snic) {
+ cxgb3i_log_debug("cxgb3 %s, OOM.\n", t3dev->name);
+ return NULL;
+ }
+
+ spin_lock_init(&snic->lock);
+ snic->tdev = t3dev;
+ snic->pdev = adapter->pdev;
+
+ if (cxgb3i_adapter_ulp_init(snic))
+ goto free_snic;
+
+ for_each_port(adapter, i) {
+ snic->hba[i] = cxgb3i_hba_host_add(snic, adapter->port[i]);
+ if (!snic->hba[i])
+ goto ulp_cleanup;
+ }
+ snic->hba_cnt = adapter->params.nports;
+
+ /* add to the list */
+ write_lock(&cxgb3i_snic_rwlock);
+ list_add_tail(&snic->list_head, &cxgb3i_snic_list);
+ write_unlock(&cxgb3i_snic_rwlock);
+
+ return snic;
+
+ulp_cleanup:
+ cxgb3i_adapter_ulp_cleanup(snic);
+free_snic:
+ kfree(snic);
+ return NULL;
+}
+
+/**
+ * cxgb3i_snic_cleanup - release all the resources held and cleanup any h/w
+ * settings necessary
+ * @snic: pointer to adapter instance
+ */
+void cxgb3i_adapter_remove(struct cxgb3i_adapter *snic)
+{
+ int i;
+
+ /* remove from the list */
+ write_lock(&cxgb3i_snic_rwlock);
+ list_del(&snic->list_head);
+ write_unlock(&cxgb3i_snic_rwlock);
+
+ for (i = 0; i < snic->hba_cnt; i++) {
+ if (snic->hba[i]) {
+ cxgb3i_hba_host_remove(snic->hba[i]);
+ snic->hba[i] = NULL;
+ }
+ }
+
+ /* release ddp resources */
+ cxgb3i_adapter_ulp_cleanup(snic);
+ kfree(snic);
+}
+
+struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *t3dev)
+{
+ struct cxgb3i_adapter *snic;
+
+ read_lock(&cxgb3i_snic_rwlock);
+ list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
+ if (snic->tdev == t3dev) {
+ read_unlock(&cxgb3i_snic_rwlock);
+ return snic;
+ }
+ }
+ read_unlock(&cxgb3i_snic_rwlock);
+
+ return NULL;
+}
+
+struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
+{
+ struct cxgb3i_adapter *snic;
+ int i;
+
+ read_lock(&cxgb3i_snic_rwlock);
+ list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
+ for (i = 0; i < snic->hba_cnt; i++) {
+ if (snic->hba[i]->ndev == ndev) {
+ read_unlock(&cxgb3i_snic_rwlock);
+ return (snic->hba[i]);
+ }
+ }
+ }
+ read_unlock(&cxgb3i_snic_rwlock);
+ return NULL;
+}
+
+void cxgb3i_hba_conn_add(struct cxgb3i_conn *cconn, struct cxgb3i_hba *hba)
+{
+ cconn->hba = hba;
+ write_lock(&hba->cconn_rwlock);
+ list_add_tail(&cconn->list_head, &hba->cconn_list);
+ write_unlock(&hba->cconn_rwlock);
+}
+
+void cxgb3i_hba_conn_remove(struct cxgb3i_conn *cconn)
+{
+ struct cxgb3i_hba *hba = cconn->hba;
+
+ if (hba) {
+ write_lock(&hba->cconn_rwlock);
+ list_del(&cconn->list_head);
+ write_unlock(&hba->cconn_rwlock);
+ }
+}
+
+struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *snic,
+ struct net_device *ndev)
+{
+ struct cxgb3i_hba *hba;
+ struct Scsi_Host *shost;
+ int err;
+
+ shost = iscsi_host_alloc(&cxgb3i_host_template,
+ sizeof(struct cxgb3i_hba),
+ CXGB3I_SCSI_QDEPTH_DFLT);
+ if (!shost) {
+ cxgb3i_log_info("iscsi_host_alloc failed.\n");
+ return NULL;
+ }
+
+ shost->transportt = cxgb3i_scsi_transport;
+ shost->max_lun = 512;
+ shost->max_id = 0;
+ shost->max_channel = 0;
+ shost->max_cmd_len = 16;
+
+ hba = iscsi_host_priv(shost);
+ INIT_LIST_HEAD(&hba->cconn_list);
+ rwlock_init(&hba->cconn_rwlock);
+ hba->snic = snic;
+ hba->ndev = ndev;
+ hba->shost = shost;
+
+ pci_dev_get(snic->pdev);
+ err = iscsi_host_add(shost, &snic->pdev->dev);
+ if (err) {
+ cxgb3i_log_info("iscsi_host_add failed.\n");
+ goto pci_dev_put;
+ }
+
+ cxgb3i_log_debug("shost 0x%p, hba 0x%p, no %u.\n",
+ shost, hba, shost->host_no);
+
+ return hba;
+
+pci_dev_put:
+ pci_dev_put(snic->pdev);
+ scsi_host_put(shost);
+ return NULL;
+}
+
+void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba)
+{
+ if (hba->shost) {
+ cxgb3i_log_debug("shost 0x%p, hba 0x%p, no %u.\n",
+ hba->shost, hba, hba->shost->host_no);
+ iscsi_host_remove(hba->shost);
+ pci_dev_put(hba->snic->pdev);
+ /* cleanup connections ? */
+ iscsi_host_free(hba->shost);
+ }
+}
+
+/**
+ * cxgb3i_ep_connect - establish TCP connection to target portal
+ * @dst_addr: target IP address
+ * @non_blocking: blocking or non-blocking call
+ *
+ * Initiates a TCP/IP connection to the dst_addr
+ */
+static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
+ int non_blocking)
+{
+ struct iscsi_endpoint *ep;
+ struct cxgb3i_endpoint *cep;
+ struct cxgb3i_hba *hba;
+ struct socket *sock;
+ struct sock *sk;
+ struct tcp_sock *tp;
+ int err;
+
+ err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0)
+ return NULL;
+ sk = sock->sk;
+
+ sk->sk_allocation = GFP_ATOMIC;
+ sk->sk_reuse = 1;
+ tp = tcp_sk(sk);
+ tp->nonagle |= TCP_NAGLE_OFF;
+ err = cxgb3i_tcp_connect(sock, dst_addr, sizeof(struct sockaddr),
+ ULP_MODE_ISCSI);
+ if (err < 0) {
+ cxgb3i_log_info("sock 0x%p, connect failed %d.\n", sock, err);
+ goto release_sock;
+ }
+ if (!c3cn_flag(sock->sk, C3CN_OFFLOADED)) {
+ cxgb3i_log_info("sock 0x%p, NOT offloaded.\n", sock);
+ goto release_sock;
+ }
+ if (C3CN_ULP_MODE(sk) != ULP_MODE_ISCSI) {
+ cxgb3i_log_info("sock 0x%p, mode 0x%x, NOT expected.\n",
+ sock, C3CN_ULP_MODE(sk));
+ goto release_sock;
+ }
+ hba = cxgb3i_hba_find_by_netdev(__sk_dst_get(sk)->dev);
+ if (!hba) {
+ cxgb3i_log_info("NOT going through cxgbi device.\n");
+ goto release_sock;
+ }
+
+ ep = iscsi_create_endpoint(sizeof(*cep));
+ if (!ep) {
+ cxgb3i_log_info("iscsi alloc ep, OOM.\n");
+ goto release_sock;
+ }
+ cep = ep->dd_data;
+ cep->sock = sock;
+ cep->hba = hba;
+
+ cxgb3i_log_debug("sock 0x%p, iscsi_ep 0x%p, cxgb_ep 0x%p, hba 0x%p.\n",
+ sock, ep, cep, hba);
+ return ep;
+
+release_sock:
+ sock_release(sock);
+ return NULL;
+}
+
+/**
+ * cxgb3i_ep_poll - polls for TCP connection establishement
+ * @ep: TCP connection (endpoint) handle
+ * @timeout_ms: timeout value in milli secs
+ *
+ * polls for TCP connect request to complete
+ */
+static int cxgb3i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
+{
+ return 1;
+}
+
+/**
+ * cxgb3i_ep_disconnect - teardown TCP connection
+ * @ep: TCP connection (endpoint) handle
+ *
+ * teardown TCP connection
+ */
+static void cxgb3i_ep_disconnect(struct iscsi_endpoint *ep)
+{
+ struct cxgb3i_endpoint *cep = (struct cxgb3i_endpoint *)ep->dd_data;
+ struct cxgb3i_conn *cconn = cep->cconn;
+
+ cxgb3i_log_debug("ep 0x%p, cep 0x%p.\n", ep, cep);
+
+ if (cconn && cconn->conn) {
+ struct iscsi_conn *conn = cconn->conn;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ cxgb3i_sk_restore_callbacks(cep->sock->sk, conn);
+ write_lock_bh(&cep->sock->sk->sk_callback_lock);
+ set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+ cconn->cep = NULL;
+ tcp_conn->sock = NULL;
+ write_unlock_bh(&cep->sock->sk->sk_callback_lock);
+ }
+
+ sock_release(cep->sock);
+ iscsi_destroy_endpoint(ep);
+}
+
+/**
+ * cxgb3i_session_create - create a new iscsi session
+ * @cmds_max: max # of commands
+ * @qdepth: scsi queue depth
+ * @initial_cmdsn: initial iscsi CMDSN for this session
+ * @host_no: pointer to return host no
+ *
+ * Creates a new iSCSI session
+ */
+static struct iscsi_cls_session *cxgb3i_session_create(struct iscsi_endpoint
+ *ep, uint16_t cmds_max,
+ uint16_t qdepth,
+ uint32_t initial_cmdsn,
+ uint32_t *host_no)
+{
+ struct cxgb3i_endpoint *cep;
+ struct cxgb3i_hba *hba;
+ struct Scsi_Host *shost;
+ struct iscsi_cls_session *cls_session;
+ struct iscsi_session *session;
+ int i;
+
+ if (!ep) {
+ cxgb3i_log_error("%s, missing endpoint.\n", __func__);
+ return NULL;
+ }
+
+ cep = (struct cxgb3i_endpoint *)ep->dd_data;
+ hba = cep->hba;
+ shost = hba->shost;
+ cxgb3i_log_debug("ep 0x%p, cep 0x%p, hba 0x%p.\n", ep, cep, hba);
+ BUG_ON(hba != iscsi_host_priv(shost));
+
+ *host_no = shost->host_no;
+
+ cls_session = iscsi_session_setup(&cxgb3i_iscsi_transport, shost,
+ cmds_max,
+ sizeof(struct iscsi_tcp_task),
+ initial_cmdsn, ISCSI_MAX_TARGET);
+ if (!cls_session)
+ return NULL;
+
+ session = cls_session->dd_data;
+
+ for (i = 0; i < session->cmds_max; i++) {
+ struct iscsi_task *task = session->cmds[i];
+ struct iscsi_tcp_task *tcp_task = task->dd_data;
+
+ task->hdr = &tcp_task->hdr.cmd_hdr;
+ task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE;
+ }
+
+ if (iscsi_r2tpool_alloc(session))
+ goto remove_session;
+
+ return cls_session;
+
+remove_session:
+ iscsi_session_teardown(cls_session);
+ return NULL;
+}
+
+/**
+ * cxgb3i_session_destroy - destroys iscsi session
+ * @cls_session: pointer to iscsi cls session
+ *
+ * Destroys an iSCSI session instance and releases its all resources held
+ */
+static void cxgb3i_session_destroy(struct iscsi_cls_session *cls_session)
+{
+ cxgb3i_log_debug("sess 0x%p.\n", cls_session);
+ iscsi_r2tpool_free(cls_session->dd_data);
+ iscsi_session_teardown(cls_session);
+}
+
+/**
+ * cxgb3i_conn_create - create iscsi connection instance
+ * @cls_session: pointer to iscsi cls session
+ * @cid: iscsi cid
+ *
+ * Creates a new iSCSI connection instance for a given session
+ */
+static struct iscsi_cls_conn *cxgb3i_conn_create(struct iscsi_cls_session
+ *cls_session, uint32_t cid)
+{
+ struct iscsi_cls_conn *cls_conn;
+ struct iscsi_conn *conn;
+ struct iscsi_tcp_conn *tcp_conn;
+ struct cxgb3i_conn *cconn;
+
+ cxgb3i_log_debug("sess 0x%p, cid %u.\n", cls_session, cid);
+
+ cls_conn = iscsi_conn_setup(cls_session,
+ sizeof(*tcp_conn) + sizeof(*cconn), cid);
+ if (!cls_conn)
+ return NULL;
+ conn = cls_conn->dd_data;
+
+ conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+ tcp_conn = conn->dd_data;
+ tcp_conn->iscsi_conn = conn;
+
+ cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
+ cconn->conn = conn;
+
+ return cls_conn;
+}
+
+/**
+ * cxgb3i_conn_bind - binds iscsi sess, conn and endpoint together
+ * @cls_session: pointer to iscsi cls session
+ * @cls_conn: pointer to iscsi cls conn
+ * @transport_eph: 64-bit EP handle
+ * @is_leading: leading connection on this session?
+ *
+ * Binds together an iSCSI session, an iSCSI connection and a
+ * TCP connection. This routine returns error code if the TCP
+ * connection does not belong on the device iSCSI sess/conn is bound
+ */
+
+static int cxgb3i_conn_bind(struct iscsi_cls_session *cls_session,
+ struct iscsi_cls_conn *cls_conn,
+ uint64_t transport_eph, int is_leading)
+{
+ struct iscsi_conn *conn = cls_conn->dd_data;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
+ struct iscsi_endpoint *ep;
+ struct cxgb3i_endpoint *cep;
+ struct socket *sock;
+ int err;
+
+ ep = iscsi_lookup_endpoint(transport_eph);
+ if (!ep)
+ return -EINVAL;
+
+ cxgb3i_log_debug("ep 0x%p, cls sess 0x%p, cls conn 0x%p.\n",
+ ep, cls_session, cls_conn);
+
+ err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
+ if (err)
+ return -EINVAL;
+
+ cep = (struct cxgb3i_endpoint *)ep->dd_data;
+ sock = cep->sock;
+
+ tcp_conn->sock = sock;
+ cconn->hba = cep->hba;
+ cconn->cep = cep;
+ cep->cconn = cconn;
+
+ spin_lock_bh(&conn->session->lock);
+ sprintf(conn->portal_address, NIPQUAD_FMT,
+ NIPQUAD(inet_sk(sock->sk)->daddr));
+ conn->portal_port = ntohs(inet_sk(sock->sk)->dport);
+ spin_unlock_bh(&conn->session->lock);
+
+ cxgb3i_sk_set_callbacks(sock->sk, conn);
+ iscsi_tcp_hdr_recv_prep(tcp_conn);
+
+ return 0;
+}
+
+/**
+ * cxgb3i_conn_flush - flush tx
+ * @conn: pointer to iscsi conn
+ */
+static int cxgb3i_conn_flush(struct iscsi_conn *conn)
+{
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct iscsi_segment *segment = &tcp_conn->out.segment;
+
+ if (segment->total_copied < segment->total_size)
+ return cxgb3i_conn_ulp2_xmit(conn);
+ return 0;
+}
+
+/**
+ * cxgb3i_conn_get_param - return iscsi connection parameter to caller
+ * @cls_conn: pointer to iscsi cls conn
+ * @param: parameter type identifier
+ * @buf: buffer pointer
+ *
+ * returns iSCSI connection parameters
+ */
+static int cxgb3i_conn_get_param(struct iscsi_cls_conn *cls_conn,
+ enum iscsi_param param, char *buf)
+{
+ struct iscsi_conn *conn = cls_conn->dd_data;
+ int len;
+
+ cxgb3i_log_debug("cls_conn 0x%p, param %d.\n", cls_conn, param);
+
+ switch (param) {
+ case ISCSI_PARAM_CONN_PORT:
+ spin_lock_bh(&conn->session->lock);
+ len = sprintf(buf, "%hu\n", conn->portal_port);
+ spin_unlock_bh(&conn->session->lock);
+ break;
+ case ISCSI_PARAM_CONN_ADDRESS:
+ spin_lock_bh(&conn->session->lock);
+ len = sprintf(buf, "%s\n", conn->portal_address);
+ spin_unlock_bh(&conn->session->lock);
+ break;
+ default:
+ return iscsi_conn_get_param(cls_conn, param, buf);
+ }
+
+ return len;
+}
+
+static int cxgb3i_conn_set_param(struct iscsi_cls_conn *cls_conn,
+ enum iscsi_param param, char *buf, int buflen)
+{
+ struct iscsi_conn *conn = cls_conn->dd_data;
+ struct iscsi_session *session = conn->session;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
+ int value, err = 0;
+
+ switch (param) {
+ case ISCSI_PARAM_HDRDGST_EN:
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+ if (!err && conn->hdrdgst_en)
+ cxgb3i_conn_ulp_setup(cconn, conn->hdrdgst_en,
+ conn->datadgst_en);
+ break;
+ case ISCSI_PARAM_DATADGST_EN:
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+ if (!err && conn->datadgst_en)
+ cxgb3i_conn_ulp_setup(cconn, conn->hdrdgst_en,
+ conn->datadgst_en);
+ break;
+ case ISCSI_PARAM_MAX_R2T:
+ sscanf(buf, "%d", &value);
+ if (value <= 0 || !is_power_of_2(value))
+ return -EINVAL;
+ if (session->max_r2t == value)
+ break;
+ iscsi_r2tpool_free(session);
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+ if (!err && iscsi_r2tpool_alloc(session))
+ return -ENOMEM;
+ case ISCSI_PARAM_MAX_RECV_DLENGTH:
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+ cxgb3i_log_debug("MAX_RECV %u.\n", conn->max_recv_dlength);
+ break;
+ case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+ err = iscsi_set_param(cls_conn, param, buf, buflen);
+ cxgb3i_log_debug("MAX_XMIT %u.\n", conn->max_xmit_dlength);
+ break;
+ default:
+ return iscsi_set_param(cls_conn, param, buf, buflen);
+ }
+ return err;
+}
+
+/**
+ * cxgb3i_host_get_param - returns host (adapter) related parameters
+ * @shost: scsi host pointer
+ * @param: parameter type identifier
+ * @buf: buffer pointer
+ */
+static int cxgb3i_host_get_param(struct Scsi_Host *shost,
+ enum iscsi_host_param param, char *buf)
+{
+ struct cxgb3i_hba *hba = iscsi_host_priv(shost);
+ int i;
+ int len = 0;
+
+ switch (param) {
+ case ISCSI_HOST_PARAM_HWADDRESS:
+ for (i = 0; i < 6; i++)
+ len +=
+ sprintf(buf + len, "%02x.",
+ hba->ndev->dev_addr[i]);
+ len--;
+ buf[len] = '\0';
+ break;
+ case ISCSI_HOST_PARAM_NETDEV_NAME:
+ len = sprintf(buf, "%s\n", hba->ndev->name);
+ break;
+ default:
+ return iscsi_host_get_param(shost, param, buf);
+ }
+ return len;
+}
+
+/**
+ * cxgb3i_conn_get_stats - returns iSCSI stats
+ * @cls_conn: pointer to iscsi cls conn
+ * @stats: pointer to iscsi statistic struct
+ */
+static void cxgb3i_conn_get_stats(struct iscsi_cls_conn *cls_conn,
+ struct iscsi_stats *stats)
+{
+ struct iscsi_conn *conn = cls_conn->dd_data;
+
+ stats->txdata_octets = conn->txdata_octets;
+ stats->rxdata_octets = conn->rxdata_octets;
+ stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
+ stats->dataout_pdus = conn->dataout_pdus_cnt;
+ stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
+ stats->datain_pdus = conn->datain_pdus_cnt;
+ stats->r2t_pdus = conn->r2t_pdus_cnt;
+ stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
+ stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
+ stats->digest_err = 0;
+ stats->timeout_err = 0;
+ stats->custom_length = 1;
+ strcpy(stats->custom[0].desc, "eh_abort_cnt");
+ stats->custom[0].value = conn->eh_abort_cnt;
+}
+
+static inline u32 tag_base(struct cxgb3i_tag_format *format,
+ unsigned int idx, unsigned int age)
+{
+ u32 sw_bits = idx | (age << format->idx_bits);
+ u32 tag = sw_bits >> format->rsvd_shift;
+ tag <<= format->rsvd_bits + format->rsvd_shift;
+ tag |= sw_bits & ((1 << format->rsvd_shift) - 1);
+ return tag;
+}
+
+static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt,
+ int *idx, int *age)
+{
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
+ struct cxgb3i_adapter *snic = cconn->hba->snic;
+ u32 sw_bits;
+
+ cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits);
+ if (idx)
+ *idx = sw_bits & ISCSI_ITT_MASK;
+ if (age)
+ *age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK;
+}
+
+static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
+{
+ struct scsi_cmnd *sc = task->sc;
+ struct iscsi_conn *conn = task->conn;
+ struct iscsi_session *sess = conn->session;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
+ struct cxgb3i_adapter *snic = cconn->hba->snic;
+ u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age);
+ u32 tag = RESERVED_ITT;
+
+ if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) {
+ struct cxgb3i_tcp_conn *c3cn =
+ CXGB3_TCP_CONN(tcp_conn->sock->sk);
+ tag =
+ cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
+ scsi_out(sc)->length,
+ scsi_out(sc)->table.sgl,
+ scsi_out(sc)->table.nents);
+ }
+ if (tag == RESERVED_ITT)
+ tag = sw_tag | (snic->tag_format.rsvd_mask <<
+ snic->tag_format.rsvd_shift);
+ *hdr_itt = htonl(tag);
+ return 0;
+}
+
+static void cxgb3i_release_itt(struct iscsi_task *task, itt_t hdr_itt)
+{
+ struct scsi_cmnd *sc = task->sc;
+ struct iscsi_conn *conn = task->conn;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct cxgb3i_conn *cconn = (struct cxgb3i_conn *)(tcp_conn + 1);
+ struct cxgb3i_adapter *snic = cconn->hba->snic;
+
+ hdr_itt = ntohl(hdr_itt);
+ if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE))
+ cxgb3i_ddp_tag_release(snic, hdr_itt,
+ scsi_out(sc)->table.sgl,
+ scsi_out(sc)->table.nents);
+}
+
+/**
+ * cxgb3i_host_template -- Scsi_Host_Template structure
+ * used when registering with the scsi mid layer
+ */
+static struct scsi_host_template cxgb3i_host_template = {
+ .module = THIS_MODULE,
+ .name = "Chelsio S3xx iSCSI Initiator",
+ .proc_name = "cxgb3i",
+ .queuecommand = iscsi_queuecommand,
+ .change_queue_depth = iscsi_change_queue_depth,
+ .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1),
+ .sg_tablesize = SG_ALL,
+ .max_sectors = 0xFFFF,
+ .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
+ .eh_abort_handler = iscsi_eh_abort,
+ .eh_device_reset_handler = iscsi_eh_device_reset,
+ .eh_target_reset_handler = iscsi_eh_target_reset,
+ .use_clustering = DISABLE_CLUSTERING,
+ .slave_alloc = iscsi_slave_alloc,
+ .this_id = -1,
+};
+
+static struct iscsi_transport cxgb3i_iscsi_transport = {
+ .owner = THIS_MODULE,
+ .name = "cxgb3i",
+ .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
+ | CAP_DATADGST | CAP_DIGEST_OFFLOAD,
+ .param_mask = ISCSI_MAX_RECV_DLENGTH |
+ ISCSI_MAX_XMIT_DLENGTH |
+ ISCSI_HDRDGST_EN |
+ ISCSI_DATADGST_EN |
+ ISCSI_INITIAL_R2T_EN |
+ ISCSI_MAX_R2T |
+ ISCSI_IMM_DATA_EN |
+ ISCSI_FIRST_BURST |
+ ISCSI_MAX_BURST |
+ ISCSI_PDU_INORDER_EN |
+ ISCSI_DATASEQ_INORDER_EN |
+ ISCSI_ERL |
+ ISCSI_CONN_PORT |
+ ISCSI_CONN_ADDRESS |
+ ISCSI_EXP_STATSN |
+ ISCSI_PERSISTENT_PORT |
+ ISCSI_PERSISTENT_ADDRESS |
+ ISCSI_TARGET_NAME | ISCSI_TPGT |
+ ISCSI_USERNAME | ISCSI_PASSWORD |
+ ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
+ ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
+ ISCSI_LU_RESET_TMO |
+ ISCSI_PING_TMO | ISCSI_RECV_TMO |
+ ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
+ .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
+ ISCSI_HOST_INITIATOR_NAME | ISCSI_HOST_NETDEV_NAME,
+ .get_host_param = cxgb3i_host_get_param,
+ /* session management */
+ .create_session = cxgb3i_session_create,
+ .destroy_session = cxgb3i_session_destroy,
+ .get_session_param = iscsi_session_get_param,
+ /* connection management */
+ .create_conn = cxgb3i_conn_create,
+ .bind_conn = cxgb3i_conn_bind,
+ .destroy_conn = iscsi_conn_teardown,
+ .start_conn = iscsi_conn_start,
+ .stop_conn = iscsi_conn_stop,
+ .flush_conn = cxgb3i_conn_flush,
+ .get_conn_param = cxgb3i_conn_get_param,
+ .set_param = cxgb3i_conn_set_param,
+ .get_stats = cxgb3i_conn_get_stats,
+ /* pdu xmit req. from user space */
+ .send_pdu = iscsi_conn_send_pdu,
+ /* task */
+ .init_task = iscsi_tcp_task_init,
+ .xmit_task = iscsi_tcp_task_xmit,
+ .cleanup_task = iscsi_tcp_cleanup_task,
+ .parse_itt = cxgb3i_parse_itt,
+ .reserve_itt = cxgb3i_reserve_itt,
+ .release_itt = cxgb3i_release_itt,
+ /* TCP connect/disconnect */
+ .ep_connect = cxgb3i_ep_connect,
+ .ep_poll = cxgb3i_ep_poll,
+ .ep_disconnect = cxgb3i_ep_disconnect,
+ /* Error recovery timeout call */
+ .session_recovery_timedout = iscsi_session_recovery_timedout,
+};
+
+int cxgb3i_iscsi_init(void)
+{
+ cxgb3i_scsi_transport =
+ iscsi_register_transport(&cxgb3i_iscsi_transport);
+ if (!cxgb3i_scsi_transport) {
+ cxgb3i_log_error("Could not register cxgb3i transport.\n");
+ return -ENODEV;
+ }
+ cxgb3i_log_debug("cxgb3i transport 0x%p.\n", cxgb3i_scsi_transport);
+ return 0;
+}
+
+void cxgb3i_iscsi_cleanup(void)
+{
+ if (cxgb3i_scsi_transport) {
+ cxgb3i_log_debug("cxgb3i transport 0x%p.\n",
+ cxgb3i_scsi_transport);
+ iscsi_unregister_transport(&cxgb3i_iscsi_transport);
+ cxgb3i_scsi_transport = NULL;
+ }
+}
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
new file mode 100644
index 0000000..9e80311
--- /dev/null
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -0,0 +1,2808 @@
+/*
+ * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved.
+ *
+ * Written by Dimitris Michailidis (dm(a)chelsio.com)
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this
+ * release for licensing terms and conditions.
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/if_vlan.h>
+#include <linux/inet_diag.h>
+#include <linux/version.h>
+
+#ifdef CONFIG_SECURITY_NETWORK
+#include <linux/security.h>
+#endif
+
+#include "cxgb3_defs.h"
+#include "cxgb3_ctl_defs.h"
+#include "firmware_exports.h"
+#include "cxgb3i_offload.h"
+#include "cxgb3i_ulp2.h"
+
+#define VALIDATE_SEQ 1
+
+typedef int (cxgb3_cpl_handler_decl) (struct t3cdev *,
+ struct sk_buff *, void *);
+
+static cxgb3_cpl_handler_decl do_bad_cpl;
+static cxgb3_cpl_handler_decl do_act_establish;
+static cxgb3_cpl_handler_decl do_act_open_rpl;
+static cxgb3_cpl_handler_decl do_wr_ack;
+static cxgb3_cpl_handler_decl do_peer_close;
+static cxgb3_cpl_handler_decl do_abort_req;
+static cxgb3_cpl_handler_decl do_abort_rpl;
+static cxgb3_cpl_handler_decl do_close_con_rpl;
+static cxgb3_cpl_handler_decl do_iscsi_hdr;
+
+static struct cxgb3i_tcp_tunables default_cxgb3i_tcp_tunables = {
+ .max_host_sndbuf = 32 * 1024,
+ .max_wrs = 15,
+ .rx_credit_thres = 10 * 1024,
+ .cong_alg = -1,
+ .delack = 1,
+ .tcp_window_scaling = 1,
+};
+
+/*
+ * Protocol structure and functions for our sockets.
+ */
+static struct proto t3_tcp_prot;
+static void chelsio_close(struct sock *, long);
+static int chelsio_disconnect(struct sock *, int);
+static int chelsio_destroy(struct sock *);
+static void process_deferq(struct work_struct *);
+
+static LIST_HEAD(cxgb3_list);
+static DECLARE_MUTEX(cxgb3_list_lock);
+
+/*
+ * For ULP connections HW may add headers, e.g., for digests, that aren't part
+ * of the messages sent by the host but that are part of the TCP payload and
+ * therefore consume TCP sequence space. Tx connection parameters that
+ * operate in TCP sequence space are affected by the HW additions and need to
+ * compensate for them to accurately track TCP sequence numbers. This array
+ * contains the compensating extra lengths for ULP packets. It is indexed by
+ * a packet's ULP submode.
+ */
+static const unsigned int cxgb3_ulp_extra_len[] = { 0, 4, 4, 8 };
+
+/*
+ * Return the length of any HW additions that will be made to a Tx packet.
+ * Such additions can happen for some types of ULP packets.
+ */
+static inline unsigned int ulp_extra_len(const struct sk_buff *skb)
+{
+ return cxgb3_ulp_extra_len[skb_ulp_mode(skb) & 3];
+}
+
+/*
+ * Size of WRs in bytes. Note that we assume all devices we are handling have
+ * the same WR size.
+ */
+static unsigned int wrlen __read_mostly;
+
+/*
+ * The number of WRs needed for an skb depends on the number of page fragments
+ * in the skb and whether it has any payload in its main body. This maps the
+ * length of the gather list represented by an skb into the # of necessary WRs.
+ */
+static unsigned int skb_wrs[MAX_SKB_FRAGS + 2] __read_mostly;
+
+static void t3_init_wr_tab(unsigned int wr_len)
+{
+ int i;
+
+ if (skb_wrs[1]) /* already initialized */
+ return;
+
+ for (i = 1; i < ARRAY_SIZE(skb_wrs); i++) {
+ int sgl_len = (3 * i) / 2 + (i & 1);
+
+ sgl_len += 3;
+ skb_wrs[i] = (sgl_len <= wr_len
+ ? 1 : 1 + (sgl_len - 2) / (wr_len - 1));
+ }
+
+ wrlen = wr_len * 8;
+}
+
+/*
+ * TOE information returned through inet_diag for offloaded connections.
+ */
+struct t3_inet_diag_info {
+ u32 toe_id;
+ u32 tid;
+ u16 wrs;
+ u8 ulp_mode:4;
+ u8 sched_class:4;
+ u8 ddp_enabled;
+ char dev_name[T3CNAMSIZ];
+};
+
+/*
+ * Socket filter that drops everything by specifying a 0-length filter program.
+ */
+static struct sk_filter drop_all = {.refcnt = ATOMIC_INIT(1) };
+
+/*
+ * This sk_buff holds a fake header-only TCP segment that we use whenever we
+ * need to exploit SW TCP functionality that expects TCP headers, such as
+ * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
+ * CPUs without locking.
+ */
+static struct sk_buff *tcphdr_skb __read_mostly;
+
+/*
+ * Initialize state for cxgb3 API operations.
+ */
+int cxgb3i_tcp_init(cxgb3_cpl_handler_func *cpl_handlers)
+{
+ int i;
+
+ /*
+ * Instialize protocol structure for our sockets. We first copy
+ * the standard TCP protocol structure so we end up with standard
+ * values for things like pointers to counters, etc.
+ */
+ t3_tcp_prot = tcp_prot;
+ t3_tcp_prot.close = chelsio_close;
+ t3_tcp_prot.disconnect = chelsio_disconnect;
+ t3_tcp_prot.destroy = chelsio_destroy;
+
+ tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
+ if (!tcphdr_skb) {
+ printk(KERN_ERR
+ "Chelsio TCP offload: can't allocate sk_buff\n");
+ return -1;
+ }
+ skb_put(tcphdr_skb, sizeof(struct tcphdr));
+ skb_reset_transport_header(tcphdr_skb);
+ memset(tcphdr_skb->data, 0, tcphdr_skb->len);
+ /* CIPSO_V4_OPTEXIST is false for tcphdr_skb without anything extra */
+
+ for (i = 0; i < NUM_CPL_CMDS; i++)
+ cpl_handlers[i] = do_bad_cpl;
+
+ cpl_handlers[CPL_ACT_ESTABLISH] = do_act_establish;
+ cpl_handlers[CPL_ACT_OPEN_RPL] = do_act_open_rpl;
+ cpl_handlers[CPL_PEER_CLOSE] = do_peer_close;
+ cpl_handlers[CPL_ABORT_REQ_RSS] = do_abort_req;
+ cpl_handlers[CPL_ABORT_RPL_RSS] = do_abort_rpl;
+ cpl_handlers[CPL_CLOSE_CON_RPL] = do_close_con_rpl;
+ cpl_handlers[CPL_TX_DMA_ACK] = do_wr_ack;
+ cpl_handlers[CPL_ISCSI_HDR] = do_iscsi_hdr;
+
+ return 0;
+}
+
+void cxgb3i_tcp_add(struct t3cdev *cdev, struct cxgb3_client *client)
+{
+ struct cxgb3i_tcp_data *cdata;
+ struct adap_ports *ports;
+ struct ofld_page_info rx_page_info;
+ unsigned int wr_len;
+ int i;
+
+ cdata = kzalloc(sizeof *cdata, GFP_KERNEL);
+ if (!cdata)
+ return;
+ ports = kzalloc(sizeof *ports, GFP_KERNEL);
+ if (!ports) {
+ kfree(cdata);
+ return;
+ }
+ cdata->ports = ports;
+
+ if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0 ||
+ cdev->ctl(cdev, GET_PORTS, cdata->ports) < 0 ||
+ cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info) < 0) {
+ kfree(ports);
+ kfree(cdata);
+ return;
+ }
+
+ t3_init_wr_tab(wr_len);
+
+ INIT_LIST_HEAD(&cdata->list);
+ cdata->cdev = cdev;
+ cdata->client = client;
+ cdata->rx_page_size = rx_page_info.page_size;
+ cdata->conf = default_cxgb3i_tcp_tunables;
+ cdata->conf.max_wrs = T3C_DATA(cdev)->max_wrs;
+ skb_queue_head_init(&cdata->deferq);
+ INIT_WORK(&cdata->deferq_task, process_deferq);
+
+ for (i = 0; i < ports->nports; i++)
+ NDEV2CDATA(ports->lldevs[i]) = cdata;
+
+ down(&cxgb3_list_lock);
+ list_add_tail(&cdata->list, &cxgb3_list);
+ up(&cxgb3_list_lock);
+
+ return;
+}
+
+void cxgb3i_tcp_remove(struct t3cdev *cdev)
+{
+ struct cxgb3i_tcp_data *cdata = CXGB3_TCP_DATA(cdev);
+ struct adap_ports *ports = cdata->ports;
+ int i;
+
+ for (i = 0; i < ports->nports; i++)
+ NDEV2CDATA(ports->lldevs[i]) = NULL;
+
+ down(&cxgb3_list_lock);
+ list_del(&cdata->list);
+ up(&cxgb3_list_lock);
+
+ kfree(ports);
+ kfree(cdata);
+}
+
+/*
+ * Return TRUE if the specified net device is for a port on one of our
+ * registered adapters.
+ */
+static int is_cxgb3_dev(struct net_device *dev)
+{
+ struct cxgb3i_tcp_data *cdata;
+
+ down(&cxgb3_list_lock);
+ list_for_each_entry(cdata, &cxgb3_list, list) {
+ struct adap_ports *ports = cdata->ports;
+ int i;
+
+ for (i = 0; i < ports->nports; i++)
+ if (dev == ports->lldevs[i]) {
+ up(&cxgb3_list_lock);
+ return 1;
+ }
+ }
+ up(&cxgb3_list_lock);
+ return 0;
+}
+
+/*
+ * Primary cxgb3 API operations.
+ * =============================
+ */
+
+static int tcp_v4_connect_offload(struct sock *, struct sockaddr *, int);
+static void t3_cleanup_rbuf(struct sock *, int);
+static int t3_push_frames(struct sock *, int);
+static int t3_send_reset(struct sock *, int, struct sk_buff *);
+static int t3_sendskb(struct sock *, struct sk_buff *, int);
+
+/*
+ * Return connected socket to specified endpoint.
+ */
+int cxgb3i_tcp_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int ulp_mode)
+{
+ struct sock *sk;
+ struct cxgb3i_tcp_conn *c3cn;
+ int ret;
+
+ c3cn = kzalloc(sizeof(*c3cn), GFP_KERNEL);
+ if (c3cn == NULL)
+ return -ENOMEM;
+ c3cn->flags = 0;
+ c3cn->ulp_mode = ulp_mode;
+
+ sk = sock->sk;
+ CXGB3_TCP_CONN(sk) = c3cn;
+
+ ret = tcp_v4_connect_offload(sk, uaddr, addr_len);
+ if (ret) {
+ CXGB3_TCP_CONN(sk) = NULL;
+ kfree(c3cn);
+ }
+ return ret;
+}
+
+void cxgb3i_tcp_cleanup_rbuf(struct sock *sk, int copied)
+{
+ t3_cleanup_rbuf(sk, copied);
+ return;
+}
+
+int cxgb3i_tcp_sendskb(struct sock *sk, struct sk_buff *skb, int flags)
+{
+ return t3_sendskb(sk, skb, flags);
+}
+
+/*
+ * Protocol operations.
+ * ====================
+ */
+
+static int make_close_transition(struct sock *);
+static void close_conn(struct sock *);
+static void t3_purge_write_queue(struct sock *);
+
+/*
+ * Release a socket's local TCP port if the socket is bound. This is normally
+ * done by tcp_done() but because we need to wait for HW to release TIDs we
+ * usually call tcp_done at a later time than the SW stack would have. This
+ * can be used to release the port earlier so the SW stack can reuse it before
+ * we are done with the connection.
+ */
+static inline void release_tcp_port(struct sock *sk)
+{
+ if (inet_csk(sk)->icsk_bind_hash)
+ inet_put_port(sk);
+}
+
+static void chelsio_close(struct sock *sk, long timeout)
+{
+ int data_lost, old_state;
+
+ lock_sock(sk);
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+
+ /*
+ * We need to flush the receive buffs. We do this only on the
+ * descriptor close, not protocol-sourced closes, because the
+ * reader process may not have drained the data yet! Make a note
+ * of whether any received data will be lost so we can decide whether
+ * to FIN or RST.
+ */
+ data_lost = skb_queue_len(&sk->sk_receive_queue);
+ __skb_queue_purge(&sk->sk_receive_queue);
+
+ if (sk->sk_state == TCP_CLOSE) /* Nothing if we are already closed */
+ ;
+ else if (data_lost || sk->sk_state == TCP_SYN_SENT) {
+ /* Unread data was tossed, zap the connection. */
+ NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+ t3_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+ release_tcp_port(sk);
+ goto unlock;
+ } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+ /* Check zero linger _after_ checking for unread data. */
+ sk->sk_prot->disconnect(sk, 0);
+ NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+ } else if (make_close_transition(sk)) { /* Regular FIN-based close */
+ close_conn(sk);
+ }
+
+ if (timeout)
+ sk_stream_wait_close(sk, timeout);
+
+unlock:
+ old_state = sk->sk_state;
+ sock_hold(sk); /* must last past the potential inet_csk_destroy_sock */
+ sock_orphan(sk);
+ atomic_inc(sk->sk_prot->orphan_count);
+
+ release_sock(sk); /* Final release_sock in connection's lifetime. */
+
+ /*
+ * There are no more user references at this point. Grab the socket
+ * spinlock and finish the close.
+ */
+ local_bh_disable();
+ bh_lock_sock(sk);
+
+ /*
+ * Because the socket was orphaned before the bh_lock_sock
+ * either the backlog or a BH may have already destroyed it.
+ * Bail out if so.
+ */
+ if (old_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
+ goto out;
+
+ if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 &&
+ !c3cn_flag(sk, C3CN_ABORT_SHUTDOWN)) {
+ struct sk_buff *skb;
+
+ skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+ if (skb) {
+ t3_send_reset(sk, CPL_ABORT_SEND_RST, skb);
+ NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+ }
+ }
+
+ if (sk->sk_state == TCP_CLOSE)
+ inet_csk_destroy_sock(sk);
+
+out:
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ sock_put(sk);
+}
+
+static int chelsio_disconnect(struct sock *sk, int flags)
+{
+ printk(KERN_ERR "chelsio_disconnect not implemented\n");
+ return -ENOTSUPP;
+}
+
+/*
+ * Our version of tcp_v4_destroy_sock(). We need to do this because
+ * tcp_writequeue_purge() that is used in the original doesn't quite match
+ * our needs. If we ever hook into the memory management of the SW stack we
+ * may be able to use tcp_v4_destroy_sock() directly.
+ */
+static int chelsio_destroy(struct sock *sk)
+{
+ struct cxgb3i_tcp_conn *c3cn;
+
+ C3CN_ULP_MODE(sk) = ULP_MODE_NONE;
+ t3_purge_write_queue(sk);
+ c3cn = CXGB3_TCP_CONN(sk);
+ CXGB3_TCP_CONN(sk) = NULL;
+ kfree(c3cn);
+ return tcp_prot.destroy(sk);
+}
+
+/*
+ * Local utility routines used to implement primary cxgb3 API operations.
+ * ======================================================================
+ */
+
+static int tcp_connect_offload(struct sock *);
+static u32 t3_send_rx_credits(struct sock *, u32, u32, int);
+static void mk_act_open_req(struct sock *, struct sk_buff *,
+ unsigned int, const struct l2t_entry *);
+static int wait_for_mem(struct sock *, long *);
+static void skb_entail(struct sock *, struct sk_buff *, int);
+
+static inline int is_t3a(const struct t3cdev *cdev)
+{
+ return cdev->type == T3A;
+}
+
+/*
+ * Determine the value of a packet's ->priority field. Bit 0 determines
+ * whether the packet should use a control Tx queue, bits 1..3 determine
+ * the queue set to use.
+ */
+static inline unsigned int mkprio(unsigned int cntrl, const struct sock *sk)
+{
+ return cntrl;
+}
+
+/*
+ * Returns true if an sk_buff carries urgent data.
+ */
+static inline int skb_urgent(struct sk_buff *skb)
+{
+ return (CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_URG) != 0;
+}
+
+static inline void reset_wr_list(struct tcp_sock *tp)
+{
+ tp->forward_skb_hint = NULL;
+}
+
+/*
+ * Add a WR to a socket's list of pending WRs. This is a singly-linked list
+ * of sk_buffs operating as a FIFO. We use the following sock and sk_buff
+ * fields to maintain it:
+ * - sock.forward_skb_hint, sock.retransmit_skb_hint as head and tail pointers
+ * - sk_buff.sp as packet next pointer
+ */
+static inline void enqueue_wr(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ skb->sp = NULL;
+
+ /*
+ * We want to take an extra reference since both us and the driver
+ * need to free the packet before it's really freed. We know there's
+ * just one user currently so we use atomic_set rather than skb_get
+ * to avoid the atomic op.
+ */
+ atomic_set(&skb->users, 2);
+
+ if (!tp->forward_skb_hint)
+ tp->forward_skb_hint = skb;
+ else
+ tp->retransmit_skb_hint->sp = (void *)skb;
+ tp->retransmit_skb_hint = skb;
+}
+
+/* Returns bits 2:7 of a socket's TOS field */
+#define SK_TOS(sk) ((inet_sk(sk)->tos >> 2) & M_TOS)
+
+/*
+ * The next two functions calculate the option 0 value for a socket.
+ */
+static inline unsigned int calc_opt0h(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return V_NAGLE((tp->nonagle & TCP_NAGLE_OFF) == 0) |
+ V_KEEP_ALIVE(sock_flag(sk, SOCK_KEEPOPEN) != 0) | F_TCAM_BYPASS |
+ V_WND_SCALE(tp->rx_opt.rcv_wscale) | V_MSS_IDX(C3CN_MSS_IDX(sk));
+}
+
+static inline unsigned int calc_opt0l(struct sock *sk)
+{
+ unsigned int tos;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tos = SK_TOS(sk);
+ if ((tos & 0x38) == 0x30) /* suppress values in special range */
+ tos = 0;
+
+ return V_TOS(tos) | V_ULP_MODE(C3CN_ULP_MODE(sk)) |
+ V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32) M_RCV_BUFSIZ));
+}
+
+static inline unsigned int calc_opt2(const struct sock *sk)
+{
+ const struct t3cdev *cdev = C3CN_CDEV(sk);
+ int flv_valid = CXGB3_TCP_TUNABLE(cdev, cong_alg) != -1;
+
+ return V_FLAVORS_VALID(flv_valid) |
+ V_CONG_CONTROL_FLAVOR(flv_valid ? CXGB3_TCP_TUNABLE(cdev, cong_alg)
+ : 0);
+}
+
+static inline void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+ int len)
+{
+ struct tx_data_wr *req;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ skb_reset_transport_header(skb);
+ req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_lo = htonl(V_WR_TID(C3CN_TID(sk)));
+ req->sndseq = htonl(tp->snd_nxt);
+ /* len includes the length of any HW ULP additions */
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(C3CN_L2T(sk)->smt_idx));
+ /* V_TX_ULP_SUBMODE sets both the mode and submode */
+ req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) |
+ V_TX_URG(skb_urgent(skb)) |
+ V_TX_SHOVE((!c3cn_flag(sk, C3CN_TX_MORE_DATA)) &&
+ (skb_peek(&sk->sk_write_queue) ? 0 : 1)));
+
+ if (!c3cn_flag(sk, C3CN_TX_DATA_SENT)) {
+
+ req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
+ V_TX_CPU_IDX(C3CN_QSET(sk)));
+
+ /* Sendbuffer is in units of 32KB.
+ */
+ req->param |= htonl(V_TX_SNDBUF(sk->sk_sndbuf >> 15));
+ c3cn_set_flag(sk, C3CN_TX_DATA_SENT);
+ }
+}
+
+static int tcp_v4_connect_offload(struct sock *sk,
+ struct sockaddr *uaddr, int addr_len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+ struct rtable *rt;
+ __be32 daddr, nexthop;
+ int tmp;
+ int err;
+
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (usin->sin_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ nexthop = daddr = usin->sin_addr.s_addr;
+ if (inet->opt && inet->opt->srr) {
+ if (!daddr)
+ return -EINVAL;
+ nexthop = inet->opt->faddr;
+ }
+
+ tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+ IPPROTO_TCP, inet->sport, usin->sin_port, sk, 1);
+ if (tmp < 0) {
+ if (tmp == -ENETUNREACH)
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ return tmp;
+ }
+
+ if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+ ip_rt_put(rt);
+ return -ENETUNREACH;
+ }
+
+ if (!inet->opt || !inet->opt->srr)
+ daddr = rt->rt_dst;
+
+ if (!inet->saddr)
+ inet->saddr = rt->rt_src;
+ inet->rcv_saddr = inet->saddr;
+
+ if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
+ /* Reset inherited state */
+ tp->rx_opt.ts_recent = 0;
+ tp->rx_opt.ts_recent_stamp = 0;
+ tp->write_seq = 0;
+ }
+
+ if (tcp_death_row.sysctl_tw_recycle &&
+ !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
+ struct inet_peer *peer = rt->peer;
+ /*
+ * VJ's idea. We save last timestamp seen from
+ * the destination in peer table, when entering state
+ * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+ * when trying new connection.
+ */
+ if (peer != NULL &&
+ peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
+ tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+ tp->rx_opt.ts_recent = peer->tcp_ts;
+ }
+ }
+
+ inet->dport = usin->sin_port;
+ inet->daddr = daddr;
+
+ inet_csk(sk)->icsk_ext_hdr_len = 0;
+ if (inet->opt)
+ inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
+
+ tp->rx_opt.mss_clamp = 536;
+
+ /* Socket identity is still unknown (sport may be zero).
+ * However we set state to SYN-SENT and not releasing socket
+ * lock select source port, enter ourselves into the hash tables and
+ * complete initialization after this.
+ */
+ tcp_set_state(sk, TCP_SYN_SENT);
+ err = inet_hash_connect(&tcp_death_row, sk);
+ if (err)
+ goto failure;
+
+ err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
+ if (err)
+ goto failure;
+
+ /* OK, now commit destination to socket. */
+ sk->sk_gso_type = SKB_GSO_TCPV4;
+ sk_setup_caps(sk, &rt->u.dst);
+
+ if (tcp_connect_offload(sk))
+ return 0;
+ /*
+ * If we get here, we don't have an offload connection so simply
+ * return a failure.
+ */
+ err = -ENOTSUPP;
+
+failure:
+ /*
+ * This unhashes the socket and releases the local port,
+ * if necessary.
+ */
+ tcp_set_state(sk, TCP_CLOSE);
+ ip_rt_put(rt);
+ sk->sk_route_caps = 0;
+ inet->dport = 0;
+ return err;
+}
+
+static inline int is_delack_mode_valid(struct t3cdev *cdev, struct sock *sk)
+{
+ return (!C3CN_ULP_MODE(sk)
+ || (C3CN_ULP_MODE(sk) == ULP_MODE_TCPDDP && cdev->type >= T3A));
+}
+
+/*
+ * Set of states for which we should return RX credits.
+ */
+#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
+
+/*
+ * Called after some received data has been read. It returns RX credits
+ * to the HW for the amount of data processed.
+ */
+static void t3_cleanup_rbuf(struct sock *sk, int copied)
+{
+ struct tcp_sock *tp;
+ struct t3cdev *cdev;
+ int dack_mode, must_send;
+ u32 thres, credits, dack = 0;
+
+ if (!sk_in_state(sk, CREDIT_RETURN_STATE))
+ return;
+
+ tp = tcp_sk(sk);
+ credits = tp->copied_seq - tp->rcv_wup;
+ if (unlikely(!credits))
+ return;
+
+ cdev = C3CN_CDEV(sk);
+ thres = CXGB3_TCP_TUNABLE(cdev, rx_credit_thres);
+
+ if (unlikely(thres == 0))
+ return;
+
+ if (is_delack_mode_valid(cdev, sk)) {
+ dack_mode = CXGB3_TCP_TUNABLE(cdev, delack);
+ if (unlikely(dack_mode != C3CN_DELAK_MODE(sk))) {
+ u32 r = tp->rcv_nxt - C3CN_DELAK_SEQ(sk);
+
+ if (r >= tp->rcv_wnd || r >= 16 * tp->rx_opt.mss_clamp)
+ dack = (F_RX_DACK_CHANGE |
+ V_RX_DACK_MODE(dack_mode));
+ }
+ } else
+ dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
+
+ /*
+ * For coalescing to work effectively ensure the receive window has
+ * at least 16KB left.
+ */
+ must_send = credits + 16384 >= tp->rcv_wnd;
+
+ if (must_send || credits >= thres)
+ tp->rcv_wup += t3_send_rx_credits(sk, credits, dack, must_send);
+}
+
+/*
+ * Generic ARP failure handler that discards the buffer.
+ */
+static void arp_failure_discard(struct t3cdev *cdev, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+}
+
+/*
+ * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a
+ * socket's send queue and sends them on to the TOE. Must be called with the
+ * socket lock held. Returns the amount of send buffer space that was freed
+ * as a result of sending queued data to the TOE.
+ */
+static int t3_push_frames(struct sock *sk, int req_completion)
+{
+ int total_size = 0;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+ struct t3cdev *cdev;
+ struct cxgb3i_tcp_data *cdata;
+
+ if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
+ return 0;
+
+ /*
+ * We shouldn't really be called at all after an abort but check just
+ * in case.
+ */
+ if (unlikely(c3cn_flag(sk, C3CN_ABORT_SHUTDOWN)))
+ return 0;
+
+ cdev = C3CN_CDEV(sk);
+ cdata = CXGB3_TCP_DATA(cdev);
+
+ while (C3CN_WR_AVAIL(sk)
+ && (skb = skb_peek(&sk->sk_write_queue)) != NULL
+ && !c3cn_flag(sk, C3CN_TX_WAIT_IDLE)
+ && (!(CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_HOLD)
+ || skb_queue_len(&sk->sk_write_queue) > 1)) {
+
+ int len = skb->len; /* length before skb_push */
+ int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len);
+ int wrs_needed = skb_wrs[frags];
+
+ if (wrs_needed > 1 && len + sizeof(struct tx_data_wr) <= wrlen)
+ wrs_needed = 1;
+
+ WARN_ON(frags >= ARRAY_SIZE(skb_wrs) || wrs_needed < 1);
+ if (C3CN_WR_AVAIL(sk) < wrs_needed)
+ break;
+
+ __skb_unlink(skb, &sk->sk_write_queue);
+ skb->priority = mkprio(CPL_PRIORITY_DATA, sk);
+ skb->csum = wrs_needed; /* remember this until the WR_ACK */
+ C3CN_WR_AVAIL(sk) -= wrs_needed;
+ C3CN_WR_UNACKED(sk) += wrs_needed;
+ enqueue_wr(tp, skb);
+
+ if (likely(CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
+ len += ulp_extra_len(skb);
+ make_tx_data_wr(sk, skb, len);
+ tp->snd_nxt += len;
+ tp->lsndtime = tcp_time_stamp;
+ if ((req_completion
+ && C3CN_WR_UNACKED(sk) == wrs_needed)
+ || (CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
+ || C3CN_WR_UNACKED(sk) >= C3CN_WR_MAX(sk) / 2) {
+ struct work_request_hdr *wr = cplhdr(skb);
+
+ wr->wr_hi |= htonl(F_WR_COMPL);
+ C3CN_WR_UNACKED(sk) = 0;
+ }
+ CXGB3_TCP_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
+ } else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON)
+ c3cn_set_flag(sk, C3CN_CLOSE_CON_REQUESTED);
+
+ total_size += skb->truesize;
+ if (CXGB3_TCP_SKB_CB(skb)->flags & C3CB_FLAG_BARRIER)
+ c3cn_set_flag(sk, C3CN_TX_WAIT_IDLE);
+ set_arp_failure_handler(skb, arp_failure_discard);
+ l2t_send(cdev, skb, C3CN_L2T(sk));
+ }
+ sk->sk_wmem_queued -= total_size;
+ return total_size;
+}
+
+/*
+ * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
+ * and send it along.
+ */
+static void abort_arp_failure(struct t3cdev *cdev, struct sk_buff *skb)
+{
+ struct cpl_abort_req *req = cplhdr(skb);
+
+ req->cmd = CPL_ABORT_NO_RST;
+ cxgb3_ofld_send(cdev, skb);
+}
+
+/*
+ * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
+ * not send multiple ABORT_REQs for the same connection and also that we do
+ * not try to send a message after the connection has closed. Returns 1 if
+ * an ABORT_REQ wasn't generated after all, 0 otherwise.
+ */
+static int t3_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
+{
+ struct cpl_abort_req *req;
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int tid = C3CN_TID(sk);
+
+ if (unlikely(c3cn_flag(sk, C3CN_ABORT_SHUTDOWN) || !C3CN_CDEV(sk))) {
+ if (skb)
+ __kfree_skb(skb);
+ return 1;
+ }
+
+ c3cn_set_flag(sk, C3CN_ABORT_RPL_PENDING);
+ c3cn_set_flag(sk, C3CN_ABORT_SHUTDOWN);
+
+ /* Purge the send queue so we don't send anything after an abort. */
+ t3_purge_write_queue(sk);
+
+ if (c3cn_flag(sk, C3CN_CLOSE_CON_REQUESTED) && is_t3a(C3CN_CDEV(sk)))
+ mode |= CPL_ABORT_POST_CLOSE_REQ;
+
+ if (!skb)
+ skb = alloc_skb(sizeof(*req), GFP_KERNEL | __GFP_NOFAIL);
+ skb->priority = mkprio(CPL_PRIORITY_DATA, sk);
+ set_arp_failure_handler(skb, abort_arp_failure);
+
+ req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+ req->wr.wr_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+ req->rsvd0 = htonl(tp->snd_nxt);
+ req->rsvd1 = !c3cn_flag(sk, C3CN_TX_DATA_SENT);
+ req->cmd = mode;
+ if (sk->sk_state == TCP_SYN_SENT)
+ __skb_queue_tail(&tp->out_of_order_queue, skb);
+ else
+ l2t_send(C3CN_CDEV(sk), skb, C3CN_L2T(sk));
+ return 0;
+}
+
+/*
+ * This must be called with the socket locked, otherwise dev may be NULL.
+ */
+static inline int chelsio_wspace(const struct sock *sk)
+{
+ struct t3cdev *dev = C3CN_CDEV(sk);
+
+ return (dev ? (CXGB3_TCP_TUNABLE(dev, max_host_sndbuf)
+ - sk->sk_wmem_queued)
+ : 0);
+}
+
+static inline int tcp_memory_free(struct sock *sk)
+{
+ return chelsio_wspace(sk) > 0;
+}
+
+/*
+ * Add a list of skbs to a socket send queue. This interface is intended for
+ * use by in-kernel ULPs. The skbs must comply with the max size limit of the
+ * device and have a headroom of at least TX_HEADER_LEN bytes.
+ */
+static int t3_sendskb(struct sock *sk, struct sk_buff *skb, int flags)
+{
+ struct sk_buff *next;
+ struct tcp_sock *tp = tcp_sk(sk);
+ int err, copied = 0;
+ long timeo;
+
+ lock_sock(sk);
+ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+ if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+ (err = sk_stream_wait_connect(sk, &timeo)) != 0)
+ goto out_err;
+
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ err = -EPIPE;
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ goto out_err;
+
+ /*
+ * We check for send buffer space once for the whole skb list. It
+ * isn't critical if we end up overrunning the send buffer limit as we
+ * do not allocate any new memory. The benefit is we don't need to
+ * perform intermediate packet pushes.
+ */
+ while (!tcp_memory_free(sk)) {
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = wait_for_mem(sk, &timeo);
+ if (err)
+ goto out_err;
+ }
+
+ while (skb) {
+ if (unlikely(skb_headroom(skb) < TX_HEADER_LEN)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ next = skb->next;
+ skb->next = NULL;
+ skb_entail(sk, skb, C3CB_FLAG_NO_APPEND | C3CB_FLAG_NEED_HDR);
+ copied += skb->len;
+ tp->write_seq += skb->len + ulp_extra_len(skb);
+ skb = next;
+ }
+done:
+ if (likely(skb_queue_len(&sk->sk_write_queue)))
+ t3_push_frames(sk, 1);
+ release_sock(sk);
+ return copied;
+
+out_err:
+ if (copied == 0)
+ copied = sk_stream_error(sk, flags, err);
+ goto done;
+}
+
+/*
+ * Low-level utility routines for primary API functions.
+ * =====================================================
+ */
+/* routines to implement CPL message processing */
+static void sock_act_establish(struct sock *, struct sk_buff *);
+static void active_open_failed(struct sock *, struct sk_buff *);
+static void wr_ack(struct sock *, struct sk_buff *);
+static void do_peer_fin(struct sock *, struct sk_buff *);
+static void process_abort_req(struct sock *, struct sk_buff *);
+static void process_abort_rpl(struct sock *, struct sk_buff *);
+static void process_close_con_rpl(struct sock *, struct sk_buff *);
+static void process_rx_iscsi_hdr(struct sock *, struct sk_buff *);
+
+static struct sk_buff *__get_cpl_reply_skb(struct sk_buff *, size_t, gfp_t);
+
+static int t3_connect(struct sock *, struct net_device *);
+static void tcp_uncork(struct sock *);
+static void tcp_push(struct sock *, int);
+static void fail_act_open(struct sock *, int);
+static void init_offload_sk(struct sock *, struct t3cdev *, struct dst_entry