Wednesday, September 27, 2017

SMB3 encryption in the kernel cifs module

This is a code walk through which takes you through how the cifs module goes through encrypting its communications with the server.

Mount option parsing.

static int
cifs_parse_mount_options(const char *mountdata, const char *devname,
                         struct smb_vol *vol)
{
..
                case Opt_seal:
                        /* we do not do the following in secFlags because seal
                         * is a per tree connection (mount) not a per socket
                         * or per-smb connection option in the protocol
                         * vol->secFlg |= CIFSSEC_MUST_SEAL;
                         */
                        vol->seal = 1;
..
}

static struct cifs_tcon *
cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
{
..
        if (volume_info->seal) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                                 "SMB3 or later required for encryption\n");
                        rc = -EOPNOTSUPP;
                        goto out_fail;
#ifdef CONFIG_CIFS_SMB2
                } else if (tcon->ses->server->capabilities &
                                        SMB2_GLOBAL_CAP_ENCRYPTION)
                        tcon->seal = true;
                else {
                        cifs_dbg(VFS, "Encryption is not supported on share\n");
                        rc = -EOPNOTSUPP;
                        goto out_fail;
#endif /* CONFIG_CIFS_SMB2 */
                }
        }
..
}

So at this point, tcon->seal is set.

We check if encryption is required before making a call using the check
static int encryption_required(const struct cifs_tcon *tcon)
{
        if (!tcon)
                return 0;
        if ((tcon->ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) ||
            (tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA))
                return 1;
        if (tcon->seal &&
            (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
                return 1;
        return 0;
}

Which is then called before each SMB call to determine if encryption is required.

int
SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
          __u8 *oplock, struct smb2_file_all_info *buf,
          struct smb2_err_rsp **err_buf)
{
..
        if (encryption_required(tcon))
                flags |= CIFS_TRANSFORM_REQ;
..
}

This check for encryption is made for every call and if required, the CIFS_TRANSFORM_REQ flag is set.

struct smb2_transform_hdr {
        __be32 smb2_buf_length; /* big endian on wire */
                                /* length is only two or three bytes - with
                                 one or two byte type preceding it that MBZ */
        __le32 ProtocolId;      /* 0xFD 'S' 'M' 'B' */
        __u8   Signature[16];
//Below this point, we use the information to authenticate but not encrypt. Only the data is encrypted.
//This is the associate data.

        __u8   Nonce[16];
        __le32 OriginalMessageSize;
        __u16  Reserved1;
        __le16 Flags; /* EncryptionAlgorithm */
        __u64  SessionId;
} __packed;


static int
smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
{
        struct smb_rqst cur_rqst;
        int rc;
        // If the packet doesn't need to be transformed, send it out
        if (!(flags & CIFS_TRANSFORM_REQ))
                return __smb_send_rqst(server, rqst);
        //Check for init_transform_rq and free_transform_rq to transform headers.        if (!server->ops->init_transform_rq ||
            !server->ops->free_transform_rq) {
                cifs_dbg(VFS, "Encryption requested but transform callbacks are missed\n");
                return -EIO;
        }
        //Call the transform functions.
        //Here cur request is a pointer to a cur_rqst on the heap.

        rc = server->ops->init_transform_rq(server, &cur_rqst, rqst);
        if (rc)
                return rc;
        //Send packets.        rc = __smb_send_rqst(server, &cur_rqst);
        //Free packets.        server->ops->free_transform_rq(&cur_rqst);
        return rc;
}

struct smb_version_operations smb30_operations = {
..
        .init_transform_rq = smb3_init_transform_rq,
        .free_transform_rq = smb3_free_transform_rq,
..
};
static int
smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
                       struct smb_rqst *old_rq)
{
        struct kvec *iov;
        struct page **pages;
        struct smb2_transform_hdr *tr_hdr;
        unsigned int npages = old_rq->rq_npages;
        int i;
        int rc = -ENOMEM;
        //Allocate an array of page pointers        pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
        if (!pages)
                return rc;
        //Copy over the data for pages from the old array.
        new_rq->rq_pages = pages;
        new_rq->rq_npages = old_rq->rq_npages;
        new_rq->rq_pagesz = old_rq->rq_pagesz;
        new_rq->rq_tailsz = old_rq->rq_tailsz;
        //Allocate a new set of pages for the new request to be transformed.
        for (i = 0; i < npages; i++) {
                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
                if (!pages[i])
                        goto err_free_pages;
        }
        //Create a new array of iov pointers.
        iov = kmalloc_array(old_rq->rq_nvec, sizeof(struct kvec), GFP_KERNEL);
        if (!iov)
                goto err_free_pages;
        /* copy all iovs from the old except the 1st one (rfc1002 length) */
        memcpy(&iov[1], &old_rq->rq_iov[1],
                                sizeof(struct kvec) * (old_rq->rq_nvec - 1));
        new_rq->rq_iov = iov;
        new_rq->rq_nvec = old_rq->rq_nvec;
        //Add a transform header to be used for the first iov.
        tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
        if (!tr_hdr)
                goto err_free_iov;
        /* fill the 1st iov with a transform header */
        fill_transform_hdr(tr_hdr, old_rq);
        new_rq->rq_iov[0].iov_base = tr_hdr;
        new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
        /* copy pages form the old */
        for (i = 0; i < npages; i++) {
                char *dst = kmap(new_rq->rq_pages[i]);
                char *src = kmap(old_rq->rq_pages[i]);
                unsigned int len = (i < npages - 1) ? new_rq->rq_pagesz :
                                                        new_rq->rq_tailsz;
                memcpy(dst, src, len);
                kunmap(new_rq->rq_pages[i]);
                kunmap(old_rq->rq_pages[i]);
        }
        //Now encrypt the new iov.
        rc = crypt_message(server, new_rq, 1);
        cifs_dbg(FYI, "encrypt message returned %d", rc);
        if (rc)
                goto err_free_tr_hdr;
        return rc;
err_free_tr_hdr:
        kfree(tr_hdr);
err_free_iov:
        kfree(iov);
err_free_pages:
        for (i = i - 1; i >= 0; i--)
                put_page(pages[i]);
        kfree(pages);
        return rc;
}

static int
crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
{
        struct smb2_transform_hdr *tr_hdr =
                        (struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
        unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
        int rc = 0;
        struct scatterlist *sg;
        u8 sign[SMB2_SIGNATURE_SIZE] = {};
        u8 key[SMB3_SIGN_KEY_SIZE];
        struct aead_request *req;
        char *iv;
        unsigned int iv_len;
        struct cifs_crypt_result result = {0, };
        struct crypto_aead *tfm;
        unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
        init_completion(&result.completion);
        //Go through the sessions list and get the encryption/decryption key
        rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not get %scryption key\n", __func__,
                         enc ? "en" : "de");
                return 0;
        }
        //Allocate the AED transforms and save it in server->secmech.ccmaesencrypt/decrypt
        //This is done only once for the server
        rc = smb3_crypto_aead_allocate(server);
        if (rc) {
                cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
                return rc;
        }
       //use the right tfm
        tfm = enc ? server->secmech.ccmaesencrypt :
                                                server->secmech.ccmaesdecrypt;
        //Set the session key
        rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
        if (rc) {
                cifs_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
                return rc;
        }
        rc = crypto_aead_setauthsize(tfm, SMB2_SIGNATURE_SIZE);
        if (rc) {
                cifs_dbg(VFS, "%s: Failed to set authsize %d\n", __func__, rc);
                return rc;
        }
        req = aead_request_alloc(tfm, GFP_KERNEL);
        if (!req) {
                cifs_dbg(VFS, "%s: Failed to alloc aead request", __func__);
                return -ENOMEM;
        }
        if (!enc) {
                memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE);
                crypt_len += SMB2_SIGNATURE_SIZE;
        }
        sg = init_sg(rqst, sign);
        if (!sg) {
                cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
                goto free_req;
        }
        iv_len = crypto_aead_ivsize(tfm);
         iv = kzalloc(iv_len, GFP_KERNEL);
        if (!iv) {
                cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
                goto free_sg;
        }
        iv[0] = 3;
        memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
        aead_request_set_crypt(req, sg, sg, crypt_len, iv);
        aead_request_set_ad(req, assoc_data_len);
        aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                  cifs_crypt_complete, &result);
        rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
        if (rc == -EINPROGRESS || rc == -EBUSY) {
                wait_for_completion(&result.completion);
                rc = result.err;
        }
        if (!rc && enc)
                memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
        kfree(iv);
free_sg:
        kfree(sg);
free_req:
        kfree(req);
        return rc;
}
                                            

Friday, July 15, 2016

RTL-SDR: Logging home energy consumption

I have been using an energy meter for a while to track the energy consumption at home. The system consists of a transmitter and a receiver. The transmitter is placed in a meter box with a clamp on the live feed going into the house and the receiver sits in my office. The transmitter transmits the energy consumption in watts(joules/sec) at certain intervals while the receiver converts these readings from the transmitter to energy consumed in KW-hr and tracks the consumption for a day, week, month and year.

While this system works and allows me to track consumption, I wanted a bit more flexibility in the way energy consumption was displayed. For example, one of the requirements was to break down the energy consumption for various times in the day.

There were multiple solutions available.
1) Use energy consumption monitors which also provide a cloud based service reporting on energy consumption. I rejected this as I am locked into using a particular vendor who may one day decide to shut shop.
2) There are some monitors available which allow you to download data off the the receiver which you can then use. This is a manual process which requires you to connect to the device with a USB cable to pull the data.
3) Find another method to capture the data sent by the transmitter directly and process it. This is the method I finally chose.

The energy consumption monitor I have is an Efergy Elite Classic(Bought second hand on ebay for GBP 21). The transmitter operates in the 433 MHz range and can be set to transmit data in 6/12/18 second intervals.
I also purchased a RTL-SDR USB dongle(GBP 11.99, again bought off ebay) which allows me to tune into various frequencies using software on a computer. You can also use the software(I use gqrx on linux) to scan and determine exactly which frequencies have data being transmitted on them.
An idle raspberry pi was requisitioned for this project and will happily use the RTL-SDR dongle.

Looking around for ideas, I came across the project rtl_433 which uses rtl-sdr libraries and has a large number of decoders already in place. It scans the 433 MHz frequency and automatically decodes data packets for any of the various supported devices. The utility already supports the Efergy energy consumption monitor which is a big plus.

Once the dongle is connected and rtl_433 built, we can simply decode calls over the wire with the command
rtl_433 -f 433550000 -R 36
By default, the rtl_433 utility tunes to 433920000 Hz. The Efergy transmitter transmits on 433550000 Hz I have to pass this frequency with the -f option. This is where I used gqrx to determine the frequency which Efergy transmitter transmits on.

The output is of the form
Power consumption at 110 volts: 92.40 watts
Power consumption at 115 volts: 96.60 watts
Power consumption at 120 volts: 100.80 watts
Power consumption at 220 volts: 184.80 watts
Power consumption at 230 volts: 193.20 watts
Power consumption at 240 volts: 201.60 watts
The mains power in the UK is at 240 volts and it shows my consumption at that voltage to be 201.60 watts. The data in watts is then further processed using a combination of grep, sed and a python script and upload it to Thingspeak. 

Thingspeak is aimed at IoT applications. Thingspeak provides you a REST api to record data. This data can be analysed and used as needed. As a bonus, there are a few Android apps available too which can read the data off the Thingspeak platform.

I first started recording the data in watts updating the Thingspeak channel every time the transmitter transmitted. However the resulting graph was difficult to read because of the huge spikes every time I turned the kettle or the microwave on. To give me a better understanding of my energy consumption, I converted the watts to KW-hr and update the Thingspeak channel every hour instead. This gives me consumption for an hour instead. I preferred looking at the readings in KW-hr as my energy company charges me per KW-hr.

The resulting graph:


Sunday, June 26, 2016

MQTT

MQTT is a lightweight protocol on top of TCP/IP which is used to transfer data between nodes. It was built to be used by devices having low network bandwidth. The low overheads in using this protocol has made it a popular choice for use with IoT devices.

The protocol uses a subscribe/publish method wherein both the data source and the recipient subscribe to a topic on a broker. This model is similar to twitter where a tweet sent by a user can be read by multiple users who subscribe to the user.

A broker is a software run on a more capable hardware which acts as a data distributor. A topic is similar to a subject and is built in a tree structure with its components separated by a '/'. Wildcards are allowed when describing topics to subscribe to.  Mosquitto(http://mosquitto.org/) is the most popular opensource broker at this moment.

Clients connect to the broker and subscribe to the topic. A client wishing to transmit its data(the sender) will publish data for the topic on the broker. Subscribers to the topic on the broker receive the data and process it further. MQTT is data agnostic and the data can take any format the sender wishes.

MQTT apis are available for multiple programming languages at the eclipse paho website.

The PubSub client library is available for use with arduinos.

Some use cases for MQTT are
a) Sensors such as temperature sensors periodically sending information about its surroundings.
b) Sending commands to a device from various sources.

An example project which uses MQTT in an arduino setting is available on the Make website. Here  the automation software - openhab is configured with MQTT bindings to send commands specifying the colour required to the broker(mosquitto) running on a raspberry pi. The ESP8266 connected to a LED strip subscribes to this broker reads the colour data off the broker and sets the light colour accordingly.

More information is on the protocol is available in the mqtt man page at
http://mosquitto.org/man/mqtt-7.html

Wednesday, June 01, 2016

Using Mock

The article is from my notes on setting up a RHEL 7 build environment on a RHEL 6 machine. This can be used to setup a build environment for any of my guest on the host machine.

I have a RHEL 6 based server grade machine which hosts vms belonging to RHEL 6, RHEL 7 and Fedora rawhide. In some instances, I need to be able to compile kernels based on the git tree for a certain guest machine. Trying to do this on a vm usually means I need to export the git tree over NFS and given the resources the vm has, takes a long time. The idea is to compile kernels on the more capable host machine and simply run "make install modules_install" on the guest system.

I export the filesystem containing the git trees using NFS
# cat /etc/exports
/NotBackedUp/scratch 192.168.140.0/24(rw,no_root_squash)


To setup mock,

1) Install mock.
# yum install -y mock

2) Set the epel7 environment as the default environment.
cd /etc/mock; ln -s epel-7-kernel-x86_64.cfg default.cfg

3) Edit epel-7-kernel-x86_64.cfg and add the lines below.

Since the environment is intended to build kernels, install the required dependencies along with some other tools.
config_opts['install'] ='elfutils dwarves rpm-build flex bison inotify-tools bc openssl openssl-devel openssh-clients rsync'
Allow bind mounts. Mount the local directory /NotBackedUp/scratch to /scratch in the mock environment
config_opts['plugin_conf']['bind_mount_enable'] = True
config_opts['plugin_conf']['bind_mount_opts']['dirs'].append(('/NotBackedUp/scratch', '/scratch' ))
4) Add user and group mock

5) Switch over to user mock and initialise the mock environment using the command 
$ mock --init

6) Switch over to the mock shell
$ mock shell

At this point, we are in a chroot shell as a root user in the mock EPEL 7 environment. The directory containing the git tree is available at the directory specified in the mock configuration file. In my case, this is /scratch. 

7) On the guest machine, make sure you mount the exported NFS share and run 
make menuconfig/oldconfig/localmodconfig
to setup the .config file. 

8) Switch back over to the mock environment on the host machine and run 
make; make modules
to build the kernel and the modules.

9) Once the build is done, on the guest vm, go back to the git tree and run
make install modules_install
to install the newly built kernel.

10) Setup grub on the guest machine and reboot into the new kernel.


Thursday, August 20, 2015

Investigation into the effects of server timeouts on the CIFS client.

The investigation is driven by a user request to reduce the time spent by the client waiting for a request to an unresponsive server.

The client awaits responses by the server in the kernel thread "cifsd" which runs the call path
cifs_demultiplex_thread()->cifs_read_from_socket()->cifs_readv_from_socket()->kernel_recvmsg()

We investigate 2 scenarios
a) The server is orderly shutdown ie. the server closes the socket.
b) The server is abruptly taken off the network ie. it has no chance to orderly shutdown the socket. This is simulated using iptables.

In both cases we assume that the client has a share mounted from the server.

Server is orderly shutdown

kernel_recvmsg() call in cifs_readv_from_socket() returns 0.

int
cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
                       unsigned int nr_segs, unsigned int to_read)
{
..
                length = kernel_recvmsg(server->ssocket, &smb_msg,
                                        iov, segs, to_read, 0);
..
                } else if (length <= 0) {
                        cifs_dbg(FYI, "Received no data or error: expecting %d\n"
                                 "got %d", to_read, length);
                        cifs_reconnect(server);
                        total_read = -ECONNABORTED;
                        break;
                }
..
}
A call is made to cifs_reconnect() where an attempt is made to continuously reconnect to the server in the "cifsd" thread every  in the code path
cifs_demultiplex_thread()->cifs_read_from_socket()->cifs_readv_from_socket()->cifs_reconnect()->generic_ip_connect()

int
cifs_reconnect(struct TCP_Server_Info *server)
{
..
        do {
..
                rc = generic_ip_connect(server);
                if (rc) {
                        cifs_dbg(FYI, "reconnect error %d\n", rc);
                        mutex_unlock(&server->srv_mutex);
                        msleep(3000);
                }
..
        } while (server->tcpStatus == CifsNeedReconnect);
        return rc;
}

While the server is down, generic_ip_connect() continously returns -EHOSTDOWN(111). It will sleep for 3 seconds between each reconnect attempt.

While the attempts to reconnect are made in the cifsd thread, any attempt to perform an operation on the share results in the thread calling smb_init() which calls cifs_reconnect_tcon()
ex: when calling ls -l - CIFSSMBUnixQPathInfo() -> smb_init() -> cifs_reconnect_tcon()

static int
cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
{
..
        while (server->tcpStatus == CifsNeedReconnect) {
                wait_event_interruptible_timeout(server->response_q,
                        (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
..
                /*
                 * on "soft" mounts we wait once. Hard mounts keep
                 * retrying until process is killed or server comes
                 * back on-line
                 */
                if (!tcon->retry) {
                        cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
                        return -EHOSTDOWN;
                }
        }
..
}
In the while loop above, we return after 10 seconds in case of a soft mount or continously retry in case of hardmounts.

Conclusion:

In cases where the server is shutdown orderly, the client immediately attempts to reconnect. It continues doing this through the cifsd thread keeping an interval of 3 seconds between each attempt. Any attempt to perform an operation on this mount on a soft mounted share(which is the default) will fail after waiting for 10 seconds for the reconnect attempts made by the cifsd thread to succeed.

Server is abruptly shutdown


The server tcp connection is represented by struct TCP_Server_Info. This contains a variable "unsigned long lstrp" which contains the jiffies at which the client last got a response from the server.

struct TCP_Server_Info {
..
        unsigned long lstrp; /* when we got last response from this server */
..
};

This is initialised in cifs_get_tcp_session when the first connection to the server is made.
static struct TCP_Server_Info *
cifs_get_tcp_session(struct smb_vol *volume_info)
{
..
        tcp_ses->lstrp = jiffies;
..
}

This value is set to the latest jiffies at the end of every successful read request.

static int
cifs_demultiplex_thread(void *p)
{
..
        while (server->tcpStatus != CifsExiting) {
..
                mid_entry = server->ops->find_mid(server, buf);
                if (!mid_entry || !mid_entry->receive)
                        length = standard_receive3(server, mid_entry);
                else
                        length = mid_entry->receive(server, mid_entry);
                if (length < 0)
                        continue;
                if (server->large_buf)
                        buf = server->bigbuf;
                server->lstrp = jiffies;
..
        } /* end while !EXITING */
..
}

It is this last response variable server->lstrep which is used to determine the responsiveness of the server.

Before we look at how it is used to determine the server responsiveness, we should also take a look at the echo service. This is the keepalive service used to "ping" the server at regular intervals in case of inactivity. This is done by setting up a work on the cifsiod_wq workqueue which is called ever SMB_ECHO_INTERVAL(60) seconds.

static struct TCP_Server_Info *
cifs_get_tcp_session(struct smb_vol *volume_info)
{
..
        INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
..
        /* queue echo request delayed work */
        queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
..
}

After SMB_ECHO_INTERVAL(60) seconds, we call 

static void
cifs_echo_request(struct work_struct *work)
{
        int rc;
        struct TCP_Server_Info *server = container_of(work,
                                        struct TCP_Server_Info, echo.work);
        /*
         * We cannot send an echo if it is disabled or until the
         * NEGOTIATE_PROTOCOL request is done, which is indicated by
         * server->ops->need_neg() == true. Also, no need to ping if
         * we got a response recently.
         */
        if (!server->ops->need_neg || server->ops->need_neg(server) ||
            (server->ops->can_echo && !server->ops->can_echo(server)) ||
            time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
                goto requeue_echo;
        rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
        if (rc)
                cifs_dbg(FYI, "Unable to send echo request to server: %s\n",
                         server->hostname);
requeue_echo:
        queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
}

We call an echo if the session hasn't been negotiated yet and if no response was received since the last echo interval. At the end of the call, we requeue the echo work request on the cifsiod_wq.

Before each call to read on the socket, we first check to make sure that the server is not unresponsive.

int
cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
                       unsigned int nr_segs, unsigned int to_read)
{
..
                if (server_unresponsive(server)) {
                        total_read = -ECONNABORTED;
                        break;
                }
..
                length = kernel_recvmsg(server->ssocket, &smb_msg,
                                        iov, segs, to_read, 0);
..
}

Note that we send an echo call every SMB_ECHO_INTERVAL. So in case of an inactive connections, we should receive a reply atleast after every SMB_ECHO_INTERVAL(60) seconds. The timeout for receives on the socket is set to 7 seconds when setting up the socket in generic_ip_connect(). If no replies have been received for 7 seconds, kernel_recvmsg() timesout and we redo the for loop which checks if the server is unresponsive again.

static bool
server_unresponsive(struct TCP_Server_Info *server)
{
        /*
         * We need to wait 2 echo intervals to make sure we handle such
         * situations right:
         * 1s  client sends a normal SMB request
         * 2s  client gets a response
         * 30s echo workqueue job pops, and decides we got a response recently
         *     and don't need to send another
         * ...
         * 65s kernel_recvmsg times out, and we see that we haven't gotten
         *     a response in >60s.
         */
        if (server->tcpStatus == CifsGood &&
            time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
                cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
                         server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
                cifs_reconnect(server);
                wake_up(&server->response_q);
                return true;
        }
        return false;
}



Any filesystem activity performing an operation on this share uses functions such as SendReceive(), SendReceive2() or SendReceiveBlockingLock() to send over commands to the remote server. They then wait for a response from the server by calling wait_for_response().


static int
wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
{
        int error;
        error = wait_event_freezekillable_unsafe(server->response_q,
                                    midQ->mid_state != MID_REQUEST_SUBMITTED);
        if (error < 0)
                return -ERESTARTSYS;
        return 0;
}

It continues waiting for a response until we finally timeout in the cifs_demultiplex_thread "cifsd" in the call path cifs_demultiplex_thread()->cifs_read_from_socket()->cifs_readv_from_socket()->server_unresponsive(). Once the function server_unresponsive() confirms that the server hasn't replied for atleast 2 * SMB_ECHO_INTERVAL, it calls cifs_reconnect() to mark the TCP_Server_Info as needing a reconnect.

int
cifs_reconnect(struct TCP_Server_Info *server)
{
..
//Mark the TCP_Server_Info as needing reconnect
                server->tcpStatus = CifsNeedReconnect;
..
//Mark all sessions and tcons as needing a reconnect
        list_for_each(tmp, &server->smb_ses_list) {
                ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
                ses->need_reconnect = true;
                ses->ipc_tid = 0;
                list_for_each(tmp2, &ses->tcon_list) {
                        tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
                        tcon->need_reconnect = true;
                }
        }
..
        spin_lock(&GlobalMid_Lock);
        list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
                mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
//Mark each pending request as requiring a retry
                if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
                        mid_entry->mid_state = MID_RETRY_NEEDED;
                list_move(&mid_entry->qhead, &retry_list);
        }
        spin_unlock(&GlobalMid_Lock);
..
}

We then wake up all pending requests in server_unresponsive()

static bool
server_unresponsive(struct TCP_Server_Info *server)
{
..
        if (server->tcpStatus == CifsGood &&
            time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
                cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
                         server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
//Mark all pending requests as needing retries
                cifs_reconnect(server);
//Wake up all tasks waiting for a response.
                wake_up(&server->response_q);
                return true;
        }
        return false;
}

For the task sending the request, we return and return -EAGAIN requesting the task to be run again.

int
SendReceive(const unsigned int xid, struct cifs_ses *ses,
            struct smb_hdr *in_buf, struct smb_hdr *out_buf,
            int *pbytes_returned, const int timeout)
{
..
//Set request status as submitted
        midQ->mid_state = MID_REQUEST_SUBMITTED;
        cifs_in_send_inc(ses->server);
//Send request
        rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
        cifs_in_send_dec(ses->server);
        cifs_save_when_sent(midQ);
..
//Wait for response
        rc = wait_for_response(ses->server, midQ);
..
//Check response. For a retry, this call will return -EAGAIN
        rc = cifs_sync_mid_result(midQ, ses->server);
        if (rc != 0) {
                add_credits(ses->server, 1, 0);
//Which is returned to the called
                return rc;
        }
..
}

The call attempts to retry and subsequently attempts to reconnect to the tcon

ex: when calling ls -l - CIFSSMBUnixQPathInfo() -> smb_init() -> cifs_reconnect_tcon()

static int
cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
{
..
        while (server->tcpStatus == CifsNeedReconnect) {
                wait_event_interruptible_timeout(server->response_q,
                        (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
..
                /*
                 * on "soft" mounts we wait once. Hard mounts keep
                 * retrying until process is killed or server comes
                 * back on-line
                 */
                if (!tcon->retry) {
                        cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
                        return -EHOSTDOWN;
                }
        }
..
}
In the while loop above, we return after 10 seconds in case of a soft mount or continuously retry in case of hardmounts.

Conclusion:

When the server is abruptdly taken off the network, we will mark the server as requiring a reconnect. This can take at max a total of
timeout for recvmsg(7) + 2 * SMB_ECHO_INTERVAL(60) + time waiting for reconnect in cifs_reconnect_tcon() (10) = 137 seconds.

Sunday, June 28, 2015

10 minute Rava Chakuli

This recipe is from Shyamala Kini from Konkani Amchi food. 

Boil 2 cups of water.
Once it comes to rolling boil ,add salt to taste, little jeera, a pinch of hing, pinch of chilly pwd and 1 cup of rava/sooji.
Switch off the cooker and mix well  into a consistency of a dough (something like upma/kesari consistency.)
Cool it till lukewarm.
Then add 1 cup of rice flour and a spoon of butter and some sesame seeds.Knead/mix well.
Put it into moulds and deep fry them on medium flame.
Allow it to cool before tasting

P.S.:Use the same cup for the measurement.


Monday, December 15, 2014

Debugging calls to cifs.upcall

/usr/sbin/cifs.upcall is the request-key help program used to obtain certain data like kerberos keys or results of dns calls from userland processes.

It is sometimes necessary to debug the execution of this helper function for which I use the following methods.


1) Capture debug messages from cifs.upcall.

Edit /etc/rsyslog.conf and add the line

*.* /var/log/debug

Restart rsyslog service and confirm that the log file /var/log/debug has been created which will log all messages.

Attempt the call which will usually be the call to mount the cifs share. You should see the debug messages from cifs.upcall in /var/log/debug.

2) Strace cifs.upcall calls.

First move the original cifs.upcall file
# cd /usr/sbin; mv cifs.upcall cifs.upcall.orig

Create a new text file cifs.upcall with the following content.

#!/bin/bash

echo $@ >> /tmp/upcall.log
strace -fxvto /tmp/cifs.upcall.st -s1000  /usr/sbin/cifs.upcall.orig $@

Make sure this is an executable
# chmod +x cifs.upcall

Now attempt the mount process. You will see the strace output in /tmp/cifs.upcall.st.