From: Eric Paris on
Some system calls, such as loading a module, can cause filesystem activity
such as creating lots of new files in /sys or debugfs. Audit can only capture
20 inodes in a single syscall since it stores them in a fixed length array.
This patch shrinks the fixed length array to 5 names (the number used by a
rename operation) and then allocates names dynamically as they are needed
after 5. It should shut up the complaints we sometimes see about
'name_count maxed, losing inode data" such as that seen in Red Hat bugzilla
445757.

Signed-off-by: Eric Paris <eparis(a)redhat.com>
---

kernel/auditsc.c | 402 +++++++++++++++++++++++++++++-------------------------
1 files changed, 215 insertions(+), 187 deletions(-)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 92212c4..a2a85f7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,8 +70,9 @@
#include "audit.h"

/* AUDIT_NAMES is the number of slots we reserve in the audit_context
- * for saving names from getname(). */
-#define AUDIT_NAMES 20
+ * for saving names from getname(). If we get more names we will allocate
+ * a name dynamically and also add those to the list anchored by names_list. */
+#define AUDIT_NAMES 5

/* Indicates that audit should log the full pathname. */
#define AUDIT_NAME_FULL -1
@@ -100,9 +101,8 @@ struct audit_cap_data {
*
* Further, in fs/namei.c:path_lookup() we store the inode and device. */
struct audit_names {
+ struct list_head list; /* audit_context->names_list */
const char *name;
- int name_len; /* number of name's characters to log */
- unsigned name_put; /* call __putname() for this name */
unsigned long ino;
dev_t dev;
umode_t mode;
@@ -112,6 +112,14 @@ struct audit_names {
u32 osid;
struct audit_cap_data fcap;
unsigned int fcap_ver;
+ int name_len; /* number of name's characters to log */
+ bool name_put; /* call __putname() for this name */
+ /*
+ * This was an allocated audit_names and not from the array of
+ * names allocated in the task audit context. Thus this name
+ * should be freed on syscall exit
+ */
+ bool should_free;
};

struct audit_aux_data {
@@ -173,8 +181,17 @@ struct audit_context {
long return_code;/* syscall return code */
u64 prio;
int return_valid; /* return code is valid */
- int name_count;
- struct audit_names names[AUDIT_NAMES];
+ /*
+ * The names_list is the list of all audit_names collected during this
+ * syscall. The first AUDIT_NAMES entries in the names_list will
+ * actually be from the preallocated_names array for performance
+ * reasons. Except during allocation they should never be referenced
+ * through the preallocated_names array and should only be found/used
+ * by running the names_list.
+ */
+ struct audit_names preallocated_names[AUDIT_NAMES];
+ int name_count; /* total records in names_list */
+ struct list_head names_list; /* anchor for struct audit_names->list */
char * filterkey; /* key for rule that triggered record */
struct path pwd;
struct audit_context *previous; /* For nested syscalls */
@@ -302,17 +319,18 @@ static int audit_match_perm(struct audit_context *ctx, int mask)

static int audit_match_filetype(struct audit_context *ctx, int val)
{
- int index;
+ struct audit_names *n;
mode_t mode = (mode_t)val;

if (unlikely(!ctx))
return 0;

- for (index = 0; index < ctx->name_count; index++) {
- if ((ctx->names[index].ino != -1) &&
- ((ctx->names[index].mode & S_IFMT) == mode))
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if ((n->ino != -1) &&
+ ((n->mode & S_IFMT) == mode))
return 1;
}
+
return 0;
}

@@ -445,11 +463,12 @@ static int audit_filter_rules(struct task_struct *tsk,
enum audit_state *state)
{
const struct cred *cred = get_task_cred(tsk);
- int i, j, need_sid = 1;
+ int i, need_sid = 1;
u32 sid;

for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &rule->fields[i];
+ struct audit_names *n;
int result = 0;

switch (f->type) {
@@ -512,8 +531,8 @@ static int audit_filter_rules(struct task_struct *tsk,
result = audit_comparator(MAJOR(name->dev),
f->op, f->val);
else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(MAJOR(n->dev), f->op, f->val)) {
++result;
break;
}
@@ -525,8 +544,8 @@ static int audit_filter_rules(struct task_struct *tsk,
result = audit_comparator(MINOR(name->dev),
f->op, f->val);
else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(MINOR(n->dev), f->op, f->val)) {
++result;
break;
}
@@ -537,8 +556,8 @@ static int audit_filter_rules(struct task_struct *tsk,
if (name)
result = (name->ino == f->val);
else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (audit_comparator(ctx->names[j].ino, f->op, f->val)) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_comparator(n->ino, f->op, f->val)) {
++result;
break;
}
@@ -593,11 +612,10 @@ static int audit_filter_rules(struct task_struct *tsk,
name->osid, f->type, f->op,
f->lsm_rule, ctx);
} else if (ctx) {
- for (j = 0; j < ctx->name_count; j++) {
- if (security_audit_rule_match(
- ctx->names[j].osid,
- f->type, f->op,
- f->lsm_rule, ctx)) {
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (security_audit_rule_match(n->osid, f->type,
+ f->op, f->lsm_rule,
+ ctx)) {
++result;
break;
}
@@ -710,39 +728,53 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
return AUDIT_BUILD_CONTEXT;
}

-/* At syscall exit time, this filter is called if any audit_names[] have been
+/*
+ * Given an audit_name check the inode hash table to see if they match.
+ * Called holding the rcu read lock to protect the use of audit_inode_hash
+ */
+static int audit_filter_inode_name(struct task_struct *tsk,
+ struct audit_names *n,
+ struct audit_context *ctx) {
+ int word, bit;
+ int h = audit_hash_ino((u32)n->ino);
+ struct list_head *list = &audit_inode_hash[h];
+ struct audit_entry *e;
+ enum audit_state state;
+
+ word = AUDIT_WORD(ctx->major);
+ bit = AUDIT_BIT(ctx->major);
+
+ if (list_empty(list))
+ return 0;
+
+ list_for_each_entry_rcu(e, list, list) {
+ if ((e->rule.mask[word] & bit) == bit &&
+ audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
+ ctx->current_state = state;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* At syscall exit time, this filter is called if any audit_names have been
* collected during syscall processing. We only check rules in sublists at hash
- * buckets applicable to the inode numbers in audit_names[].
+ * buckets applicable to the inode numbers in audit_names.
* Regarding audit_state, same rules apply as for audit_filter_syscall().
*/
void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
{
- int i;
- struct audit_entry *e;
- enum audit_state state;
+ struct audit_names *n;

if (audit_pid && tsk->tgid == audit_pid)
return;

rcu_read_lock();
- for (i = 0; i < ctx->name_count; i++) {
- int word = AUDIT_WORD(ctx->major);
- int bit = AUDIT_BIT(ctx->major);
- struct audit_names *n = &ctx->names[i];
- int h = audit_hash_ino((u32)n->ino);
- struct list_head *list = &audit_inode_hash[h];

- if (list_empty(list))
- continue;
-
- list_for_each_entry_rcu(e, list, list) {
- if ((e->rule.mask[word] & bit) == bit &&
- audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
- rcu_read_unlock();
- ctx->current_state = state;
- return;
- }
- }
+ list_for_each_entry(n, &ctx->names_list, list) {
+ if (audit_filter_inode_name(tsk, n, ctx))
+ break;
}
rcu_read_unlock();
}
@@ -786,7 +818,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,

static inline void audit_free_names(struct audit_context *context)
{
- int i;
+ struct audit_names *n, *next;

#if AUDIT_DEBUG == 2
if (context->put_count + context->ino_count != context->name_count) {
@@ -797,10 +829,9 @@ static inline void audit_free_names(struct audit_context *context)
context->serial, context->major, context->in_syscall,
context->name_count, context->put_count,
context->ino_count);
- for (i = 0; i < context->name_count; i++) {
+ list_for_each_entry(n, &context->names_list, list) {
printk(KERN_ERR "names[%d] = %p = %s\n", i,
- context->names[i].name,
- context->names[i].name ?: "(null)");
+ n->name, n->name ?: "(null)");
}
dump_stack();
return;
@@ -811,9 +842,12 @@ static inline void audit_free_names(struct audit_context *context)
context->ino_count = 0;
#endif

- for (i = 0; i < context->name_count; i++) {
- if (context->names[i].name && context->names[i].name_put)
- __putname(context->names[i].name);
+ list_for_each_entry_safe(n, next, &context->names_list, list) {
+ list_del(&n->list);
+ if (n->name && n->name_put)
+ __putname(n->name);
+ if (n->should_free)
+ kfree(n);
}
context->name_count = 0;
path_put(&context->pwd);
@@ -851,6 +885,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
return NULL;
audit_zero_context(context, state);
INIT_LIST_HEAD(&context->killed_trees);
+ INIT_LIST_HEAD(&context->names_list);
return context;
}

@@ -1307,6 +1342,68 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_end(ab);
}

+static void audit_log_name(struct audit_context *context, struct audit_names *n,
+ int record_num, int *call_panic)
+{
+ struct audit_buffer *ab;
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+ if (!ab)
+ return; /* audit_panic has been called */
+
+ audit_log_format(ab, "item=%d", record_num);
+
+ if (n->name) {
+ switch (n->name_len) {
+ case AUDIT_NAME_FULL:
+ /* log the full path */
+ audit_log_format(ab, " name=");
+ audit_log_untrustedstring(ab, n->name);
+ break;
+ case 0:
+ /* name was specified as a relative path and the
+ * directory component is the cwd */
+ audit_log_d_path(ab, "name=", &context->pwd);
+ break;
+ default:
+ /* log the name's directory component */
+ audit_log_format(ab, " name=");
+ audit_log_n_untrustedstring(ab, n->name,
+ n->name_len);
+ }
+ } else
+ audit_log_format(ab, " name=(null)");
+
+ if (n->ino != (unsigned long)-1) {
+ audit_log_format(ab, " inode=%lu"
+ " dev=%02x:%02x mode=%#o"
+ " ouid=%u ogid=%u rdev=%02x:%02x",
+ n->ino,
+ MAJOR(n->dev),
+ MINOR(n->dev),
+ n->mode,
+ n->uid,
+ n->gid,
+ MAJOR(n->rdev),
+ MINOR(n->rdev));
+ }
+ if (n->osid != 0) {
+ char *ctx = NULL;
+ u32 len;
+ if (security_secid_to_secctx(
+ n->osid, &ctx, &len)) {
+ audit_log_format(ab, " osid=%u", n->osid);
+ *call_panic = 2;
+ } else {
+ audit_log_format(ab, " obj=%s", ctx);
+ security_release_secctx(ctx, len);
+ }
+ }
+
+ audit_log_fcaps(ab, n);
+
+ audit_log_end(ab);
+}
+
static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
{
const struct cred *cred;
@@ -1314,6 +1411,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
struct audit_buffer *ab;
struct audit_aux_data *aux;
const char *tty;
+ struct audit_names *n;

/* tsk == current */
context->pid = tsk->pid;
@@ -1453,66 +1551,10 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
audit_log_end(ab);
}
}
- for (i = 0; i < context->name_count; i++) {
- struct audit_names *n = &context->names[i];
-
- ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
- if (!ab)
- continue; /* audit_panic has been called */

- audit_log_format(ab, "item=%d", i);
-
- if (n->name) {
- switch(n->name_len) {
- case AUDIT_NAME_FULL:
- /* log the full path */
- audit_log_format(ab, " name=");
- audit_log_untrustedstring(ab, n->name);
- break;
- case 0:
- /* name was specified as a relative path and the
- * directory component is the cwd */
- audit_log_d_path(ab, "name=", &context->pwd);
- break;
- default:
- /* log the name's directory component */
- audit_log_format(ab, " name=");
- audit_log_n_untrustedstring(ab, n->name,
- n->name_len);
- }
- } else
- audit_log_format(ab, " name=(null)");
-
- if (n->ino != (unsigned long)-1) {
- audit_log_format(ab, " inode=%lu"
- " dev=%02x:%02x mode=%#o"
- " ouid=%u ogid=%u rdev=%02x:%02x",
- n->ino,
- MAJOR(n->dev),
- MINOR(n->dev),
- n->mode,
- n->uid,
- n->gid,
- MAJOR(n->rdev),
- MINOR(n->rdev));
- }
- if (n->osid != 0) {
- char *ctx = NULL;
- u32 len;
- if (security_secid_to_secctx(
- n->osid, &ctx, &len)) {
- audit_log_format(ab, " osid=%u", n->osid);
- call_panic = 2;
- } else {
- audit_log_format(ab, " obj=%s", ctx);
- security_release_secctx(ctx, len);
- }
- }
-
- audit_log_fcaps(ab, n);
-
- audit_log_end(ab);
- }
+ i = 0;
+ list_for_each_entry(n, &context->names_list, list)
+ audit_log_name(context, n, i++, &call_panic);

/* Send end of event record to help user space know we are finished */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
@@ -1804,6 +1846,30 @@ retry:
#endif
}

+static struct audit_names *audit_alloc_name(struct audit_context *context)
+{
+ struct audit_names *aname;
+
+ if (context->name_count < AUDIT_NAMES) {
+ aname = &context->preallocated_names[context->name_count];
+ memset(aname, 0, sizeof(*aname));
+ } else {
+ aname = kzalloc(sizeof(*aname), GFP_NOFS);
+ if (!aname)
+ return NULL;
+ aname->should_free = true;
+ }
+
+ aname->ino = (unsigned long)-1;
+ list_add_tail(&aname->list, &context->names_list);
+
+ context->name_count++;
+#if AUDIT_DEBUG
+ context->ino_count++;
+#endif
+ return aname;
+}
+
/**
* audit_getname - add a name to the list
* @name: name to add
@@ -1814,6 +1880,7 @@ retry:
void __audit_getname(const char *name)
{
struct audit_context *context = current->audit_context;
+ struct audit_names *n;

if (IS_ERR(name) || !name)
return;
@@ -1826,13 +1893,15 @@ void __audit_getname(const char *name)
#endif
return;
}
- BUG_ON(context->name_count >= AUDIT_NAMES);
- context->names[context->name_count].name = name;
- context->names[context->name_count].name_len = AUDIT_NAME_FULL;
- context->names[context->name_count].name_put = 1;
- context->names[context->name_count].ino = (unsigned long)-1;
- context->names[context->name_count].osid = 0;
- ++context->name_count;
+
+ n = audit_alloc_name(context);
+ if (!n)
+ return;
+
+ n->name = name;
+ n->name_len = AUDIT_NAME_FULL;
+ n->name_put = true;
+
if (!context->pwd.dentry) {
read_lock(&current->fs->lock);
context->pwd = current->fs->pwd;
@@ -1859,12 +1928,13 @@ void audit_putname(const char *name)
printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n",
__FILE__, __LINE__, context->serial, name);
if (context->name_count) {
+ struct audit_names *n;
int i;
- for (i = 0; i < context->name_count; i++)
+
+ list_for_each_entry(n, &context->names_list, list)
printk(KERN_ERR "name[%d] = %p = %s\n", i,
- context->names[i].name,
- context->names[i].name ?: "(null)");
- }
+ n->name, n->name ?: "(null)");
+ }
#endif
__putname(name);
}
@@ -1885,39 +1955,11 @@ void audit_putname(const char *name)
#endif
}

-static int audit_inc_name_count(struct audit_context *context,
- const struct inode *inode)
-{
- if (context->name_count >= AUDIT_NAMES) {
- if (inode)
- printk(KERN_DEBUG "name_count maxed, losing inode data: "
- "dev=%02x:%02x, inode=%lu\n",
- MAJOR(inode->i_sb->s_dev),
- MINOR(inode->i_sb->s_dev),
- inode->i_ino);
-
- else
- printk(KERN_DEBUG "name_count maxed, losing inode data\n");
- return 1;
- }
- context->name_count++;
-#if AUDIT_DEBUG
- context->ino_count++;
-#endif
- return 0;
-}
-
-
static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry)
{
struct cpu_vfs_cap_data caps;
int rc;

- memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t));
- memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t));
- name->fcap.fE = 0;
- name->fcap_ver = 0;
-
if (!dentry)
return 0;

@@ -1957,30 +1999,25 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent
*/
void __audit_inode(const char *name, const struct dentry *dentry)
{
- int idx;
struct audit_context *context = current->audit_context;
const struct inode *inode = dentry->d_inode;
+ struct audit_names *n;

if (!context->in_syscall)
return;
- if (context->name_count
- && context->names[context->name_count-1].name
- && context->names[context->name_count-1].name == name)
- idx = context->name_count - 1;
- else if (context->name_count > 1
- && context->names[context->name_count-2].name
- && context->names[context->name_count-2].name == name)
- idx = context->name_count - 2;
- else {
- /* FIXME: how much do we care about inodes that have no
- * associated name? */
- if (audit_inc_name_count(context, inode))
- return;
- idx = context->name_count - 1;
- context->names[idx].name = NULL;
+
+ list_for_each_entry_reverse(n, &context->names_list, list) {
+ if (n->name && (n->name == name))
+ goto out;
}
+
+ /* unable to find the name from a previous getname() */
+ n = audit_alloc_name(context);
+ if (!n)
+ return;
+out:
handle_path(dentry);
- audit_copy_inode(&context->names[idx], dentry, inode);
+ audit_copy_inode(n, dentry, inode);
}

/**
@@ -1999,11 +2036,11 @@ void __audit_inode(const char *name, const struct dentry *dentry)
void __audit_inode_child(const struct dentry *dentry,
const struct inode *parent)
{
- int idx;
struct audit_context *context = current->audit_context;
const char *found_parent = NULL, *found_child = NULL;
const struct inode *inode = dentry->d_inode;
const char *dname = dentry->d_name.name;
+ struct audit_names *n;
int dirlen = 0;

if (!context->in_syscall)
@@ -2013,9 +2050,7 @@ void __audit_inode_child(const struct dentry *dentry,
handle_one(inode);

/* parent is more likely, look for it first */
- for (idx = 0; idx < context->name_count; idx++) {
- struct audit_names *n = &context->names[idx];
-
+ list_for_each_entry(n, &context->names_list, list) {
if (!n->name)
continue;

@@ -2028,9 +2063,7 @@ void __audit_inode_child(const struct dentry *dentry,
}

/* no matching parent, look for matching child */
- for (idx = 0; idx < context->name_count; idx++) {
- struct audit_names *n = &context->names[idx];
-
+ list_for_each_entry(n, &context->names_list, list) {
if (!n->name)
continue;

@@ -2048,34 +2081,29 @@ void __audit_inode_child(const struct dentry *dentry,

add_names:
if (!found_parent) {
- if (audit_inc_name_count(context, parent))
+ n = audit_alloc_name(context);
+ if (!n)
return;
- idx = context->name_count - 1;
- context->names[idx].name = NULL;
- audit_copy_inode(&context->names[idx], NULL, parent);
+ audit_copy_inode(n, NULL, parent);
}

if (!found_child) {
- if (audit_inc_name_count(context, inode))
+ n = audit_alloc_name(context);
+ if (!n)
return;
- idx = context->name_count - 1;

/* Re-use the name belonging to the slot for a matching parent
* directory. All names for this context are relinquished in
* audit_free_names() */
if (found_parent) {
- context->names[idx].name = found_parent;
- context->names[idx].name_len = AUDIT_NAME_FULL;
+ n->name = found_parent;
+ n->name_len = AUDIT_NAME_FULL;
/* don't call __putname() */
- context->names[idx].name_put = 0;
- } else {
- context->names[idx].name = NULL;
+ n->name_put = false;
}

if (inode)
- audit_copy_inode(&context->names[idx], NULL, inode);
- else
- context->names[idx].ino = (unsigned long)-1;
+ audit_copy_inode(n, NULL, inode);
}
}
EXPORT_SYMBOL_GPL(__audit_inode_child);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/