159 lines
5.2 KiB
ReStructuredText
159 lines
5.2 KiB
ReStructuredText
|
.. SPDX-License-Identifier: GPL-2.0
|
||
|
|
||
|
====================================================================
|
||
|
Reference-count design for elements of lists/arrays protected by RCU
|
||
|
====================================================================
|
||
|
|
||
|
|
||
|
Please note that the percpu-ref feature is likely your first
|
||
|
stop if you need to combine reference counts and RCU. Please see
|
||
|
include/linux/percpu-refcount.h for more information. However, in
|
||
|
those unusual cases where percpu-ref would consume too much memory,
|
||
|
please read on.
|
||
|
|
||
|
------------------------------------------------------------------------
|
||
|
|
||
|
Reference counting on elements of lists which are protected by traditional
|
||
|
reader/writer spinlocks or semaphores are straightforward:
|
||
|
|
||
|
CODE LISTING A::
|
||
|
|
||
|
1. 2.
|
||
|
add() search_and_reference()
|
||
|
{ {
|
||
|
alloc_object read_lock(&list_lock);
|
||
|
... search_for_element
|
||
|
atomic_set(&el->rc, 1); atomic_inc(&el->rc);
|
||
|
write_lock(&list_lock); ...
|
||
|
add_element read_unlock(&list_lock);
|
||
|
... ...
|
||
|
write_unlock(&list_lock); }
|
||
|
}
|
||
|
|
||
|
3. 4.
|
||
|
release_referenced() delete()
|
||
|
{ {
|
||
|
... write_lock(&list_lock);
|
||
|
if(atomic_dec_and_test(&el->rc)) ...
|
||
|
kfree(el);
|
||
|
... remove_element
|
||
|
} write_unlock(&list_lock);
|
||
|
...
|
||
|
if (atomic_dec_and_test(&el->rc))
|
||
|
kfree(el);
|
||
|
...
|
||
|
}
|
||
|
|
||
|
If this list/array is made lock free using RCU as in changing the
|
||
|
write_lock() in add() and delete() to spin_lock() and changing read_lock()
|
||
|
in search_and_reference() to rcu_read_lock(), the atomic_inc() in
|
||
|
search_and_reference() could potentially hold reference to an element which
|
||
|
has already been deleted from the list/array. Use atomic_inc_not_zero()
|
||
|
in this scenario as follows:
|
||
|
|
||
|
CODE LISTING B::
|
||
|
|
||
|
1. 2.
|
||
|
add() search_and_reference()
|
||
|
{ {
|
||
|
alloc_object rcu_read_lock();
|
||
|
... search_for_element
|
||
|
atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) {
|
||
|
spin_lock(&list_lock); rcu_read_unlock();
|
||
|
return FAIL;
|
||
|
add_element }
|
||
|
... ...
|
||
|
spin_unlock(&list_lock); rcu_read_unlock();
|
||
|
} }
|
||
|
3. 4.
|
||
|
release_referenced() delete()
|
||
|
{ {
|
||
|
... spin_lock(&list_lock);
|
||
|
if (atomic_dec_and_test(&el->rc)) ...
|
||
|
call_rcu(&el->head, el_free); remove_element
|
||
|
... spin_unlock(&list_lock);
|
||
|
} ...
|
||
|
if (atomic_dec_and_test(&el->rc))
|
||
|
call_rcu(&el->head, el_free);
|
||
|
...
|
||
|
}
|
||
|
|
||
|
Sometimes, a reference to the element needs to be obtained in the
|
||
|
update (write) stream. In such cases, atomic_inc_not_zero() might be
|
||
|
overkill, since we hold the update-side spinlock. One might instead
|
||
|
use atomic_inc() in such cases.
|
||
|
|
||
|
It is not always convenient to deal with "FAIL" in the
|
||
|
search_and_reference() code path. In such cases, the
|
||
|
atomic_dec_and_test() may be moved from delete() to el_free()
|
||
|
as follows:
|
||
|
|
||
|
CODE LISTING C::
|
||
|
|
||
|
1. 2.
|
||
|
add() search_and_reference()
|
||
|
{ {
|
||
|
alloc_object rcu_read_lock();
|
||
|
... search_for_element
|
||
|
atomic_set(&el->rc, 1); atomic_inc(&el->rc);
|
||
|
spin_lock(&list_lock); ...
|
||
|
|
||
|
add_element rcu_read_unlock();
|
||
|
... }
|
||
|
spin_unlock(&list_lock); 4.
|
||
|
} delete()
|
||
|
3. {
|
||
|
release_referenced() spin_lock(&list_lock);
|
||
|
{ ...
|
||
|
... remove_element
|
||
|
if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock);
|
||
|
kfree(el); ...
|
||
|
... call_rcu(&el->head, el_free);
|
||
|
} ...
|
||
|
5. }
|
||
|
void el_free(struct rcu_head *rhp)
|
||
|
{
|
||
|
release_referenced();
|
||
|
}
|
||
|
|
||
|
The key point is that the initial reference added by add() is not removed
|
||
|
until after a grace period has elapsed following removal. This means that
|
||
|
search_and_reference() cannot find this element, which means that the value
|
||
|
of el->rc cannot increase. Thus, once it reaches zero, there are no
|
||
|
readers that can or ever will be able to reference the element. The
|
||
|
element can therefore safely be freed. This in turn guarantees that if
|
||
|
any reader finds the element, that reader may safely acquire a reference
|
||
|
without checking the value of the reference counter.
|
||
|
|
||
|
A clear advantage of the RCU-based pattern in listing C over the one
|
||
|
in listing B is that any call to search_and_reference() that locates
|
||
|
a given object will succeed in obtaining a reference to that object,
|
||
|
even given a concurrent invocation of delete() for that same object.
|
||
|
Similarly, a clear advantage of both listings B and C over listing A is
|
||
|
that a call to delete() is not delayed even if there are an arbitrarily
|
||
|
large number of calls to search_and_reference() searching for the same
|
||
|
object that delete() was invoked on. Instead, all that is delayed is
|
||
|
the eventual invocation of kfree(), which is usually not a problem on
|
||
|
modern computer systems, even the small ones.
|
||
|
|
||
|
In cases where delete() can sleep, synchronize_rcu() can be called from
|
||
|
delete(), so that el_free() can be subsumed into delete as follows::
|
||
|
|
||
|
4.
|
||
|
delete()
|
||
|
{
|
||
|
spin_lock(&list_lock);
|
||
|
...
|
||
|
remove_element
|
||
|
spin_unlock(&list_lock);
|
||
|
...
|
||
|
synchronize_rcu();
|
||
|
if (atomic_dec_and_test(&el->rc))
|
||
|
kfree(el);
|
||
|
...
|
||
|
}
|
||
|
|
||
|
As additional examples in the kernel, the pattern in listing C is used by
|
||
|
reference counting of struct pid, while the pattern in listing B is used by
|
||
|
struct posix_acl.
|