** Description changed:

  Hello everyone:
  
  I'm using openstack mitaka and ceph-jewel-10.2.6 to do snapshot actions.
  And sometimes it occurs below deadlock condition.
  
  ps -ef | grep cinder-volume
  gdb -q python-dbg -p xx
- 
  
  I found 3 frames are racing the lock:
  
  Thread 1 (Thread 0x7ffab0fac740 (LWP 1029)):
  #0  0x00007ffab07bf6d5 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0
  #1  0x00007ffa90c936d0 in Wait (mutex=..., this=0x7ffd91027aa0) at 
./common/Cond.h:56
  #2  C_SaferCond::wait (this=this@entry=0x7ffd91027a40) at ./common/Cond.h:202
  #3  0x00007ffa90cbd7fc in 
librbd::ImageState<librbd::ImageCtx>::refresh_if_required (this=0x3fb2fb0) at 
librbd/ImageState.cc:354
  #4  0x00007ffa90ce3c10 in librbd::list_children_info 
(ictx=ictx@entry=0x5257530, parent_spec=..., image_info=std::map with 0 
elements) at librbd/internal.cc:926
  #5  0x00007ffa90ce5614 in librbd::list_children (ictx=ictx@entry=0x5257530, 
names=std::set with 0 elements) at librbd/internal.cc:892
- #6  0x00007ffa90c79e93 in rbd_list_children (image=0x5257530, 
pools=pools@entry=0x584fbb0 "hJ\n\260\372\177", 
pools_len=pools_len@entry=0x7ffd91028118, 
-     images=images@entry=0x55f9640 "\250K\n\260\372\177", 
images_len=images_len@entry=0x7ffd91028120) at librbd/librbd.cc:2423
+ #6  0x00007ffa90c79e93 in rbd_list_children (image=0x5257530, 
pools=pools@entry=0x584fbb0 "hJ\n\260\372\177", 
pools_len=pools_len@entry=0x7ffd91028118,
+     images=images@entry=0x55f9640 "\250K\n\260\372\177", 
images_len=images_len@entry=0x7ffd91028120) at librbd/librbd.cc:2423
  #7  0x00007ffa9a5a3620 in __pyx_pf_3rbd_5Image_76list_children 
(__pyx_v_self=0x59110a8) at rbd.c:15450
  #8  __pyx_pw_3rbd_5Image_77list_children (__pyx_v_self=0x59110a8, 
unused=<optimized out>) at rbd.c:15329
  #9  0x00007ffab0ab487c in PyEval_EvalFrameEx () from 
/lib64/libpython2.7.so.1.0
  #10 0x00007ffab0ab6e3d in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
  #11 0x00007ffab0ab433c in PyEval_EvalFrameEx () from 
/lib64/libpython2.7.so.1.0
  #12 0x00007ffab0ab6e3d in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
  #13 0x00007ffab0ab433c in PyEval_EvalFrameEx () from 
/lib64/libpython2.7.so.1.0
  #14 0x00007ffab0ab6e3d in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
  #15 0x00007ffab0ab433c in PyEval_EvalFrameEx () from 
/lib64/libpython2.7.so.1.0
- #16 0x00007ffab0ab6e3d in PyEval_EvalCodeEx () from 
/lib64/libpython2.7.so.1.0 
+ #16 0x00007ffab0ab6e3d in PyEval_EvalCodeEx () from /lib64/libpython2.7.so.1.0
  ....
  
  Thread 12 (Thread 0x7ffa84ac4700 (LWP 1479895)):
  #0  0x00007ffab07bf03e in pthread_rwlock_wrlock () from /lib64/libpthread.so.0
  #1  0x00007ffa90cc32dc in RWLock::get_write (this=0x52577e0, 
lockdep=<optimized out>) at ./common/RWLock.h:123
  #2  0x00007ffa90d4e7b3 in WLocker (lock=..., this=<synthetic pointer>) at 
./common/RWLock.h:183
  #3  librbd::image::RefreshRequest<librbd::ImageCtx>::apply 
(this=this@entry=0x55f9850) at librbd/image/RefreshRequest.cc:860
  #4  0x00007ffa90d4f778 in 
librbd::image::RefreshRequest<librbd::ImageCtx>::handle_v2_apply 
(this=0x55f9850, result=result@entry=0x7ffa84ac3b2c)
-     at librbd/image/RefreshRequest.cc:655
+     at librbd/image/RefreshRequest.cc:655
  #5  0x00007ffa90d4f92b in 
librbd::util::detail::C_StateCallbackAdapter<librbd::image::RefreshRequest<librbd::ImageCtx>,
 &librbd::image::RefreshRequest<librbd::Ima
  geCtx>::handle_v2_apply, true>::complete (this=0x7ff9e4002f50, r=0) at 
./librbd/Utils.h:66#6  0x00007ffa90cb7644 in ContextWQ::process 
(this=0x3fd9bf0, ctx=0x7ff9e4002f50) at ./common/WorkQueue.h:608
  #7  0x00007ffa90eae54e in ThreadPool::worker (this=0x5180a60, wt=0x56c5430) 
at common/WorkQueue.cc:128
  #8  0x00007ffa90eaf420 in ThreadPool::WorkThread::entry (this=<optimized 
out>) at common/WorkQueue.h:445
  #9  0x00007ffab07bbdc5 in start_thread () from /lib64/libpthread.so.0
  #10 0x00007ffaafde176d in clone () from /lib64/libc.so.6
  
- 
  Thread 13 (Thread 0x7ffa852c5700 (LWP 1479894)):
  #0  0x00007ffab07bee24 in pthread_rwlock_rdlock () from /lib64/libpthread.so.0
  #1  0x00007ffa90c89d2f in RWLock::get_read (this=0x52576c8) at 
./common/RWLock.h:100
  #2  0x00007ffa90cce5d4 in RLocker (lock=..., this=<synthetic pointer>) at 
./common/RWLock.h:161
  #3  librbd::ImageWatcher<librbd::ImageCtx>::handle_payload 
(this=this@entry=0x7ff9e4001d10, payload=..., 
ack_ctx=ack_ctx@entry=0x7ff9e4001690)
-     at librbd/ImageWatcher.cc:673
+     at librbd/ImageWatcher.cc:673
  #4  0x00007ffa90ccee3c in 
operator()<librbd::watch_notify::ReleasedLockPayload> (payload=..., 
this=<optimized out>) at ./librbd/ImageWatcher.h:218
  #5  internal_visit<librbd::watch_notify::ReleasedLockPayload const> 
(operand=..., this=<synthetic pointer>) at 
/usr/include/boost/variant/variant.hpp:1017
  #6  
visitation_impl_invoke_impl<boost::detail::variant::invoke_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor
 const>, void const*, librbd::w
  atch_notify::ReleasedLockPayload> (storage=0x7ffa852c4838, visitor=<synthetic 
pointer>) at /usr/include/boost/variant/detail/visitation_impl.hpp:130#7  
visitation_impl_invoke<boost::detail::variant::invoke_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor
 const>, void const*, librbd::watch_
  notify::ReleasedLockPayload, 
boost::variant<librbd::watch_notify::AcquiredLockPayload, 
librbd::watch_notify::ReleasedLockPayload, 
librbd::watch_notify::RequestLockPayload, 
librbd::watch_notify::HeaderUpdatePayload, 
librbd::watch_notify::AsyncProgressPayload, 
librbd::watch_notify::AsyncCompletePayload, 
librbd::watch_notify::FlattenPayload, librbd::watch_notify::ResizePayload, 
librbd::watch_notify::SnapCreatePayload, 
librbd::watch_notify::SnapRemovePayload, 
librbd::watch_notify::SnapRenamePayload, 
librbd::watch_notify::SnapProtectPayload, 
librbd::watch_notify::SnapUnprotectPayload, 
librbd::watch_notify::RebuildObjectMapPayload, 
librbd::watch_notify::RenamePayload, 
librbd::watch_notify::UnknownPayload>::has_fallback_type_> 
(internal_which=<optimized out>, t=0x0, storage=0x7ffa852c4838,     
visitor=<synthetic pointer>) at 
/usr/include/boost/variant/detail/visitation_impl.hpp:173
  #8  visitation_impl<mpl_::int_<0>, 
boost::detail::variant::visitation_impl_step<boost::mpl::l_iter<boost::mpl::l_item<mpl_::long_<16l>,
 librbd::watch_notify::Acqu
  iredLockPayload, boost::mpl::l_item<mpl_::long_<15l>, 
librbd::watch_notify::ReleasedLockPayload, boost::mpl::l_item<mpl_::long_<14l>, 
librbd::watch_notify::RequestLockPayload, boost::mpl::l_item<mpl_::long_<13l>, 
librbd::watch_notify::HeaderUpdatePayload, boost::mpl::l_item<mpl_::long_<12l>, 
librbd::watch_notify::AsyncProgressPayload, 
boost::mpl::l_item<mpl_::long_<11l>, 
librbd::watch_notify::AsyncCompletePayload, 
boost::mpl::l_item<mpl_::long_<10l>, librbd::watch_notify::FlattenPayload, 
boost::mpl::l_item<mpl_::long_<9l>, librbd::watch_notify::ResizePayload, 
boost::mpl::l_item<mpl_::long_<8l>, librbd::watch_notify::SnapCreatePayload, 
boost::mpl::l_item<mpl_::long_<7l>, librbd::watch_notify::SnapRemovePayload, 
boost::mpl::l_item<mpl_::long_<6l>, librbd::watch_notify::SnapRenamePayload, 
boost::mpl::l_item<mpl_::long_<5l>, librbd::watch_notify::SnapProtectPayload, 
boost::mpl::l_item<mpl_::long_<4l>, librbd::watch_notify::SnapUnprotectPayload, 
boost::mpl::l_item<mpl_::long_<3l>, 
librbd::watch_notify::RebuildObjectMapPayload, 
boost::mpl::l_item<mpl_::long_<2l>, librbd::watch_notify::RenamePayload, 
boost::mpl::l_item<mpl_::long_<1l>, librbd::watch_notify::UnknownPayload, 
boost::mpl::l_end> > > > > > > > > > > > > > > > >, 
boost::mpl::l_iter<boost::mpl::l_end> >, 
boost::detail::variant::invoke_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor
 const>, void const*, boost::variant<librbd::watch_notify::AcquiredLockPayload, 
librbd::watch_notify::ReleasedLockPayload, 
librbd::watch_notify::RequestLockPayload, 
librbd::watch_notify::HeaderUpdatePayload, 
librbd::watch_notify::AsyncProgressPayload, 
librbd::watch_notify::AsyncCompletePayload, 
librbd::watch_notify::FlattenPayload, librbd::watch_notify::ResizePayload, 
librbd::watch_notify::SnapCreatePayload, 
librbd::watch_notify::SnapRemovePayload, 
librbd::watch_notify::SnapRenamePayload, 
librbd::watch_notify::SnapProtectPayload, 
librbd::watch_notify::SnapUnprotectPayload, 
librbd::watch_notify::RebuildObjectMapPayload, 
librbd::watch_notify::RenamePayload, 
librbd::watch_notify::UnknownPayload>::has_fallback_type_> (no_backup_flag=..., 
storage=0x7ffa852c4838, visitor=<synthetic pointer>, logical_which=<optimized 
out>, internal_which=<optimized out>)    at 
/usr/include/boost/variant/detail/visitation_impl.hpp:256
  #9  
internal_apply_visitor_impl<boost::detail::variant::invoke_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor
 const>, void const*> (
-     storage=0x7ffa852c4838, visitor=<synthetic pointer>, 
logical_which=<optimized out>, internal_which=<optimized out>)
-     at /usr/include/boost/variant/variant.hpp:2326
+     storage=0x7ffa852c4838, visitor=<synthetic pointer>, 
logical_which=<optimized out>, internal_which=<optimized out>)
+     at /usr/include/boost/variant/variant.hpp:2326
  #10 
internal_apply_visitor<boost::detail::variant::invoke_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor
 const> > (
-     visitor=<synthetic pointer>, this=0x7ffa852c4830) at 
/usr/include/boost/variant/variant.hpp:2348
+     visitor=<synthetic pointer>, this=0x7ffa852c4830) at 
/usr/include/boost/variant/variant.hpp:2348
  #11 
apply_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor 
const> (visitor=..., this=0x7ffa852c4830)
-     at /usr/include/boost/variant/variant.hpp:2370
+     at /usr/include/boost/variant/variant.hpp:2370
  #12 
apply_visitor<librbd::ImageWatcher<librbd::ImageCtx>::HandlePayloadVisitor, 
boost::variant<librbd::watch_notify::AcquiredLockPayload, 
librbd::watch_notify::Re
  leasedLockPayload, librbd::watch_notify::RequestLockPayload, 
librbd::watch_notify::HeaderUpdatePayload, 
librbd::watch_notify::AsyncProgressPayload, 
librbd::watch_notify::AsyncCompletePayload, 
librbd::watch_notify::FlattenPayload, librbd::watch_notify::ResizePayload, 
librbd::watch_notify::SnapCreatePayload, 
librbd::watch_notify::SnapRemovePayload, 
librbd::watch_notify::SnapRenamePayload, 
librbd::watch_notify::SnapProtectPayload, 
librbd::watch_notify::SnapUnprotectPayload, 
librbd::watch_notify::RebuildObjectMapPayload, 
librbd::watch_notify::RenamePayload, librbd::watch_notify::UnknownPayload> 
const> (visitable=..., visitor=...)    at 
/usr/include/boost/variant/detail/apply_visitor_unary.hpp:76
- #13 librbd::ImageWatcher<librbd::ImageCtx>::process_payload 
(this=this@entry=0x7ff9e4001d10, notify_id=notify_id@entry=1554778161239, 
-     handle=handle@entry=140711248795552, payload=..., r=r@entry=0) at 
librbd/ImageWatcher.cc:974
+ #13 librbd::ImageWatcher<librbd::ImageCtx>::process_payload 
(this=this@entry=0x7ff9e4001d10, notify_id=notify_id@entry=1554778161239,
+     handle=handle@entry=140711248795552, payload=..., r=r@entry=0) at 
librbd/ImageWatcher.cc:974
  #14 0x00007ffa90ccf007 in 
librbd::ImageWatcher<librbd::ImageCtx>::handle_notify (this=0x7ff9e4001d10, 
notify_id=1554778161239, handle=140711248795552, bl=...)
-     at librbd/ImageWatcher.cc:1002
+     at librbd/ImageWatcher.cc:1002
  #15 0x00007ffa9d4581b5 in ?? () from /lib64/librados.so.2
  #16 0x00007ffa9d4687cc in ?? () from /lib64/librados.so.2
  #17 0x00007ffa9d4280c9 in ?? () from /lib64/librados.so.2
  #18 0x00007ffa9d515546 in ?? () from /lib64/librados.so.2
  #19 0x00007ffab07bbdc5 in start_thread () from /lib64/libpthread.so.0
  #20 0x00007ffaafde176d in clone () from /lib64/libc.so.6
- 
  
  I found this:
  librbd: possible deadlock with flush if refresh in-progress
  
https://github.com/ceph/ceph/commit/547e867628975c7144590e9332aa62b0ef82a433#diff-7a45ddfe5905a26e5abed55fe6a17b1d
  
  but i am not sure if this bugfix is for my contiditon.
  
  Thancks a lot if anyone can give advise, ^ _ ^

** Converted to question:
   https://answers.launchpad.net/ubuntu/+source/ceph/+question/658257

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1718134

Title:
  rbd refresh_if_required deadlock

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/ceph/+bug/1718134/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

Reply via email to