joggerjoel commented on issue #12580:
URL: https://github.com/apache/apisix/issues/12580#issuecomment-3258431800

   #!/bin/bash
   
   # Script to demonstrate APISIX etcd hanging issue
   # This script clearly shows how APISIX hangs on init_etcd phase
   
   set -e
   
   echo "=========================================="
   echo "APISIX Etcd Hanging Bug Demonstration"
   echo "=========================================="
   echo ""
   
   # Colors for output
   RED='\033[0;31m'
   GREEN='\033[0;32m'
   YELLOW='\033[1;33m'
   BLUE='\033[0;34m'
   NC='\033[0m' # No Color
   
   # Function to print colored output
   print_status() {
       local color=$1
       local message=$2
       echo -e "${color}${message}${NC}"
   }
   
   print_header() {
       echo ""
       echo "=========================================="
       echo "$1"
       echo "=========================================="
   }
   
   # Function to check if a process is hanging
   check_process_hanging() {
       local pid=$1
       local process_name=$2
       local timeout=${3:-30}
       
       print_status $BLUE "Checking if $process_name (PID: $pid) is hanging..."
       
       # Wait for the specified timeout
       sleep $timeout
       
       if ps -p $pid > /dev/null 2>&1; then
           print_status $RED "❌ HANGING DETECTED: $process_name is still 
running after ${timeout}s"
           print_status $RED "   This indicates the process is stuck and not 
completing initialization"
           return 1
       else
           print_status $GREEN "✅ $process_name completed successfully"
           return 0
       fi
   }
   
   # Function to monitor APISIX logs for hanging patterns
   monitor_apisix_logs() {
       local log_file=$1
       local timeout=${2:-60}
       
       print_status $BLUE "Monitoring APISIX logs for hanging patterns..."
       print_status $YELLOW "Looking for 'init_etcd' hanging indicators..."
       
       # Start monitoring logs in background
       (
           timeout $timeout tail -f "$log_file" 2>/dev/null | while read line; 
do
               if echo "$line" | grep -q "init_etcd"; then
                   print_status $RED "🚨 HANGING PATTERN DETECTED: $line"
               elif echo "$line" | grep -q 
"etcd.*timeout\|etcd.*error\|etcd.*failed"; then
                   print_status $YELLOW "⚠️  Etcd issue detected: $line"
               elif echo "$line" | grep -q "ready to accept connections\|APISIX 
is ready"; then
                   print_status $GREEN "✅ APISIX initialization completed: 
$line"
                   break
               fi
           done
       ) &
       
       local monitor_pid=$!
       sleep $timeout
       kill $monitor_pid 2>/dev/null || true
   }
   
   print_header "Step 1: Environment Check"
   print_status $BLUE "Checking if we're in the right environment..."
   
   # Check if we're in a container or have Docker
   if [ -f /.dockerenv ]; then
       print_status $GREEN "✅ Running inside Docker container"
   else
       print_status $YELLOW "⚠️  Not in Docker container - make sure to run 
this in the APISIX container"
   fi
   
   # Check if etcd is accessible
   print_status $BLUE "Testing etcd connectivity..."
   if nc -z etcd 2379 2>/dev/null; then
       print_status $GREEN "✅ etcd is reachable on port 2379"
   else
       print_status $RED "❌ etcd is not reachable on port 2379"
       print_status $RED "   Make sure etcd container is running and accessible"
       exit 1
   fi
   
   print_header "Step 2: Clean Environment Setup"
   print_status $BLUE "Preparing clean environment for testing..."
   
   # Kill any existing APISIX processes
   print_status $YELLOW "Stopping any existing APISIX processes..."
   pkill -f apisix || true
   sleep 2
   
   # Clean up any existing logs
   rm -f /tmp/apisix_hanging_test.log
   rm -f /tmp/etcd_test.log
   
   print_header "Step 3: Etcd Health Verification"
   print_status $BLUE "Verifying etcd is healthy and responsive..."
   
   # Test etcd with a simple operation
   if etcdctl --endpoints=http://etcd:2379 endpoint health > /tmp/etcd_test.log 
2>&1; then
       print_status $GREEN "✅ etcd health check passed"
       cat /tmp/etcd_test.log
   else
       print_status $RED "❌ etcd health check failed"
       cat /tmp/etcd_test.log
       exit 1
   fi
   
   # Test etcd with a simple read/write operation
   print_status $BLUE "Testing etcd read/write operations..."
   if etcdctl --endpoints=http://etcd:2379 put /test/key "test_value" > 
/dev/null 2>&1; then
       if etcdctl --endpoints=http://etcd:2379 get /test/key | grep -q 
"test_value"; then
           print_status $GREEN "✅ etcd read/write operations working"
           etcdctl --endpoints=http://etcd:2379 del /test/key > /dev/null 2>&1
       else
           print_status $RED "❌ etcd read operation failed"
       fi
   else
       print_status $RED "❌ etcd write operation failed"
   fi
   
   print_header "Step 4: APISIX Hanging Demonstration"
   print_status $BLUE "Starting APISIX and demonstrating the hanging 
behavior..."
   print_status $YELLOW "Expected: APISIX should hang on 'init_etcd' phase"
   
   # Start APISIX in background with logging
   print_status $BLUE "Starting APISIX process..."
   /usr/local/apisix/bin/apisix start > /tmp/apisix_hanging_test.log 2>&1 &
   APISIX_PID=$!
   
   print_status $GREEN "APISIX started with PID: $APISIX_PID"
   print_status $YELLOW "Monitoring for hanging behavior..."
   
   # Start log monitoring in background
   monitor_apisix_logs /tmp/apisix_hanging_test.log 30 &
   MONITOR_PID=$!
   
   # Check if APISIX is hanging
   print_status $BLUE "Waiting 30 seconds to detect hanging behavior..."
   if check_process_hanging $APISIX_PID "APISIX" 30; then
       print_status $GREEN "✅ APISIX completed initialization (unexpected - bug 
may be fixed)"
   else
       print_status $RED "❌ HANGING CONFIRMED: APISIX is stuck and not 
completing initialization"
   fi
   
   # Stop log monitoring
   kill $MONITOR_PID 2>/dev/null || true
   
   print_header "Step 5: Detailed Hanging Analysis"
   print_status $BLUE "Analyzing the hanging behavior in detail..."
   
   # Check if APISIX process is still running
   if ps -p $APISIX_PID > /dev/null 2>&1; then
       print_status $RED "❌ APISIX process is still running (PID: $APISIX_PID)"
       print_status $RED "   This confirms the hanging issue"
       
       # Show process details
       print_status $BLUE "Process details:"
       ps -p $APISIX_PID -o pid,ppid,cmd,etime,pcpu,pmem
       
       # Check for child processes
       print_status $BLUE "Child processes:"
       ps --ppid $APISIX_PID -o pid,ppid,cmd,etime 2>/dev/null || echo "No 
child processes found"
       
   else
       print_status $GREEN "✅ APISIX process has exited"
   fi
   
   # Analyze logs for hanging patterns
   print_status $BLUE "Analyzing APISIX logs for hanging patterns..."
   if [ -f /tmp/apisix_hanging_test.log ]; then
       print_status $YELLOW "Last 20 lines of APISIX log:"
       tail -20 /tmp/apisix_hanging_test.log
       
       echo ""
       print_status $YELLOW "Searching for hanging indicators:"
       
       if grep -q "init_etcd" /tmp/apisix_hanging_test.log; then
           print_status $RED "🚨 Found 'init_etcd' in logs - this is where 
APISIX hangs"
           grep -n "init_etcd" /tmp/apisix_hanging_test.log | tail -5
       fi
       
       if grep -q "etcd.*timeout\|etcd.*error\|etcd.*failed" 
/tmp/apisix_hanging_test.log; then
           print_status $YELLOW "⚠️  Found etcd-related errors:"
           grep -n "etcd.*timeout\|etcd.*error\|etcd.*failed" 
/tmp/apisix_hanging_test.log
       fi
       
       if grep -q "ready to accept connections\|APISIX is ready" 
/tmp/apisix_hanging_test.log; then
           print_status $GREEN "✅ Found initialization completion message"
           grep -n "ready to accept connections\|APISIX is ready" 
/tmp/apisix_hanging_test.log
       fi
   fi
   
   print_header "Step 6: Admin API Test"
   print_status $BLUE "Testing Admin API accessibility..."
   
   # Wait a bit more for any delayed initialization
   sleep 5
   
   # Test Admin API
   print_status $BLUE "Attempting to access Admin API..."
   if curl -s -f -m 10 http://localhost:9180/apisix/admin/services \
      -H "X-API-KEY: YurWsfrexXabWoiFRfGSvfpxTlnfOsWH" > /dev/null 2>&1; then
       print_status $GREEN "✅ Admin API is responding (unexpected - hanging 
issue may be resolved)"
   else
       print_status $RED "❌ Admin API is not responding (expected due to 
hanging)"
       print_status $RED "   This confirms that APISIX initialization is 
incomplete"
   fi
   
   print_header "Step 7: Route Synchronization Test"
   print_status $BLUE "Testing route synchronization with etcd..."
   
   # Try to create a test route via Admin API
   print_status $BLUE "Attempting to create a test route..."
   ROUTE_RESPONSE=$(curl -s -w "%{http_code}" -m 10 \
       -X PUT http://localhost:9180/apisix/admin/routes/1 \
       -H "X-API-KEY: YurWsfrexXabWoiFRfGSvfpxTlnfOsWH" \
       -H "Content-Type: application/json" \
       -d '{
           "uri": "/test",
           "upstream": {
               "type": "roundrobin",
               "nodes": {
                   "httpbin.org:80": 1
               }
           }
       }' 2>/dev/null || echo "000")
   
   HTTP_CODE="${ROUTE_RESPONSE: -3}"
   if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
       print_status $GREEN "✅ Route creation successful (unexpected)"
       
       # Test if route is accessible
       print_status $BLUE "Testing route accessibility..."
       if curl -s -f -m 10 http://localhost/test > /dev/null 2>&1; then
           print_status $GREEN "✅ Route is accessible (unexpected - 
synchronization working)"
       else
           print_status $RED "❌ Route is not accessible (expected - 
synchronization failed)"
       fi
   else
       print_status $RED "❌ Route creation failed (expected due to hanging)"
       print_status $RED "   HTTP Code: $HTTP_CODE"
   fi
   
   print_header "Step 8: Summary and Cleanup"
   print_status $BLUE "Generating summary of hanging behavior..."
   
   echo ""
   print_status $YELLOW "=== HANGING BEHAVIOR SUMMARY ==="
   echo ""
   
   # Check final status
   if ps -p $APISIX_PID > /dev/null 2>&1; then
       print_status $RED "❌ HANGING CONFIRMED:"
       print_status $RED "   - APISIX process is still running but not 
responding"
       print_status $RED "   - Admin API is not accessible"
       print_status $RED "   - Route synchronization with etcd has failed"
       print_status $RED "   - System is stuck in incomplete initialization 
state"
       
       echo ""
       print_status $YELLOW "Expected behavior:"
       print_status $GREEN "   - APISIX should complete initialization in 5-10 
seconds"
       print_status $GREEN "   - Admin API should be accessible"
       print_status $GREEN "   - Routes should be synchronized with etcd"
       print_status $GREEN "   - HTTP requests should work properly"
       
       echo ""
       print_status $YELLOW "Actual behavior (bug):"
       print_status $RED "   - APISIX hangs on 'init_etcd' phase"
       print_status $RED "   - Process never completes initialization"
       print_status $RED "   - etcd is healthy but APISIX cannot sync"
       print_status $RED "   - System remains in broken state"
       
   else
       print_status $GREEN "✅ APISIX completed initialization (unexpected)"
   fi
   
   echo ""
   print_status $BLUE "Cleaning up test environment..."
   # Kill APISIX process
   kill $APISIX_PID 2>/dev/null || true
   sleep 2
   
   # Clean up test files
   rm -f /tmp/apisix_hanging_test.log
   rm -f /tmp/etcd_test.log
   
   print_status $GREEN "✅ Cleanup completed"
   
   echo ""
   print_status $YELLOW "=== DEMONSTRATION COMPLETE ==="
   print_status $BLUE "This script has demonstrated the etcd hanging issue with 
APISIX."
   print_status $BLUE "The hanging behavior prevents proper route 
synchronization and system functionality."
   echo ""
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to