joggerjoel commented on issue #12580:
URL: https://github.com/apache/apisix/issues/12580#issuecomment-3258431800
#!/bin/bash
# Script to demonstrate APISIX etcd hanging issue
# This script clearly shows how APISIX hangs on init_etcd phase
set -e
echo "=========================================="
echo "APISIX Etcd Hanging Bug Demonstration"
echo "=========================================="
echo ""
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
print_header() {
echo ""
echo "=========================================="
echo "$1"
echo "=========================================="
}
# Function to check if a process is hanging
check_process_hanging() {
local pid=$1
local process_name=$2
local timeout=${3:-30}
print_status $BLUE "Checking if $process_name (PID: $pid) is hanging..."
# Wait for the specified timeout
sleep $timeout
if ps -p $pid > /dev/null 2>&1; then
print_status $RED "❌ HANGING DETECTED: $process_name is still
running after ${timeout}s"
print_status $RED " This indicates the process is stuck and not
completing initialization"
return 1
else
print_status $GREEN "✅ $process_name completed successfully"
return 0
fi
}
# Function to monitor APISIX logs for hanging patterns
monitor_apisix_logs() {
local log_file=$1
local timeout=${2:-60}
print_status $BLUE "Monitoring APISIX logs for hanging patterns..."
print_status $YELLOW "Looking for 'init_etcd' hanging indicators..."
# Start monitoring logs in background
(
timeout $timeout tail -f "$log_file" 2>/dev/null | while read line;
do
if echo "$line" | grep -q "init_etcd"; then
print_status $RED "🚨 HANGING PATTERN DETECTED: $line"
elif echo "$line" | grep -q
"etcd.*timeout\|etcd.*error\|etcd.*failed"; then
print_status $YELLOW "⚠️ Etcd issue detected: $line"
elif echo "$line" | grep -q "ready to accept connections\|APISIX
is ready"; then
print_status $GREEN "✅ APISIX initialization completed:
$line"
break
fi
done
) &
local monitor_pid=$!
sleep $timeout
kill $monitor_pid 2>/dev/null || true
}
print_header "Step 1: Environment Check"
print_status $BLUE "Checking if we're in the right environment..."
# Check if we're in a container or have Docker
if [ -f /.dockerenv ]; then
print_status $GREEN "✅ Running inside Docker container"
else
print_status $YELLOW "⚠️ Not in Docker container - make sure to run
this in the APISIX container"
fi
# Check if etcd is accessible
print_status $BLUE "Testing etcd connectivity..."
if nc -z etcd 2379 2>/dev/null; then
print_status $GREEN "✅ etcd is reachable on port 2379"
else
print_status $RED "❌ etcd is not reachable on port 2379"
print_status $RED " Make sure etcd container is running and accessible"
exit 1
fi
print_header "Step 2: Clean Environment Setup"
print_status $BLUE "Preparing clean environment for testing..."
# Kill any existing APISIX processes
print_status $YELLOW "Stopping any existing APISIX processes..."
pkill -f apisix || true
sleep 2
# Clean up any existing logs
rm -f /tmp/apisix_hanging_test.log
rm -f /tmp/etcd_test.log
print_header "Step 3: Etcd Health Verification"
print_status $BLUE "Verifying etcd is healthy and responsive..."
# Test etcd with a simple operation
if etcdctl --endpoints=http://etcd:2379 endpoint health > /tmp/etcd_test.log
2>&1; then
print_status $GREEN "✅ etcd health check passed"
cat /tmp/etcd_test.log
else
print_status $RED "❌ etcd health check failed"
cat /tmp/etcd_test.log
exit 1
fi
# Test etcd with a simple read/write operation
print_status $BLUE "Testing etcd read/write operations..."
if etcdctl --endpoints=http://etcd:2379 put /test/key "test_value" >
/dev/null 2>&1; then
if etcdctl --endpoints=http://etcd:2379 get /test/key | grep -q
"test_value"; then
print_status $GREEN "✅ etcd read/write operations working"
etcdctl --endpoints=http://etcd:2379 del /test/key > /dev/null 2>&1
else
print_status $RED "❌ etcd read operation failed"
fi
else
print_status $RED "❌ etcd write operation failed"
fi
print_header "Step 4: APISIX Hanging Demonstration"
print_status $BLUE "Starting APISIX and demonstrating the hanging
behavior..."
print_status $YELLOW "Expected: APISIX should hang on 'init_etcd' phase"
# Start APISIX in background with logging
print_status $BLUE "Starting APISIX process..."
/usr/local/apisix/bin/apisix start > /tmp/apisix_hanging_test.log 2>&1 &
APISIX_PID=$!
print_status $GREEN "APISIX started with PID: $APISIX_PID"
print_status $YELLOW "Monitoring for hanging behavior..."
# Start log monitoring in background
monitor_apisix_logs /tmp/apisix_hanging_test.log 30 &
MONITOR_PID=$!
# Check if APISIX is hanging
print_status $BLUE "Waiting 30 seconds to detect hanging behavior..."
if check_process_hanging $APISIX_PID "APISIX" 30; then
print_status $GREEN "✅ APISIX completed initialization (unexpected - bug
may be fixed)"
else
print_status $RED "❌ HANGING CONFIRMED: APISIX is stuck and not
completing initialization"
fi
# Stop log monitoring
kill $MONITOR_PID 2>/dev/null || true
print_header "Step 5: Detailed Hanging Analysis"
print_status $BLUE "Analyzing the hanging behavior in detail..."
# Check if APISIX process is still running
if ps -p $APISIX_PID > /dev/null 2>&1; then
print_status $RED "❌ APISIX process is still running (PID: $APISIX_PID)"
print_status $RED " This confirms the hanging issue"
# Show process details
print_status $BLUE "Process details:"
ps -p $APISIX_PID -o pid,ppid,cmd,etime,pcpu,pmem
# Check for child processes
print_status $BLUE "Child processes:"
ps --ppid $APISIX_PID -o pid,ppid,cmd,etime 2>/dev/null || echo "No
child processes found"
else
print_status $GREEN "✅ APISIX process has exited"
fi
# Analyze logs for hanging patterns
print_status $BLUE "Analyzing APISIX logs for hanging patterns..."
if [ -f /tmp/apisix_hanging_test.log ]; then
print_status $YELLOW "Last 20 lines of APISIX log:"
tail -20 /tmp/apisix_hanging_test.log
echo ""
print_status $YELLOW "Searching for hanging indicators:"
if grep -q "init_etcd" /tmp/apisix_hanging_test.log; then
print_status $RED "🚨 Found 'init_etcd' in logs - this is where
APISIX hangs"
grep -n "init_etcd" /tmp/apisix_hanging_test.log | tail -5
fi
if grep -q "etcd.*timeout\|etcd.*error\|etcd.*failed"
/tmp/apisix_hanging_test.log; then
print_status $YELLOW "⚠️ Found etcd-related errors:"
grep -n "etcd.*timeout\|etcd.*error\|etcd.*failed"
/tmp/apisix_hanging_test.log
fi
if grep -q "ready to accept connections\|APISIX is ready"
/tmp/apisix_hanging_test.log; then
print_status $GREEN "✅ Found initialization completion message"
grep -n "ready to accept connections\|APISIX is ready"
/tmp/apisix_hanging_test.log
fi
fi
print_header "Step 6: Admin API Test"
print_status $BLUE "Testing Admin API accessibility..."
# Wait a bit more for any delayed initialization
sleep 5
# Test Admin API
print_status $BLUE "Attempting to access Admin API..."
if curl -s -f -m 10 http://localhost:9180/apisix/admin/services \
-H "X-API-KEY: YurWsfrexXabWoiFRfGSvfpxTlnfOsWH" > /dev/null 2>&1; then
print_status $GREEN "✅ Admin API is responding (unexpected - hanging
issue may be resolved)"
else
print_status $RED "❌ Admin API is not responding (expected due to
hanging)"
print_status $RED " This confirms that APISIX initialization is
incomplete"
fi
print_header "Step 7: Route Synchronization Test"
print_status $BLUE "Testing route synchronization with etcd..."
# Try to create a test route via Admin API
print_status $BLUE "Attempting to create a test route..."
ROUTE_RESPONSE=$(curl -s -w "%{http_code}" -m 10 \
-X PUT http://localhost:9180/apisix/admin/routes/1 \
-H "X-API-KEY: YurWsfrexXabWoiFRfGSvfpxTlnfOsWH" \
-H "Content-Type: application/json" \
-d '{
"uri": "/test",
"upstream": {
"type": "roundrobin",
"nodes": {
"httpbin.org:80": 1
}
}
}' 2>/dev/null || echo "000")
HTTP_CODE="${ROUTE_RESPONSE: -3}"
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
print_status $GREEN "✅ Route creation successful (unexpected)"
# Test if route is accessible
print_status $BLUE "Testing route accessibility..."
if curl -s -f -m 10 http://localhost/test > /dev/null 2>&1; then
print_status $GREEN "✅ Route is accessible (unexpected -
synchronization working)"
else
print_status $RED "❌ Route is not accessible (expected -
synchronization failed)"
fi
else
print_status $RED "❌ Route creation failed (expected due to hanging)"
print_status $RED " HTTP Code: $HTTP_CODE"
fi
print_header "Step 8: Summary and Cleanup"
print_status $BLUE "Generating summary of hanging behavior..."
echo ""
print_status $YELLOW "=== HANGING BEHAVIOR SUMMARY ==="
echo ""
# Check final status
if ps -p $APISIX_PID > /dev/null 2>&1; then
print_status $RED "❌ HANGING CONFIRMED:"
print_status $RED " - APISIX process is still running but not
responding"
print_status $RED " - Admin API is not accessible"
print_status $RED " - Route synchronization with etcd has failed"
print_status $RED " - System is stuck in incomplete initialization
state"
echo ""
print_status $YELLOW "Expected behavior:"
print_status $GREEN " - APISIX should complete initialization in 5-10
seconds"
print_status $GREEN " - Admin API should be accessible"
print_status $GREEN " - Routes should be synchronized with etcd"
print_status $GREEN " - HTTP requests should work properly"
echo ""
print_status $YELLOW "Actual behavior (bug):"
print_status $RED " - APISIX hangs on 'init_etcd' phase"
print_status $RED " - Process never completes initialization"
print_status $RED " - etcd is healthy but APISIX cannot sync"
print_status $RED " - System remains in broken state"
else
print_status $GREEN "✅ APISIX completed initialization (unexpected)"
fi
echo ""
print_status $BLUE "Cleaning up test environment..."
# Kill APISIX process
kill $APISIX_PID 2>/dev/null || true
sleep 2
# Clean up test files
rm -f /tmp/apisix_hanging_test.log
rm -f /tmp/etcd_test.log
print_status $GREEN "✅ Cleanup completed"
echo ""
print_status $YELLOW "=== DEMONSTRATION COMPLETE ==="
print_status $BLUE "This script has demonstrated the etcd hanging issue with
APISIX."
print_status $BLUE "The hanging behavior prevents proper route
synchronization and system functionality."
echo ""
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]