fix recursion error on reconnection

This commit is contained in:
Vasily.onl
2025-06-03 11:42:10 +08:00
parent 01cea1d5e5
commit d508616677
2 changed files with 255 additions and 60 deletions

View File

@@ -388,56 +388,83 @@ class OKXWebSocketClient:
self.logger.warning(f"{self.component_name}: Cannot start tasks while stopping is in progress")
return
# Cancel any existing tasks first
# Check if tasks are already running
if (self._ping_task and not self._ping_task.done() and
self._message_handler_task and not self._message_handler_task.done()):
if self.logger:
self.logger.debug(f"{self.component_name}: Background tasks already running")
return
# Cancel any existing tasks first (safety measure)
await self._stop_background_tasks()
# Start ping task
self._ping_task = asyncio.create_task(self._ping_loop())
# Ensure we're still supposed to start tasks after stopping
if self._tasks_stopping or not self.is_connected:
if self.logger:
self.logger.debug(f"{self.component_name}: Aborting task start - stopping or disconnected")
return
# Start message handler task
self._message_handler_task = asyncio.create_task(self._message_handler())
if self.logger:
self.logger.debug(f"{self.component_name}: Started background tasks")
try:
# Start ping task
self._ping_task = asyncio.create_task(self._ping_loop())
# Start message handler task
self._message_handler_task = asyncio.create_task(self._message_handler())
if self.logger:
self.logger.debug(f"{self.component_name}: Started background tasks")
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error starting background tasks: {e}")
# Clean up on failure
await self._stop_background_tasks()
async def _stop_background_tasks(self) -> None:
"""Stop background tasks with proper synchronization."""
"""Stop background tasks with proper synchronization - simplified approach."""
self._tasks_stopping = True
try:
tasks = []
# Collect tasks to cancel
if self._ping_task and not self._ping_task.done():
tasks.append(self._ping_task)
if self._message_handler_task and not self._message_handler_task.done():
tasks.append(self._message_handler_task)
tasks_to_cancel = []
if not tasks:
if self._ping_task and not self._ping_task.done():
tasks_to_cancel.append(('ping_task', self._ping_task))
if self._message_handler_task and not self._message_handler_task.done():
tasks_to_cancel.append(('message_handler_task', self._message_handler_task))
if not tasks_to_cancel:
if self.logger:
self.logger.debug(f"{self.component_name}: No background tasks to stop")
return
if self.logger:
self.logger.debug(f"{self.component_name}: Stopping {len(tasks)} background tasks")
self.logger.debug(f"{self.component_name}: Stopping {len(tasks_to_cancel)} background tasks")
# Cancel all tasks
for task in tasks:
task.cancel()
# Wait for all tasks to complete with timeout
if tasks:
# Cancel tasks individually to avoid recursion
for task_name, task in tasks_to_cancel:
try:
await asyncio.wait_for(
asyncio.gather(*tasks, return_exceptions=True),
timeout=5.0
)
except asyncio.TimeoutError:
if self.logger:
self.logger.warning(f"{self.component_name}: Task shutdown timeout - some tasks may still be running")
if not task.done():
task.cancel()
if self.logger:
self.logger.debug(f"{self.component_name}: Cancelled {task_name}")
except Exception as e:
if self.logger:
self.logger.debug(f"{self.component_name}: Expected exception during task shutdown: {e}")
self.logger.debug(f"{self.component_name}: Error cancelling {task_name}: {e}")
# Wait for tasks to complete individually with shorter timeouts
for task_name, task in tasks_to_cancel:
try:
await asyncio.wait_for(task, timeout=2.0)
except asyncio.TimeoutError:
if self.logger:
self.logger.warning(f"{self.component_name}: {task_name} shutdown timeout")
except asyncio.CancelledError:
# Expected when task is cancelled
pass
except Exception as e:
if self.logger:
self.logger.debug(f"{self.component_name}: {task_name} shutdown exception: {e}")
# Clear task references
self._ping_task = None
@@ -446,6 +473,9 @@ class OKXWebSocketClient:
if self.logger:
self.logger.debug(f"{self.component_name}: Background tasks stopped successfully")
except Exception as e:
if self.logger:
self.logger.error(f"{self.component_name}: Error in _stop_background_tasks: {e}")
finally:
self._tasks_stopping = False
@@ -495,6 +525,9 @@ class OKXWebSocketClient:
)
except asyncio.TimeoutError:
continue # No message received, continue loop
except asyncio.CancelledError:
# Exit immediately on cancellation
break
# Check if we're still supposed to be running
if self._tasks_stopping:
@@ -512,35 +545,42 @@ class OKXWebSocketClient:
self._connection_state = ConnectionState.DISCONNECTED
# Use lock to prevent concurrent reconnection attempts
async with self._reconnection_lock:
# Double-check we still need to reconnect
if (self._connection_state == ConnectionState.DISCONNECTED and
self._reconnect_attempts < self.max_reconnect_attempts and
not self._tasks_stopping):
self._reconnect_attempts += 1
if self.logger:
self.logger.info(f"{self.component_name}: Attempting automatic reconnection ({self._reconnect_attempts}/{self.max_reconnect_attempts})")
# Stop current tasks properly
await self._stop_background_tasks()
# Attempt reconnection with stored subscriptions
stored_subscriptions = list(self._subscriptions.values())
if await self.reconnect():
if self.logger:
self.logger.info(f"{self.component_name}: Automatic reconnection successful")
# The reconnect method will restart tasks, so we exit this handler
break
else:
if self.logger:
self.logger.error(f"{self.component_name}: Automatic reconnection failed")
break
else:
if self.logger:
self.logger.error(f"{self.component_name}: Max reconnection attempts exceeded or shutdown in progress")
break
try:
# Use asyncio.wait_for to prevent hanging on lock acquisition
async with asyncio.wait_for(self._reconnection_lock.acquire(), timeout=5.0):
try:
# Double-check we still need to reconnect
if (self._connection_state == ConnectionState.DISCONNECTED and
self._reconnect_attempts < self.max_reconnect_attempts and
not self._tasks_stopping):
self._reconnect_attempts += 1
if self.logger:
self.logger.info(f"{self.component_name}: Attempting automatic reconnection ({self._reconnect_attempts}/{self.max_reconnect_attempts})")
# Attempt reconnection (this will handle task cleanup)
if await self.reconnect():
if self.logger:
self.logger.info(f"{self.component_name}: Automatic reconnection successful")
# Exit this handler as reconnect will start new tasks
break
else:
if self.logger:
self.logger.error(f"{self.component_name}: Automatic reconnection failed")
break
else:
if self.logger:
self.logger.error(f"{self.component_name}: Max reconnection attempts exceeded or shutdown in progress")
break
finally:
self._reconnection_lock.release()
except asyncio.TimeoutError:
if self.logger:
self.logger.warning(f"{self.component_name}: Timeout acquiring reconnection lock")
break
except asyncio.CancelledError:
# Exit immediately on cancellation
break
except asyncio.CancelledError:
if self.logger: