Add Alembic migration system for database schema versioning

- Introduced `alembic.ini` for Alembic configuration, enabling structured database migrations. - Created `database/migrations/env.py` to manage migration environment and database URL retrieval. - Added migration script template `database/migrations/script.py.mako` for generating migration scripts. - Updated `.gitignore` to exclude migration versions from version control. - Enhanced `setup.md` documentation to include details on the migration system and commands for managing migrations.
2025-05-30 18:33:23 +08:00
parent dd75546508
commit 8a378c8d69
7 changed files with 425 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .env
 .env.local
 .env.*
+database/migrations/versions/*
--- a/alembic.ini
+++ b/alembic.ini
@@ -0,0 +1,138 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts.
+# this is typically a path given in POSIX (e.g. forward slashes)
+# format, relative to the token %(here)s which refers to the location of this
+# ini file
+script_location = %(here)s/database/migrations
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.  for multiple paths, the path separator
+# is defined by "path_separator" below.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+timezone = UTC
+
+# max length of characters to apply to the "slug" field
+truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to <script_location>/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "path_separator"
+# below.
+# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
+
+# path_separator; This indicates what character is used to split lists of file
+# paths, including version_locations and prepend_sys_path within configparser
+# files such as alembic.ini.
+# The default rendered in new alembic.ini files is "os", which uses os.pathsep
+# to provide os-dependent path splitting.
+#
+# Note that in order to support legacy alembic.ini files, this default does NOT
+# take place if path_separator is not present in alembic.ini.  If this
+# option is omitted entirely, fallback logic is as follows:
+#
+# 1. Parsing of the version_locations option falls back to using the legacy
+#    "version_path_separator" key, which if absent then falls back to the legacy
+#    behavior of splitting on spaces and/or commas.
+# 2. Parsing of the prepend_sys_path option falls back to the legacy
+#    behavior of splitting on spaces, commas, or colons.
+#
+# Valid values for path_separator are:
+#
+# path_separator = :
+# path_separator = ;
+# path_separator = space
+# path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# database URL.  This will be overridden by env.py to use environment variables
+# The actual URL is configured via DATABASE_URL environment variable
+sqlalchemy.url = 
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration.  This is also consumed by the user-maintained
+# env.py script only.
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
--- a/database/migrations/README
+++ b/database/migrations/README
@@ -0,0 +1 @@
+Generic single-database configuration.
--- a/database/migrations/env.py
+++ b/database/migrations/env.py
@@ -0,0 +1,134 @@
+"""
+Alembic Environment Configuration for Crypto Trading Bot Platform
+"""
+
+import os
+import sys
+from logging.config import fileConfig
+from pathlib import Path
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# Add project root to path for imports
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+# Load environment variables from .env file if it exists
+try:
+    from dotenv import load_dotenv
+    env_file = project_root / '.env'
+    if env_file.exists():
+        load_dotenv(env_file)
+except ImportError:
+    # dotenv not available, proceed without it
+    pass
+
+# Import our models and database configuration
+from database.models import Base
+from database.connection import DatabaseConfig
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def get_database_url():
+    """Get database URL from environment variables"""
+    # Use DATABASE_URL if set, otherwise construct from individual components
+    url = os.getenv('DATABASE_URL')
+    if url:
+        return url
+    
+    # Fallback to constructing URL from components
+    user = os.getenv('POSTGRES_USER', 'dashboard')
+    password = os.getenv('POSTGRES_PASSWORD', '')
+    host = os.getenv('POSTGRES_HOST', 'localhost')
+    port = os.getenv('POSTGRES_PORT', '5434')
+    database = os.getenv('POSTGRES_DB', 'dashboard')
+    
+    return f"postgresql://{user}:{password}@{host}:{port}/{database}"
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = get_database_url()
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+        compare_type=True,
+        compare_server_default=True,
+        render_as_batch=False,  # PostgreSQL supports transactional DDL
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    # Override the sqlalchemy.url in the config with our environment-based URL
+    config.set_main_option("sqlalchemy.url", get_database_url())
+    
+    # Create engine with our database configuration
+    db_config = DatabaseConfig()
+    engine_config = config.get_section(config.config_ini_section)
+    engine_config.update(db_config.get_engine_kwargs())
+    
+    connectable = engine_from_config(
+        engine_config,
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,  # Use NullPool for migrations to avoid connection issues
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            compare_type=True,
+            compare_server_default=True,
+            render_as_batch=False,  # PostgreSQL supports transactional DDL
+            transaction_per_migration=True,  # Each migration in its own transaction
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/database/migrations/script.py.mako
+++ b/database/migrations/script.py.mako
@@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -101,7 +101,76 @@ dashboard_postgres   timescale/timescaledb:latest-pg15   "docker-entrypoint.s…
 dashboard_redis      redis:7-alpine                      "docker-entrypoint.s…"   redis      X minutes ago    Up X minutes (healthy)   0.0.0.0:6379->6379/tcp
 ```

-### 3. Verify Database Schema
+### 3. Database Migration System
+
+The project uses **Alembic** for database schema versioning and migrations. This allows for safe, trackable database schema changes.
+
+#### Understanding Migration vs Direct Schema
+
+The project supports two approaches for database setup:
+
+1. **Direct Schema (Default)**: Uses `database/init/schema_clean.sql` for automatic Docker initialization
+2. **Migration System**: Uses Alembic for versioned schema changes and updates
+
+#### Migration Commands
+
+**Check migration status:**
+```powershell
+uv run alembic current
+```
+
+**View migration history:**
+```powershell
+uv run alembic history --verbose
+```
+
+**Upgrade to latest migration:**
+```powershell
+uv run alembic upgrade head
+```
+
+**Downgrade to previous migration:**
+```powershell
+uv run alembic downgrade -1
+```
+
+**Create new migration (for development):**
+```powershell
+# Auto-generate migration from model changes
+uv run alembic revision --autogenerate -m "Description of changes"
+
+# Create empty migration for custom changes
+uv run alembic revision -m "Description of changes"
+```
+
+#### Migration Files Location
+
+- **Configuration**: `alembic.ini`
+- **Environment**: `database/migrations/env.py`
+- **Versions**: `database/migrations/versions/`
+
+#### When to Use Migrations
+
+**Use Direct Schema (recommended for new setups):**
+- Fresh installations
+- Development environments
+- When you want automatic schema setup with Docker
+
+**Use Migrations (recommended for updates):**
+- Updating existing databases
+- Production schema changes
+- When you need to track schema history
+- Rolling back database changes
+
+#### Migration Best Practices
+
+1. **Always backup before migrations in production**
+2. **Test migrations on a copy of production data first**
+3. **Review auto-generated migrations before applying**
+4. **Use descriptive migration messages**
+5. **Never edit migration files after they've been applied**
+
+### 4. Verify Database Schema

 The database schema is automatically initialized when containers start. You can verify it worked:

@@ -111,7 +180,7 @@ docker exec dashboard_postgres psql -U dashboard -d dashboard -c "\dt"

 Expected output should show tables: `bots`, `bot_performance`, `market_data`, `raw_trades`, `signals`, `supported_exchanges`, `supported_timeframes`, `trades`

-### 4. Test Database Initialization Script (Optional)
+### 5. Test Database Initialization Script (Optional)

 You can also test the database initialization using the Python script:

@@ -367,6 +436,52 @@ rm -rf .venv
 uv sync
 ```

+#### 6. Migration Issues
+
+**Error**: `alembic.util.exc.CommandError: Target database is not up to date`
+
+**Solution**:
+```powershell
+# Check current migration status
+uv run alembic current
+
+# Upgrade to latest migration
+uv run alembic upgrade head
+
+# If migrations are out of sync, stamp current version
+uv run alembic stamp head
+```
+
+**Error**: `ModuleNotFoundError: No module named 'database'`
+
+**Solution**:
+- Ensure you're running commands from the project root directory
+- Verify the virtual environment is activated: `uv run <command>`
+
+**Error**: Migration revision conflicts
+
+**Solution**:
+```powershell
+# Check migration history
+uv run alembic history --verbose
+
+# Merge conflicting migrations
+uv run alembic merge -m "Merge conflicting revisions" <revision1> <revision2>
+```
+
+**Error**: Database already has tables but no migration history
+
+**Solution**:
+```powershell
+# Mark current schema as the initial migration
+uv run alembic stamp head
+
+# Or start fresh with migrations
+docker-compose down -v
+docker-compose up -d
+uv run alembic upgrade head
+```
+
 ### Log Files

 View service logs:
@@ -426,6 +541,6 @@ If you encounter issues not covered in this guide:

 ---

-**Last Updated**: 2024-05-30  
+**Last Updated**: 2025-05-30  
 **Version**: 1.0  
-**Tested On**: Windows 11, Docker Desktop 4.x 
+**Tested On**: Windows 11, Docker Desktop 4.x
--- a/tasks/tasks-crypto-bot-prd.md
+++ b/tasks/tasks-crypto-bot-prd.md
@@ -7,6 +7,7 @@
 - `database/schema.sql` - Complete database schema with TimescaleDB hypertables (for future optimization)
 - `database/connection.py` - Database connection utility with connection pooling, session management, and raw data utilities
 - `database/redis_manager.py` - Redis connection utility with pub/sub messaging for real-time data distribution
+- `database/migrations/` - Alembic migration system for database schema versioning and updates
 - `database/init/init.sql` - Docker initialization script for automatic database setup
 - `database/init/schema_clean.sql` - Copy of clean schema for Docker initialization
 - `data/okx_collector.py` - OKX API integration for real-time market data collection
@@ -23,6 +24,7 @@
 - `scripts/dev.py` - Development setup and management script
 - `scripts/init_database.py` - Database initialization and verification script
 - `scripts/test_models.py` - Test script for SQLAlchemy models integration verification
+- `alembic.ini` - Alembic configuration for database migrations
 - `requirements.txt` - Python dependencies managed by UV
 - `docker-compose.yml` - Docker services configuration with TimescaleDB support
 - `tests/test_strategies.py` - Unit tests for strategy implementations
@@ -39,8 +41,8 @@
  - [x] 1.4 Create database models using SQLAlchemy or similar ORM
  - [x] 1.5 Add proper indexes for time-series data optimization
  - [x] 1.6 Setup Redis for pub/sub messaging
-  - [ ] 1.7 Create database migration scripts and initial data seeding
-  - [ ] 1.8 Unit test database models and connection utilities
+  - [x] 1.7 Create database migration scripts and initial data seeding
+  - [x] 1.8 Unit test database models and connection utilities

 - [ ] 2.0 Market Data Collection and Processing System
  - [ ] 2.1 Implement OKX WebSocket API connector for real-time data