refactor: Move get_stock_data to utils module for shared usage

This commit is contained in:
Bobby (aider) 2025-02-08 12:13:59 -08:00
parent d41d40ab4f
commit 012fa0e071
4 changed files with 124 additions and 108 deletions

View File

@ -3,7 +3,7 @@ import pandas as pd
import os
from db.db_connection import create_client
from trading.position_calculator import PositionCalculator
from screener.t_sunnyband import get_stock_data
from utils.data_utils import get_stock_data
from screener.user_input import get_interval_choice
from indicators.three_atr_ema import ThreeATREMAIndicator

View File

@ -8,113 +8,6 @@ from trading.position_calculator import PositionCalculator
from screener.user_input import get_interval_choice
def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame:
"""Fetch stock data from the database with enhanced fallback logic"""
try:
client = create_client()
# Expand window to 90 days for more data robustness
start_date = start_date - timedelta(days=90)
# First try primary data source
if interval == "daily":
table = "stock_prices_daily"
else:
table = "stock_prices"
# Unified query format
query = f"""
SELECT
toDateTime(window_start/1000000000) as date,
open,
high,
low,
close,
volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
AND window_start BETWEEN
{int(start_date.timestamp() * 1e9)} AND
{int(end_date.timestamp() * 1e9)}
AND toYear(toDateTime(window_start/1000000000)) <= toYear(now())
AND toYear(toDateTime(window_start/1000000000)) >= (toYear(now()) - 1)
ORDER BY date ASC
"""
result = client.query(query)
# Fallback to intraday data if needed
if not result.result_rows and interval == "daily":
# Try building daily bars from intraday data
print(f"⚠️ No daily data for {ticker}, resampling from intraday data")
intraday_query = f"""
SELECT
toDateTime(window_start/1000000000) as date,
first_value(open) AS open,
max(high) AS high,
min(low) AS low,
last_value(close) AS close,
sum(volume) AS volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
AND window_start BETWEEN
{int(start_date.timestamp() * 1e9)} AND
{int(end_date.timestamp() * 1e9)}
AND toYear(toDateTime(window_start/1000000000)) <= toYear(now())
AND toYear(toDateTime(window_start/1000000000)) >= (toYear(now()) - 1)
GROUP BY date
ORDER BY date ASC
"""
result = client.query(intraday_query)
# Fallback to different intervals if still empty
if not result.result_rows:
# Try alternate data sources
print(f"⚠️ No {interval} data for {ticker}, trying weekly")
weekly_query = f"""
SELECT
toStartOfWeek(window_start) AS date,
first_value(open) AS open,
max(high) AS high,
min(low) AS low,
last_value(close) AS close,
sum(volume) AS volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
GROUP BY date
ORDER BY date ASC
"""
result = client.query(weekly_query)
if not result.result_rows:
return pd.DataFrame()
df = pd.DataFrame(
result.result_rows,
columns=['date', 'open', 'high', 'low', 'close', 'volume']
)
# Convert numeric columns
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Handle null values
if df['close'].isnull().any():
print(f"Warning: Found null values in close prices")
df = df.dropna(subset=['close'])
if df.empty or 'close' not in df.columns:
return pd.DataFrame()
if df['date'].dtype == object:
df['date'] = pd.to_datetime(df['date'])
return df
except Exception as e:
print(f"Error fetching {ticker} data: {str(e)}")
return pd.DataFrame()
def get_valid_tickers(min_price: float, max_price: float, min_volume: int, interval: str) -> list:
"""Get tickers that meet the price and volume criteria"""

3
src/utils/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .data_utils import get_stock_data
__all__ = ['get_stock_data']

120
src/utils/data_utils.py Normal file
View File

@ -0,0 +1,120 @@
import pandas as pd
from datetime import datetime, timedelta
from db.db_connection import create_client
def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame:
"""
Fetch stock data from the database with enhanced fallback logic
Args:
ticker (str): Stock ticker symbol
start_date (datetime): Start date for data fetch
end_date (datetime): End date for data fetch
interval (str): Time interval for data ('daily', '5min', etc.)
Returns:
pd.DataFrame: DataFrame with OHLCV data
"""
try:
client = create_client()
# Expand window to 90 days for more data robustness
start_date = start_date - timedelta(days=90)
# First try primary data source
if interval == "daily":
table = "stock_prices_daily"
else:
table = "stock_prices"
# Unified query format
query = f"""
SELECT
toDateTime(window_start/1000000000) as date,
open,
high,
low,
close,
volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
AND window_start BETWEEN
{int(start_date.timestamp() * 1e9)} AND
{int(end_date.timestamp() * 1e9)}
AND toYear(toDateTime(window_start/1000000000)) <= toYear(now())
AND toYear(toDateTime(window_start/1000000000)) >= (toYear(now()) - 1)
ORDER BY date ASC
"""
result = client.query(query)
# Fallback to intraday data if needed
if not result.result_rows and interval == "daily":
print(f"⚠️ No daily data for {ticker}, resampling from intraday data")
intraday_query = f"""
SELECT
toDateTime(window_start/1000000000) as date,
first_value(open) AS open,
max(high) AS high,
min(low) AS low,
last_value(close) AS close,
sum(volume) AS volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
AND window_start BETWEEN
{int(start_date.timestamp() * 1e9)} AND
{int(end_date.timestamp() * 1e9)}
AND toYear(toDateTime(window_start/1000000000)) <= toYear(now())
AND toYear(toDateTime(window_start/1000000000)) >= (toYear(now()) - 1)
GROUP BY date
ORDER BY date ASC
"""
result = client.query(intraday_query)
# Fallback to different intervals if still empty
if not result.result_rows:
print(f"⚠️ No {interval} data for {ticker}, trying weekly")
weekly_query = f"""
SELECT
toStartOfWeek(window_start) AS date,
first_value(open) AS open,
max(high) AS high,
min(low) AS low,
last_value(close) AS close,
sum(volume) AS volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
GROUP BY date
ORDER BY date ASC
"""
result = client.query(weekly_query)
if not result.result_rows:
return pd.DataFrame()
df = pd.DataFrame(
result.result_rows,
columns=['date', 'open', 'high', 'low', 'close', 'volume']
)
# Convert numeric columns
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Handle null values
if df['close'].isnull().any():
print(f"Warning: Found null values in close prices")
df = df.dropna(subset=['close'])
if df.empty or 'close' not in df.columns:
return pd.DataFrame()
if df['date'].dtype == object:
df['date'] = pd.to_datetime(df['date'])
return df
except Exception as e:
print(f"Error fetching {ticker} data: {str(e)}")
return pd.DataFrame()