fix: Enhance data fetching with fallback logic and expanded time ranges

This commit is contained in:
Bobby (aider) 2025-02-08 11:09:19 -08:00
parent 7ef27027ee
commit b820c5a5b2
2 changed files with 95 additions and 67 deletions

View File

@ -11,7 +11,10 @@ def run_atr_ema_target_scanner(min_price: float, max_price: float, min_volume: i
# Set time range # Set time range
end_date = datetime.now() end_date = datetime.now()
start_date = end_date - timedelta(days=30) # Changed from 1 day to 30 days start_date = end_date - timedelta(days=90) # Expanded from 30 to 90 days
market_days = pd.bdate_range(start=start_date.date(), end=end_date.date())
if len(market_days) < 50:
start_date = end_date - timedelta(days=50*1.5) # Ensure 50 trading days coverage
start_ts = int(start_date.timestamp() * 1000000000) start_ts = int(start_date.timestamp() * 1000000000)
end_ts = int(end_date.timestamp() * 1000000000) end_ts = int(end_date.timestamp() * 1000000000)
@ -51,14 +54,35 @@ def run_atr_ema_target_scanner(min_price: float, max_price: float, min_volume: i
print("\n🔍 Verifying data availability...") print("\n🔍 Verifying data availability...")
valid_query = f""" valid_query = f"""
SELECT ticker SELECT ticker
FROM stock_db.stock_prices FROM (
WHERE window_start BETWEEN {start_ts} AND {end_ts} SELECT ticker, count() as cnt
GROUP BY ticker FROM stock_db.stock_prices_daily
HAVING count() > 50 WHERE date BETWEEN '{start_date.date()}' AND '{end_date.date()}'
GROUP BY ticker
HAVING cnt >= 50
UNION ALL
SELECT ticker, count() as cnt
FROM stock_db.stock_prices
WHERE window_start BETWEEN {start_ts} AND {end_ts}
GROUP BY ticker
HAVING cnt >= 250 # 50 days * average 50 bars/day
)
GROUP BY ticker
HAVING sum(cnt) >= 50
""" """
valid_result = client.query(valid_query) valid_result = client.query(valid_query)
valid_symbols = {row[0] for row in valid_result.result_rows} valid_symbols = {row[0] for row in valid_result.result_rows}
qualified_stocks = [s for s in stocks if s[0] in valid_symbols] qualified_stocks = [s for s in stocks if s[0] in valid_symbols]
# Enhanced validation check
for ticker in list(qualified_stocks):
test_df = get_stock_data(ticker[0], start_date, end_date, "1d")
if test_df.empty or len(test_df) < 50:
print(f"🚫 Removing {ticker[0]} - insufficient initial data")
qualified_stocks.remove(ticker)
print(f"\n✅ Found {len(qualified_stocks)} stocks with sufficient historical data") print(f"\n✅ Found {len(qualified_stocks)} stocks with sufficient historical data")
indicator = ThreeATREMAIndicator() indicator = ThreeATREMAIndicator()

View File

@ -9,89 +9,93 @@ from screener.user_input import get_interval_choice
def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame: def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame:
"""Fetch stock data from the database""" """Fetch stock data from the database with enhanced fallback logic"""
try: try:
client = create_client() client = create_client()
# Calculate proper date range (looking back from today) # Expand window to 90 days for more data robustness
end_date = datetime.now() start_date = start_date - timedelta(days=90)
start_date = end_date - timedelta(days=60) # 60 days of history
# First try primary data source
if interval == "daily": if interval == "daily":
table = "stock_prices_daily" table = "stock_prices_daily"
date_col = "date"
query = f"""
SELECT
{date_col} as date,
open,
high,
low,
close,
volume
FROM stock_db.{table}
WHERE ticker = '{ticker}'
AND {date_col} BETWEEN '{start_date.date()}' AND '{end_date.date()}'
ORDER BY date ASC
"""
else: else:
table = "stock_prices" table = "stock_prices"
date_col = "window_start"
minutes_map = { # Unified query format
"5min": 5, query = f"""
"15min": 15, SELECT
"30min": 30, date,
"1hour": 60 open,
} high,
minutes = minutes_map[interval] low,
close,
# Get 5-minute bars and resample them to the desired interval volume
query = f""" FROM stock_db.{table}
SELECT WHERE ticker = '{ticker}'
fromUnixTimestamp(intDiv(window_start/1000000000, 300) * 300) as interval_start, AND date BETWEEN '{start_date.date()}' AND '{end_date.date()}'
min(open) as open, ORDER BY date ASC
max(high) as high, """
min(low) as low,
argMax(close, window_start) as close,
sum(volume) as volume
FROM stock_db.{table}
WHERE ticker = '{ticker}'
AND window_start/1000000000 BETWEEN
toUnixTimestamp('{start_date.date()}') AND
toUnixTimestamp('{end_date.date()}')
GROUP BY interval_start
ORDER BY interval_start ASC
"""
result = client.query(query) result = client.query(query)
# Fallback to intraday data if needed
if not result.result_rows and interval == "daily":
# Try building daily bars from intraday data
print(f"⚠️ No daily data for {ticker}, resampling from intraday data")
intraday_query = f"""
SELECT
toStartOfDay(window_start) AS date,
first_value(open) AS open,
max(high) AS high,
min(low) AS low,
last_value(close) AS close,
sum(volume) AS volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
AND window_start BETWEEN
toUnixTimestamp('{start_date.date()}') * 1000000000 AND
toUnixTimestamp('{end_date.date()}') * 1000000000
GROUP BY date
ORDER BY date ASC
"""
result = client.query(intraday_query)
# Fallback to different intervals if still empty
if not result.result_rows:
# Try alternate data sources
print(f"⚠️ No {interval} data for {ticker}, trying weekly")
weekly_query = f"""
SELECT
toStartOfWeek(window_start) AS date,
first_value(open) AS open,
max(high) AS high,
min(low) AS low,
last_value(close) AS close,
sum(volume) AS volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
GROUP BY date
ORDER BY date ASC
"""
result = client.query(weekly_query)
if not result.result_rows: if not result.result_rows:
print(f"No data found for {ticker}")
return pd.DataFrame() return pd.DataFrame()
df = pd.DataFrame( df = pd.DataFrame(
result.result_rows, result.result_rows,
columns=['date', 'open', 'high', 'low', 'close', 'volume'] columns=['date', 'open', 'high', 'low', 'close', 'volume']
) )
if interval != "daily" and interval != "5min": # Convert date column if needed
# Resample to desired interval if df['date'].dtype == object:
df.set_index('date', inplace=True) df['date'] = pd.to_datetime(df['date'])
minutes = minutes_map[interval]
rule = f'{minutes}min'
df = df.resample(rule).agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}).dropna()
df.reset_index(inplace=True)
return df return df
except Exception as e: except Exception as e:
print(f"Error fetching data for {ticker}: {str(e)}") print(f"Error fetching {ticker} data: {str(e)}")
return pd.DataFrame() return pd.DataFrame()
def get_valid_tickers(min_price: float, max_price: float, min_volume: int, interval: str) -> list: def get_valid_tickers(min_price: float, max_price: float, min_volume: int, interval: str) -> list: