fix: Enhance data fetching with fallback logic and expanded time ranges
This commit is contained in:
parent
7ef27027ee
commit
b820c5a5b2
@ -11,7 +11,10 @@ def run_atr_ema_target_scanner(min_price: float, max_price: float, min_volume: i
|
||||
|
||||
# Set time range
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=30) # Changed from 1 day to 30 days
|
||||
start_date = end_date - timedelta(days=90) # Expanded from 30 to 90 days
|
||||
market_days = pd.bdate_range(start=start_date.date(), end=end_date.date())
|
||||
if len(market_days) < 50:
|
||||
start_date = end_date - timedelta(days=50*1.5) # Ensure 50 trading days coverage
|
||||
start_ts = int(start_date.timestamp() * 1000000000)
|
||||
end_ts = int(end_date.timestamp() * 1000000000)
|
||||
|
||||
@ -51,14 +54,35 @@ def run_atr_ema_target_scanner(min_price: float, max_price: float, min_volume: i
|
||||
print("\n🔍 Verifying data availability...")
|
||||
valid_query = f"""
|
||||
SELECT ticker
|
||||
FROM stock_db.stock_prices
|
||||
WHERE window_start BETWEEN {start_ts} AND {end_ts}
|
||||
GROUP BY ticker
|
||||
HAVING count() > 50
|
||||
FROM (
|
||||
SELECT ticker, count() as cnt
|
||||
FROM stock_db.stock_prices_daily
|
||||
WHERE date BETWEEN '{start_date.date()}' AND '{end_date.date()}'
|
||||
GROUP BY ticker
|
||||
HAVING cnt >= 50
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT ticker, count() as cnt
|
||||
FROM stock_db.stock_prices
|
||||
WHERE window_start BETWEEN {start_ts} AND {end_ts}
|
||||
GROUP BY ticker
|
||||
HAVING cnt >= 250 # 50 days * average 50 bars/day
|
||||
)
|
||||
GROUP BY ticker
|
||||
HAVING sum(cnt) >= 50
|
||||
"""
|
||||
valid_result = client.query(valid_query)
|
||||
valid_symbols = {row[0] for row in valid_result.result_rows}
|
||||
qualified_stocks = [s for s in stocks if s[0] in valid_symbols]
|
||||
|
||||
# Enhanced validation check
|
||||
for ticker in list(qualified_stocks):
|
||||
test_df = get_stock_data(ticker[0], start_date, end_date, "1d")
|
||||
if test_df.empty or len(test_df) < 50:
|
||||
print(f"🚫 Removing {ticker[0]} - insufficient initial data")
|
||||
qualified_stocks.remove(ticker)
|
||||
|
||||
print(f"\n✅ Found {len(qualified_stocks)} stocks with sufficient historical data")
|
||||
|
||||
indicator = ThreeATREMAIndicator()
|
||||
|
||||
@ -9,89 +9,93 @@ from screener.user_input import get_interval_choice
|
||||
|
||||
|
||||
def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame:
|
||||
"""Fetch stock data from the database"""
|
||||
"""Fetch stock data from the database with enhanced fallback logic"""
|
||||
try:
|
||||
client = create_client()
|
||||
|
||||
# Calculate proper date range (looking back from today)
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=60) # 60 days of history
|
||||
# Expand window to 90 days for more data robustness
|
||||
start_date = start_date - timedelta(days=90)
|
||||
|
||||
# First try primary data source
|
||||
if interval == "daily":
|
||||
table = "stock_prices_daily"
|
||||
date_col = "date"
|
||||
query = f"""
|
||||
SELECT
|
||||
{date_col} as date,
|
||||
open,
|
||||
high,
|
||||
low,
|
||||
close,
|
||||
volume
|
||||
FROM stock_db.{table}
|
||||
WHERE ticker = '{ticker}'
|
||||
AND {date_col} BETWEEN '{start_date.date()}' AND '{end_date.date()}'
|
||||
ORDER BY date ASC
|
||||
"""
|
||||
else:
|
||||
table = "stock_prices"
|
||||
date_col = "window_start"
|
||||
minutes_map = {
|
||||
"5min": 5,
|
||||
"15min": 15,
|
||||
"30min": 30,
|
||||
"1hour": 60
|
||||
}
|
||||
minutes = minutes_map[interval]
|
||||
|
||||
# Get 5-minute bars and resample them to the desired interval
|
||||
query = f"""
|
||||
SELECT
|
||||
fromUnixTimestamp(intDiv(window_start/1000000000, 300) * 300) as interval_start,
|
||||
min(open) as open,
|
||||
max(high) as high,
|
||||
min(low) as low,
|
||||
argMax(close, window_start) as close,
|
||||
sum(volume) as volume
|
||||
FROM stock_db.{table}
|
||||
WHERE ticker = '{ticker}'
|
||||
AND window_start/1000000000 BETWEEN
|
||||
toUnixTimestamp('{start_date.date()}') AND
|
||||
toUnixTimestamp('{end_date.date()}')
|
||||
GROUP BY interval_start
|
||||
ORDER BY interval_start ASC
|
||||
"""
|
||||
|
||||
# Unified query format
|
||||
query = f"""
|
||||
SELECT
|
||||
date,
|
||||
open,
|
||||
high,
|
||||
low,
|
||||
close,
|
||||
volume
|
||||
FROM stock_db.{table}
|
||||
WHERE ticker = '{ticker}'
|
||||
AND date BETWEEN '{start_date.date()}' AND '{end_date.date()}'
|
||||
ORDER BY date ASC
|
||||
"""
|
||||
|
||||
result = client.query(query)
|
||||
|
||||
# Fallback to intraday data if needed
|
||||
if not result.result_rows and interval == "daily":
|
||||
# Try building daily bars from intraday data
|
||||
print(f"⚠️ No daily data for {ticker}, resampling from intraday data")
|
||||
intraday_query = f"""
|
||||
SELECT
|
||||
toStartOfDay(window_start) AS date,
|
||||
first_value(open) AS open,
|
||||
max(high) AS high,
|
||||
min(low) AS low,
|
||||
last_value(close) AS close,
|
||||
sum(volume) AS volume
|
||||
FROM stock_db.stock_prices
|
||||
WHERE ticker = '{ticker}'
|
||||
AND window_start BETWEEN
|
||||
toUnixTimestamp('{start_date.date()}') * 1000000000 AND
|
||||
toUnixTimestamp('{end_date.date()}') * 1000000000
|
||||
GROUP BY date
|
||||
ORDER BY date ASC
|
||||
"""
|
||||
result = client.query(intraday_query)
|
||||
|
||||
# Fallback to different intervals if still empty
|
||||
if not result.result_rows:
|
||||
# Try alternate data sources
|
||||
print(f"⚠️ No {interval} data for {ticker}, trying weekly")
|
||||
weekly_query = f"""
|
||||
SELECT
|
||||
toStartOfWeek(window_start) AS date,
|
||||
first_value(open) AS open,
|
||||
max(high) AS high,
|
||||
min(low) AS low,
|
||||
last_value(close) AS close,
|
||||
sum(volume) AS volume
|
||||
FROM stock_db.stock_prices
|
||||
WHERE ticker = '{ticker}'
|
||||
GROUP BY date
|
||||
ORDER BY date ASC
|
||||
"""
|
||||
result = client.query(weekly_query)
|
||||
|
||||
if not result.result_rows:
|
||||
print(f"No data found for {ticker}")
|
||||
return pd.DataFrame()
|
||||
|
||||
df = pd.DataFrame(
|
||||
result.result_rows,
|
||||
columns=['date', 'open', 'high', 'low', 'close', 'volume']
|
||||
)
|
||||
|
||||
if interval != "daily" and interval != "5min":
|
||||
# Resample to desired interval
|
||||
df.set_index('date', inplace=True)
|
||||
minutes = minutes_map[interval]
|
||||
rule = f'{minutes}min'
|
||||
|
||||
df = df.resample(rule).agg({
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum'
|
||||
}).dropna()
|
||||
|
||||
df.reset_index(inplace=True)
|
||||
|
||||
# Convert date column if needed
|
||||
if df['date'].dtype == object:
|
||||
df['date'] = pd.to_datetime(df['date'])
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching data for {ticker}: {str(e)}")
|
||||
print(f"Error fetching {ticker} data: {str(e)}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_valid_tickers(min_price: float, max_price: float, min_volume: int, interval: str) -> list:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user