feat: Add time interval resampling to stock data fetching
This commit is contained in:
parent
7e046a1e33
commit
cea43c4d32
@ -61,30 +61,24 @@ def save_signals_to_csv(signals: list, scanner_name: str) -> None:
|
||||
|
||||
def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame:
|
||||
"""
|
||||
Fetch stock data from the database with enhanced fallback logic
|
||||
Fetch and resample stock data based on the chosen interval
|
||||
|
||||
Args:
|
||||
ticker (str): Stock ticker symbol
|
||||
start_date (datetime): Start date for data fetch
|
||||
end_date (datetime): End date for data fetch
|
||||
interval (str): Time interval for data ('daily', '5min', etc.)
|
||||
interval (str): Time interval for data ('daily', '5min', '15min', '30min', '1hour')
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame with OHLCV data
|
||||
pd.DataFrame: Resampled DataFrame with OHLCV data
|
||||
"""
|
||||
try:
|
||||
client = create_client()
|
||||
|
||||
# Expand window to 90 days for more data robustness
|
||||
# Expand window to get enough data for calculations
|
||||
start_date = start_date - timedelta(days=90)
|
||||
|
||||
# First try primary data source
|
||||
if interval == "daily":
|
||||
table = "stock_prices_daily"
|
||||
else:
|
||||
table = "stock_prices"
|
||||
|
||||
# Unified query format
|
||||
# Base query to get raw data at finest granularity
|
||||
query = f"""
|
||||
SELECT
|
||||
toDateTime(window_start/1000000000) as date,
|
||||
@ -104,50 +98,10 @@ def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interv
|
||||
|
||||
result = client.query(query)
|
||||
|
||||
# Fallback to intraday data if needed
|
||||
if not result.result_rows and interval == "daily":
|
||||
print(f"⚠️ No daily data for {ticker}, resampling from intraday data")
|
||||
intraday_query = f"""
|
||||
SELECT
|
||||
toDateTime(window_start/1000000000) as date,
|
||||
first_value(open) AS open,
|
||||
max(high) AS high,
|
||||
min(low) AS low,
|
||||
last_value(close) AS close,
|
||||
sum(volume) AS volume
|
||||
FROM stock_db.stock_prices
|
||||
WHERE ticker = '{ticker}'
|
||||
AND window_start BETWEEN
|
||||
{int(start_date.timestamp() * 1e9)} AND
|
||||
{int(end_date.timestamp() * 1e9)}
|
||||
AND toYear(toDateTime(window_start/1000000000)) <= toYear(now())
|
||||
AND toYear(toDateTime(window_start/1000000000)) >= (toYear(now()) - 1)
|
||||
GROUP BY date
|
||||
ORDER BY date ASC
|
||||
"""
|
||||
result = client.query(intraday_query)
|
||||
|
||||
# Fallback to different intervals if still empty
|
||||
if not result.result_rows:
|
||||
print(f"⚠️ No {interval} data for {ticker}, trying weekly")
|
||||
weekly_query = f"""
|
||||
SELECT
|
||||
toStartOfWeek(window_start) AS date,
|
||||
first_value(open) AS open,
|
||||
max(high) AS high,
|
||||
min(low) AS low,
|
||||
last_value(close) AS close,
|
||||
sum(volume) AS volume
|
||||
FROM stock_db.stock_prices
|
||||
WHERE ticker = '{ticker}'
|
||||
GROUP BY date
|
||||
ORDER BY date ASC
|
||||
"""
|
||||
result = client.query(weekly_query)
|
||||
|
||||
if not result.result_rows:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Create base DataFrame
|
||||
df = pd.DataFrame(
|
||||
result.result_rows,
|
||||
columns=['date', 'open', 'high', 'low', 'close', 'volume']
|
||||
@ -158,18 +112,50 @@ def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interv
|
||||
for col in numeric_columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
|
||||
# Convert date column
|
||||
df['date'] = pd.to_datetime(df['date'])
|
||||
|
||||
# Set date as index for resampling
|
||||
df.set_index('date', inplace=True)
|
||||
|
||||
# Resample based on interval
|
||||
if interval == 'daily':
|
||||
rule = '1D'
|
||||
elif interval == '5min':
|
||||
rule = '5T'
|
||||
elif interval == '15min':
|
||||
rule = '15T'
|
||||
elif interval == '30min':
|
||||
rule = '30T'
|
||||
elif interval == '1hour':
|
||||
rule = '1H'
|
||||
else:
|
||||
rule = '1D' # Default to daily
|
||||
|
||||
resampled = df.resample(rule).agg({
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum'
|
||||
}).dropna()
|
||||
|
||||
# Reset index to get date as column
|
||||
resampled.reset_index(inplace=True)
|
||||
|
||||
# Filter to requested date range
|
||||
mask = (resampled['date'] >= start_date + timedelta(days=89)) & (resampled['date'] <= end_date)
|
||||
resampled = resampled.loc[mask]
|
||||
|
||||
# Handle null values
|
||||
if df['close'].isnull().any():
|
||||
if resampled['close'].isnull().any():
|
||||
print(f"Warning: Found null values in close prices")
|
||||
df = df.dropna(subset=['close'])
|
||||
resampled = resampled.dropna(subset=['close'])
|
||||
|
||||
if df.empty or 'close' not in df.columns:
|
||||
if resampled.empty or 'close' not in resampled.columns:
|
||||
return pd.DataFrame()
|
||||
|
||||
if df['date'].dtype == object:
|
||||
df['date'] = pd.to_datetime(df['date'])
|
||||
|
||||
return df
|
||||
return resampled
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching {ticker} data: {str(e)}")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user