refactor: Split large functions in data_utils.py into smaller modules

This commit is contained in:
Bobby (aider) 2025-02-12 19:50:27 -08:00
parent 3e98ba4e9d
commit 8775d35326

View File

@ -77,80 +77,6 @@ def print_signal(signal_data: dict, signal_type: str = "🔍") -> None:
# Print available keys for debugging # Print available keys for debugging
print(f"Available keys: {list(signal_data.keys())}") print(f"Available keys: {list(signal_data.keys())}")
def get_qualified_stocks(start_date: datetime, end_date: datetime, min_price: float, max_price: float, min_volume: int) -> list:
"""
Get qualified stocks based on price and volume criteria within date range
Args:
start_date (datetime): Start date for data fetch
end_date (datetime): End date for data fetch
min_price (float): Minimum stock price
max_price (float): Maximum stock price
min_volume (int): Minimum trading volume
Returns:
list: List of tuples (ticker, price, volume, last_update, type)
"""
try:
start_ts = int(start_date.timestamp() * 1000000000)
end_ts = int(end_date.timestamp() * 1000000000)
with create_client() as client:
query = f"""
WITH filtered_data AS (
SELECT
sp.ticker,
sp.window_start,
sp.close,
sp.volume,
t.type as stock_type,
toDateTime(toDateTime(sp.window_start/1000000000)) as trade_date
FROM stock_db.stock_prices sp
JOIN stock_db.stock_tickers t ON sp.ticker = t.ticker
WHERE window_start BETWEEN {start_ts} AND {end_ts}
AND toDateTime(window_start/1000000000) <= now()
AND close BETWEEN {min_price} AND {max_price}
AND volume >= {min_volume}
),
daily_data AS (
SELECT
ticker,
stock_type,
toDate(trade_date) as date,
argMax(close, window_start) as daily_close,
sum(volume) as daily_volume
FROM filtered_data
GROUP BY ticker, stock_type, toDate(trade_date)
),
latest_data AS (
SELECT
ticker,
any(stock_type) as stock_type,
argMax(daily_close, date) as last_close,
sum(daily_volume) as total_volume,
max(toUnixTimestamp(date)) as last_update
FROM daily_data
GROUP BY ticker
HAVING last_close BETWEEN {min_price} AND {max_price}
)
SELECT
ticker,
last_close,
total_volume,
last_update,
stock_type
FROM latest_data
ORDER BY ticker
"""
result = client.query(query)
qualified_stocks = [(row[0], row[1], row[2], row[3], row[4]) for row in result.result_rows]
return qualified_stocks
except Exception as e:
print(f"Error getting qualified stocks: {str(e)}")
return []
def save_signals_to_csv(signals: list, scanner_name: str) -> None: def save_signals_to_csv(signals: list, scanner_name: str) -> None:
""" """
@ -214,41 +140,6 @@ def process_signal_data(ticker: str, signal_data: dict, current_volume: int,
return entry_data return entry_data
def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interval: str) -> pd.DataFrame:
"""
Fetch and resample stock data based on the chosen interval
Args:
ticker (str): Stock ticker symbol
start_date (datetime): Start date for data fetch
end_date (datetime): End date for data fetch
interval (str): Time interval for data ('daily', '5min', '15min', '30min', '1hour')
Returns:
pd.DataFrame: Resampled DataFrame with OHLCV data
"""
try:
with create_client() as client:
# Expand window to get enough data for calculations
start_date = start_date - timedelta(days=90)
# Base query to get raw data at finest granularity
query = f"""
SELECT
toDateTime(window_start/1000000000) as date,
open,
high,
low,
close,
volume
FROM stock_db.stock_prices
WHERE ticker = '{ticker}'
AND window_start BETWEEN
{int(start_date.timestamp() * 1e9)} AND
{int(end_date.timestamp() * 1e9)}
AND toDateTime(window_start/1000000000) <= now()
ORDER BY date ASC
"""
result = client.query(query) result = client.query(query)