From ccf6671f4b78834c305fbf15940c67aa4a97048c Mon Sep 17 00:00:00 2001 From: "Bobby Abellana (aider)" Date: Thu, 6 Feb 2025 22:13:19 -0800 Subject: [PATCH] feat: Enhance stock screening with daily data filtering and intraday validation --- src/screener/t_sunnyband.py | 80 ++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/src/screener/t_sunnyband.py b/src/screener/t_sunnyband.py index e9a91d7..f88b165 100644 --- a/src/screener/t_sunnyband.py +++ b/src/screener/t_sunnyband.py @@ -31,7 +31,6 @@ def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interv """Fetch stock data from the database""" client = create_client() - # Select appropriate table based on interval if interval == "daily": table = "stock_prices_daily" date_col = "date" @@ -59,19 +58,20 @@ def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interv } minutes = minutes_map[interval] + # Get 5-minute bars and resample them to the desired interval query = f""" SELECT - fromUnixTimestamp({date_col}) as date, - open, - high, - low, - close, - volume + fromUnixTimestamp(intDiv({date_col}, 300) * 300) as interval_start, + min(open) as open, + max(high) as high, + min(low) as low, + argMax(close, {date_col}) as close, + sum(volume) as volume FROM stock_db.{table} WHERE ticker = '{ticker}' AND {date_col} BETWEEN toUnixTimestamp('{start_date.date()}') AND toUnixTimestamp('{end_date.date()}') - AND (toMinute(fromUnixTimestamp({date_col})) % {minutes}) = 0 - ORDER BY date ASC + GROUP BY interval_start + ORDER BY interval_start ASC """ try: @@ -84,6 +84,23 @@ def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interv result.result_rows, columns=['date', 'open', 'high', 'low', 'close', 'volume'] ) + + if interval != "daily" and interval != "5min": + # Resample to desired interval + df.set_index('date', inplace=True) + minutes = minutes_map[interval] + rule = f'{minutes}T' + + df = df.resample(rule).agg({ + 'open': 'first', + 'high': 'max', + 'low': 'min', + 'close': 'last', + 'volume': 'sum' + }).dropna() + + df.reset_index(inplace=True) + return df except Exception as e: print(f"Error fetching data for {ticker}: {str(e)}") @@ -93,34 +110,43 @@ def get_valid_tickers(min_price: float, max_price: float, min_volume: int, inter """Get tickers that meet the price and volume criteria""" client = create_client() yesterday = (datetime.now() - timedelta(days=1)).date() - today = datetime.now().date() - if interval == "daily": - table = "stock_prices_daily" - date_col = "date" - date_condition = f"{date_col} = '{yesterday}'" - else: - table = "stock_prices" - date_col = "window_start" - # Get today's trading hours timestamp range (9:30 AM to 4:00 PM EST) - market_open = int(datetime.combine(today, datetime.strptime("09:30", "%H:%M").time()).timestamp()) - market_close = int(datetime.combine(today, datetime.strptime("16:00", "%H:%M").time()).timestamp()) - date_condition = f"{date_col} BETWEEN {market_open} AND {market_close}" - - query = f""" + # First get valid tickers from daily data + daily_query = f""" SELECT DISTINCT ticker - FROM stock_db.{table} - WHERE {date_condition} + FROM stock_db.stock_prices_daily + WHERE date = '{yesterday}' AND close BETWEEN {min_price} AND {max_price} AND volume >= {min_volume} ORDER BY ticker ASC """ try: - result = client.query(query) + result = client.query(daily_query) tickers = [row[0] for row in result.result_rows] - print(f"Found {len(tickers)} stocks matching price and volume criteria") + print(f"\nFound {len(tickers)} stocks matching price and volume criteria") + + if interval != "daily": + # Now verify these tickers have intraday data today + today = datetime.now().date() + market_open = int(datetime.combine(today, datetime.strptime("09:30", "%H:%M").time()).timestamp()) + market_close = int(datetime.combine(today, datetime.strptime("16:00", "%H:%M").time()).timestamp()) + + intraday_query = f""" + SELECT DISTINCT ticker + FROM stock_db.stock_prices + WHERE ticker IN ({','.join([f"'{t}'" for t in tickers])}) + AND window_start BETWEEN {market_open} AND {market_close} + GROUP BY ticker + HAVING count() >= 10 -- Ensure we have enough data points + """ + + result = client.query(intraday_query) + tickers = [row[0] for row in result.result_rows] + print(f"Of those, {len(tickers)} have recent intraday data") + return tickers + except Exception as e: print(f"Error fetching tickers: {str(e)}") return []