fix: Enhance data validation and type handling in screener scripts

This commit is contained in:
Bobby (aider) 2025-02-08 11:39:01 -08:00
parent b4312e92d4
commit 02a26636e3
2 changed files with 40 additions and 15 deletions

View File

@ -80,25 +80,41 @@ def run_atr_ema_target_scanner(min_price: float, max_price: float, min_volume: i
last_update = row["window_start"]
try:
# Get historical data
df = get_stock_data(ticker, start_date, end_date, interval)
# VALIDATION CHECKS - BEGIN
required_columns = ['date', 'open', 'high', 'low', 'close', 'volume']
# Enhanced validation with debugging
if df.empty:
print(f"⚠️ {ticker}: Empty DataFrame from get_stock_data()")
print(f"⚠️ {ticker}: Empty DataFrame")
continue
missing_cols = set(required_columns) - set(df.columns)
if missing_cols:
print(f"⚠️ {ticker}: Missing columns {missing_cols} in DataFrame")
print(f"Actual columns: {df.columns.tolist()}")
# Ensure DataFrame has required columns and proper types
required_columns = ['date', 'open', 'high', 'low', 'close', 'volume']
# Print column info for debugging
print(f"\nProcessing {ticker}")
print(f"Columns present: {df.columns.tolist()}")
# Convert columns to numeric if needed
for col in ['open', 'high', 'low', 'close', 'volume']:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Verify data validity
if df['close'].isnull().any():
print(f"⚠️ {ticker}: Contains null values in close price")
continue
if 'close' not in df.columns:
print(f"⚠️ {ticker}: 'close' column missing, cannot process")
print(f"Available columns: {df.columns.tolist()}")
if len(df) < 50:
print(f"⚠️ {ticker}: Insufficient data points ({len(df)})")
continue
# Calculate indicator with validated data
results = indicator.calculate(df.copy()) # Use copy to prevent modifications
if results.empty:
print(f"⚠️ {ticker}: No valid indicator results")
continue
# VALIDATION CHECKS - END
results = indicator.calculate(df)
last_row = results.iloc[-1]

View File

@ -88,9 +88,18 @@ def get_stock_data(ticker: str, start_date: datetime, end_date: datetime, interv
df = pd.DataFrame(
result.result_rows,
columns=['date', 'open', 'high', 'low', 'close', 'volume']
).dropna(subset=['close']) # Filter out rows with null close prices
)
# Convert numeric columns
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Handle null values
if df['close'].isnull().any():
print(f"Warning: Found null values in close prices")
df = df.dropna(subset=['close'])
# Convert date column if needed
if df.empty or 'close' not in df.columns:
return pd.DataFrame()