stock_system/src/screener/data_fetcher.py

145 lines
4.5 KiB
Python

import datetime
from db.db_connection import create_client
def validate_date_range(start_date, end_date, required_quarters=4):
"""
Ensures we have enough data (e.g., required_quarters) to evaluate
the 'C' in CANSLIM. If the date range is too short, adjust or return a warning.
Args:
start_date (str or datetime): User-provided start date.
end_date (str or datetime): User-provided end date.
required_quarters (int): Number of quarters needed (default: 4).
Returns:
(datetime, datetime): Tuple of adjusted (start_date, end_date).
"""
if isinstance(start_date, str):
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
if isinstance(end_date, str):
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
# Approximate needed delta for required_quarters
needed_days = required_quarters * 91 # ~3 months each
needed_delta = datetime.timedelta(days=needed_days)
if (end_date - start_date) < needed_delta:
start_date = end_date - needed_delta
print("Warning: Provided date range was too short. Adjusted start_date for enough data.")
return start_date, end_date
def fetch_financial_data(symbol, start_date, end_date):
"""
Fetch financial data for a given stock symbol, including:
- Quarterly EPS for EPS Score
- Annual EPS for Annual EPS Score
- Sales Growth
- Return on Equity (ROE)
Args:
symbol (str): Stock ticker symbol.
start_date (str or datetime): Start date for data retrieval.
end_date (str or datetime): End date for data retrieval.
Returns:
dict: Contains EPS, sales growth, annual EPS, and ROE.
"""
client = create_client()
query = f"""
SELECT
filing_date,
diluted_eps,
revenue,
net_income,
equity,
timeframe
FROM stock_db.stock_financials
WHERE ticker = '{symbol}'
AND filing_date BETWEEN toDate('{start_date}') AND toDate('{end_date}')
AND (timeframe = 'quarterly' OR timeframe = 'annual') -- Fetch both annual and quarterly data
ORDER BY filing_date ASC
"""
result = client.query(query)
if not result.result_rows:
return {}
quarterly_eps = []
annual_eps = []
revenues = []
sales_growth = []
net_incomes = []
equities = []
for row in result.result_rows:
filing_date, eps, revenue, net_income, equity, timeframe = row
if timeframe == "quarterly":
quarterly_eps.append(eps)
revenues.append(revenue)
net_incomes.append(net_income)
equities.append(equity)
elif timeframe == "annual":
annual_eps.append(eps)
# Calculate Sales Growth (Quarter-over-Quarter)
for i in range(1, len(revenues)):
prev_revenue = revenues[i - 1]
current_revenue = revenues[i]
if prev_revenue > 0:
growth = ((current_revenue - prev_revenue) / prev_revenue) * 100
else:
growth = None # Not enough data
sales_growth.append(growth)
sales_growth.insert(0, None) # First quarter lacks comparison
# Calculate ROE
roe_values = []
for i in range(len(net_incomes)):
if equities[i] > 0:
roe = (net_incomes[i] / equities[i]) * 100
else:
roe = None
roe_values.append(roe)
return {
"symbol": symbol,
"quarterly_eps": quarterly_eps, # Used for EPS_Score
"annual_eps": annual_eps, # Used for Annual_EPS_Score
"sales_growth": sales_growth,
"roe": roe_values # Return on Equity
}
def get_stocks_in_time_range(start_date, end_date):
"""
Query ClickHouse for all stock symbols that have data within the given date range.
Args:
start_date (str or datetime): Start date in 'YYYY-MM-DD' format.
end_date (str or datetime): End date in 'YYYY-MM-DD' format.
Returns:
list: A list of stock symbols that have data in the specified date range.
"""
if isinstance(start_date, datetime.datetime):
start_date = start_date.strftime("%Y-%m-%d")
if isinstance(end_date, datetime.datetime):
end_date = end_date.strftime("%Y-%m-%d")
client = create_client()
query = f"""
SELECT DISTINCT ticker
FROM stock_db.stock_prices_daily
WHERE date BETWEEN toDate('{start_date}') AND toDate('{end_date}')
"""
result = client.query(query)
return [row[0] for row in result.result_rows]