Yahoo Finance in Python — Modular Guide for Finance & Analytics Teams
2025-09-02 • Finance
Educational use only. This guide demonstrates public techniques to fetch lightweight market data for learning and personal projects. For production/commercial use, consult the data provider’s Terms and obtain a licensed API.
Yahoo Finance API in Python — A Step-by-Step Guide with yfinance and Raw Endpoints
This guide is written for finance, analytics, and BI teams who want a clear, reusable way to pull:
- Latest quotes (prices & currency)
- Daily historical closes
- Dividends
- Asset profiles (name, sector, country, quoteType)
We provide two approaches:
- Raw Yahoo endpoints (with graceful HTML fallback), then
yfinance
as a convenient alternative (and a fallback where it helps).
Everything is modular. You can copy just the part you need: quotes, history, dividends, or profiles.
Table of Contents
- Quick Start (TL;DR)
- Project Layout
- Approach A — Raw Endpoints (with HTML fallback)
- Approach B —
yfinance
- Use-Cases & Patterns
- Reliability, Limits & Good Citizenship
- CLI (Optional) for Local Testing
Quick Start (TL;DR)
Install minimal dependencies:
pip install requests yfinance pandas
Copy the functions you need from sections A2–A6. Example: get quotes for a handful of symbols:
from yahoo_finance import get_quotes
print(get_quotes(["AAPL","MSFT","NVDA","BTC"])) # BTC→BTC-USD automatically
If you don’t want a separate module file: paste the specific function(s) directly into your script.
Project Layout
You can embed functions directly into your project or keep them together in a module (recommended):
yahoo-guide/
├─ yahoo_finance.py # Functions you’ll copy (A1–A6), plus helpers
├─ cli/
│ └─ yahoo_finance_cli.py # Optional: run from terminal to test
└─ requirements.txt # requests, yfinance, pandas
Don’t need CLI? Skip it. Each section below includes copy-paste functions.
Approach A — Raw Endpoints (with HTML fallback)
Yahoo offers JSON endpoints that often work out-of-the-box. In some regions or networks, requests can be 401/403. In those cases we gracefully fallback to parsing a tiny embedded JSON on the quote HTML page. Where helpful, we optionally use yfinance
as a last resort so you’re never blocked.
A1. Session, Headers & Helpers
What this does:
- Creates a single
requests.Session()
with a browser-like User-Agent. - Warms cookies by visiting the homepage (helps reduce 401s).
- Provides small helpers for crypto symbol normalization and asset class mapping.
# A1) session + helpers (copy-paste)
import json, re, sys, requests
from datetime import date, datetime, timezone
from typing import Dict, List, Optional, Tuple
UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0.0.0 Safari/537.36"
)
BASE_HEADERS = {
"User-Agent": UA,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
"Referer": "https://finance.yahoo.com/",
}
_session: Optional[requests.Session] = None
def _log(*a): print("[YF]", *a, file=sys.stderr)
def _ensure_session():
global _session
if _session is not None:
return
s = requests.Session()
s.headers.update(BASE_HEADERS)
try:
r = s.get("https://finance.yahoo.com/", timeout=10, allow_redirects=True)
_log("GET", r.url, r.status_code)
except Exception as e:
_log("warm home exception", repr(e))
_session = s
def _get(url: str, params: Optional[dict] = None, accept_json: bool = True) -> Optional[dict]:
"""GET JSON with query2→query1 fallback. None on failure."""
_ensure_session()
headers = dict(_session.headers)
if accept_json:
headers["Accept"] = "application/json, text/plain, */*"
try:
r = _session.get(url, params=params or {}, headers=headers, timeout=12, allow_redirects=True)
_log("GET", r.url, r.status_code)
if r.status_code == 200:
try:
return r.json() or {}
except Exception:
return None
if r.status_code in (401,403) and "query2" in url:
alt = url.replace("query2", "query1")
r2 = _session.get(alt, params=params or {}, headers=headers, timeout=12, allow_redirects=True)
_log("GET", r2.url, r2.status_code)
if r2.status_code == 200:
try: return r2.json() or {}
except Exception: return None
return None
except Exception as e:
_log("GET exception", repr(e))
return None
# Helpers
DEFAULT_CURRENCY = "USD"
def convert_to_yahoo_symbol(symbol: str) -> str:
s = (symbol or "").upper().strip()
if s and s.isalpha() and len(s) in (3,4,5) and s in {"BTC","ETH","SOL","DOGE","BNB","ADA","XRP"}:
return f"{s}-{DEFAULT_CURRENCY}"
return s
def convert_from_yahoo_symbol(symbol: str) -> str:
s = (symbol or "").upper().strip()
suf = f"-{DEFAULT_CURRENCY}"
return s[:-len(suf)] if s.endswith(suf) else s
def parse_asset_class(quote_type: Optional[str], short_name: Optional[str] = None) -> Tuple[Optional[str], Optional[str]]:
t = (quote_type or "").upper()
if t in ("EQUITY","COMMONSTOCK","STOCK"): return "Equity","Stock"
if t in ("ETF","EQUITYETF","ETP"): return "Equity","ETF"
if t in ("MUTUALFUND","FUND"): return "Equity","Mutual Fund"
if t in ("BOND",): return "Fixed Income","Bond"
if t in ("CRYPTOCURRENCY","CRYPTO"): return "Alternative Investment","Crypto"
if t in ("INDEX",): return "Alternative Investment","Index"
if t in ("OPTION",): return "Alternative Investment","Option"
if (short_name or "").upper().endswith(" REIT"): return "Real Estate","REIT"
return None, None
Why this matters: A single warmed session reduces random 401s/403s and speeds up subsequent calls across your app.
A2. Search Symbols
What it does: Suggests symbols and basic metadata for a query (e.g., “TSLA”, “Tesla”).
When to use: Autocomplete, onboarding flows, search boxes.
Fallbacks: If autoc
404s in your region, we try v1/finance/search
.
def search_symbols(query: str, include_indices: bool = False, limit: int = 10) -> List[Dict]:
items: List[Dict] = []
# A) autoc (may 404 by region)
auto = _get("https://autoc.finance.yahoo.com/autoc",
{"query": query, "region": 1, "lang": "en-US"})
results = (auto or {}).get("ResultSet", {}).get("Result") or []
if not results:
# B) v1/finance/search
s1 = _get("https://query2.finance.yahoo.com/v1/finance/search",
{"q": query, "lang": "en-US", "region": 1})
quotes = (s1 or {}).get("quotes") or []
for r in quotes:
sym = r.get("symbol")
if not sym: continue
tdisp = r.get("quoteType") or r.get("typeDisp") or r.get("type")
ac, sub = parse_asset_class(tdisp, r.get("shortname"))
items.append({
"symbol": convert_from_yahoo_symbol(sym),
"name": r.get("shortname") or r.get("longname") or sym,
"currency": r.get("currency"),
"type": tdisp,
"exchange": r.get("exchDisp") or r.get("exchange"),
"assetClass": ac,
"assetSubClass": sub,
})
else:
for r in results[: limit * 3]:
sym = r.get("symbol")
if not sym: continue
tdisp = r.get("typeDisp") or r.get("type")
ac, sub = parse_asset_class(tdisp, r.get("name"))
items.append({
"symbol": convert_from_yahoo_symbol(sym),
"name": r.get("name") or sym,
"currency": r.get("currency"),
"type": tdisp,
"exchange": r.get("exch"),
"assetClass": ac,
"assetSubClass": sub,
})
# De-dup & trim
seen, unique = set(), []
for it in items:
k = (it["symbol"], it.get("exchange") or "")
if k in seen: continue
seen.add(k); unique.append(it)
return unique[:limit]
Use-cases:
- Ticker search in web apps.
- Instrument pickers for portfolio tools.
- BI parameter controls (e.g., “ETF only”).
A3. Latest Quotes
What it does: Returns a dict {symbol: {marketPrice, currency, marketState}}
.
Fallbacks: JSON → HTML page → yfinance
as last resort.
def get_quotes(symbols: List[str]) -> Dict[str, Dict]:
out: Dict[str, Dict] = {}
if not symbols: return out
ysyms = [convert_to_yahoo_symbol(s) for s in symbols]
# 1) JSON API
data = _get("https://query2.finance.yahoo.com/v7/finance/quote",
{"symbols": ",".join(ysyms)}, accept_json=True)
results = (data or {}).get("quoteResponse", {}).get("result") or []
for r in results:
sym_y = r.get("symbol")
if not sym_y: continue
sym_app = convert_from_yahoo_symbol(sym_y)
out[sym_app] = {
"currency": r.get("currency") or DEFAULT_CURRENCY,
"marketPrice": float(r.get("regularMarketPrice") or 0.0),
"marketState": "open" if (r.get("marketState") == "REGULAR" or sym_app.endswith(DEFAULT_CURRENCY)) else "closed",
}
# 2) HTML fallback
missing = [s for s in symbols if s not in out]
_EMBED_RE = re.compile(r"root\.App\.main\s*=\s*(\{.*?\})\s*;\s*</script>", re.DOTALL)
def _fetch_quote_page_json(symbol: str) -> Optional[dict]:
_ensure_session()
url = f"https://finance.yahoo.com/quote/{symbol}"
try:
r = _session.get(url, timeout=12, allow_redirects=True)
_log("GET", r.url, r.status_code)
if r.status_code != 200: return None
m = _EMBED_RE.search(r.text)
if not m: return None
return json.loads(m.group(1))
except Exception as e:
_log("quote page parse exception", repr(e))
return None
def _extract_price_from_page_json(data: dict) -> Optional[dict]:
try:
stores = data["context"]["dispatcher"]["stores"]
price = stores["QuoteSummaryStore"]["price"]
regular = price.get("regularMarketPrice") or {}
val = regular.get("raw") if isinstance(regular, dict) else regular
return {
"currency": price.get("currency") or DEFAULT_CURRENCY,
"marketPrice": float(val or 0.0),
"marketState": "open",
}
except Exception:
return None
for m in missing:
pj = _fetch_quote_page_json(convert_to_yahoo_symbol(m))
if not pj: continue
p = _extract_price_from_page_json(pj)
if p: out[m] = p
# 3) yfinance last resort
still = [s for s in symbols if s not in out]
if still:
try:
import yfinance as yf
for s in still:
t = yf.Ticker(convert_to_yahoo_symbol(s))
info = t.fast_info
price = getattr(info, "last_price", None) if not isinstance(info, dict) else info.get("last_price")
if price is None:
hist = t.history(period="1d", interval="1d")
if not hist.empty: price = float(hist["Close"].iloc[-1])
if price is not None:
out[s] = {"currency": getattr(info, "currency", None) if not isinstance(info, dict) else info.get("currency") or DEFAULT_CURRENCY,
"marketPrice": float(price), "marketState": "open"}
except Exception as e:
_log("yfinance fallback exception", repr(e))
return out
Use-cases:
- Portfolio snapshots (current price).
- Alerts (price crosses threshold).
- Dashboard tiles (e.g., “Top 10 holdings now”).
A4. Historical Daily Prices
What it does: Returns {YYYY-MM-DD: {marketPrice}}
between start
and end
.
Fallbacks: JSON → yfinance
.
def get_historical(symbol: str, start: date, end: date) -> Dict[str, Dict]:
ys = convert_to_yahoo_symbol(symbol)
out: Dict[str, Dict] = {}
data = _get(f"https://query2.finance.yahoo.com/v8/finance/chart/{ys}", {
"interval":"1d",
"period1": start.strftime("%Y-%m-%d"),
"period2": end.strftime("%Y-%m-%d"),
}, accept_json=True) or {}
res = (data.get("chart") or {}).get("result") or []
if res:
q = res[0].get("indicators", {}).get("quote", [])
ts = res[0].get("timestamp", [])
if q and ts:
closes = q[0].get("close", [])
for t, c in zip(ts, closes):
if c is None: continue
d = datetime.fromtimestamp(int(t), tz=timezone.utc).date().isoformat()
out[d] = {"marketPrice": float(c)}
return out
# yfinance fallback
try:
import yfinance as yf
t = yf.Ticker(ys)
hist = t.history(start=start.isoformat(), end=end.isoformat(), interval="1d")
if not hist.empty:
for idx, row in hist.iterrows():
d = idx.date().isoformat()
out[d] = {"marketPrice": float(row["Close"])}
except Exception as e:
_log("yfinance history fallback exception", repr(e))
return out
Use-cases:
- Backtests, rolling returns, drawdowns.
- BI charts (Power BI / Tableau / Metabase).
- Cost bases & P&L calc over time.
A5. Dividends
What it does: {YYYY-MM-DD: {marketPrice: dividend_amount}}
in date window.
Fallbacks: JSON → yfinance
.
def get_dividends(symbol: str, start: date, end: date) -> Dict[str, Dict]:
ys = convert_to_yahoo_symbol(symbol)
out: Dict[str, Dict] = {}
data = _get(f"https://query2.finance.yahoo.com/v8/finance/chart/{ys}", {
"events":"dividends",
"interval":"1d",
"period1": start.strftime("%Y-%m-%d"),
"period2": end.strftime("%Y-%m-%d"),
}, accept_json=True) or {}
res = (data.get("chart") or {}).get("result") or []
if res:
events = (res[0].get("events") or {}).get("dividends") or {}
for _, ev in events.items():
t = ev.get("date"); amt = ev.get("amount")
if t is None or amt is None: continue
d = datetime.fromtimestamp(int(t), tz=timezone.utc).date().isoformat()
out[d] = {"marketPrice": float(amt)}
if out: return out
# yfinance fallback
try:
import yfinance as yf
t = yf.Ticker(ys)
div = t.dividends
if not div.empty:
for idx, val in div.items():
if start <= idx.date() <= end:
out[idx.date().isoformat()] = {"marketPrice": float(val)}
except Exception as e:
_log("yfinance dividends fallback exception", repr(e))
return out
Use-cases:
- Income reports; DRIP modelling.
- Yield analytics; factor screens.
- Cash forecasting in treasury models.
A6. Asset Profiles
What it does: Name, sector, country, currency, quoteType, and mapped asset class/sub-class.
Fallbacks: JSON → HTML → yfinance
.
def get_asset_profile(symbol: str) -> Dict:
ys = convert_to_yahoo_symbol(symbol)
data = _get(f"https://query2.finance.yahoo.com/v10/finance/quoteSummary/{ys}",
{"modules":"assetProfile,summaryProfile,price,quoteType"}, accept_json=True) or {}
results = (data.get("quoteSummary") or {}).get("result") or []
if results:
r = results[0]
ap = r.get("assetProfile") or {}
sp = r.get("summaryProfile") or {}
price = r.get("price") or {}
qt = r.get("quoteType") or {}
long_name = price.get("longName")
short_name = price.get("shortName")
quote_type = qt.get("quoteType")
ac, sub = parse_asset_class(quote_type, short_name)
name = long_name or short_name or symbol
return {
"name": name,
"sector": ap.get("sector") or sp.get("sector"),
"country": ap.get("country") or sp.get("country"),
"currency": price.get("currency") or DEFAULT_CURRENCY,
"quoteType": quote_type,
"assetClass": ac,
"assetSubClass": sub,
}
# HTML fallback
_EMBED_RE = re.compile(r"root\.App\.main\s*=\s*(\{.*?\})\s*;\s*</script>", re.DOTALL)
def _fetch_quote_page_json(symbol: str) -> Optional[dict]:
_ensure_session()
url = f"https://finance.yahoo.com/quote/{symbol}"
try:
r = _session.get(url, timeout=12, allow_redirects=True)
_log("GET", r.url, r.status_code)
if r.status_code != 200: return None
m = _EMBED_RE.search(r.text)
if not m: return None
return json.loads(m.group(1))
except Exception as e:
_log("quote page parse exception", repr(e))
return None
def _extract_profile_from_page_json(data: dict, symbol: str) -> dict:
try:
stores = data["context"]["dispatcher"]["stores"]
qss = stores.get("QuoteSummaryStore") or {}
ap = (qss.get("assetProfile") or {})
sp = (qss.get("summaryProfile") or {})
price = (qss.get("price") or {})
qt = (qss.get("quoteType") or {})
long_name = price.get("longName")
short_name = price.get("shortName")
quote_type = qt.get("quoteType")
ac, sub = parse_asset_class(quote_type, short_name)
name = long_name or short_name or symbol
return {
"name": name or symbol,
"sector": ap.get("sector") or sp.get("sector"),
"country": ap.get("country") or sp.get("country"),
"currency": price.get("currency") or DEFAULT_CURRENCY,
"quoteType": quote_type,
"assetClass": ac,
"assetSubClass": sub,
}
except Exception:
return {}
pj = _fetch_quote_page_json(ys)
if pj:
prof = _extract_profile_from_page_json(pj, ys)
if prof: return prof
# yfinance last resort
try:
import yfinance as yf
t = yf.Ticker(ys)
info = t.get_info() if hasattr(t, "get_info") else (getattr(t, "info", {}) or {})
name = info.get("longName") or info.get("shortName") or symbol
return {
"name": name,
"sector": info.get("sector"),
"country": info.get("country"),
"currency": info.get("currency") or DEFAULT_CURRENCY,
"quoteType": info.get("quoteType"),
"assetClass": None,
"assetSubClass": None,
}
except Exception as e:
_log("yfinance profile fallback exception", repr(e))
return {}
Approach B — yfinance
yfinance
is convenient and maintained. Prefer it when:
- You don’t need to control HTTP details.
- You’re okay with a library dependency for robustness.
- You want history/dividends in one line.
Examples:
import yfinance as yf
t = yf.Ticker("AAPL")
print(t.fast_info) # quick metadata including last_price
print(t.history(period="1y").tail()) # OHLCV DataFrame
print(t.dividends.tail()) # pandas Series of dividends
Keep both approaches in your toolkit: use raw endpoints when you need control; rely on
yfinance
when you need simplicity.
Use-Cases & Patterns
1) BI dashboards (Power BI / Tableau / Metabase)
- Nightly job:
get_quotes()
for watchlists → store to warehouse (Postgres/BigQuery/etc.).get_historical()
for charts.
- Publish reports to stakeholders.
2) Portfolio / FP&A tooling
- Daily NAV refresh for internal funds.
- Dividends → cash forecast.
- Profiles → instrument master data.
3) Alerts & automations
- If
get_quotes(["AAPL"])["AAPL"]["marketPrice"] > X
, send Slack/Email. - Monthly dividend digest (next month’s payers).
4) Web apps & internal portals
- Search box →
search_symbols()
+ picker. - Instrument page → quotes + history + profile.
- Cache responses for 15–60s to avoid spammy calls.
5) Data hygiene
- Normalize crypto tickers with
convert_to_yahoo_symbol
. - Map assetClass/assetSubClass for consistent downstream reporting.
Reliability, Limits & Good Citizenship
- Use a single warmed session & modern User-Agent.
- Add retry/backoff if you scale up. Cache hot responses.
- Respect provider Terms. For commercial/production, use a licensed API.
- Expect occasional 401/403 → rely on the built fallbacks or
yfinance
. - For very high scale, decouple: run a server job to hydrate a DB table your apps read from.
CLI (Optional) for Local Testing
Save as cli/yahoo_finance_cli.py
(works with the functions above). This is purely for quick verification while you learn.
from __future__ import annotations
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
import argparse, json
from datetime import date, timedelta
from yahoo_finance import (
search_symbols, get_quotes, get_historical, get_dividends, get_asset_profile
)
def main():
p = argparse.ArgumentParser()
sub = p.add_subparsers(dest="cmd", required=True)
s = sub.add_parser("search"); s.add_argument("query")
q = sub.add_parser("quotes"); q.add_argument("symbols", nargs="+")
h = sub.add_parser("history")
h.add_argument("symbol")
h.add_argument("--start", default=(date.today()-timedelta(days=365)).isoformat())
h.add_argument("--end", default=date.today().isoformat())
d = sub.add_parser("divs")
d.add_argument("symbol")
d.add_argument("--start", default=(date.today()-timedelta(days=365*5)).isoformat())
d.add_argument("--end", default=date.today().isoformat())
a = sub.add_parser("profile"); a.add_argument("symbol")
args = p.parse_args()
if args.cmd == "search":
print(json.dumps(search_symbols(args.query), indent=2))
elif args.cmd == "quotes":
print(json.dumps(get_quotes(args.symbols), indent=2))
elif args.cmd == "history":
s, e = date.fromisoformat(args.start), date.fromisoformat(args.end)
print(json.dumps(get_historical(args.symbol, s, e), indent=2))
elif args.cmd == "divs":
s, e = date.fromisoformat(args.start), date.fromisoformat(args.end)
print(json.dumps(get_dividends(args.symbol, s, e), indent=2))
elif args.cmd == "profile":
print(json.dumps(get_asset_profile(args.symbol), indent=2))
if __name__ == "__main__":
main()
Run examples:
python cli/yahoo_finance_cli.py search TSLA
python cli/yahoo_finance_cli.py quotes AAPL MSFT BTC
python cli/yahoo_finance_cli.py history NVDA --start 2023-01-01 --end 2024-01-01
python cli/yahoo_finance_cli.py divs VOO --start 2020-01-01 --end 2025-01-01
python cli/yahoo_finance_cli.py profile JPM
Attribution & Disclaimer
This tutorial is educational and uses publicly available endpoints for demonstration. Market data may be delayed or incomplete. Always validate accuracy before making decisions.
Support Mudric Lab
If a post saved you time, you can support our work.
Be professional. No promos. Off-topic may be removed.