TurtleTrade/stock_em.py

166 lines
5.3 KiB
Python

import asyncio
from typing import Dict, List
import aiohttp
import pandas as pd
# 增加了 User-Agent 头
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/122.0.0.0 Safari/537.36"
),
"Referer": "https://quote.eastmoney.com/center/gridlist.html#hs_a_board", # 合理来源页
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
}
async def fetch_single_page(
session: aiohttp.ClientSession, url: str, params: Dict
) -> Dict:
"""异步获取单页数据"""
async with session.get(url, params=params, headers=HEADERS, ssl=False) as response:
return await response.json()
async def fetch_all_pages_async(url: str, base_params: Dict) -> List[Dict]:
"""异步获取所有页面数据"""
first_page_params = base_params.copy()
first_page_params["pn"] = "1"
async with aiohttp.ClientSession() as session:
first_page_data = await fetch_single_page(session, url, first_page_params)
if first_page_data.get("rc") != 0 or not first_page_data.get("data"):
return [first_page_data]
total = first_page_data["data"]["total"]
page_size = int(base_params["pz"])
total_pages = (total + page_size - 1) // page_size
total_pages = min(total_pages, 100)
tasks = []
for page in range(1, total_pages + 1):
page_params = base_params.copy()
page_params["pn"] = str(page)
tasks.append(fetch_single_page(session, url, page_params))
results = await asyncio.gather(*tasks)
return results
def process_data(page_results: List[Dict]) -> pd.DataFrame:
"""处理数据为 DataFrame"""
all_data = []
page_number = 1
items_per_page = 100
for result in page_results:
if result.get("rc") == 0 and result.get("data") and result["data"].get("diff"):
page_data = result["data"]["diff"]
for item in page_data:
item["page_number"] = page_number
item["page_index"] = page_data.index(item)
all_data.extend(page_data)
page_number += 1
if not all_data:
return pd.DataFrame()
df = pd.DataFrame(all_data)
df["序号"] = df.apply(
lambda row: (row["page_number"] - 1) * items_per_page + row["page_index"] + 1,
axis=1,
)
df.drop(columns=["page_number", "page_index"], inplace=True, errors="ignore")
column_map = {
"f1": "原序号",
"f2": "最新价",
"f3": "涨跌幅",
"f4": "涨跌额",
"f5": "成交量",
"f6": "成交额",
"f7": "振幅",
"f8": "换手率",
"f9": "市盈率-动态",
"f10": "量比",
"f11": "5分钟涨跌",
"f12": "代码",
"f13": "_",
"f14": "名称",
"f15": "最高",
"f16": "最低",
"f17": "今开",
"f18": "昨收",
"f20": "总市值",
"f21": "流通市值",
"f22": "涨速",
"f23": "市净率",
"f24": "60日涨跌幅",
"f25": "年初至今涨跌幅",
}
df.rename(columns=column_map, inplace=True)
desired_columns = [
"序号", "代码", "名称", "最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅",
"最高", "最低", "今开", "昨收", "量比", "换手率", "市盈率-动态", "市净率",
"总市值", "流通市值", "涨速", "5分钟涨跌", "60日涨跌幅", "年初至今涨跌幅"
]
available_columns = [col for col in desired_columns if col in df.columns]
df = df[available_columns]
for col in [
"最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "最高", "最低", "今开",
"昨收", "量比", "换手率", "市盈率-动态", "市净率", "总市值", "流通市值", "涨速",
"5分钟涨跌", "60日涨跌幅", "年初至今涨跌幅"
]:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce")
df.sort_values(by="涨跌幅", ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)
df["序号"] = df.index + 1
return df
async def stock_zh_a_spot_em_async() -> pd.DataFrame:
url = "https://82.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "100",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f12",
"fs": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,"
"f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152",
}
results = await fetch_all_pages_async(url, params)
return process_data(results)
def stock_zh_a_spot_em() -> pd.DataFrame:
"""
东方财富网-沪深京 A 股-实时行情 (同步接口)
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 实时行情
:rtype: pandas.DataFrame
"""
import nest_asyncio
nest_asyncio.apply()
return asyncio.run(stock_zh_a_spot_em_async())
if __name__ == "__main__":
df = stock_zh_a_spot_em()
print(df)