166 lines
5.3 KiB
Python
166 lines
5.3 KiB
Python
|
import asyncio
|
||
|
from typing import Dict, List
|
||
|
|
||
|
import aiohttp
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
|
# 增加了 User-Agent 头
|
||
|
HEADERS = {
|
||
|
"User-Agent": (
|
||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||
|
"Chrome/122.0.0.0 Safari/537.36"
|
||
|
),
|
||
|
"Referer": "https://quote.eastmoney.com/center/gridlist.html#hs_a_board", # 合理来源页
|
||
|
"Accept": "application/json, text/plain, */*",
|
||
|
"Accept-Language": "zh-CN,zh;q=0.9",
|
||
|
"Connection": "keep-alive",
|
||
|
}
|
||
|
|
||
|
|
||
|
async def fetch_single_page(
|
||
|
session: aiohttp.ClientSession, url: str, params: Dict
|
||
|
) -> Dict:
|
||
|
"""异步获取单页数据"""
|
||
|
async with session.get(url, params=params, headers=HEADERS, ssl=False) as response:
|
||
|
return await response.json()
|
||
|
|
||
|
|
||
|
async def fetch_all_pages_async(url: str, base_params: Dict) -> List[Dict]:
|
||
|
"""异步获取所有页面数据"""
|
||
|
first_page_params = base_params.copy()
|
||
|
first_page_params["pn"] = "1"
|
||
|
|
||
|
async with aiohttp.ClientSession() as session:
|
||
|
first_page_data = await fetch_single_page(session, url, first_page_params)
|
||
|
|
||
|
if first_page_data.get("rc") != 0 or not first_page_data.get("data"):
|
||
|
return [first_page_data]
|
||
|
|
||
|
total = first_page_data["data"]["total"]
|
||
|
page_size = int(base_params["pz"])
|
||
|
total_pages = (total + page_size - 1) // page_size
|
||
|
total_pages = min(total_pages, 100)
|
||
|
|
||
|
tasks = []
|
||
|
for page in range(1, total_pages + 1):
|
||
|
page_params = base_params.copy()
|
||
|
page_params["pn"] = str(page)
|
||
|
tasks.append(fetch_single_page(session, url, page_params))
|
||
|
|
||
|
results = await asyncio.gather(*tasks)
|
||
|
return results
|
||
|
|
||
|
|
||
|
def process_data(page_results: List[Dict]) -> pd.DataFrame:
|
||
|
"""处理数据为 DataFrame"""
|
||
|
all_data = []
|
||
|
page_number = 1
|
||
|
items_per_page = 100
|
||
|
|
||
|
for result in page_results:
|
||
|
if result.get("rc") == 0 and result.get("data") and result["data"].get("diff"):
|
||
|
page_data = result["data"]["diff"]
|
||
|
for item in page_data:
|
||
|
item["page_number"] = page_number
|
||
|
item["page_index"] = page_data.index(item)
|
||
|
all_data.extend(page_data)
|
||
|
page_number += 1
|
||
|
|
||
|
if not all_data:
|
||
|
return pd.DataFrame()
|
||
|
|
||
|
df = pd.DataFrame(all_data)
|
||
|
df["序号"] = df.apply(
|
||
|
lambda row: (row["page_number"] - 1) * items_per_page + row["page_index"] + 1,
|
||
|
axis=1,
|
||
|
)
|
||
|
df.drop(columns=["page_number", "page_index"], inplace=True, errors="ignore")
|
||
|
|
||
|
column_map = {
|
||
|
"f1": "原序号",
|
||
|
"f2": "最新价",
|
||
|
"f3": "涨跌幅",
|
||
|
"f4": "涨跌额",
|
||
|
"f5": "成交量",
|
||
|
"f6": "成交额",
|
||
|
"f7": "振幅",
|
||
|
"f8": "换手率",
|
||
|
"f9": "市盈率-动态",
|
||
|
"f10": "量比",
|
||
|
"f11": "5分钟涨跌",
|
||
|
"f12": "代码",
|
||
|
"f13": "_",
|
||
|
"f14": "名称",
|
||
|
"f15": "最高",
|
||
|
"f16": "最低",
|
||
|
"f17": "今开",
|
||
|
"f18": "昨收",
|
||
|
"f20": "总市值",
|
||
|
"f21": "流通市值",
|
||
|
"f22": "涨速",
|
||
|
"f23": "市净率",
|
||
|
"f24": "60日涨跌幅",
|
||
|
"f25": "年初至今涨跌幅",
|
||
|
}
|
||
|
|
||
|
df.rename(columns=column_map, inplace=True)
|
||
|
|
||
|
desired_columns = [
|
||
|
"序号", "代码", "名称", "最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅",
|
||
|
"最高", "最低", "今开", "昨收", "量比", "换手率", "市盈率-动态", "市净率",
|
||
|
"总市值", "流通市值", "涨速", "5分钟涨跌", "60日涨跌幅", "年初至今涨跌幅"
|
||
|
]
|
||
|
available_columns = [col for col in desired_columns if col in df.columns]
|
||
|
df = df[available_columns]
|
||
|
|
||
|
for col in [
|
||
|
"最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "最高", "最低", "今开",
|
||
|
"昨收", "量比", "换手率", "市盈率-动态", "市净率", "总市值", "流通市值", "涨速",
|
||
|
"5分钟涨跌", "60日涨跌幅", "年初至今涨跌幅"
|
||
|
]:
|
||
|
if col in df.columns:
|
||
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
||
|
|
||
|
df.sort_values(by="涨跌幅", ascending=False, inplace=True)
|
||
|
df.reset_index(drop=True, inplace=True)
|
||
|
df["序号"] = df.index + 1
|
||
|
return df
|
||
|
|
||
|
|
||
|
async def stock_zh_a_spot_em_async() -> pd.DataFrame:
|
||
|
url = "https://82.push2.eastmoney.com/api/qt/clist/get"
|
||
|
params = {
|
||
|
"pn": "1",
|
||
|
"pz": "100",
|
||
|
"po": "1",
|
||
|
"np": "1",
|
||
|
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
||
|
"fltt": "2",
|
||
|
"invt": "2",
|
||
|
"fid": "f12",
|
||
|
"fs": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
|
||
|
"fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,"
|
||
|
"f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152",
|
||
|
}
|
||
|
results = await fetch_all_pages_async(url, params)
|
||
|
return process_data(results)
|
||
|
|
||
|
|
||
|
def stock_zh_a_spot_em() -> pd.DataFrame:
|
||
|
"""
|
||
|
东方财富网-沪深京 A 股-实时行情 (同步接口)
|
||
|
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
|
||
|
:return: 实时行情
|
||
|
:rtype: pandas.DataFrame
|
||
|
"""
|
||
|
import nest_asyncio
|
||
|
nest_asyncio.apply()
|
||
|
return asyncio.run(stock_zh_a_spot_em_async())
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
df = stock_zh_a_spot_em()
|
||
|
print(df)
|