爬取基金温度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import pandas as pd
import requests # 导入requests库用于发送HTTP请求
import re # 导入re库用于正则表达式匹配
import json # 导入json库用于解析JSON数据
import time # 导入time库用于生成时间戳

codes = [{"id": 1, "name": "创业板指", "zs": "399006", "cn": "159915", "cw": "161022"},
{"id": 2, "name": "深证成指", "zs": "399001", "cn": "159903", "cw": "202017"},
{"id": 3, "name": "深证100", "zs": "399330", "cn": "159901", "cw": "110019"},
{"id": 4, "name": "央视50", "zs": "399550", "cn": "159965", "cw": "217027"},
{"id": 5, "name": "沪深300", "zs": "000300", "cn": "510300", "cw": "160706"},
{"id": 6, "name": "上证180", "zs": "000010", "cn": "510180", "cw": "519180"},
{"id": 7, "name": "上证50", "zs": "000016", "cn": "510050", "cw": "001051"},
{"id": 8, "name": "中证100", "zs": "000903", "cn": "512910", "cw": "240014"},
{"id": 9, "name": "中小300", "zs": "399008", "cn": "159907", "cw": "270026"},
{"id": 10, "name": "创业板50", "zs": "399673", "cn": "159949", "cw": "007464"},
{"id": 11, "name": "中证1000", "zs": "000852", "cn": "512100", "cw": ""},
{"id": 12, "name": "中证500", "zs": "000905", "cn": "510500", "cw": "160119"},
{"id": 13, "name": "科创50", "zs": "000688", "cn": "588000", "cw": "011609"},
{"id": 14, "name": "深证红利", "zs": "399324", "cn": "159905", "cw": "481012"},
{"id": 15, "name": "50AH优选", "zs": "950090", "cn": "", "cw": "501050"},
{"id": 16, "name": "中证红利", "zs": "000922", "cn": "515890", "cw": "100032"},
{"id": 17, "name": "深证F120", "zs": "399702", "cn": "159910", "cw": "070023"},
{"id": 18, "name": "红利指数", "zs": "000015", "cn": "510880", "cw": "016441"},
{"id": 19, "name": "300红利", "zs": "000821", "cn": "512530", "cw": ""},
{"id": 20, "name": "红利低波", "zs": "H30269", "cn": "512890", "cw": "005561"},
{"id": 21, "name": "基本面50", "zs": "000925", "cn": "512750", "cw": "160716"},
{"id": 22, "name": "深证价值", "zs": "399348", "cn": "159913", "cw": "519706"},
{"id": 23, "name": "深证F60", "zs": "399701", "cn": "159916", "cw": "530015"},
{"id": 24, "name": "300价值", "zs": "000919", "cn": "", "cw": "519671"},
{"id": 25, "name": "红利潜力", "zs": "H30089", "cn": "515570", "cw": "007671"},
{"id": 26, "name": "180价值", "zs": "000029", "cn": "510030", "cw": "240016"},
{"id": 27, "name": "500SNLV", "zs": "930782", "cn": "512260", "cw": "003318"},
{"id": 28, "name": "300红利LV", "zs": "930740", "cn": "", "cw": "007605"},
{"id": 29, "name": "国证地产", "zs": "399393", "cn": "", "cw": "160218"},
{"id": 30, "name": "环境治理", "zs": "399806", "cn": "", "cw": "164908"},
{"id": 31, "name": "基建工程", "zs": "399995", "cn": "", "cw": "165525"},
{"id": 32, "name": "全指材料", "zs": "000987", "cn": "159944", "cw": ""},
{"id": 33, "name": "养老产业", "zs": "399812", "cn": "", "cw": "000968"},
{"id": 34, "name": "中国互联网", "zs": "H11136", "cn": "164906", "cw": "164906"},
{"id": 35, "name": "国证有色", "zs": "399395", "cn": "160221", "cw": ""},
{"id": 36, "name": "中国互联网50", "zs": "H30533", "cn": "513050", "cw": "006327"},
{"id": 37, "name": "中证银行", "zs": "399986", "cn": "512800", "cw": "001594"},
{"id": 38, "name": "全指金融", "zs": "000992", "cn": "159940", "cw": "001469"},
{"id": 39, "name": "全指工业", "zs": "000988", "cn": "159953", "cw": ""},
{"id": 40, "name": "家用电器", "zs": "930697", "cn": "159996", "cw": "005063"},
{"id": 41, "name": "国证食品", "zs": "399396", "cn": "159843", "cw": "160222"},
{"id": 42, "name": "高铁产业", "zs": "399807", "cn": "", "cw": "160135"},
{"id": 43, "name": "中证酒", "zs": "399987", "cn": "512690", "cw": "160632"},
{"id": 44, "name": "新能源", "zs": "000941", "cn": "516160", "cw": "012831"},
{"id": 45, "name": "中证消费", "zs": "000932", "cn": "159928", "cw": "000248"},
{"id": 46, "name": "证券公司", "zs": "399975", "cn": "512000", "cw": "004069"},
{"id": 47, "name": "中证国防", "zs": "399973", "cn": "512670", "cw": "502003"},
{"id": 48, "name": "全指可选", "zs": "000989", "cn": "159936", "cw": "001133"},
{"id": 49, "name": "全指信息", "zs": "000993", "cn": "159939", "cw": "000942"},
{"id": 50, "name": "中证军工", "zs": "399967", "cn": "512660", "cw": "161024"},
{"id": 51, "name": "大宗商品", "zs": "000979", "cn": "", "cw": "161715"},
{"id": 52, "name": "汽车指数", "zs": "931008", "cn": "", "cw": "004854"},
{"id": 53, "name": "中证煤炭", "zs": "399998", "cn": "013275", "cw": "161032"},
{"id": 54, "name": "CS食品饮", "zs": "930653", "cn": "", "cw": "001631"},
{"id": 55, "name": "中证传媒", "zs": "399971", "cn": "512980", "cw": "004752"},
{"id": 56, "name": "大农业", "zs": "399814", "cn": "", "cw": "001027"},
{"id": 57, "name": "中证白酒", "zs": "399997", "cn": "", "cw": "161725"},
{"id": 58, "name": "CS新能车", "zs": "399976", "cn": "515030", "cw": "161028"},
{"id": 59, "name": "中证环保", "zs": "000827", "cn": "512580", "cw": "001064"},
{"id": 60, "name": "科技龙头", "zs": "931087", "cn": "515000", "cw": "007873"},
{"id": 61, "name": "人工智能", "zs": "931071", "cn": "515980", "cw": "008020"},
{"id": 62, "name": "CS创新药", "zs": "931152", "cn": "159992", "cw": "012738"},
{"id": 63, "name": "5G通信", "zs": "931079", "cn": "515050", "cw": "008086"},
{"id": 64, "name": "光伏产业", "zs": "931151", "cn": "515790", "cw": "011102"},
{"id": 65, "name": "国证芯片", "zs": "980017", "cn": "159995", "cw": "008887"},
{"id": 66, "name": "中华半导体芯片", "zs": "990001", "cn": "512760", "cw": "008281"},
{"id": 67, "name": "恒生指数", "zs": "HSI", "cn": "159920", "cw": "164705"},
{"id": 68, "name": "恒生综合中小型股指数", "zs": "HSMSI", "cn": "", "cw": "160922"},
{"id": 69, "name": "恒生中国企业指数", "zs": "HSCEI", "cn": "510900", "cw": "110031"},
{"id": 70, "name": "恒生A股行业龙头指数", "zs": "HSCAIT", "cn": "", "cw": "540012"},
{"id": 71, "name": "标普500", "zs": ".INX", "cn": "513500", "cw": "050025"},
{"id": 72, "name": "纳斯达克100指数", "zs": ".NDX", "cn": "", "cw": "160213"}]


# 一、根据API接口爬取数据
def scrape_website(api, headers=None):
"""获取网站数据"""
try:
response = requests.get(api, headers=headers, timeout=10) # 设置超时,避免长时间等待
response.raise_for_status()
return response
except requests.RequestException as e:
print(f"请求数据时出错: {e}")
return None


def zswd():
"""从API爬取指数温度数据"""
# 指定要抓取的网站URL(请替换为实际的API地址)
t_results = scrape_website('http://caf-qibei.com/index?type=html') # 替换为实际的API
if t_results is None:
return []

soup = BeautifulSoup(t_results.content, 'html.parser')
tr_tags = soup.find_all('tr')

index_T = []
for tr in tr_tags:
a_tags = tr.find_all('a', href=True)
td_tag = tr.find('td', {'colspan': '3'})

if len(a_tags) >= 2 and td_tag:
first_a_text = a_tags[0].text.strip()
td_text = td_tag.text.strip()

try:
td_float = float(td_text)
index_T.append((first_a_text, td_float))
except ValueError:
continue # 跳过转换错误的值
return index_T


# 指数温度
wendu = zswd()

# 提取 zs 列表
zs = [i['zs'] for i in codes]

# 创建 wendu 的 DataFrame
df_wendu = pd.DataFrame(wendu, columns=['zs', 'temperature'])

# 筛选出 zs 在 codes 中的对应数据
df_filtered = df_wendu[df_wendu['zs'].isin(zs)]

# 将 codes 转换为 DataFrame
df_codes = pd.DataFrame(codes)

# 通过 zs 字段合并 codes 和 wendu
df_merged = pd.merge(df_filtered, df_codes[['zs', 'name', 'cn', 'cw']], on='zs', how='inner')

# 修改列名
df_merged = df_merged.rename(columns={'zs': '指数', 'name': '名称', 'temperature': '温度', 'cn': '场内', 'cw': '场外'})

# 按照温度升序排序
df_merged_sorted = df_merged.sort_values(by='温度', ascending=True)

# 输出结果
print(df_merged_sorted)

爬取基金历史数据和现价实时数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas as pd  # 导入pandas库用于数据处理
import requests # 导入requests库用于发送HTTP请求
import re # 导入re库用于正则表达式匹配
import json # 导入json库用于解析JSON数据

data = [{"id": 1, "name": "创业板指", "zs": "399006", "cn": "159915", "cw": "161022"},
{"id": 2, "name": "深证成指", "zs": "399001", "cn": "159903", "cw": "202017"},
{"id": 3, "name": "深证100", "zs": "399330", "cn": "159901", "cw": "110019"},
{"id": 4, "name": "央视50", "zs": "399550", "cn": "159965", "cw": "217027"},
{"id": 5, "name": "沪深300", "zs": "000300", "cn": "510300", "cw": "160706"},
{"id": 6, "name": "上证180", "zs": "000010", "cn": "510180", "cw": "519180"},
{"id": 7, "name": "上证50", "zs": "000016", "cn": "510050", "cw": "001051"},
{"id": 8, "name": "中证100", "zs": "000903", "cn": "512910", "cw": "240014"},
{"id": 9, "name": "中小300", "zs": "399008", "cn": "159907", "cw": "270026"},
{"id": 10, "name": "创业板50", "zs": "399673", "cn": "159949", "cw": "007464"},
{"id": 11, "name": "中证1000", "zs": "000852", "cn": "512100", "cw": ""},
{"id": 12, "name": "中证500", "zs": "000905", "cn": "510500", "cw": "160119"}]

# 存储 cn 数据的列表,并剔除掉空值
jj = [item["cn"] for item in data if item["cn"]] # 去除空的 "cn" 值

results = [] # 用于存储结果的空列表

# 创建一个字典来快速查找名称
name_dict = {item["cn"]: item["name"] for item in data}

for i in jj:
df_list = [] # 创建空列表存储单个基金的历史净值数据
for index in range(1, 3): # 获取一只基金的所有净值数据
url = f'http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery1830041192874394646584_1617938643457&fundCode={i}&pageIndex={index}&pageSize=20&startDate=&endDate=&_=1617939181252'
headers = {
'Referer': 'http://fundf10.eastmoney.com/', # 防盗链
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
resp = requests.get(url, headers=headers) # 获取网页源代码
html = resp.text # 接收数据
res = re.findall(r'\((.*?)\)', html) # 查找匹配,获取想要的内容
if res: # 检查是否有匹配结果
datas = json.loads(res[0])["Data"]["LSJZList"] # 获取列表里的第一个数据
df = pd.DataFrame(datas) # 数据可视化
df_list.append(df) # 将获取的数据存到列表中

if df_list: # 确保有数据
df_data = pd.concat(df_list) # 数据拼接合并

# 将DWJZ列转换为数字类型,强制转换会将无法转换的值变为NaN
df_data['DWJZ'] = pd.to_numeric(df_data['DWJZ'], errors='coerce')
# 去除空值
df_data = df_data.dropna(subset=['DWJZ'])

zdz = df_data['DWJZ'].min() # 选取最低值
zgz = df_data['DWJZ'].max() # 选取最高值
avg = df_data['DWJZ'].mean() # 计算平均值

# 使用字典查找名称
name = name_dict.get(i, '未知名称') # 获取对应名称,默认值为'未知名称'

# 将结果存入results列表
results.append({
'基金代码': i,
'名称': name,
'最低净值': zdz,
'最高净值': zgz,
'平均净值': avg
})

etf = []
# 集思录 API
etfapi = 'https://www.jisilu.cn/data/etf/etf_list/?___jsl=LST___t=1693819271762&volume=500&unit_total=2&rp=25'
etfdata = requests.get(etfapi).json()['rows']

for i in etfdata:
fund_id = i['cell']['fund_id']
if fund_id in jj:
etf.append({
'基金代码': fund_id,
'现价': i['cell']['price'],
'涨跌': i['cell']['increase_rt'],
'溢价率': i['cell']['discount_rt'],
'成交额': i['cell']['volume']
})

etf_price = pd.DataFrame(etf)

# 创建DataFrame显示结果
results_df = pd.DataFrame(results)

# 合并现价到结果DataFrame,去除空值
results_df = results_df.merge(etf_price, on='基金代码', how='left').dropna()

print(results_df) # 输出合并后的结果

优化后,数据转换有问题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd  # 导入pandas库用于数据处理
import requests # 导入requests库用于发送HTTP请求
import re # 导入re库用于正则表达式匹配
import json # 导入json库用于解析JSON数据

data = [{"id": 1, "name": "创业板指", "zs": "399006", "cn": "159915", "cw": "161022"},
{"id": 2, "name": "深证成指", "zs": "399001", "cn": "159903", "cw": "202017"},
{"id": 3, "name": "深证100", "zs": "399330", "cn": "159901", "cw": "110019"},
{"id": 4, "name": "央视50", "zs": "399550", "cn": "159965", "cw": "217027"},
{"id": 5, "name": "沪深300", "zs": "000300", "cn": "510300", "cw": "160706"},
{"id": 6, "name": "上证180", "zs": "000010", "cn": "510180", "cw": "519180"},
{"id": 7, "name": "上证50", "zs": "000016", "cn": "510050", "cw": "001051"},
{"id": 8, "name": "中证100", "zs": "000903", "cn": "512910", "cw": "240014"},
{"id": 9, "name": "中小300", "zs": "399008", "cn": "159907", "cw": "270026"},
{"id": 10, "name": "创业板50", "zs": "399673", "cn": "159949", "cw": "007464"},
{"id": 11, "name": "中证1000", "zs": "000852", "cn": "512100", "cw": ""},
{"id": 12, "name": "中证500", "zs": "000905", "cn": "510500", "cw": "160119"}]

# 存储 cn 数据的列表,并剔除掉空值
jj = [item["cn"] for item in data if item["cn"]] # 去除空的 "cn" 值

results = [] # 用于存储结果的空列表

# 创建一个字典来快速查找名称
name_dict = {item["cn"]: item["name"] for item in data}

for i in jj:
df_list = [] # 创建空列表存储单个基金的历史净值数据
for index in range(1, 3): # 获取一只基金的所有净值数据
url = f'http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery1830041192874394646584_1617938643457&fundCode={i}&pageIndex={index}&pageSize=20&startDate=&endDate=&_=1617939181252'
headers = {
'Referer': 'http://fundf10.eastmoney.com/', # 防盗链
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
resp = requests.get(url, headers=headers) # 获取网页源代码
html = resp.text # 接收数据
res = re.findall(r'\((.*?)\)', html) # 查找匹配,获取想要的内容
if res: # 检查是否有匹配结果
datas = json.loads(res[0])["Data"]["LSJZList"] # 获取列表里的第一个数据
df = pd.DataFrame(datas) # 数据可视化
df_list.append(df) # 将获取的数据存到列表中

if df_list: # 确保有数据
df_data = pd.concat(df_list) # 数据拼接合并

# 将DWJZ列转换为数字类型,强制转换会将无法转换的值变为NaN
df_data['DWJZ'] = pd.to_numeric(df_data['DWJZ'], errors='coerce')
# 去除空值
df_data = df_data.dropna(subset=['DWJZ'])

zdz = df_data['DWJZ'].min() # 选取最低值
zgz = df_data['DWJZ'].max() # 选取最高值
avg = df_data['DWJZ'].mean() # 计算平均值

# 使用字典查找名称
name = name_dict.get(i, '未知名称') # 获取对应名称,默认值为'未知名称'

# 将结果存入results列表
results.append({
'基金代码': i,
'名称': name,
'最低净值': zdz,
'最高净值': zgz,
'平均净值': avg
})

etf = []
# 集思录 API
etfapi = 'https://www.jisilu.cn/data/etf/etf_list/?___jsl=LST___t=1693819271762&volume=500&unit_total=2&rp=25'
etfdata = requests.get(etfapi).json()['rows']

for i in etfdata:
fund_id = i['cell']['fund_id']
if fund_id in jj:
etf.append({
'基金代码': fund_id,
'现价': i['cell']['price'],
'涨跌': i['cell']['increase_rt'],
'溢价率': i['cell']['discount_rt'],
'成交额': i['cell']['volume']
})

etf_price = pd.DataFrame(etf)

# 创建DataFrame显示结果
results_df = pd.DataFrame(results)

# 合并现价到结果DataFrame,去除空值
results_df = results_df.merge(etf_price, on='基金代码', how='left').dropna()

print(results_df)

# # 将数值列转换为浮点数
# results_df['现价'] = pd.to_numeric(results_df['现价'], errors='coerce')
# results_df['涨跌'] = pd.to_numeric(results_df['涨跌'], errors='coerce')
# results_df['溢价率'] = pd.to_numeric(results_df['溢价率'], errors='coerce')
# results_df['成交额'] = pd.to_numeric(results_df['成交额'], errors='coerce')
#
# # 筛选成交额大于10000且现价小于平均净值的记录
# filtered_df = results_df[(results_df['成交额'] > 10000) & (results_df['现价'] < results_df['平均净值'])]
#
# print(filtered_df) # 输出筛选后的结果

历史净值和现价都获取到了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd  # 导入pandas库用于数据处理
import requests # 导入requests库用于发送HTTP请求
import re # 导入re库用于正则表达式匹配
import json # 导入json库用于解析JSON数据

data = [{"id": 1, "name": "创业板指", "zs": "399006", "cn": "159915", "cw": "161022"},
{"id": 2, "name": "深证成指", "zs": "399001", "cn": "159903", "cw": "202017"},
{"id": 3, "name": "深证100", "zs": "399330", "cn": "159901", "cw": "110019"},
{"id": 4, "name": "央视50", "zs": "399550", "cn": "159965", "cw": "217027"},
{"id": 5, "name": "沪深300", "zs": "000300", "cn": "510300", "cw": "160706"},
{"id": 6, "name": "上证180", "zs": "000010", "cn": "510180", "cw": "519180"},
{"id": 7, "name": "上证50", "zs": "000016", "cn": "510050", "cw": "001051"},
{"id": 8, "name": "中证100", "zs": "000903", "cn": "512910", "cw": "240014"},
{"id": 9, "name": "中小300", "zs": "399008", "cn": "159907", "cw": "270026"},
{"id": 10, "name": "创业板50", "zs": "399673", "cn": "159949", "cw": "007464"},
{"id": 11, "name": "中证1000", "zs": "000852", "cn": "512100", "cw": ""},
{"id": 12, "name": "中证500", "zs": "000905", "cn": "510500", "cw": "160119"}]

# 存储 cn 数据的列表,并剔除掉空值
jj = [item["cn"] for item in data if item["cn"]]

results = [] # 用于存储结果的空列表

# 创建一个字典来快速查找名称
name_dict = {item["cn"]: item["name"] for item in data}

for i in jj:
df_list = [] # 创建空列表存储单个基金的历史净值数据
for index in range(1, 3): # 获取一只基金的所有净值数据
url = f'http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery1830041192874394646584_1617938643457&fundCode={i}&pageIndex={index}&pageSize=20&startDate=&endDate=&_=1617939181252'
headers = {
'Referer': 'http://fundf10.eastmoney.com/', # 防盗链
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
resp = requests.get(url, headers=headers) # 获取网页源代码
html = resp.text # 接收数据
res = re.findall(r'\((.*?)\)', html) # 查找匹配,获取想要的内容
if res: # 检查是否有匹配结果
datas = json.loads(res[0])["Data"]["LSJZList"] # 获取列表里的第一个数据
df = pd.DataFrame(datas) # 数据可视化
df_list.append(df) # 将获取的数据存到列表中

if df_list: # 确保有数据
df_data = pd.concat(df_list) # 数据拼接合并

# 将DWJZ列转换为数字类型,强制转换会将无法转换的值变为NaN
df_data['DWJZ'] = pd.to_numeric(df_data['DWJZ'], errors='coerce')
zdz = df_data['DWJZ'].min() # 选取最低值
zgz = df_data['DWJZ'].max() # 选取最高值
avg = df_data['DWJZ'].mean() # 计算平均值

# 使用字典查找名称
name = name_dict.get(i, '未知名称') # 获取对应名称,默认值为'未知名称'

# 将结果存入results列表
results.append({
'基金代码': i,
'名称': name,
'最低净值': zdz,
'最高净值': zgz,
'平均净值': avg
})

etf = []
# 集思录 API
etfapi = 'https://www.jisilu.cn/data/etf/etf_list/?___jsl=LST___t=1693819271762&volume=500&unit_total=2&rp=25'
etfdata = requests.get(etfapi).json()['rows']

for i in etfdata:
fund_id = i['cell']['fund_id']
if fund_id in jj:
code = fund_id
price = i['cell']['price']
etf.append({'基金代码': code, '现价': price})

etf_price = pd.DataFrame(etf)

# 创建DataFrame显示结果
results_df = pd.DataFrame(results)

# 合并现价到结果DataFrame
results_df = results_df.merge(etf_price, on='基金代码', how='left')

print(results_df) # 输出合并后的结果

这是温度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import pandas as pd

# 基金代码和名称的映射列表
codes = [
{"id": 1, "name": "创业板指", "zs": "399006", "cn": "159915", "cw": "161022"},
{"id": 2, "name": "深证成指", "zs": "399001", "cn": "159903", "cw": "202017"},
{"id": 3, "name": "深证100", "zs": "399330", "cn": "159901", "cw": "110019"},
{"id": 4, "name": "央视50", "zs": "399550", "cn": "159965", "cw": "217027"},
{"id": 5, "name": "沪深300", "zs": "000300", "cn": "510300", "cw": "160706"},
{"id": 6, "name": "上证180", "zs": "000010", "cn": "510180", "cw": "519180"},
{"id": 7, "name": "上证50", "zs": "000016", "cn": "510050", "cw": "001051"},
{"id": 8, "name": "中证100", "zs": "000903", "cn": "512910", "cw": "240014"},
{"id": 9, "name": "中小300", "zs": "399008", "cn": "159907", "cw": "270026"},
{"id": 10, "name": "创业板50", "zs": "399673", "cn": "159949", "cw": "007464"},
{"id": 11, "name": "中证1000", "zs": "000852", "cn": "512100", "cw": ""},
{"id": 12, "name": "中证500", "zs": "000905", "cn": "510500", "cw": "160119"},
]

# 一、根据API接口爬取数据
def scrape_website(api, headers=None):
try:
response = requests.get(api, headers=headers)
response.raise_for_status()
return response
except requests.RequestException as e:
print(f"请求数据时出错: {e}")
return None

def zswd():
# 指定要抓取的网站URL(请替换为实际的API地址)
t_results = scrape_website('http://caf-qibei.com/index?type=html')
if t_results is None:
return []

soup = BeautifulSoup(t_results.content, 'html.parser')
tr_tags = soup.find_all('tr')

index_T = []
for tr in tr_tags:
a_tags = tr.find_all('a', href=True)
td_tag = tr.find('td', {'colspan': '3'})

if len(a_tags) >= 2 and td_tag:
first_a_text = a_tags[0].text.strip()
td_text = td_tag.text.strip()

try:
td_float = float(td_text)
index_T.append((first_a_text, td_float))
except ValueError:
continue # 跳过转换错误的值
return index_T

# 获取数据
data = zswd()

# 创建DataFrame并设置索引
df = pd.DataFrame(data, columns=['指数代码', '温度'])
df.set_index('指数代码', inplace=True)

# 输出结果
print(df)

需要把温度的指数代码换成,场内代码,再和第一段代码重合

半成品

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import pandas as pd
import requests # 导入requests库用于发送HTTP请求
import re # 导入re库用于正则表达式匹配
import json # 导入json库用于解析JSON数据
import time # 导入time库用于生成时间戳

codes = [
{"id": 1, "name": "创业板指", "zs": "399006", "cn": "159915", "cw": "161022"},
{"id": 2, "name": "深证成指", "zs": "399001", "cn": "159903", "cw": "202017"},
{"id": 3, "name": "深证100", "zs": "399330", "cn": "159901", "cw": "110019"},
{"id": 4, "name": "央视50", "zs": "399550", "cn": "159965", "cw": "217027"},
{"id": 5, "name": "沪深300", "zs": "000300", "cn": "510300", "cw": "160706"},
{"id": 6, "name": "上证180", "zs": "000010", "cn": "510180", "cw": "519180"},
{"id": 7, "name": "上证50", "zs": "000016", "cn": "510050", "cw": "001051"},
{"id": 8, "name": "中证100", "zs": "000903", "cn": "512910", "cw": "240014"},
{"id": 9, "name": "中小300", "zs": "399008", "cn": "159907", "cw": "270026"},
{"id": 10, "name": "创业板50", "zs": "399673", "cn": "159949", "cw": "007464"},
{"id": 11, "name": "中证1000", "zs": "000852", "cn": "512100", "cw": ""},
{"id": 12, "name": "中证500", "zs": "000905", "cn": "510500", "cw": "160119"}
]


# 一、根据API接口爬取数据
def scrape_website(api, headers=None):
try:
response = requests.get(api, headers=headers)
response.raise_for_status()
return response
except requests.RequestException as e:
print(f"请求数据时出错: {e}")
return None


def zswd():
# 指定要抓取的网站URL(请替换为实际的API地址)
t_results = scrape_website('http://caf-qibei.com/index?type=html')
if t_results is None:
return []

soup = BeautifulSoup(t_results.content, 'html.parser')
tr_tags = soup.find_all('tr')

index_T = []
for tr in tr_tags:
a_tags = tr.find_all('a', href=True)
td_tag = tr.find('td', {'colspan': '3'})

if len(a_tags) >= 2 and td_tag:
first_a_text = a_tags[0].text.strip()
td_text = td_tag.text.strip()

try:
td_float = float(td_text)
index_T.append((first_a_text, td_float))
except ValueError:
continue # 跳过转换错误的值
return index_T


# 获取数据
data = zswd()

# 创建DataFrame并设置索引
df = pd.DataFrame(data, columns=['指数代码', '温度'])
df.set_index('指数代码', inplace=True)

print(df)
# 存储 cn 数据的列表,并剔除掉空值
jj = [item["cn"] for item in codes if item["cn"]]

results = [] # 用于存储结果的空列表

# 创建字典快速查找名称和指数
name_dict = {item["cn"]: item["name"] for item in codes}
zs_dict = {item["cn"]: item["zs"] for item in codes}

for i in jj:
df_list = [] # 创建空列表存储单个基金的历史净值数据
for index in range(1, 3): # 获取一只基金的所有净值数据
url = f'http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery&fundCode={i}&pageIndex={index}&pageSize=20&startDate=&endDate=&_={int(time.time() * 1000)}'
headers = {
'Referer': 'http://fundf10.eastmoney.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
try:
resp = requests.get(url, headers=headers) # 获取网页源代码
resp.raise_for_status() # 检查请求是否成功
html = resp.text # 接收数据
res = re.findall(r'\((.*?)\)', html) # 查找匹配,获取想要的内容
if res: # 检查是否有匹配结果
datas = json.loads(res[0])["Data"]["LSJZList"] # 获取列表里的第一个数据
df = pd.DataFrame(datas) # 数据可视化
df_list.append(df) # 将获取的数据存到列表中
except requests.RequestException as e:
print(f"请求失败: {e}")

if df_list: # 确保有数据
df_data = pd.concat(df_list) # 数据拼接合并

# 将DWJZ列转换为数字类型,强制转换会将无法转换的值变为NaN
df_data['DWJZ'] = pd.to_numeric(df_data['DWJZ'], errors='coerce')
zdz = df_data['DWJZ'].min() # 选取最低值
zgz = df_data['DWJZ'].max() # 选取最高值
avg = df_data['DWJZ'].mean() # 计算平均值

# 使用字典查找名称和指数
name = name_dict.get(i, '未知名称') # 获取对应名称,默认值为'未知名称'
zs = zs_dict.get(i, '未知名称') # 获取对应名称,默认值为'未知名称'

# 将结果存入results列表
results.append({
'指数代码': zs,
'基金代码': i,
'名称': name,
'最低净值': zdz,
'最高净值': zgz,
'平均净值': avg
})

etf = []
# 集思录 API
etfapi = 'https://www.jisilu.cn/data/etf/etf_list/?___jsl=LST___t=1693819271762&volume=500&unit_total=2&rp=25'
try:
etfdata = requests.get(etfapi).json()['rows']
for i in etfdata:
fund_id = i['cell']['fund_id']
if fund_id in jj:
code = fund_id
price = i['cell']['price']
etf.append({'基金代码': code, '现价': price})
except requests.RequestException as e:
print(f"集思录请求失败: {e}")

etf_price = pd.DataFrame(etf)

# 创建DataFrame显示结果
results_df = pd.DataFrame(results)

# 合并现价到结果DataFrame
results_df = results_df.merge(etf_price, on='基金代码', how='left')

# 合并温度到结果DataFrame
# results_df = results_df.merge(df, on='基金代码', how='left')

print(results_df) # 输出合并后的结果


函数工作流在用的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import requests  # 导入requests库用于发送网络请求
from bs4 import BeautifulSoup # 导入BeautifulSoup库用于解析HTML


# 定义一个安全发送网络请求的函数,接受URL、会话对象和一个标记是否返回JSON格式的数据
def safe_request(url, session, is_json=True):
try:
response = session.get(url) # 使用session发送GET请求到指定的URL
response.raise_for_status() # 检查请求是否成功,如果不成功会抛出异常
return response.json() if is_json else response.text # 根据is_json返回JSON或文本内容
except requests.RequestException as e: # 捕获请求过程中的异常
print(f"Error requesting '{url}': {e}") # 打印错误信息
return None # 出现异常返回None


# 定义一个从原始数据中提取基金数据的函数
def extract_fund_data(raw_data, keys, code_mappings):
code_set = {mapping['cn'] for mapping in code_mappings} # 从映射中提取所有基金代码构成一个集合
return [
{key: float(item['cell'][key].rstrip('%')) if key != 'fund_id' else item['cell'][key]
for key in keys if key in item['cell']} # 提取每项数据,将百分比转换为浮点数,除非是基金ID
for item in raw_data if 'cell' in item and item['cell']['fund_id'] in code_set
] # 遍历原始数据中的每一项,只处理包含'cell'且基金ID在代码集合中的项


# 定义一个更新基金代码信息的函数
def update_fund_codes(urls, session, code_mappings, keys):
for url in urls: # 遍历所有URL
raw_data = safe_request(url, session) # 发送安全请求获取原始数据
if raw_data: # 如果请求成功获取到数据
cleaned_data = extract_fund_data(raw_data.get('rows', []), keys, code_mappings) # 提取并清洗数据
for data_item in cleaned_data: # 遍历每一项清洗后的数据
for code_mapping in code_mappings: # 遍历每一项代码映射
if code_mapping['cn'] == data_item['fund_id']: # 如果代码匹配
code_mapping.update({k: data_item[k] for k in keys[1:]}) # 更新映射信息
code_mapping['return'] = code_mapping.pop('increase_rt') # 更改键名'increase_rt'为'return'
code_mapping['yield'] = code_mapping.pop('discount_rt') # 更改键名'discount_rt'为'yield'


# 定义一个获取指数温度的函数
def get_index_temps(api, session):
html_content = safe_request(api, session, is_json=False) # 请求HTML内容
if html_content is None: # 如果请求失败
return []

soup = BeautifulSoup(html_content, 'html.parser') # 使用BeautifulSoup解析HTML内容
tr_tags = soup.find_all('tr') # 查找所有的<tr>标签
index_temps = [(tr.find_all('a', href=True)[0].text.strip(), float(tr.find('td', {'colspan': '3'}).text.strip()))
for tr in tr_tags if len(tr.find_all('a', href=True)) >= 2 and tr.find('td', {'colspan': '3'}) and
tr.find('td', {'colspan': '3'}).text.strip().replace('.', '', 1).isdigit()]
return index_temps # 返回解析出的指数温度数据


# 定义一个整合温度信息到基金代码映射的函数
def integrate_temps_with_funds(temperatures, code_mappings):
id_to_code = {code['id']: code for code in code_mappings} # 创建一个ID到代码的映射
for index_id, temp in temperatures: # 遍历所有温度信息
if index_id in id_to_code:
id_to_code[index_id]['temp'] = temp # 将温度信息添加到相应的代码映射中
return sorted([code for code in code_mappings if 'temp' in code], key=lambda x: x['temp']) # 返回包含温度信息的代码映射,按温度排序


# 定义一个根据条件过滤基金的函数
def filter_funds(funds, condition):
return [{'名称': fund['name'], '代码': fund['cw'], '温度': fund['temp'], '涨跌': fund['return']} for fund in funds if
condition(fund)] # 返回满足条件的基金信息列表


session = requests.Session() # 创建一个会话对象,用于维持会话状态
urls = [
'https://www.jisilu.cn/data/etf/etf_list/?___jsl=LST___t=1663138938775&rp=25&page=1',
'https://www.jisilu.cn/data/qdii/qdii_list/A?___jsl=LST___t=1665370506235&rp=22&page=1',
'https://www.jisilu.cn/data/qdii/qdii_list/E?___jsl=LST___t=1665371145110&rp=22&page=1'
] # 定义要访问的URL列表
keys = ['fund_id', 'price', 'increase_rt', 'discount_rt'] # 定义要提取的数据键
code_mappings = [ # 定义基金代码映射
{'id': '000016', 'name': '上证50', 'cn': '510050', 'cw': '001051'},
{'id': '000300', 'name': '沪深300', 'cn': '510300', 'cw': '160706'},
{'id': '000905', 'name': '中证500', 'cn': '510500', 'cw': '160119'},
{'id': '000922', 'name': '中证红利', 'cn': '515890', 'cw': '100032'},
{'id': 'SPX', 'name': '标普500', 'cn': '513500', 'cw': '050025'},
{'id': 'GDAXI', 'name': '德国DAX', 'cn': '513030', 'cw': '000614'},
{'id': '931079', 'name': '5G通信', 'cn': '515050', 'cw': '008086'},
{'id': '980017', 'name': '国证芯片', 'cn': '159995', 'cw': '008887'},
{'id': '399975', 'name': '证券公司', 'cn': '512000', 'cw': '004069'},
{'id': 'H30533', 'name': '互联网50', 'cn': '513050', 'cw': '006327'},
{'id': '399967', 'name': '中证军工', 'cn': '512660', 'cw': '161024'},
{'id': '399987', 'name': '中证酒', 'cn': '512690', 'cw': '160632'},
{'id': '399396', 'name': '国证食品', 'cn': '159843', 'cw': '160222'},
{'id': '399998', 'name': '中证煤炭', 'cn': '515220', 'cw': '161032'}
]

update_fund_codes(urls, session, code_mappings, keys) # 更新基金代码信息
temperatures_api = 'http://caf-qibei.com/index?type=html' # 定义指数温度API的URL
temps = get_index_temps(temperatures_api, session) # 获取指数温度
funds = integrate_temps_with_funds(temps, code_mappings) # 将温度信息整合到基金代码映射中

low_temp_funds = filter_funds(funds, lambda x: x['return'] < -1) # 筛选出回报率小于-2%的基金
high_temp_funds = filter_funds(funds, lambda x: x['return'] > 1) # 筛选出回报率大于2%的基金

print(low_temp_funds) # 打印低温基金信息
print(high_temp_funds) # 打印高温基金信息