Python读取并解析通达信数据文件
通达信的数据文件格式比较简单,网络上能找到的Python 解析代码大多使用struct 来解析,性能不高。 其实numpy 也能直接读取二进制文件,而且结合pandas,功能更强大。
import numpy as np import pandas as pd def 读取日线数据(文件路径): dt = np.dtype([ ('Date', 'u4'), ('Open', 'u4'), ('High', 'u4'), ('Low', 'u4'), ('Close', 'u4'), ('Amount', 'f'), ('Volume', 'u4'), ('Reserve','u4')]) data = np.fromfile(文件路径, dtype=dt) #df = pd.DataFrame(data) # Or if you want to explicitly set the column names df = pd.DataFrame(data, columns=data.dtype.names) df.eval(''' year=floor(Date/10000) month=floor((Date%10000)/100) day=floor(Date%10000%100) Open=Open/100 High=High/100 Low=Low/100 Close=Close/100 ''',inplace=True) df.index=pd.to_datetime(df.loc[:,['year','month','day']]) return df.drop(['Date','year','month','day'],1) def 读取分钟数据(文件路径): ''' year=floor(m_date/2048)+2004; %提取年信息 mon=floor(mod(m_date,2048)/100); %提取月信息 day=mod(mod(m_date,2048),100); %提取日信息*/ m_time/60 输出小时 m_time%60 输出分钟 ''' dt = np.dtype([ ('m_date', 'u2'), ('m_time', 'u2'), ('Open', 'f4'), ('High', 'f4'), ('Low', 'f4'), ('Close', 'f4'), ('Amount', 'f4'), ('Volume', 'u4'), ('Reserve','u4')]) data = np.fromfile(文件路径, dtype=dt) #df = pd.DataFrame(data) # Or if you want to explicitly set the column names df = pd.DataFrame(data, columns=data.dtype.names) df.eval(''' year=floor(m_date/2048)+2004 month=floor((m_date%2048)/100) day=floor(m_date%2048%100) hour = floor(m_time/60) minute = m_time%60 ''',inplace=True) df.index=pd.to_datetime(df.loc[:,['year','month','day','hour','minute']]) return df.drop(['m_date','m_time','year','month','day','hour','minute'],1) 日线文件路径 = r'C:\vipdoc\sh\lday\sh600025.day' 五分文件路径 = r'C:\vipdoc\sh\fzline\sh600025.lc5' 一分文件路径 = r'C:\vipdoc\sh\minline\sh600025.lc1' 日线数据 = 读取日线数据(日线文件路径) 五分数据 = 读取分钟数据(五分文件路径) 一分数据 = 读取分钟数据(一分文件路径) print(日线数据.tail(10)) print(五分数据.tail(10)) print(一分数据.tail(10)) print(一分数据.head(10))
Open High Low Close Amount Volume Reserve 2020-02-28 3.74 3.77 3.67 3.69 222089056.0 59854355 65536 2020-03-02 3.71 3.79 3.71 3.76 134752352.0 35872664 65536 2020-03-03 3.79 3.82 3.77 3.79 119396688.0 31502027 65536 2020-03-04 3.76 3.88 3.76 3.86 167259584.0 43594328 65536 2020-03-05 3.89 3.95 3.87 3.94 192437232.0 49077518 65536 2020-03-06 3.91 3.93 3.88 3.90 121692808.0 31218072 65536 2020-03-09 3.85 3.86 3.76 3.78 184477168.0 48633066 65536 2020-03-10 3.74 3.88 3.74 3.87 173113808.0 45416429 65536 2020-03-11 3.86 3.87 3.81 3.81 105091608.0 27373594 65536 2020-03-12 3.80 3.81 3.73 3.75 96933592.0 25786021 0 Open High Low Close Amount Volume Reserve 2020-03-12 14:15:00 3.75 3.75 3.74 3.74 1437680.0 384000 0 2020-03-12 14:20:00 3.74 3.75 3.74 3.75 1298536.0 347000 0 2020-03-12 14:25:00 3.75 3.75 3.74 3.74 608440.0 162600 0 2020-03-12 14:30:00 3.74 3.75 3.74 3.75 1224136.0 327200 0 2020-03-12 14:35:00 3.75 3.76 3.74 3.76 4984256.0 1329200 0 2020-03-12 14:40:00 3.75 3.76 3.75 3.75 2114376.0 563500 0 2020-03-12 14:45:00 3.76 3.78 3.75 3.75 3283496.0 872300 0 2020-03-12 14:50:00 3.75 3.77 3.75 3.76 1384568.0 368400 0 2020-03-12 14:55:00 3.76 3.77 3.75 3.75 2265616.0 602600 0 2020-03-12 15:00:00 3.75 3.76 3.75 3.75 568992.0 151500 0 Open High Low Close Amount Volume Reserve 2020-03-12 14:51:00 3.76 3.77 3.76 3.76 4.428390e+05 117700 0 2020-03-12 14:52:00 3.76 3.77 3.76 3.77 2.189030e+05 58200 0 2020-03-12 14:53:00 3.76 3.77 3.75 3.75 1.140250e+06 303300 0 2020-03-12 14:54:00 3.75 3.76 3.75 3.76 2.192300e+05 58400 0 2020-03-12 14:55:00 3.75 3.76 3.75 3.75 2.441970e+05 65000 0 2020-03-12 14:56:00 3.75 3.76 3.75 3.76 1.799150e+05 47900 0 2020-03-12 14:57:00 3.75 3.76 3.75 3.76 2.011590e+05 53500 0 2020-03-12 14:58:00 3.76 3.76 3.76 3.76 0.000000e+00 0 0 2020-03-12 14:59:00 3.76 3.76 3.76 3.76 0.000000e+00 0 0 2020-03-12 15:00:00 3.75 3.75 3.75 3.75 1.878750e+05 50100 0 Open High Low Close Amount Volume Reserve 2019-11-14 09:31:00 4.36 4.36 4.35 4.35 7.074900e+05 162300 0 2019-11-14 09:32:00 4.36 4.36 4.35 4.36 4.887820e+05 112200 0 2019-11-14 09:33:00 4.36 4.36 4.35 4.36 2.453460e+05 56300 0 2019-11-14 09:34:00 4.36 4.36 4.35 4.36 2.981950e+05 68400 0 2019-11-14 09:35:00 4.36 4.36 4.35 4.36 2.192820e+05 50300 0 2019-11-14 09:36:00 4.35 4.36 4.35 4.36 5.482161e+05 125900 0 2019-11-14 09:37:00 4.36 4.36 4.35 4.35 1.130302e+06 259800 0 2019-11-14 09:38:00 4.35 4.35 4.34 4.34 5.735700e+05 131900 0 2019-11-14 09:39:00 4.34 4.35 4.34 4.35 4.512571e+05 103800 0 2019-11-14 09:40:00 4.34 4.34 4.33 4.33 1.674892e+06 386000 0
好,感觉你好牛,要是能编程下载数据集就好了!
感谢分享!