This is an introduction of how to save DataFrame type data in HDF format to speed up data reading.
Save: store.put ('where to put the data in the h5 file', DataFrame name) Read: store ('Specify the location to read the data in the h5 file')
sample.py
import pandas as pd
import numpy as np
DATA_STORE = './data/asset.h5'
dat = [
['2019-07-01','9997','740'],
['2019-07-02','9997','749'],
['2019-07-03','9997','757'],
['2019-07-04','9997','769'],
['2019-07-05','9997','762'],
['2019-07-08','9997','860']
]
df4 = pd.DataFrame(dat,columns=["A","B","C"])
print("df4",df4)
# DATA_Asset created in STORE.Write DataFrame df4 in HDF format to h5 file.
#Format: store.put( 'Where to put the data in the h5 file' ,DataFrame name)
with pd.HDFStore(DATA_STORE) as store:
store.put('general/test', df4)
# DATA_Asset created in STORE.Read data from h5 file.
#Format: store( 'Specify the location to read the data in the h5 file')
with pd.HDFStore(DATA_STORE) as store:
df5 = store['general/test']
print(df5)
output
df4
A B C
0 2019-07-01 9997 740
1 2019-07-02 9997 749
2 2019-07-03 9997 757
3 2019-07-04 9997 769
4 2019-07-05 9997 762
5 2019-07-08 9997 860
df5
A B C
0 2019-07-01 9997 740
1 2019-07-02 9997 749
2 2019-07-03 9997 757
3 2019-07-04 9997 769
4 2019-07-05 9997 762
5 2019-07-08 9997 860
Recommended Posts