It was created after thinking that there would be a way to insert a large amount of data as easily as possible when you want to store the data in DataFrame in .read_csv etc. in MSSQL. bulk insert is not used assuming you are not authorized.
It takes about 30 minutes to INSERT 1 million rows. Please let me know if there is a better way.
mssql_insert.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# python 3.5
import pymssql
import pandas as pd
#Preparation
table_name = '[db].[dbo].[sample_table]'
#Type specification of INSERT destination table
columns = ['%s', '%s', '%d', '%d', '%d', '%d']
#Create a DataFrame
df = pd.DataFrame(...)
#INSERT statement generation
args = dict(table=table_name, columns=', '.join(columns))
sql = 'INSERT INTO {table} VALUES ({columns})'.format(**args)
#Convert NaN to None
df = df.astype(object).where(pd.notnull(df), None)
params = [tuple(x) for x in df.values]
#SQL execution
cnn = pymssql.connect(host="server", user="sa", password="password", database="db")
cur = cnn.cursor()
cur.executemany(sql, params)
cnn.commit()
cur.close()
Recommended Posts