If you want to add a column anywhere in Pandas DataFrame, use pandas.DataFrame.insert It can be achieved by using it, but there are some complaints.
--Not immutable (DataFrame is directly rewritten / destructive method / no inplace option) --The place to add must be specified by the numerical value of the index
I wrote a convenient function to solve these problems.
from typing import Union, Optional
import pandas as pd
def insert_columns(
df: pd.DataFrame,
data: Union[pd.Series, pd.DataFrame],
*,
before: Optional[str] = None,
after: Optional[str] = None,
allow_duplicates: bool = False,
inplace: bool = False,
) -> pd.DataFrame:
if not inplace:
df = df.copy()
if not (after is None) ^ (before is None):
raise ValueError('Specify only "before" or "after"')
if before:
loc = df.columns.get_loc(before)
else:
loc = df.columns.get_loc(after) + 1
if type(data) is pd.Series:
df.insert(loc, data.name, data, allow_duplicates)
elif type(data) is pd.DataFrame:
for column in data.columns[::-1]:
df.insert(loc, column, data[column], allow_duplicates)
return df
--Specify the column name for before
or ʻafter --By default, the input DataFrame is not directly rewritten. --Rewrite directly by specifying ʻinplace = True
Use sklearn's iris dataset as sample data.
from sklearn import datasets
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
target = pd.Series(iris.target_names[iris.target], name='target')
df.head()
target.head()
Try adding target after sepal width (cm)
in df.
insert_columns(df, target, after='sepal width (cm)')
In this example, Series is added, but DataFrame can also be specified.
Recommended Posts