Pandas exercises (editing)

draft

python


# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np

#File path definition
IN_FPATH  = "./input/q1_input.csv"
OUT_FPATH = "./output/q1_output.csv"



#Read file
df = pd.read_csv(IN_FPATH)
df.date = pd.to_datetime(df.date)

# q1-1)
#Add the last phase column
df_pre_phase = df.phase.copy()
df_diff_phase = df.phase.diff()
df_pre_phase = df_pre_phase - df_diff_phase
df_pre_phase[ df_diff_phase == 0 ] = np.nan
df_pre_phase = df_pre_phase.fillna(method='ffill')
df["pre_phase"] = df_pre_phase

#Add the phase column two times before
df_pre_pre_phase = df.pre_phase.copy()
df_diff_phase = df.pre_phase.diff()
df_pre_pre_phase = df_pre_pre_phase - df_diff_phase
df_pre_pre_phase[ df_diff_phase == 0 ] = np.nan
df_pre_pre_phase = df_pre_pre_phase.fillna(method='ffill')
df["df_pre_pre_phase"] = df_pre_pre_phase


# q1-2)
#The day when the transition from Phase 4 to 5"1"Add the column
df_phase4_5 = df.phase.copy()
df_diff_phase = df.phase.diff()
df_phase4_5[ df.phase!=5 ] = 0  #np.nan
df_phase4_5[ (df.phase==5) & (df_pre_phase==4) ] = 1
df_phase4_5[ df_diff_phase != 1 ] = 0  #np.nan
df["Phase4_5"] = df_phase4_5


# q1-3)
#The day when the transition from Phase 5 to 6"1"Add the column
df_phase5_6 = df.phase.copy()
df_diff_phase = df.phase.diff()
df_phase5_6[ df.phase!=6 ] = 0  #np.nan
df_phase5_6[ (df.phase==6) & (df_pre_phase==5) ] = 1
df_phase5_6[ df_diff_phase != 1 ] = 0  #np.nan
df["Phase5_6"] = df_phase5_6


# q1-4)
#Add a column that counts up every time a phase transitions from 5 to 6
df_5_6_countup = df_phase5_6.cumsum()
df["5_6_countup"] = df_5_6_countup


# q1-5)
#Add a column that counts up every time a phase transitions from 5 to 6
#However, clear the count to 0 when the phase changes from 4 to 5.
df_phase4_5_counter = df_5_6_countup.copy()
df_phase4_5_counter[ df_phase4_5==0  ] = np.nan
df_phase4_5_counter = df_phase4_5_counter.fillna(method='ffill')
df_5_6_counter_phase4_clear = df_5_6_countup - df_phase4_5_counter
df["5_6_countup_4clear"] = df_5_6_counter_phase4_clear


# q1-6)
#Add a column for the cumulative time of Phase 5
#However, if the previous phase was 4, it is excluded from the cumulative total.
df_phase5_erapsed = df.date.copy()
df_diff_phase = df.phase.diff()
df_phase5_erapsed[ (df.phase!=5)|(df_diff_phase==0) ] = np.nan
df_phase5_erapsed = df_phase5_erapsed.fillna(method='ffill')
df_phase5_erapsed = df_phase5_erapsed.fillna(method='bfill')
df_phase5_erapsed = df.date - df_phase5_erapsed
df_phase5_erapsed = df_phase5_erapsed.dt.total_seconds()
df_phase5_erapsed[ (df_phase5_6!=1) ] = np.nan
df["phase5_erapsed"] = df_phase5_erapsed
df["phase5_erapsed_1st"] = df_phase5_erapsed[df_pre_pre_phase==4]

df_phase5_erapsed_cumsum = df_phase5_erapsed.copy()
df_phase5_erapsed_cumsum = df_phase5_erapsed_cumsum.cumsum()

df_phase5_erapsed_cumsum_st = df_phase5_erapsed_cumsum.copy()
df_phase5_erapsed_cumsum_st[ (df["Phase5_6"]!=1)|(df["df_pre_pre_phase"]!=4) ] = np.nan
df_phase5_erapsed_cumsum_st = df_phase5_erapsed_cumsum_st.fillna(method='ffill')
df_phase5_erapsed_cumsum_st = df_phase5_erapsed_cumsum_st.fillna(0)
df_phase5_erapsed_cumsum = df_phase5_erapsed_cumsum - df_phase5_erapsed_cumsum_st
df["phase5_erapsed_cumsum"] = df_phase5_erapsed_cumsum
df["phase5_erapsed_cumsum"] = df["phase5_erapsed_cumsum"].fillna(method="ffill")
df["phase5_erapsed_mean"] = df["phase5_erapsed_cumsum"]/(df["5_6_countup_4clear"]-1)
df["phase5_erapsed_mean"] = df["phase5_erapsed_mean"].fillna(method="ffill")

# q1-7)
#Add a column for cumulative time in Phase 6
df_phase6_5 = df.phase.copy()
df_phase6_5[ df.phase!=5 ] = 0
df_phase6_5[ (df.phase==5) & (df_pre_phase==6) ] = 1
df_phase6_5[ df_diff_phase != -1 ] = 0  #np.nan

df_phase6_erapsed = df.date.copy()
df_diff_phase = df.phase.diff()
df_phase6_erapsed[ (df.phase!=6)|(df_diff_phase==0) ] = np.nan
df_phase6_erapsed = df_phase6_erapsed.fillna(method='ffill')
df_phase6_erapsed = df_phase6_erapsed.fillna(method='bfill')
df_phase6_erapsed = df.date - df_phase6_erapsed
df_phase6_erapsed = df_phase6_erapsed.dt.total_seconds()
df_phase6_erapsed[ (df_phase6_5!=1) ] = np.nan

df_phase6_erapsed_cumsum = df_phase6_erapsed.cumsum()

df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum.copy()
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(method='ffill')
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(0)
df_phase6_erapsed_cumsum_st[ (df["Phase5_6"]!=1)|(df["df_pre_pre_phase"]!=4) ] = np.nan
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(method='ffill')
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(0)
df["phase6_erapsed_cumsum"] = df_phase6_erapsed_cumsum - df_phase6_erapsed_cumsum_st
df["phase6_erapsed_cumsum"] = df["phase6_erapsed_cumsum"].fillna(method="ffill")
df["phase6_erapsed_mean"] = ( df_phase6_erapsed_cumsum - df_phase6_erapsed_cumsum_st )/(df["5_6_countup_4clear"])
df["phase6_erapsed_mean"] = df["phase6_erapsed_mean"].fillna(method="ffill")

#File output
df.to_csv(OUT_FPATH, encoding="shift-jis")



Recommended Posts

Pandas exercises (editing)
Pandas
Pandas memo
Pandas basics
Pandas notes
Pandas memorandum
Algorithm exercises 13
Pandas basics
pandas memorandum
pandas memo
pandas SettingWithCopyWarning