In the previous article (https://qiita.com/matsxxx/items/652e58f77558faecfd23), I showed an example of separating moving objects in a video with Dynamic Mode Decomposition (DMD). In this article, we will try moving object separation with the Robust Principal Component Analysis (Robust PCA) of the Principal Component Pursuit (PCP) algorithm [1].
Robust PCA via PCP
Robust PCA is a method of decomposing into a low rank structure and a sparse structure. In the case of a moving body, the background has a low rank structure and the moving body has a sparse structure. This time, I will try Robust PCA, which is a PCP algorithm. Decomposes the original data matrix $ M $ into a low rank matrix $ L $ and a sparse matrix $ S $.
M = L + S
PCP decomposes into $ L $ and $ S $ by minimizing the following extended Lagrangian $ l $.
l(L, S, Y) = \| L \|_* + \lambda\|S\|_1 +<Y, M - L - S> + \frac{\mu}{2}\|M-L-s\|^2_F
$ L $ is a low-rank matrix, $ S $ is a sparse matrix, $ Y $ is a Lagrange multiplier matrix, and $ \ lambda $ and $ \ mu $ are PCP parameters. $ \ <, * > $ is the dot product. The dimensions of each variable are $ M, L, S, Y \ in \ mathbb {R} ^ {n_1 \ times n_2}
\begin{align}
&||A||_* = \mathrm{tr}(\sqrt{A^\mathrm{T}A})= \sum^{\mathrm{min}(n_1,n_2)}_{i=1} \sigma_i\\
&||A||_1 = \underset{1\leq j \leq n_2}{\mathrm{max}}\sum^{n_1}_{i=1}|a_{ij}|\\
&||A||_F = \sqrt{\sum^{n_1}_{i=1}\sum^{n_2}_{j=1}|a_{ij}|^2} = \sqrt{\mathrm{tr}(A^\mathrm{T}A)} = \sqrt{\sum^{\mathrm{min}(n_1,n_2)}_{i=1}\sigma^2_i}
\end{align}
It will be. The subscript $ \ mathrm {T} $ represents the transposed matrix. The formula that describes the norm is [Wikipedia description](https://ja.wikipedia.org/wiki/%E8%A1%8C%E5%88%97%E3%83%8E%E3%83%AB% I referred to E3% 83% A0). The sparse matrix $ S $ is updated with the shrinkage operator, and the low rank matrix $ L $ is updated with the singular value thresholding operator. The parameter $ \ tau $ shrinkage opertor $ S_ \ tau $ is
\begin{align}
&S_\tau(x) = \mathrm{sgn}(x) \mathrm{max}(|x|-\tau, 0)
\end{align}
It is expressed as. singular value thresholding operator $ D_ \ tau $ uses shrinkage oprator $ S_ \ tau $ and singular value decomposition.
D_{\tau}(X) = US_\tau(\Sigma)V^\mathrm{T}, \
\mathrm{where} \ X = U\Sigma V^\mathrm{T}
It will be. $ U, \ Sigma, and V $ are the left singular vector, singular value, and right singular vector of $ X $, respectively. Use the shrinkage operator $ S_ \ tau $ and the singular value thresholding operator $ D_ \ tau $ to update the low rank matrix $ L $ and the sparse matrix $ S $. In the update formula, the variable $ X $ is written as $ X_k $ before the update and $ X_ {k + 1} $ after the update, and the PCP parameters $ \ tau, \ mu $ are entered and written.
\begin{align}
&L_{k+1} = D_{1/\mu}(M - S_k - \mu^{-1}Y_k) \\
&S_{k+1} = S_{\lambda/\mu}(M - L_{k+1} + \mu^{-1}Y_k)
\end{align}
It will be. The $ S $ symbol is confusing, but it follows the notation in reference [1], except that the conjugate transpose is paraphrased as the transpose matrix. Also, the method of setting the parameters $ \ mu, \ lambda $ of shrinkage operator $ S_ \ tau $ and singular value thresholding operator $ D_ \ tau $ is wrong in the 2009 PCP paper placed in arXiv. , Please note (2009 arXiv version is $ D_ \ mu, S_ {\ lambda \ mu} $, but it should be $ D_ {1/ \ mu}, S_ {\ lambda / \ mu} It looks like $). The Lagrange multiplier matrix $ Y $ is calculated by the extended Lagrange method.
Y_{k+1} = Y_k + \mu(M - L_{k+1} - S_{k+1})
And update. The PCP parameters $ \ lambda $ and $ \ mu $ are, in reference [1], as the data matrix $ M \ in \ mathbb {R} ^ {n_1 \ times n_2} $, respectively.
\begin{align}
&\lambda = \frac{1}{\sqrt{\mathrm{max}(n_1,n_2)}} \\
&\mu = \frac{n_1 n_2}{4\|M\|_1}
\end{align}
Is set. The following is a summary of the PCP calculation procedure.
\begin{align}
&\mathrm{initialize:} S_0 = 0, Y_0 =0 \\
& \mathrm{while}\ \mathrm{not}\ \mathrm{converged}:\\
&\qquad L_{k+1} = D_{1/\mu}(M - S_k + \mu^{-1}Y_k) \\
&\qquad S_{k+1} = S_{\lambda/\mu}(M - L_{k+1} + \mu^{-1}Y_k) \\
&\qquad Y_{k+1} = Y_k + \mu(M - L_{k+1} - S_{k+1}) \\
&\mathrm{end} \ \mathrm{while} \\
&\mathrm{output:}L,S.
\end{align}
Convergence is determined by the following Frobenius norm.
\|M -L - S\|_F \leq \delta\|M\|_F \quad \mathrm{with} \ \delta=10^{-7}
Code Robust PCA of the above PCP algorithm in Python.
import numpy as np
import numpy.linalg as LA
def rpca(M, max_iter=800,p_interval=50):
def shrinkage_operator(x, tau):
return np.sign(x) * np.maximum((np.abs(x) - tau), np.zeros_like(x))
def svd_thresholding_operator(X, tau):
U, S, Vh = LA.svd(X, full_matrices=False)
return U @ np.diag(shrinkage_operator(S, tau)) @ Vh
i = 0
S = np.zeros_like(M)
Y = np.zeros_like(M)
error = np.Inf
tol = 1e-7 * LA.norm(M, ord="fro")
mu = M.shape[0] * M.shape[1]/(4 * LA.norm(M, ord=1))
mu_inv = 1/mu
lam = 1/np.sqrt(np.max(M.shape))
while i < max_iter:
L = svd_thresholding_operator(M - S + mu_inv * Y, mu_inv)
S = shrinkage_operator(M - L + mu_inv * Y, lam * mu_inv)
Y = Y + mu * (M - L - S)
error = LA.norm(M - L - S, ord='fro')
if i % p_interval == 0:
print("step:{} error:{}".format(i, error))
if error <= tol:
print("converted! error:{}".format(error))
break
i+=1
else:
print("Not converged")
return L, S
Previous article Similarly, using the Atrium of dataset in here I will. I am using 120frame to 170frame. Use Robust PCA to separate the walking person from the background.
import cv2
def prepare_frames_to_use(video_path="./atrium_video.avi",#Image path
start_frame=120,#First frame
end_frame=170,#Last frame
r=4,#Image reduction parameters
):
#Image loading
cap = cv2.VideoCapture(video_path)
#Get image resolution, frame rate, number of frames
wid = cap.get(cv2.CAP_PROP_FRAME_WIDTH)#side
wid_r = int(wid/r)
hei = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)#Vertical
hei_r = int(hei/r)
fps = cap.get(cv2.CAP_PROP_FPS)#frame rate
count = cap.get(cv2.CAP_PROP_FRAME_COUNT)#Number of frames
dt = 1/fps#Seconds between frames
print(f"wid:{wid}", f" hei:{hei}", f" fps:{fps}", f" count:{count}", f"dt:{dt}")
#Extraction of target frame
cat_frames = []
cap.set(cv2.CAP_PROP_POS_FRAMES,start_frame)
for i in range(end_frame - start_frame):
ret, img = cap.read()
if not ret:
print("no image")
break
buf = cv2.cvtColor(cv2.resize(img,(wid_r, hei_r)),
cv2.COLOR_BGR2GRAY).flatten()#
cat_frames.append(buf)
cap.release()
cat_frames = np.array(cat_frames).T
return cat_frames, wid_r, hei_r, fps
Use Robust PCA to find low rank and sparse matrices. Just enter the video data matrix into the rpca function.
def detect_moving_object():
#Acquisition of frames used for calculation
frames, wid, hei, fps = prepare_frames_to_use()
#Run Robust pca
L, S = rpca(frames)
#Video output of sparse structure
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter("sparse_video.mp4",
fourcc, fps, (int(wid), int(hei)))
for i in range(S.shape[1]):
s_buf = S[:,i]
#Brightness adjustment
lum_min = np.abs(s_buf.min())
lum_max = np.abs(s_buf.max())
lum_w = lum_max + lum_min
s_buf = (s_buf + lum_min)/lum_w * 255
s_buf = cv2.cvtColor(s_buf.reshape(int(hei), -1).astype('uint8'),
cv2.COLOR_GRAY2BGR)
writer.write(s_buf)
writer.release()
If you look at the video of the sparse matrix in the figure on the right, you can see that the walking people are clearly separated.
import numpy as np
import numpy.linalg as LA
def rpca(M, max_iter=800,p_interval=50):
def shrinkage_operator(x, tau):
return np.sign(x) * np.maximum((np.abs(x) - tau), np.zeros_like(x))
def svd_thresholding_operator(X, tau):
U, S, Vh = LA.svd(X, full_matrices=False)
return U @ np.diag(shrinkage_operator(S, tau)) @ Vh
i = 0
S = np.zeros_like(M)
Y = np.zeros_like(M)
error = np.Inf
tol = 1e-7 * LA.norm(M, ord="fro")
mu = M.shape[0] * M.shape[1]/(4 * LA.norm(M, ord=1))
mu_inv = 1/mu
lam = 1/np.sqrt(np.max(M.shape))
while i < max_iter:
L = svd_thresholding_operator(M - S + mu_inv * Y, mu_inv)
S = shrinkage_operator(M - L + mu_inv * Y, lam * mu_inv)
Y = Y + mu * (M - L - S)
error = LA.norm(M - L - S, ord='fro')
if i % p_interval == 0:
print("step:{} error:{}".format(i, error))
if error <= tol:
print("converted! error:{}".format(error))
break
i+=1
else:
print("Not converged")
return L, S
import cv2
def prepare_frames_to_use(video_path="./atrium_video.avi",#Image path
start_frame=120,#First frame
end_frame=170,#Last frame
r=4,#Image reduction parameters
):
#Image loading
cap = cv2.VideoCapture(video_path)
#Get image resolution, frame rate, number of frames
wid = cap.get(cv2.CAP_PROP_FRAME_WIDTH)#side
wid_r = int(wid/r)
hei = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)#Vertical
hei_r = int(hei/r)
fps = cap.get(cv2.CAP_PROP_FPS)#frame rate
count = cap.get(cv2.CAP_PROP_FRAME_COUNT)#Number of frames
dt = 1/fps#Seconds between frames
print(f"wid:{wid}", f" hei:{hei}", f" fps:{fps}", f" count:{count}", f"dt:{dt}")
#Extraction of target frame
cat_frames = []
cap.set(cv2.CAP_PROP_POS_FRAMES,start_frame)
for i in range(end_frame - start_frame):
ret, img = cap.read()
if not ret:
print("no image")
break
buf = cv2.cvtColor(cv2.resize(img,(wid_r, hei_r)),
cv2.COLOR_BGR2GRAY).flatten()#
cat_frames.append(buf)
cap.release()
cat_frames = np.array(cat_frames).T
return cat_frames, wid_r, hei_r, fps
def detect_moving_object():
#Acquisition of frames used for calculation
frames, wid, hei, fps = prepare_frames_to_use()
#Run Robust pca
L, S = rpca(frames)
#Video output of sparse structure
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter("sparse_video.mp4",
fourcc, fps, (int(wid), int(hei)))
for i in range(S.shape[1]):
s_buf = S[:,i]
#Brightness adjustment
lum_min = np.abs(s_buf.min())
lum_max = np.abs(s_buf.max())
lum_w = lum_max + lum_min
s_buf = (s_buf + lum_min)/lum_w * 255
s_buf = cv2.cvtColor(s_buf.reshape(int(hei), -1).astype('uint8'),
cv2.COLOR_GRAY2BGR)
writer.write(s_buf)
writer.release()
if __name__ == "__main__":
detect_moving_object()