Introduction

I am a VBA user who started studying machine learning. As a memorandum, I would like to summarize the Python / R grammar while comparing it with VBA.

table of contents

String operation

Combine strings
String length
Retrieve string
Search string
Replace string
Convert string -Convert uppercase and lowercase -Conversion between full-width and half-width
Invert string
Repeat string
Space -Space string -Delete unnecessary spaces before and after
About string vector

Summary -List -Whole program

String manipulation

In the previous Article, I compared Python and R string operations with VBA, but in the continuation, I will try using the R string operation package stringr.

Reference article states that "character string processing is possible even with the functions provided by the R standard base package, but stringr is easier to use with rational behavior for a unified interface." , When you actually use it, it is certainly impressive and easy to use because the function naming and the order of the arguments are uniform. Reference: stringr — Process R strings in a decent way

String concatenation

R(stringr)

`R`


library(stringr)

s1 <- "abc"
s2 <- "def"
s3 <- "ghij"
str_c(s1, s2, s3)
# "abcdefghij"

String length

R(stringr)

`R`


s <- "abcdefghij"
str_length(s)
# 10

Extract string

R(stringr)

`R`


s <- "abcdefghij"
str_sub(s, 1, 2)
# "ab"
str_sub(s, -2, -1)
# "ij"
str_sub(s, 4, 6)
# "def"

Search for strings

R(stringr)

`R`


s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_detect(s, "def")
# TRUE
str_detect(t, "def")
# TRUE
str_count(s, "def")
# 1
str_count(t, "def")
# 2
str_locate(s, "def")
#      start end
# [1,]     4   6
str_locate(t, "def")
#      start end
# [1,]     4   6
class(str_locate(t, "def"))
# "matrix"
str_locate_all(t, "def")
# [[1]]
#      start end
# [1,]     4   6
# [2,]    14  16
class(str_locate_all(t, "def"))
# "list"

String replacement

R(stringr)

`R`


s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_replace(s, "def", "DEF")
# "abcDEFghij"
str_replace(t, "def", "DEF")
# "abcDEFghijabcdefghij"
str_replace_all(t, "def", "DEF")
# "abcDEFghijabcDEFghij"

String conversion

Uppercase and lowercase conversion

R(stringr)

`R`


s <- "abcDEFghij"
str_to_upper(s)    #Uppercase
# "ABCDEFGHIJ"
str_to_lower(s)    #To lowercase
# "abcdefghij"
str_to_title(s)    #Uppercase only at the beginning, lowercase otherwise
# "abcdefghij"
str_to_sentence(s) #Uppercase only at the beginning, lowercase otherwise
# "Abcdefghij"

ss <- "abc def ghij"
str_to_title(ss)
# "Abc Def Ghij"
str_to_sentence(ss)
# "Abc def ghij"

t <- ""
for (i in 1:str_length(s)) {
  stemp = str_sub(s,i,i)
  if (stemp == str_to_lower(stemp)) {
    stemp = str_to_upper(stemp)
  } else if (stemp == str_to_upper(stemp)) {
    stemp = str_to_lower(stemp)
  }
  t <- str_c(t, stemp)
}
t                     #Swapping uppercase and lowercase letters
# "ABCdefGHIJ"
s == str_to_upper(s)  #Judgment of all uppercase letters
# FALSE
s == str_to_lower(s)  #Judgment of all lowercase letters
# FALSE

Full-width and half-width conversion

R(stringr)

`R`

Inversion of string

R(stringr)

`R`


s <- "abcdefghij"
t <- ""
for (i in 1:str_length(s)) {
  t <- str_c(t, str_sub(s, -i, -i))
}
t
# "jihgfedcba"

Repeat string

R(stringr)

`R`


str_dup("A", 3)
# "AAA"
str_dup("def", 3)
# "defdefdef"

space

Space string

R(stringr)

`R`


str_c("-", str_dup(" ", 3), "-")
# "-   -"
# "-   -"
s <- str_c(str_dup(" ", 2), "d",
           str_dup(" ", 3), "e",
           str_dup(" ", 4), "f",
           str_dup(" ", 5))
str_c("-", s, "-")
# "-  d   e    f     -"

Delete unnecessary space before and after

R(stringr)

`R`


str_trim(s, side="left")
# "d   e    f     "
str_trim(s, side="right")
# "  d   e    f"
str_trim(s, side="both")
# "d   e    f"

About string vector

The functions in the stringr package can be used not only for strings (single strings), but also for strings and data frames. For example, if you use the str_length function on a string vector consisting of 3 strings, a vector consisting of 3 numbers will be returned as a result of using the str_length function for each element string.

R(stringr)

`R`


s1 <- "abcdefghij"
s2 <- "cdefghijkl"
s3 <- "efghijklmn"
ss <- c(s1, s2, s3)
ss
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"

str_c(ss, "_1")
# [1] "abcdefghij_1" "cdefghijkl_1" "efghijklmn_1" 

str_length(ss)
# [1] 10 10 10

str_sub(ss, 1, 2)
# [1] "ab" "cd" "ef"
str_sub(ss, -2, -1)
# [1] "ij" "kl" "mn"
str_sub(ss, 2, 3)
# [1] "bc" "de" "fg"

str_detect(ss, "def")
# [1]  TRUE  TRUE FALSE
str_count(ss, "def")
# [1] 1 1 0
str_locate(ss, "def")
#      start end
# [1,]     4   6
# [2,]     2   4
# [3,]    NA  NA
str_locate_all(ss, "def")
# [[1]]
#      start end
# [1,]     4   6
# 
# [[2]]
#      start end
# [1,]     2   4
# 
# [[3]]
#      start end
# 

str_replace(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"
str_replace_all(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"

str_to_upper(ss)
# [1] "ABCDEFGHIJ" "CDEFGHIJKL" "EFGHIJKLMN"
str_to_lower(ss)
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"
str_to_title(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"
str_to_sentence(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"

ss == str_to_upper(ss)
# [1] FALSE FALSE FALSE
ss == str_to_lower(ss)
# [1] TRUE TRUE TRUE

str_dup(ss, 2)
# [1] "abcdefghijabcdefghij" "cdefghijklcdefghijkl" "efghijklmnefghijklmn"

tt <- str_c(" ", ss, " _1 ")
tt
# [1] " abcdefghij _1 " " cdefghijkl _1 " " efghijklmn _1 "
str_trim(tt)
# [1] "abcdefghij _1" "cdefghijkl _1" "efghijklmn _1"
str_trim(tt, side="left")
# [1] "abcdefghij _1 " "cdefghijkl _1 " "efghijklmn _1 "
str_trim(tt, side="right")
# [1] " abcdefghij _1" " cdefghijkl _1" " efghijklmn _1"

I would like to summarize vectors and data frames in another article.

Summary

List

List the character string manipulation functions used in each language. For comparison, the calculation in EXCEL is also shown. s1 = "abc" s2 = "def" s3 = "ghij" s = "abcdefghij" t = "abcdefghijabcdefghij" u = "abcDEFghij" v = "ａｂｃＤＥＦｇｈｉｊ" w = " d e f " will do. Also, in each EXCEL cell A1 cell: = "abc" Cell A2: = "def" A3 cell: = "ghij" A4 cell: = "abcdefghij" Cell A5: = "abcdefghijabcdefghij" Cell A6: = "abcDEFghij" Cell A7: = "abcDEFghij" Cell A8: = "d e f" Is entered.

Basic operation of strings

	Python	R	R(stringr)	VBA	EXCEL	result
Join	s1 + s2 + s3	paste0(s1, s2, s3) paste(s1, s2, s3, sep="")	str_c(s1, s2, s3)	s1 & s2 & s3	=A1&A2&A3 =CONCATENATE( A1,A2,A3)	abcdefghij
length	len(s)	nchar(s)	str_length(s)	Len(s)	=LEN(A4)	10
Inversion	s[::-1]			StrReverse(s)		jihgfedcba
repetition	'A' * 3		str_dup("A", 3)	String(3, "A")	=REPT("A",3)	AAA
repetition	'def' * 3		str_dup("def", 3)		=REPT("def",3)	defdefdef

Extract string

	Python	R	R(stringr)	VBA	EXCEL	result
From the left	s[8:10] s[0:2] s[:2]	substr(s, 1, 2) substring(s, 1, 2)	str_sub(s, 1, 2)	Left(s, 2)	=LEFT(A4,2)	ab
From the right	s[len(s)-2:len(s)] s[-2:]	substr(s, nchar(s)-2+1, nchar(s))	str_sub(s, -2, -1)	Right(s, 2)	=RIGHT(A4,2)	ij
On the way	s[3:6]	substr(s, 4, 6)	str_sub(s, 4, 6)	Mid(s, 4, 3)	=MID(A4,4,3)	def

Note) Regarding the extraction of the character string "in the middle", the Python and R functions specify "where to where", but the VBA and EXCEL functions specify "where and how many characters".

Search for strings

	Python	R(stringr)	VBA	EXCEL	result
Search	s.find('def')	str_locate(s, "def")	InStr(1, s, "def")	=FIND("def",A4,1) =SEARCH("def",A4,1)	3,4
Search from behind	t.rfind('def')			InStrRev(t, "def")	13,14
count	t.count('def')	str_count(t, "def")			2

Note) See above for the str_detect and str_locate functions.

String replacement

	Python	R	R(stringr)	VBA	EXCEL	result
Replacement	s.replace('def', 'DEF')	sub("def", "DEF", s)	str_replace(s, "def", "DEF")	Replace(s, "def", "DEF")	=SUBSTITUTE( A4,"def","DEF") =REPLACE(A4, FIND("def",A4), LEN("def"),"DEF")	abcDEFghij
Replace only the first one		sub("def", "DEF", t)	str_replace(t, "def", "DEF")			abcDEFghij abcdefghij
Replace all	t.replace('def', 'DEF')	gsub("def", "DEF", t)	str_replace_all(t, "def", "DEF")	Replace(t, "def", "DEF")	=SUBSTITUTE( A5,"def","DEF")	abcDEFghij abcDEFghij

String conversion

	Python	R	R(stringr)	VBA	EXCEL	result
Uppercase	u.upper()	toupper(u)	str_to_upper(u)	UCase(u)	=UPPER(A6)	ABCDEFGHIJ
To lowercase	u.lower()	tolower(u)	str_to_lower(u)	LCase(u)	=LOWER(A6)	abcdefghij
Uppercase only at the beginning, lowercase otherwise	u.capitalize()		str_to_title(u) str_to_sentence(u)	StrConv(u, vbProperCase)	=PROPER(A6)	Abcdefghij
Swap uppercase and lowercase	u.swapcase()	chartr("A-Za-z", "a-zA-z", u)				ABCdefGHIJ
Judgment of capital letters	u.isupper()	u == toupper(u)	u == str_to_upper(u)			False
Judgment of lowercase letters	u.islower()	u == tolower(u)	u == str_to_lower(u)			False
Full-width		chartr("A-Za-z", "Ａ-Ｚａ-ｚ", u)		StrConv(u, vbWide)	=JIS(A6)	ａｂｃＤＥＦｇｈｉｊ
Half-width		chartr("Ａ-Ｚａ-ｚ", "A-Za-z", v)		StrConv(v, vbNarrow)	=ASC(A7)	abcDEFghij

String space

	Python	R(stringr)	VBA	EXCEL	result
space	' ' * 3	str_dup(" ", 3)	Space(3)	=REPT(" ",3)	" "
Remove spaces on both sides	w.strip(' ')	str_trim(s, side="both")	Trim(w)	=TRIM(A8)	"d e f"
Delete left space	w.lstrip(' ')	str_trim(s, side="left")	LTrim(w)		"d e f "
Delete right space	w.rstrip(' ')	str_trim(s, side="right")	RTrim(w)		" d e f"

Note) EXCEL's TRIM function is deleted to become d e f except for one space in the character string.

Whole program

The whole program used for reference is shown. See Last article for Python and VBA code.

R(stringr)

`R`


library(stringr)

#String concatenation
s1 <- "abc"
s2 <- "def"
s3 <- "ghij"
str_c(s1, s2, s3)
# "abcdefghij"

#String length
s <- "abcdefghij"
str_length(s)
# 10

#Extract string
s <- "abcdefghij"
str_sub(s, 1, 2)
# "ab"
str_sub(s, -2, -1)
# "ij"
str_sub(s, 4, 6)
# "def"

#Search for strings
s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_detect(s, "def")
# TRUE
str_detect(t, "def")
# TRUE
str_count(s, "def")
# 1
str_count(t, "def")
# 2
str_locate(s, "def")
#      start end
# [1,]     4   6
str_locate(t, "def")
#      start end
# [1,]     4   6
class(str_locate(t, "def"))
# "matrix"
str_locate_all(t, "def")
# [[1]]
#      start end
# [1,]     4   6
# [2,]    14  16
class(str_locate_all(t, "def"))
# "list"

#String replacement
s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_replace(s, "def", "DEF")
# "abcDEFghij"
str_replace(t, "def", "DEF")
# "abcDEFghijabcdefghij"
str_replace_all(t, "def", "DEF")
# "abcDEFghijabcDEFghij"

#Converting case of character string
s <- "abcDEFghij"
str_to_upper(s)    #Uppercase
# "ABCDEFGHIJ"
str_to_lower(s)    #To lowercase
# "abcdefghij"
str_to_title(s)    #Uppercase only at the beginning, lowercase otherwise
# "abcdefghij"
str_to_sentence(s) #Uppercase only at the beginning, lowercase otherwise
# "Abcdefghij"

ss <- "abc def ghij"
str_to_title(ss)
# "Abc Def Ghij"
str_to_sentence(ss)
# "Abc def ghij"

t <- ""
for (i in 1:str_length(s)) {
  stemp = str_sub(s,i,i)
  if (stemp == str_to_lower(stemp)) {
    stemp = str_to_upper(stemp)
  } else if (stemp == str_to_upper(stemp)) {
    stemp = str_to_lower(stemp)
  }
  t <- str_c(t, stemp)
}
t                     #Swapping uppercase and lowercase letters
# "ABCdefGHIJ"
s == str_to_upper(s)  #Judgment of all uppercase letters
# FALSE
s == str_to_lower(s)  #Judgment of all lowercase letters
# FALSE

#Inversion of string
s <- "abcdefghij"
t <- ""
for (i in 1:str_length(s)) {
  t <- str_c(t, str_sub(s, -i, -i))
}
t
# "jihgfedcba"

#Repeat string
str_dup("A", 3)
# "AAA"
str_dup("def", 3)
# "defdefdef"

#String space
str_c("-", str_dup(" ", 3), "-")
# "-   -"
# "-   -"
s <- str_c(str_dup(" ", 2), "d",
           str_dup(" ", 3), "e",
           str_dup(" ", 4), "f",
           str_dup(" ", 5))
str_c("-", s, "-")
# "-  d   e    f     -"

#Remove spaces before and after the string
str_trim(s, side="left")
# "d   e    f     "
str_trim(s, side="right")
# "  d   e    f"
str_trim(s, side="both")
# "d   e    f"


#String vector
s1 <- "abcdefghij"
s2 <- "cdefghijkl"
s3 <- "efghijklmn"
ss <- c(s1, s2, s3)
ss
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"

str_c(ss, "_1")
# [1] "abcdefghij_1" "cdefghijkl_1" "efghijklmn_1" 

str_length(ss)
# [1] 10 10 10

str_sub(ss, 1, 2)
# [1] "ab" "cd" "ef"
str_sub(ss, -2, -1)
# [1] "ij" "kl" "mn"
str_sub(ss, 2, 3)
# [1] "bc" "de" "fg"

str_detect(ss, "def")
# [1]  TRUE  TRUE FALSE
str_count(ss, "def")
# [1] 1 1 0
str_locate(ss, "def")
#      start end
# [1,]     4   6
# [2,]     2   4
# [3,]    NA  NA
str_locate_all(ss, "def")
# [[1]]
#      start end
# [1,]     4   6
# 
# [[2]]
#      start end
# [1,]     2   4
# 
# [[3]]
#      start end
# 

str_replace(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"
str_replace_all(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"

str_to_upper(ss)
# [1] "ABCDEFGHIJ" "CDEFGHIJKL" "EFGHIJKLMN"
str_to_lower(ss)
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"
str_to_title(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"
str_to_sentence(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"

ss == str_to_upper(ss)
# [1] FALSE FALSE FALSE
ss == str_to_lower(ss)
# [1] TRUE TRUE TRUE

str_dup(ss, 2)
# [1] "abcdefghijabcdefghij" "cdefghijklcdefghijkl" "efghijklmnefghijklmn"

tt <- str_c(" ", ss, " _1 ")
tt
# [1] " abcdefghij _1 " " cdefghijkl _1 " " efghijklmn _1 "
str_trim(tt)
# [1] "abcdefghij _1" "cdefghijkl _1" "efghijklmn _1"
str_trim(tt, side="left")
# [1] "abcdefghij _1 " "cdefghijkl _1 " "efghijklmn _1 "
str_trim(tt, side="right")
# [1] " abcdefghij _1" " cdefghijkl _1" " efghijklmn _1"

reference

R -stringr — Process R strings in a decent way

VBA user tried using Python / R: String manipulation (continued)

Introduction

String manipulation

String concatenation

R

String length

R

Extract string

R

Search for strings

R

String replacement

R

String conversion

Uppercase and lowercase conversion

R

Full-width and half-width conversion

R

Inversion of string

R

Repeat string

R

space

Space string

R

Delete unnecessary space before and after

R

About string vector

R

Summary

List

Basic operation of strings

Extract string

Search for strings

String replacement

String conversion

String space

Whole program

R

reference

`R`

`R`

`R`

`R`

`R`

`R`

`R`

`R`

`R`

`R`

`R`

`R`

`R`