I am a professional student about 20 years old. It's been a year since I entered a vocational school. I had never done programming before I entered a vocational school. Now that I've learned the basics of python, I'd like to study while making various things!
For discord.py, refer to here. https://qiita.com/mizunana/items/4afddc71f37df555078e For CustomVision and OpenVINO, refer to here. https://github.com/hiouchiy/IntelAI_and_Cloud/blob/master/Azure/demo1/Lesson1_AzureCognitiveService_and_OpenVINO_Collaboration.ipynb
class Model(object):
def __init__(self):
self.labels = []
labels_filename = "labels.txt"
# Create a list of labels.
with open(labels_filename, 'rt') as lf:
for l in lf:
self.labels.append(l.strip())
def predict(self, imageFile):
raise NotImplementedError
def convert_to_opencv(self, image):
# RGB -> BGR conversion is performed as well.
image = image.convert('RGB')
r,g,b = np.array(image).T
opencv_image = np.array([b,g,r]).transpose()
return opencv_image
def crop_center(self, img,cropx,cropy):
h, w = img.shape[:2]
startx = w//2-(cropx//2)
starty = h//2-(cropy//2)
return img[starty:starty+cropy, startx:startx+cropx]
def resize_down_to_1600_max_dim(self, image):
h, w = image.shape[:2]
if (h < 1600 and w < 1600):
return image
new_size = (1600 * w // h, 1600) if (h > w) else (1600, 1600 * h // w)
return cv2.resize(image, new_size, interpolation = cv2.INTER_LINEAR)
def resize_to_256_square(self, image):
h, w = image.shape[:2]
return cv2.resize(image, (256, 256), interpolation = cv2.INTER_LINEAR)
def update_orientation(self, image):
exif_orientation_tag = 0x0112
if hasattr(image, '_getexif'):
exif = image._getexif()
if (exif != None and exif_orientation_tag in exif):
orientation = exif.get(exif_orientation_tag, 1)
# orientation is 1 based, shift to zero based and flip/transpose based on 0-based values
orientation -= 1
if orientation >= 4:
image = image.transpose(Image.TRANSPOSE)
if orientation == 2 or orientation == 3 or orientation == 6 or orientation == 7:
image = image.transpose(Image.FLIP_TOP_BOTTOM)
if orientation == 1 or orientation == 2 or orientation == 5 or orientation == 6:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
return image
class OpenVINOModel(Model):
def __init__(self, target_device):
super(OpenVINOModel, self).__init__()
# These are set to the default names from exported models, update as needed.
model_xml = 'model.xml'
model_bin = 'model.bin'
# Plugin initialization for specified device and load extensions library if specified
# Set the desired device name as 'device' parameter. This sample support these 3 names: CPU, GPU, MYRIAD
ie = IEPlugin(device=target_device, plugin_dirs='')
# Read IR
self.net = IENetwork(model=model_xml, weights=model_bin)
self.input_blob = next(iter(self.net.inputs))
self.out_blob = next(iter(self.net.outputs))
self.net.batch_size = 1
# Loading model to the plugin
self.exec_net = ie.load(network=self.net)
def predict(self, imageFile):
start1 = time.time()
# Load from a file
image = Image.open(imageFile)
# Update orientation based on EXIF tags, if the file has orientation info.
image = super().update_orientation(image)
# Convert to OpenCV format
image = super().convert_to_opencv(image)
# If the image has either w or h greater than 1600 we resize it down respecting
# aspect ratio such that the largest dimension is 1600
image = super().resize_down_to_1600_max_dim(image)
# We next get the largest center square
h, w = image.shape[:2]
min_dim = min(w,h)
max_square_image = super().crop_center(image, min_dim, min_dim)
# Resize that square down to 256x256
augmented_image = super().resize_to_256_square(max_square_image)
# Get the input size of the model
n, c, h, w = self.net.inputs[self.input_blob].shape
# Crop the center for the specified network_input_Size
augmented_image = super().crop_center(augmented_image, w, h)
frame = augmented_image
#
augmented_image = augmented_image.transpose((2, 0, 1))
images = np.ndarray(shape=(n, c, h, w))
images[0] = augmented_image
start2 = time.time()
predictions = self.exec_net.infer(inputs={self.input_blob: images})
infer_time = time.time() - start2
# Print the highest probability label
predictions = predictions[self.out_blob]
highest_probability_index = predictions[0].argsort()[-1:][::-1]
if highest_probability_index[0] == 1 and predictions[0][0] > 1e-12:
print("Human class1")
score = predictions[0]
send_message = "You have a low gorilla coefficient.\n If you want to raise it, you should eat 3 bananas every day."
elif highest_probability_index[0] == 1 and predictions[0][0] > 1e-6:
print("Human class2")
score = predictions[0]
send_message = "You, maybe your ancestors are gorillas(Lie)"
elif highest_probability_index[0] == 1:
print("Human class3")
score = predictions[0]
send_message = "Maybe your previous life was a gorilla?"
elif highest_probability_index[0] == 0 and predictions[0][1] > 1e-12:
print("Gorilla class1")
score = predictions[0]
send_message = "You are a genuine gorilla\n You can become a legendary gorilla!"
elif highest_probability_index[0] == 0 and predictions[0][1] > 1e-6:
print("Gorilla class2")
score = predictions[0]
send_message = "The gorilla coefficient is a little low\n If you want to get the most out of it, run around the jungle and help the gorillas around you!"
elif highest_probability_index[0] == 0:
print("Gorilla class3")
score = predictions[0]
send_message = "You are a gorilla than a person!"
return score,send_message
def download_img(url, file_name): r = requests.get(url, stream=True) if r.status_code == 200: with open(file_name, 'wb') as f: f.write(r.content)
def run_inference(target_device='CPU'): model = OpenVINOModel('CPU') file_list = glob.glob("images/*.png ") img_path = random.choice(file_list)
return model.predict(img_path)
TOKEN = 'Insert the bot token here' client = discord.Client()
#Processing that operates at startup @client.event async def on_ready(): #When started, a login notification will be displayed in the terminal print('You are now logged')
#Processing that operates when receiving a message @client.event async def on_message(message): #Ignore if the message sender is a bot if message.author.bot: return
#Judgment of whether it was spoken
if message.content.startswith('/pic'):
#There is a lot of information when I get it from discord here
juge_img = str(message.attachments[0])
#Divide the information one by one and make it an element of the list
juge_img_cre = juge_img.split(' ')
#Now I want the contents of the url, so I cut off the parts that don't need the url and get the url_Defined as img
get_img = juge_img_cre[3].lstrip("url='").rstrip("'>")
download_img(get_img, "images/image.png ")
deta = run_inference(target_device='CPU')
await message.channel.send("Your gorilla coefficient is"+str(deta[0][0])+"\n"+deta[1])
client.run(TOKEN)
Mostly, I used the site that I referred to.
## Where I struggled
I didn't know anything about making a discord bot for the first time, but what I struggled with was how to judge when an image was sent and how to save the image and pass it to the model.
First of all, the judgment sent was solved by sending a comment to the image.
And it was a little difficult to save the image.
First, I made ** download_img ** by looking at the referenced site.
I made a slight change because the site did not work because of the difference between the site I was referring to and the development environment.
OpenVINO
OpenVINO is a CPU that Intel has put out to perform processing such as machine learning.
I used it in class this time, but the speed changed tremendously, at first I thought that it was not necessary to be so fast, but when I made a video without still images, I said that it would be better to have less lag I wanted to use it because I was sick, so I used it this time.
# Impressions
I was interested in AI and entered a vocational school. At first I was doing the basics of programming, so I couldn't do much AI, so I started trying to make something simple.
It's easy to model with custom vision, and it's not too difficult to make a discord bot!
From now on, I want to make my own model and make interesting things while making various improvements!
As an aside, I came to the tax office when I was asked to ask for tax filing because my family needed it and I didn't have time, but I'm waiting for ** 100 minutes **, which is on par with popular Disneyland attractions. .. I'll do my best.