p>
prediction_service.py execution command:
python prediction_service.py "k_prediction/language.txt" "language-identifier" "Project<img width="841" alt="Screenshot 2017-05-20 13.05.20.png " src="https://qiita-image-store.s3.amazonaws.com/0/95254/cc3adb89-1330-77e9-bf2a-f32fa94f5b55.png ">
ID"
#!/usr/bin/env python
"""Simple command-line sample for the Google Prediction API Command-line application that trains on your input data. This sample does the same thing as the Hello Prediction! example. You might want to run the setup.sh script to load the sample data to Google Storage. Usage: $ python prediction_service.py "bucket/object" "model_id" "project_id" "my-xxxxx.json" You can also get help on all the command-line flags the program understands by running: $ python prediction_service.py --help To get detailed log output run: $ python prediction_service.py --logging_level=DEBUG """ from future import print_function
author = ('[email protected] (Joe Gregorio), ' '[email protected] (Marc Cohen)')
import argparse import os from pprint import pprint as pp import sys import time import MySQLdb
sys.path.append( os.path.join(os.path.dirname(os.path.realpath(file)), 'lib') )
import httplib2 from apiclient import discovery from apiclient import sample_tools from oauth2client import client from oauth2client.service_account import ServiceAccountCredentials from googleapiclient import discovery from oauth2client import tools
SLEEP_TIME = 10 scopes=['https://www.googleapis.com/auth/prediction','https://www.googleapis.com/auth/devstorage.read_only']
→ 1. Define authentication information
#Declare command-line flags. argparser = argparse.ArgumentParser(add_help=False) argparser.add_argument('object_name', help='Bucket name/hoge.txt(Training data)') argparser.add_argument('model_id', help='Model ID') argparser.add_argument('project_id', help='Project ID') argparser.add_argument('credential', help='client_secrets.json')
def print_header(line): '''Format and print header block sized to length of line''' header_str = '=' header_line = header_str * len(line) print('\n' + header_line) print(line) print(header_line)
def main(argv):
parents=[argparser] parent_parsers = [tools.argparser] parent_parsers.extend(parents) parser = argparse.ArgumentParser( description=doc, formatter_class=argparse.RawDescriptionHelpFormatter, parents=parent_parsers) flags = parser.parse_args(argv[1:]) credential_file = os.path.join(os.path.dirname(os.path.realpath(file)), flags.credential) credentials = ServiceAccountCredentials.from_json_keyfile_name( credential_file, scopes=scopes)
http = credentials.authorize(http = httplib2.Http()) service = discovery.build('prediction', 'v1.6', http=http)
→ 2. Access the Google Prediction API
try: # Get access to the Prediction API. papi = service.trainedmodels()
→ 3. List models.
# List models.
print_header('Fetching list of first ten models')
result = papi.list(maxResults=10, project=flags.project_id).execute()
print('List results:')
pp(result)
→ 4. Start training request on a data set.
# Start training request on a data set.
print_header('Submitting model training request')
body = {'id': flags.model_id, 'storageDataLocation': flags.object_name}
start = papi.insert(body=body, project=flags.project_id).execute()
print('Training results:')
pp(start)
→ 5. wait for the training to complete.
# Wait for the training to complete.
print_header('Waiting for training to complete')
while True:
status = papi.get(id=flags.model_id, project=flags.project_id).execute()
state = status['trainingStatus']
print('Training state: ' + state)
if state == 'DONE':
break
elif state == 'RUNNING':
time.sleep(SLEEP_TIME)
continue
else:
raise Exception('Training Error: ' + state)
# Job has completed.
print('Training completed:')
pp(status)
break
→ 6. Describe model
# Describe model.
print_header('Fetching model description')
result = papi.analyze(id=flags.model_id, project=flags.project_id).execute()
print('Analyze results:')
pp(result)
→ 7. Get the data to be predicted from the database
#DB call
print('================')
print('Get the data to predict from the database')
print('================')
if __name__ == "__main__":
connector = MySQLdb.connect(host="??????", db="??????", user="??????", passwd="??????", charset="utf8")
cursor = connector.cursor()
sql = "SELECT id,message FROM `posts` WHERE id = (select MAX(id) from posts)"
cursor.execute(sql)
records = cursor.fetchall()
for record in records:
print (record[0])
record_id=record[0]
record_1=record[1].encode('utf-8')
cursor.close()
connector.close()
#tst
# Make some predictions using the newly trained model.
print_header('Making some predictions')
for sample_text in [record_1]:
body = {'input': {'csvInstance': [sample_text]}}
result = papi.predict(
body=body, id=flags.model_id, project=flags.project_id).execute()
print('Prediction results for "%s"...' % sample_text)
pp(result)
import json
array = json.dumps(result)
data=json.loads(array)
data2 = data['outputMulti']
print(data2)
→ 8. Display the response data from the API
print('================')
print('Display response data from API')
print('================')
print(data2[0]['label'])
print(data2[0]['score'])
print(data2[1]['label'])
print(data2[1]['score'])
data_score=float(data2[0]['score'])-float(data2[1]['score'])
→ 9. Judge the response data from the API
print('================')
print('Judge response data from API')
print('================')
if data_score > 0 :
pacentage=float(data2[0]['score'])*100
print( "This message is'"+str(pacentage)+"%''true'is.")
evaluate = data2[0]['label']
score = data2[0]['score']
else:
pacentage=float(data2[1]['score'])*100
print( "This message is'"+str(pacentage)+"%''false'is.")
evaluate = data2[1]['label']
score = data2[1]['score']
print(record_id)
#DB call
→ 10. Reflect the result in the database
print('================')
print('Reflect the result in the database')
print('================')
if __name__ == "__main__":
connector = MySQLdb.connect(host="?????", db="?????", user="?????", passwd="?????", charset="utf8")
cursor = connector.cursor()
cursor.execute('UPDATE posts SET evaluate = (%s) WHERE id = (%s)',([evaluate],record_id))
cursor.execute('UPDATE posts SET score = (%s) WHERE id = (%s)',([score],record_id))
connector.commit()
connector.close()
if name == 'main': main(sys.argv)
→ 11. Reflect the judgment result saved in the DB in the newly posted article by the WEB application.
Reference site
https://github.com/google/google-api-python-client/tree/master/samples/prediction
<h2> Preparation </ h2>
-Creating a Cloud Platform Console project (obtaining a project ID)
-You have Prediction and Google Cloud Storage API enabled for your project.
・ Bucket creation
-Upload training data to Cloud Storage.
・ Define model name
-Get API authentication file (client_secrets.json)
https://cloud.google.com/prediction/docs/quickstart?hl=ja
<h3> How to get client_secrets.json </ h3>
I used c to do API authentication.
I will briefly describe the procedure for creating client_secrets.json.
procedure
→ Register the project on the Google Developer Console
→ Enable the Google Calendar API from "API and Authentication" → "API" on the left
→ From "API and Authentication" → "Authentication Information" on the left, press "Create a new client ID" → "Installed applications (others)" "Download JSON" and save this as client_secrets.json
File name: client_secrets.json
{ "web": { "client_id": "?????", "client_secret": "?????", "redirect_uris": [], "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://accounts.google.com/o/oauth2/token" } }
<h3> Create training data </ h3>
What is training data?
This is the data that is the basis for judgment by machine learning.
In explaining the training data, I will briefly explain opportunity learning.
There are two main types of problems handled by machine learning: "supervised learning" and "unsupervised learning."
The Google Prediction API is the former supervised learning.
In supervised learning, the purpose is to correctly predict the output (invisible) for input data (visible) given. However, the machine does not know what to output even if the input is just input.
Therefore, multiple cases of input / output pairs called training data (or teacher data) are given.
In other words, ask humans to give some examples that if you can put this in, put it out.
Based on this, the purpose is to create a machine (= function) that outputs the correct output when new input data arrives.
Of course, if you get the exact same input you've seen so far, it seems enough to select the training data with the same input and output it, but it's medium. Some input data does not appear in the training data you have.
For such data, it is a matter of supervised learning to design a learning procedure (learning algorithm) that generalizes the given training data and increases the ability to deal with data of unknown output as much as possible. This is the main theme.
<h3> Training data </ h3>
As explained above, this data is a set of input / output pair examples.
It returns true when the character hoge is entered and false when the character hoge_false is entered.
train.txt
"true","hoge" "true","hoge" "true","hoge" "true","hoge" "true","hoge" "true","hoge" "true","hoge" "true","hoge" "true","hoge" "true","hoge" "false","hoge_false" "false","hoge_false" "false","hoge_false" "false","hoge_false" "false","hoge_false" "false","hoge_false" "false","hoge_false" "false","hoge_false"
<h2> Building the environment </ h2>
Describes the information necessary to build a development environment
<h3> Python environment </ h3>
<h3> Install necessary tools (such as gcc) before installing MySQL-python </ h3>
$ sudo yum groupinstall "Development Tools" If it doesn't work $sudo yum install gcc MySQL-python install: $ pip install MySQL-python
<h3> Check with MySQL connection script </ h3>
import _mysql import sys from pprint import pprint as pp try: con = _mysql.connect('localhost', 'root', 'root', 'prediction_dev') #Edit here help(con) except _mysql.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1) finally: if con: con.close()
$ python test_connection_mysql.py <_mysql.connection open to 'localhost' at 265f940>
You can connect without any error.
This completes the environment construction for machine learning with Python.
Recommended Posts