Explanation ↓
(Version 1) Machine-learned vector embedding based on document contents and metadata, where two documents that have similar technical content have a high dot product score of their embedding vectors.
Note: If you move it all the time, it will cost GCP, so it will stop without notice. It doesn't do any load balancing either, so it can take some time to output. </ font>
First, create a python application side with AI Notebook on GCP. One .ipynb file and two .py files, as shown below.
To make it available on the web After proceeding with the Streamlit Tutorial, go to Run Streamlit remotely While referring to it, open the port of the external IP address of GCE where AI Notebook is running.
VPC Network ⇒ Firewall, Specify port = 8501 in TCP (specify streamlit). SourdeIPRange designation 0.0.0.0/0 (accepted and provided from all)
There are 3 files.
!streamlit run claimgen.py
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
#Third created py file
import um
def drawfig(dataframe,cc):
x = dataframe['xpos'].values
y = dataframe['ypos'].values
fig = go.Figure(go.Histogram2dContour(
x = x,
y = y,
#xbins={'end':200, 'size':1, 'start': 0},
zmin = 0,
#zmax = 500,
#ncontours=50,
colorscale = 'Jet',
contours = dict(
showlabels = True,
labelfont = dict(
family = 'Raleway',
color = 'white'
)
),
hoverlabel = dict(
bgcolor = 'white',
bordercolor = 'black',
font = dict(
family = 'Raleway',
color = 'black'
)
),
))
fig.add_scattergl(x=dataframe["xpos"],
y=dataframe["ypos"],
mode="markers",
marker=dict(size=3.5, color="blue"),
text= "<a href='" + dataframe["url"]+ "' style='color: rgb(255,255,255)'>" + dataframe["index"] + "<br>" +dataframe["applicant"].str[0:15]+"…<br>"+dataframe["title"].str[0:15]+"…</a>",
name="The entire"
)
#x_disp_range =[-8,10]
#y_disp_range =[-7,7]
#fig.update_xaxes(range=x_disp_range)
#fig.update_yaxes(range=y_disp_range)
fig.update_layout(
#title=dataframe['appday'].iloc[0],
height = 800,
width = 800,
bargap = 0,
hovermode = 'closest',
showlegend = False
)
fig.write_html(cc+"_heatmap.html")
#f2 = go.FigureWidget(fig)
#fig.show()
return fig
st.title('Patent document 2D map')
st.markdown('IPC and keywords on this site@The population specified in the title has been calculated[embedding](https://console.cloud.google.com/marketplace/product/google_patents_public_datasets/google-patents-research-data?filter=solution-type:dataset)Itisdisplayedasatwo-dimensionalmapwithUMAPdbasedon.Theembeddingdataofthepatentdatausedfortheplotis[this](https://console.cloud.google.com/marketplace/product/google_patents_public_datasets/google-patents-research-data)')
if st.button('Search condition specification'):
text = st.text_input('Search word input', 'apparatus')
ipc = st.selectbox(
'ipc selection',
(
'G06F','A61K','H01L','G01N','H04L','A61B','H04N','C07D','H04W','C12N','G02B','B01D','B65D','H01M','G06Q','B29C','G01R','H04B','A61M','C07C','A61F','H01R','C08L','B01J','B41J','B65G','G11B','H05K','G06K','C02F','G02F','E21B','B23K','H02J','B32B','H02K','B60R','A23L','G03G','C07K','H04M','H05B','C04B','G01B','C08G','A01G','F16H','A01N','C08F','G06T','H01H','F24F','B62D','A01K','C12Q','F16L','A63F','C23C','F16K','B65H','G05B','A47J','E04B','G09G','H01J','B21D','C09D','G11C','F21S','C09K','B65B','A61L','C01B','H02M','G01M','C22C','H04R','G01S','G03B','A63B','C08J','H01F','B23Q','E02D','B60K','H01Q','G01C','B24B','H01B','F16D','E06B','G03F','F04D','A47L','A47B','F21V','F02M','A61N','H02G','F16C','F04B','G09B','G08B','B08B','G09F','F02D','E04H','A47G','B22D','D06F','F16B','B01F','E05B','H03K','A47C','B66B','F25B','E04F','B25J','F25D','G01F','B23B','B05B','B02C','F01N','B25B','E04G','C12P','B23P','H02P','G05D','B60N','H04J','C09J','B60T','B60C','A61H','G10L','G01L','B66C','G01D','F01D','G01V','H01S','G08G','C11D','A61G','A61C','F16F','H03M','C08K','F02B','A01D','H02B','B63B','H02H','F26B','A47K','C12M','B41F','B60W','C07F','B05D','B60L','A41D','C03C','F24C','H01G','F15B','C03B','G01J','F03D','A01H','C10G','F24H','B64C','E02F','B21B','C21D','G07F','B26D','A01C','F04C','B22F','E02B','E21D','A43B','A45D','C22B','C07H','E01C','H04Q','B60J','C25D','B28B','A61J','B41M','B05C','A23K','F16J'
))
cc = st.selectbox(
'Country selection',
('JP', 'MX', 'US','AU','EP','CN'))
df = um.get_xypos_df(ipc,text,cc)
plotly_fig = drawfig(df.reset_index(),cc)
st.write(plotly_fig)
st.write(get_apprank(df))
st.write("data")
st.write(df)
import umap
import pandas as pd
from google.cloud import bigquery
#bigquery part
def get_df(s_ipc,s_text,cc):
client = bigquery.Client()
sql = """
WITH gpat AS (
SELECT
publication_number as pubnum,
top_terms,
url,
embedding_v1 as emb
FROM patents-public-data.google_patents_research.publications
),
pat AS (
SELECT publication_number as pubnum,
filing_date as appday,
STRING_AGG(DISTINCT title.text) as title,
#STRING_AGG(DISTINCT abstract.text) as abst,
STRING_AGG(DISTINCT appls.name,'|') as applicants
FROM `patents-public-data.patents.publications`,UNNEST(title_localized) as title,UNNEST(assignee_harmonized) as appls,UNNEST(ipc) as ipcs
WHERE SUBSTR(publication_number,0,2) = @cc
AND title.text LIKE @s_text
AND ipcs.code LIKE @s_ipc
#AND filing_date > 20000101
GROUP BY pubnum,filing_date
)
SELECT gpat.pubnum,
gpat.url,
gpat.top_terms,
pat.title,
#pat.abst,
pat.applicants,
pat.appday,
gpat.emb
FROM gpat
INNER JOIN pat
ON gpat.pubnum = pat.pubnum
LIMIT 1000
"""
~~Omitted because it is long
Creating using this subject: Sonos v.s. google.
# Download query results.
query_string = """
~~ What a kettle
"""
dataframe = (
bqclient.query(query_string)
.result()
.to_dataframe(bqstorage_client=bqstorageclient)
)
It says that it can be done with to_dataframe (), but an error occurs and it does not heal. I can't help it, so I took out the rows of tuples that came back with tuples. I put it in pandas. I want to find another good way.
Recommended Posts