Last time Graph the Poisson distribution and the Poisson cumulative distribution in Python and Java, respectively. I plotted the data created in Java with Matplotlib, but the one that was bridging the data with CSV I rewrote it with SQlite.
The Sqlite API uses 3.6 standard for Python and here for Java.
The code below creates tables Poisson and Poisson CDF in math_data.db and puts the probability mass and accumulation for each λ.
poisson.java
public static void main(String[] args) {
Connection connection = null;
Statement statement = null;
Calc c = new Calc();
//Changed the method to store the λ value in the list
int[] lamList = {1, 4, 8, 10};
try {
double p = 0;
Class.forName("org.sqlite.JDBC");
connection = DriverManager.getConnection("jdbc:sqlite:math_data.db");
statement = connection.createStatement();
connection.setAutoCommit(false);
//Create table
statement.executeUpdate("DROP TABLE IF EXISTS Poisson");
statement.executeUpdate("CREATE TABLE IF NOT EXISTS Poisson( lam integer, count integer , p real )");
//Enter the value
PreparedStatement preparedStatement = connection.prepareStatement("INSERT INTO Poisson values (?, ?, ?);");
//Probability that an event that occurs λ times in a certain period occurs i times in a certain period p
for (int i = 0; i < lamList.length; i++) {
for (int j = 0; j <= 12; j++) {
p = c.poisson(lamList[i], j);
preparedStatement.setInt(1, lamList[i]);
preparedStatement.setInt(2, j);
preparedStatement.setDouble(3, p);
preparedStatement.addBatch();
}
}
System.out.println(preparedStatement.executeBatch().length + "Register the batch.");
connection.commit();
//Cumulative distribution
statement.executeUpdate("DROP TABLE IF EXISTS PoissonCDF");
statement.executeUpdate("CREATE TABLE IF NOT EXISTS PoissonCDF( lam integer, count integer , p real )");
//Enter the value
preparedStatement = connection.prepareStatement("INSERT INTO PoissonCDF values (?, ?, ?);");
//Cumulative probability that an event that occurs λ times in a certain period occurs i times or less in a certain period p
for (int i = 0; i < lamList.length; i++) {
double pTotal = 0;
for (int j = 0; j <= 12; j++) {
p = c.poisson(lamList[i], j);
pTotal += p;
preparedStatement.setInt(1, lamList[i]);
preparedStatement.setInt(2, j);
preparedStatement.setDouble(3, pTotal);
preparedStatement.addBatch();
}
}
//Batch write
System.out.println(preparedStatement.executeBatch().length + "Register the batch.");
connection.commit();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} finally {
try {
if (statement != null) {
statement.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
try {
if (connection != null) {
connection.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
class Calc {
int factorial(int n) {
if (n == 0) {
return 1;
}
return n * factorial(n - 1);
}
//Probability mass density that occurs only k times when the event that occurs in a certain period is average lam
double poisson(double lam, int k) {
double total = 0;
total = Math.pow(lam, k) * Math.pow(Math.E, -lam) / factorial(k);
return total;
}
//Probability mass density (including 0) that occurs k times or less when the event that occurs in a certain period is average lam
double poisson_cdf(double lam, int k) {
double p = 0;
double total = 0;
for (int i = 0; i <= k; i++) {
p = poisson(lam, i);
total += p;
}
return total;
}
}
//Execution result
//Register 52 batches.
//Register 52 batches.
Execution is completed. Make sure it's in properly.
ResultSet resultSet;
resultSet = statement.executeQuery("select * from Poisson");
while (resultSet.next()) {
System.out.print(resultSet.getString("lam"));
System.out.print(",");
System.out.print(resultSet.getString("count"));
System.out.print(",");
System.out.println(resultSet.getString("p"));
}
resultSet = statement.executeQuery("select * from PoissonCDF");
while (resultSet.next()) {
System.out.print(resultSet.getString("lam"));
System.out.print(",");
System.out.print(resultSet.getString("count"));
System.out.print(",");
System.out.println(resultSet.getString("p"));
}
/*
Execution result
1,0,0.367879441171442
1,1,0.367879441171442
1,2,0.183939720585721
1,3,0.0613132401952404
1,4,0.0153283100488101
*/
It's firmly in it. Next I would like to read it in Python and plot it in Matplotlib.
plotting.py
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set(style="darkgrid")
conn = sqlite3.connect("math_data.db")
c = conn.cursor()
c.execute('SELECT * FROM Poisson')
#List
lam_db_list = np.array(c.fetchall())
#Since the number at the beginning of the line is λ, I will summarize it for the time being and plot for each λ
lam_db_f = sorted(list(set(map(lambda x: x[0], lam_db_list))))
fig, axe = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 5))
#Make a list for each type of λ number at the beginning of the line, and plot based on it.
for i in range(len(lam_db_f)):
y_axis = []
for j in lam_db_list:
if j[0:1] == lam_db_f[i]:
y_axis.append(j[2:])
y_axis = np.array(y_axis).reshape(-1)
x_axis = range(len(y_axis))
axe[0].plot(x_axis, y_axis, marker='o', label='$\lambda=%.2f$' % lam_db_f[i])
c.execute('SELECT * FROM PoissonCDF')
lam_db_list = np.array(c.fetchall())
lam_db_f = sorted(list(set(map(lambda x: x[0], lam_db_list))))
for i in range(len(lam_db_f)):
y_axis = [j[2:] for j in lam_db_list if j[0:1] == lam_db_f[i]]
y_axis = np.array(y_axis).reshape(-1)
x_axis = range(len(y_axis))
axe[1].plot(x_axis, y_axis, marker='o', label='$\lambda=%.2f$' % lam_db_f[i])
conn.close()
axe[0].set_xlabel('k')
axe[0].set_ylabel('probability')
axe[0].set_title('Poisson')
axe[0].legend()
axe[0].grid(True)
axe[0].set_xticks(range(len(x_axis)))
axe[1].set_xlabel('k')
axe[1].set_ylabel('probability')
axe[1].set_title('PoissonCDF')
axe[1].legend()
axe[1].grid(True)
axe[1].set_xticks(range(len(x_axis)))
plt.savefig("poisson_n_cdf.png ")
plt.show()
I was able to plot it neatly. You can also plot various Poisson distributions by changing the λ array passed on the Java side.
I created the DB in reverse matrix and tried to calculate all the arrays by brute force from the first line number, but the code became complicated. In order to make it easier to use later, we have to think about the output at the stage of creating the DB. Especially if the data deals with a small number of fixed points, it will go crazy, so I want to avoid having to play with the structure too much later.
Recommended Posts