--JRuby's #open throws an error in Windows 10 environment --Let's implement it on the Java side --If you use jisautodetect, you can't use utf-8. --To the code below
--Arguments --url (variable name: link) --Time-out time (variable name: time_limit)
JavaOpen.java
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
public class JavaOpen{
public static String open(String link, int time_limit){
String html = "";
try {
URL url = new URL(link);
URLConnection con = url.openConnection();
con.setConnectTimeout(time_limit*300);
con.setReadTimeout(time_limit*700);
try (InputStream is = con.getInputStream();){
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] byteChunk = new byte[8192];
int n;
while ( (n = is.read(byteChunk)) > 0 ) {
baos.write(byteChunk, 0, n);
}
byte[] bytes = baos.toByteArray();
html = bytesToHtml(bytes);
} catch (IOException e) {
e.printStackTrace ();
}
} finally {
return html;
}
}
public static String bytesToHtml(byte[] src) throws UnsupportedEncodingException {
String[] char_codes = { "UTF8","SJIS","EUC_JP","EUC_JP_LINUX","EUC_JP_Solaris" };
for (String cc: char_codes){
String s_tmp = new String(src, cc);
byte[] b_tmp = s_tmp.getBytes(cc);
if (Arrays.equals(src, b_tmp)) {
return s_tmp;
}
}
return "";
}
}
--String [] char_codes = {"UTF8", "SJIS", "EUC_JP", "EUC_JP_LINUX", "EUC_JP_Solaris"};
is a character code that you may be able to access, so feel free to use it.
--I wanted to come up with an alternative for the variable name time_limit ...
-- setConnectTimeout
: setReadTimeout
= 3: 7 I allocated it, but what about normal?
――I wanted to know how to read 8192 bytes at a time, but I lost the intention to move.
-try-with-resources statement --ORACLE -Simple character code judgment in Java --Qiita --Supported encodings --ORACLE
Recommended Posts