2011년 6월 13일 월요일

J2SE 5.0 SCANNER API 사용법

TEST ONE
poem.txt 문서 내용
서시 윤동주
죽는 날까지 하늘을 우러러
한 점 부끄럼이 없기를,
잎새에 이는 바람에도
나는 괴로와했다.
별을 노래하는 마음으로
모든 죽어가는 것을 사랑해야지.
그리고 나한테 주어진 길을
걸어가야겠다.
오늘 밤에도 별이 바람에 스치운다.


기존에 File IO방식을 이용한 Text 읽기 방식입니다.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.File;
public class TextReader {
private static void readFile(String fileName) {
try {
File file = new File(fileName);
FileReader reader = new FileReader(file);
BufferedReader in = new BufferedReader(reader);
String string;
while ((string = in.readLine()) != null) {
System.out.println(string);
}
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
readFile("poem.txt");
}
}


실행을 “java -Xrunhprof:cpu=samples,thread=y” 옵션으로 했을 때 로그 입니다.
THREAD START (obj=50000122, id = 200003, name="Signal Dispatcher", group="system")
THREAD START (obj=50000123, id = 200000, name="main", group="main")
THREAD END (id = 200000)
THREAD START (obj=50000160, id = 200004, name="DestroyJavaVM", group="main")
THREAD END (id = 200004)
TRACE 300042: (thread=200000)
java.io.FileInputStream.readBytes(FileInputStream.java:Unknown line)
java.io.FileInputStream.read(FileInputStream.java:194)
sun.nio.cs.StreamDecoder$CharsetSD.readBytes(StreamDecoder.java:411)
sun.nio.cs.StreamDecoder$CharsetSD.implRead(StreamDecoder.java:453)
CPU SAMPLES BEGIN (total = 2) Sat Jan 15 10:25:48 2005
rank self accum count trace method
1 100.00% 100.00% 2 300042 java.io.FileInputStream.readBytes
CPU SAMPLES END


JDK5.0 SCANNER API을 이용한 방식입니다.
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Scanner;
public class TextScanner {
private static void readFile(String fileName) {
try {
File file = new File(fileName);
Scanner scanner = new Scanner(file);
scanner.useDelimiter
(System.getProperty("line.separator"));
while (scanner.hasNext()) {
System.out.println(scanner.next());
}
scanner.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
readFile("poem.txt");
}
}


실행을 “java -Xrunhprof:cpu=samples,thread=y” 옵션으로 했을 때 로그 입니다.
THREAD START (obj=50000122, id = 200003, name="Signal Dispatcher", group="system")
THREAD START (obj=50000123, id = 200000, name="main", group="main")
THREAD END (id = 200000)
THREAD START (obj=500001f1, id = 200004, name="DestroyJavaVM", group="main")
THREAD END (id = 200004)
THREAD END (id = 200001)
TRACE 300049: (thread=200000)
java.util.regex.Pattern$categoryNames.<clinit>(Pattern.java:5637)
java.util.regex.Pattern.retrieveCategoryNode(Pattern.java:2353)
java.util.regex.Pattern.family(Pattern.java:2335)
java.util.regex.Pattern.sequence(Pattern.java:1740)
TRACE 300105: (thread=200000)
java.lang.String.equals(String.java:846)
java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1709)
java.lang.ClassLoader.loadLibrary(ClassLoader.java:1668)
java.lang.Runtime.loadLibrary0(Runtime.java:822)
TRACE 300141: (thread=200000)
java.lang.String.indexOf(String.java:1395)
java.lang.String.indexOf(String.java:1352)
java.util.ResourceBundle.setLocale(ResourceBundle.java:362)
java.util.ResourceBundle.findBundle(ResourceBundle.java:942)
CPU SAMPLES BEGIN (total = 3) Sat Jan 15 10:41:52 2005
rank self accum count trace method
1 33.33% 33.33% 1 300049 java.util.regex.Pattern$categoryNames.<clinit>
2 33.33% 66.67% 1 300105 java.lang.String.equals
3 33.33% 100.00% 1 300141 java.lang.String.indexOf
CPU SAMPLES END


TEST TWO
text.txt 문서 내용
가,기,구,게,고
나,니,누,네,노
다,디,두,데,도
라,리,루,레,로
마,미,무,메,모
바,비,부,베,보
사,시,수,세,소


기존에 File IO방식을 이용한 Text 파싱
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.File;
import java.util.StringTokenizer;
public class TextReader2 {
private static void readFile(String fileName) {
try {
File file = new File(fileName);
FileReader reader = new FileReader(file);
BufferedReader in = new BufferedReader(reader);
String string;
StringTokenizer st = null;
while ((string = in.readLine()) != null) {
st = new StringTokenizer(string,",");
while(st.hasMoreTokens()) {
System.out.println(st.nextToken());
}
}
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
readFile("text.txt");
}
}


실행을 “java -Xrunhprof:cpu=samples,thread=y” 옵션으로 했을 때 로그 입니다.
THREAD START (obj=50000122, id = 200003, name="Signal Dispatcher", group="system")
THREAD START (obj=50000123, id = 200000, name="main", group="main")
THREAD END (id = 200000)
THREAD START (obj=50000160, id = 200004, name="DestroyJavaVM", group="main")
THREAD END (id = 200004)
TRACE 300015: (thread=200000)
java.util.zip.ZipFile.open(ZipFile.java:Unknown line)
java.util.zip.ZipFile.<init>(ZipFile.java:204)
java.util.jar.JarFile.<init>(JarFile.java:132)
java.util.jar.JarFile.<init>(JarFile.java:70)
TRACE 300044: (thread=200000)
java.lang.String.charAt(String.java:557)
java.util.StringTokenizer.scanToken(StringTokenizer.java:257)
java.util.StringTokenizer.nextToken(StringTokenizer.java:334)
TextReader2.readFile(TextReader2.java:18)
TRACE 300045: (thread=200000)
sun.nio.cs.ext.DoubleByteEncoder.encodeArrayLoop(DoubleByteEncoder.java:70)
sun.nio.cs.ext.DoubleByteEncoder.encodeLoop(DoubleByteEncoder.java:192)
java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:542)
sun.nio.cs.StreamEncoder$CharsetSE.implWrite(StreamEncoder.java:384)
TRACE 300006: (thread=200000)
sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:579)
sun.misc.URLClassPath$JarLoader.<init>(URLClassPath.java:546)
sun.misc.URLClassPath$3.run(URLClassPath.java:324)
java.security.AccessController.doPrivileged(AccessController.java:Unknown line)
TRACE 300024: (thread=200000)
sun.misc.URLClassPath$3.run(URLClassPath.java:319)
java.security.AccessController.doPrivileged(AccessController.java:Unknown line)
sun.misc.URLClassPath.getLoader(URLClassPath.java:313)
sun.misc.URLClassPath.getLoader(URLClassPath.java:290)
TRACE 300026: (thread=200000)
java.io.FileInputStream.readBytes(FileInputStream.java:Unknown line)
java.io.FileInputStream.read(FileInputStream.java:194)
sun.misc.Resource.getBytes(Resource.java:77)
java.net.URLClassLoader.defineClass(URLClassLoader.java:256)
TRACE 300027: (thread=200000)
java.net.URLClassLoader.defineClass(URLClassLoader.java:259)
java.net.URLClassLoader.access$100(URLClassLoader.java:56)
java.net.URLClassLoader$1.run(URLClassLoader.java:195)
java.security.AccessController.doPrivileged(AccessController.java:Unknown line)
TRACE 300041: (thread=200000)
java.lang.ClassLoader.findBootstrapClass(ClassLoader.java:Unknown line)
java.lang.ClassLoader.findBootstrapClass0(ClassLoader.java:891)
java.lang.ClassLoader.loadClass(ClassLoader.java:301)
java.lang.ClassLoader.loadClass(ClassLoader.java:299)
CPU SAMPLES BEGIN (total = 11) Sat Jan 15 11:31:06 2005
rank self accum count trace method
1 18.18% 18.18% 2 300015 java.util.zip.ZipFile.open
2 18.18% 36.36% 2 300044 java.lang.String.charAt
3 18.18% 54.55% 2 300045 sun.nio.cs.ext.DoubleByteEncoder.encodeArrayLoop
4 9.09% 63.64% 1 300006 sun.misc.URLClassPath$JarLoader.getJarFile
5 9.09% 72.73% 1 300024 sun.misc.URLClassPath$3.run
6 9.09% 81.82% 1 300026 java.io.FileInputStream.readBytes
7 9.09% 90.91% 1 300027 java.net.URLClassLoader.defineClass
8 9.09% 100.00% 1 300041 java.lang.ClassLoader.findBootstrapClass
CPU SAMPLES END


JDK5.0 SCANNER API을 이용한 방식입니다.
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Scanner;
public class TextScanner2 {
private static void readFile(String fileName) {
try {
File file = new File(fileName);
Scanner scanner = new Scanner(file);
scanner.useDelimiter(",");
while (scanner.hasNext()) {
System.out.println(scanner.next());
}
scanner.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
readFile("text.txt");
}
}


실행을 “java -Xrunhprof:cpu=samples,thread=y” 옵션으로 했을 때 로그 입니다.
THREAD START (obj=50000122, id = 200003, name="Signal Dispatcher", group="system")
THREAD START (obj=50000123, id = 200000, name="main", group="main")
THREAD END (id = 200000)
THREAD START (obj=500001f1, id = 200004, name="DestroyJavaVM", group="main")
THREAD END (id = 200004)
THREAD END (id = 200001)
TRACE 300049: (thread=200000)
java.util.regex.Pattern$categoryNames.<clinit>(Pattern.java:5637)
java.util.regex.Pattern.retrieveCategoryNode(Pattern.java:2353)
java.util.regex.Pattern.family(Pattern.java:2335)
java.util.regex.Pattern.sequence(Pattern.java:1740)
TRACE 300105: (thread=200000)
java.lang.String.equals(String.java:846)
java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1709)
java.lang.ClassLoader.loadLibrary(ClassLoader.java:1668)
java.lang.Runtime.loadLibrary0(Runtime.java:822)
TRACE 300141: (thread=200000)
java.lang.String.indexOf(String.java:1395)
java.lang.String.indexOf(String.java:1352)
java.lang.ClassLoader.checkName(ClassLoader.java:744)
java.lang.ClassLoader.findBootstrapClass0(ClassLoader.java:889)
TRACE 300155: (thread=200000)
java.util.regex.Matcher.usePattern(Matcher.java:249)
java.util.Scanner.getCompleteTokenInBuffer(Scanner.java:916)
java.util.Scanner.next(Scanner.java:1308)
TextScanner2.readFile(TextScanner2.java:13)
CPU SAMPLES BEGIN (total = 4) Sat Jan 15 11:17:08 2005
rank self accum count trace method
1 25.00% 25.00% 1 300049 java.util.regex.Pattern$categoryNames.<clinit>
2 25.00% 50.00% 1 300105 java.lang.String.equals
3 25.00% 75.00% 1 300141 java.lang.String.indexOf
4 25.00% 100.00% 1 300155 java.util.regex.Matcher.usePattern
CPU SAMPLES END


결과
단순히 텍스트를 읽어서 가공하지 않고 화면에 출력하는 방식을 비교한다면 SCANNER API보다는 기본 방식이 포퍼먼스에서는 더 좋은 성능을 보여준다. 하지만 SCANNER API는 useDelimiter을 이용하여 읽어드린 내용을 가공한다면 큰 효과와 유연성을 얻을 수 있음을 알수 있다.

댓글 없음:

댓글 쓰기

ETL 솔루션 환경

ETL 솔루션 환경 하둡은 대용량 데이터를 값싸고 빠르게 분석할 수 있는 길을 만들어줬다. 통계분석 엔진인 “R”역시 하둡 못지 않게 관심을 받고 있다. 빅데이터 역시 데이터라는 점을 볼때 분산처리와 분석 그 이전에 데이터 품질 등 데이...