找出多个文本中频率高的单词(2)
Jun 07, 2016 pm 03:32 PM接上篇,我打算用 用concurrent包里的CountDownLatch类 去实现。 还是直接上代码吧: Main.java package com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import java.util.concurrent.Execut
接上篇,我打算用用concurrent包里的CountDownLatch类去实现。
还是直接上代码吧:
Main.java
package com.anders.thread; import java.util.HashMap; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class Main { public static void main(String[] args) { int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber")); ExecutorService es = Executors.newFixedThreadPool(threadNumber); SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber]; try { CountDownLatch doneSignals = new CountDownLatch(threadNumber); // 这是在 文件数比线程数多的情况下,若文件比线程数少的话,加个判断就可以了 for (int i = 0; i map = mergeThreadMap(threads); display(map); } catch (InterruptedException e) { e.printStackTrace(); } finally { es.shutdown(); } } private static Map<string integer> mergeThreadMap(SingleThreadStatistics[] threads) { Map<string integer> map = new HashMap<string integer>(); for (SingleThreadStatistics singleThreadStatistics : threads) { Map<string integer> threadMap = singleThreadStatistics.getMap(); for (Map.Entry<string integer> entry : threadMap.entrySet()) { String threadWord = entry.getKey(); Integer threadWordCount = entry.getValue(); Integer wordCount = map.get(threadWord); if (wordCount == null) { map.put(threadWord, threadWordCount); } else { map.put(threadWord, threadWordCount + wordCount); } } } return map; } private static void display(Map<string integer> map) { for (Map.Entry<string integer> entry : map.entrySet()) { System.out.print(entry.getKey()); System.out.println(" ," + entry.getValue()); } } } </string></string></string></string></string></string></string>
SingleThreadStatistics.java
package com.anders.thread; import java.io.File; import java.util.HashMap; import java.util.Map; import java.util.concurrent.CountDownLatch; public class SingleThreadStatistics implements Runnable { private Map<string integer> map = new HashMap<string integer>(); private CountDownLatch doneSignals; public SingleThreadStatistics(CountDownLatch doneSignals) { this.doneSignals = doneSignals; } @Override public void run() { while (true) { File file = FileManager.getFile(); if (file == null) { break; } FileManager.parseFile(file, map); } doneSignals.countDown(); } // --------getter/setter------------ public Map<string integer> getMap() { return map; } } </string></string></string>
FileManager.java
package com.anders.thread; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Manage files and offer single for every thread * * @author Anders * */ public class FileManager { private static List<file> fileList; private static int index = 0; static { String dirPath = PropertiesUtil.get("DirName"); String path = FileManager.class.getClassLoader().getResource(dirPath).getPath(); fileList = getFiles(path); } public synchronized static File getFile() { if (index == fileList.size()) { return null; } File file = fileList.get(index); index++; return file; } private static List<file> getFiles(String dirPath) { File dir = new File(dirPath); if (!dir.exists() || !dir.isDirectory()) { return Collections.emptyList(); } File[] files = dir.listFiles(); //判断 是不是 以txt结尾的文件 Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType")); List<file> list = new ArrayList<file>(); for (File file : files) { Matcher matcher = pattern.matcher(file.getName()); if (matcher.matches()) { list.add(file); } } return list; } //读取文件 使用的是java.nio的filechannel 和bytebuffer public static void parseFile(File file, Map<string integer> map) { FileInputStream ins = null; try { ins = new FileInputStream(file); FileChannel fIns = ins.getChannel(); ByteBuffer buffer = ByteBuffer.allocate(1024); while (true) { buffer.clear(); int r = fIns.read(buffer); if (r == -1) { break; } buffer.flip(); buffer2word(buffer, map); } fIns.close(); } catch (Exception e) { e.printStackTrace(); } finally { try { if (ins != null) { ins.close(); } } catch (IOException e) { e.printStackTrace(); } } } //这个是 将读取的内容,提取出 英语字母 private static void buffer2word(ByteBuffer buffer, Map<string integer> map) { StringBuilder str = new StringBuilder(); for (int i = 0; i map) { Integer count = map.get(word); if (null == count) { map.put(word, 1); } else { map.put(word, ++count); } } //看看是否是 英语字符 private static boolean isEnglishChar(byte b) { //通过ASCLL码 判断 if (b > 65 && b 97 && b <br> <p><br> </p> config.properties<br> <pre class="brush:php;toolbar:false">ThreadNumber=3 DirName=txt FileType=.*.txt
其实我觉得最重要的代码是 FileManager里的
public synchronized static File getFile() { if (index == fileList.size()) { return null; } File file = fileList.get(index); index++; return file; }
而且还有一个很重要的一点就是 验证index是否已经读取完所有的文件 要和index++放在一个同步块里面,不然会引起线程安全问题

Hot Article

Hot tools Tags

Hot Article

Hot Article Tags

Notepad++7.3.1
Easy-to-use and free code editor

SublimeText3 Chinese version
Chinese version, very easy to use

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver CS6
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

Hot Topics

Which has a greater impact on performance, memory frequency or timing?

How to search for text across all tabs in Chrome and Edge

Using large models to create a new paradigm for text summary training

Try new ringtones and text tones: Experience the latest sound alerts on iPhone in iOS 17

After chatting online for a month, the pig-killing scammer was actually defeated by AI! 2 million netizens shouted shocked

How to copy text from screenshots on Windows 11

How to use Microsoft Reader Coach with Immersive Reader

What to do if win7 system cannot open txt text
