Home Database Mysql Tutorial 找出多个文本中频率高的单词(2)

找出多个文本中频率高的单词(2)

Jun 07, 2016 pm 03:32 PM
word Multiple find out text frequency High

接上篇,我打算用 用concurrent包里的CountDownLatch类 去实现。 还是直接上代码吧: Main.java package com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import java.util.concurrent.Execut

接上篇,我打算用用concurrent包里的CountDownLatch类去实现。


还是直接上代码吧:

Main.java

package com.anders.thread;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Main {

	public static void main(String[] args) {

		int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));

		ExecutorService es = Executors.newFixedThreadPool(threadNumber);
		SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber];
		try {
			CountDownLatch doneSignals = new CountDownLatch(threadNumber);

			// 这是在 文件数比线程数多的情况下,若文件比线程数少的话,加个判断就可以了
			for (int i = 0; i  map = mergeThreadMap(threads);

			display(map);

		} catch (InterruptedException e) {
			e.printStackTrace();
		} finally {
			es.shutdown();
		}

	}

	private static Map<string integer> mergeThreadMap(SingleThreadStatistics[] threads) {
		Map<string integer> map = new HashMap<string integer>();

		for (SingleThreadStatistics singleThreadStatistics : threads) {
			Map<string integer> threadMap = singleThreadStatistics.getMap();

			for (Map.Entry<string integer> entry : threadMap.entrySet()) {
				String threadWord = entry.getKey();
				Integer threadWordCount = entry.getValue();
				Integer wordCount = map.get(threadWord);

				if (wordCount == null) {
					map.put(threadWord, threadWordCount);
				} else {
					map.put(threadWord, threadWordCount + wordCount);
				}
			}
		}

		return map;
	}

	private static void display(Map<string integer> map) {

		for (Map.Entry<string integer> entry : map.entrySet()) {
			System.out.print(entry.getKey());
			System.out.println("   ," + entry.getValue());
		}

	}

}
</string></string></string></string></string></string></string>
Copy after login

SingleThreadStatistics.java
package com.anders.thread;

import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

public class SingleThreadStatistics implements Runnable {

	private Map<string integer> map = new HashMap<string integer>();
	private CountDownLatch doneSignals;

	public SingleThreadStatistics(CountDownLatch doneSignals) {
		this.doneSignals = doneSignals;
	}

	@Override
	public void run() {

		while (true) {
			File file = FileManager.getFile();
			if (file == null) {
				break;
			}
			FileManager.parseFile(file, map);
		}

		doneSignals.countDown();

	}

	// --------getter/setter------------

	public Map<string integer> getMap() {
		return map;
	}

}
</string></string></string>
Copy after login

FileManager.java
package com.anders.thread;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Manage files and offer single for every thread
 * 
 * @author Anders
 * 
 */
public class FileManager {

	private static List<file> fileList;
	private static int index = 0;

	static {
		String dirPath = PropertiesUtil.get("DirName");
		String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();
		fileList = getFiles(path);
	}

	public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}

	private static List<file> getFiles(String dirPath) {

		File dir = new File(dirPath);
		if (!dir.exists() || !dir.isDirectory()) {
			return Collections.emptyList();
		}

		File[] files = dir.listFiles();

		//判断 是不是  以txt结尾的文件
		Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));
		List<file> list = new ArrayList<file>();

		for (File file : files) {
			Matcher matcher = pattern.matcher(file.getName());
			if (matcher.matches()) {
				list.add(file);
			}
		}

		return list;
	}

	//读取文件  使用的是java.nio的filechannel 和bytebuffer
	public static void parseFile(File file, Map<string integer> map) {
		FileInputStream ins = null;
		try {
			ins = new FileInputStream(file);
			FileChannel fIns = ins.getChannel();
			ByteBuffer buffer = ByteBuffer.allocate(1024);

			while (true) {
				buffer.clear();
				int r = fIns.read(buffer);
				if (r == -1) {
					break;
				}
				buffer.flip();
				buffer2word(buffer, map);
			}
			fIns.close();

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (ins != null) {
					ins.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

	//这个是  将读取的内容,提取出  英语字母
	private static void buffer2word(ByteBuffer buffer, Map<string integer> map) {
		StringBuilder str = new StringBuilder();
		for (int i = 0; i  map) {
		Integer count = map.get(word);
		if (null == count) {
			map.put(word, 1);
		} else {
			map.put(word, ++count);
		}
	}

	//看看是否是  英语字符
	private static boolean isEnglishChar(byte b) {
		//通过ASCLL码  判断
		if (b > 65 && b  97 && b <br>

<p><br>
</p>
config.properties<br>


<pre class="brush:php;toolbar:false">ThreadNumber=3
DirName=txt
FileType=.*.txt
Copy after login


其实我觉得最重要的代码是  FileManager里的

public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}
Copy after login
这部分代码,因为只要  每个thread 分别得到不同的文件,就可以了。

而且还有一个很重要的一点就是  验证index是否已经读取完所有的文件  要和index++放在一个同步块里面,不然会引起线程安全问题


Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot AI Tools

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Undress AI Tool

Undress AI Tool

Undress images for free

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Tools

Notepad++7.3.1

Notepad++7.3.1

Easy-to-use and free code editor

SublimeText3 Chinese version

SublimeText3 Chinese version

Chinese version, very easy to use

Zend Studio 13.0.1

Zend Studio 13.0.1

Powerful PHP integrated development environment

Dreamweaver CS6

Dreamweaver CS6

Visual web development tools

SublimeText3 Mac version

SublimeText3 Mac version

God-level code editing software (SublimeText3)

Hot Topics

Java Tutorial
1662
14
PHP Tutorial
1262
29
C# Tutorial
1235
24
Try new ringtones and text tones: Experience the latest sound alerts on iPhone in iOS 17 Try new ringtones and text tones: Experience the latest sound alerts on iPhone in iOS 17 Oct 12, 2023 pm 11:41 PM

In iOS 17, Apple has overhauled its entire selection of ringtones and text tones, offering more than 20 new sounds that can be used for calls, text messages, alarms, and more. Here's how to see them. Many new ringtones are longer and sound more modern than older ringtones. They include arpeggio, broken, canopy, cabin, chirp, dawn, departure, dolop, journey, kettle, mercury, galaxy, quad, radial, scavenger, seedling, shelter, sprinkle, steps, story time , tease, tilt, unfold and valley. Reflection remains the default ringtone option. There are also 10+ new text tones available for incoming text messages, voicemails, incoming mail alerts, reminder alerts, and more. To access new ringtones and text tones, first, make sure your iPhone

What to do if win7 system cannot open txt text What to do if win7 system cannot open txt text Jul 06, 2023 pm 04:45 PM

What should I do if win7 system cannot open txt text? When we need to edit text files on our computers, the easiest way is to use text tools. However, some users find that their computers cannot open txt text files. So how to solve this problem? Let’s take a look at the detailed tutorial to solve the problem of unable to open txt text in win7 system. Tutorial to solve the problem that win7 system cannot open txt text. 1. Right-click any txt file on the desktop. If there is no txt file, you can right-click to create a new text document, and then select properties, as shown below: 2. In the opened txt properties window , find the change button under the general options, as shown in the figure below: 3. In the pop-up open mode setting

Which has a greater impact on performance, memory frequency or timing? Which has a greater impact on performance, memory frequency or timing? Feb 19, 2024 am 08:58 AM

Memory is one of the most important components in the computer, and it has a significant impact on the performance and stability of the computer. When choosing memory, people tend to focus on two important parameters, namely timing and frequency. So, for memory performance, which is more important, timing or frequency? First, let's understand the concepts of timing and frequency. Timing refers to the time interval required for a memory chip to receive and process data. It is usually represented by a CL value (CASLatency). The smaller the CL value, the faster the memory processing speed. The frequency is within

How to search for text across all tabs in Chrome and Edge How to search for text across all tabs in Chrome and Edge Feb 19, 2024 am 11:30 AM

This tutorial shows you how to find specific text or phrases on all open tabs in Chrome or Edge on Windows. Is there a way to do a text search on all open tabs in Chrome? Yes, you can use a free external web extension in Chrome to perform text searches on all open tabs without having to switch tabs manually. Some extensions like TabSearch and Ctrl-FPlus can help you achieve this easily. How to search text across all tabs in Google Chrome? Ctrl-FPlus is a free extension that makes it easy for users to search for a specific word, phrase or text across all tabs of their browser window. This expansion

Using large models to create a new paradigm for text summary training Using large models to create a new paradigm for text summary training Jun 10, 2023 am 09:43 AM

1. Text task This article mainly discusses the method of generative text summarization, and how to use contrastive learning and large models to implement the latest generative text summarization training paradigm. It mainly involves two articles, one is BRIO: Bringing Order to Abstractive Summarization (2022), which uses contrastive learning to introduce ranking tasks in the generative model; the other is OnLearning to Summarize with Large Language Models as References (2023), which further introduces large models to generate high-quality training data based on BRIO. 2. Generative text summarization training methods and

ASUS TUF Z790 Plus is compatible with ASUS MCP79 memory frequency ASUS TUF Z790 Plus is compatible with ASUS MCP79 memory frequency Jan 03, 2024 pm 04:18 PM

ASUS tufz790plus supports memory frequency. ASUS TUFZ790-PLUS motherboard is a high-performance motherboard that supports dual-channel DDR4 memory and supports up to 64GB of memory. Its memory frequency is very powerful, up to 4800MHz. Specific supported memory frequencies include 2133MHz, 2400MHz, 2666MHz, 2800MHz, 3000MHz, 3200MHz, 3600MHz, 3733MHz, 3866MHz, 4000MHz, 4133MHz, 4266MHz, 4400MHz, 4533MHz, 4600MHz, 4733MHz and 4800MHz. Whether it is daily use or high performance needs

How to copy text from screenshots on Windows 11 How to copy text from screenshots on Windows 11 Sep 20, 2023 pm 05:57 PM

Download the new Snipping Tool with text actions Although the new Snipping Tool is limited to development and canary versions, if you don’t want to wait, you can install the updated Windows 11 Snipping Tool (version number 11.2308.33.0) now. How this works: 1. Go ahead and open this website (visit) on your Windows PC. 2. Next, select "Product ID" and paste "9MZ95KL8MR0L" into the text field. 3. Switch to the "Quick" ring from the right drop-down menu and click Search. 4. Now, look for this version “2022.2308.33.0” in the search results that appear. 5. Right click on the one with MSIXBUNDLE extension and in context menu

How to use Microsoft Reader Coach with Immersive Reader How to use Microsoft Reader Coach with Immersive Reader Mar 09, 2024 am 09:34 AM

In this article, we will show you how to use Microsoft Reading Coach in Immersive Reader on Windows PC. Reading guidance features help students or individuals practice reading and develop their literacy skills. You start by reading a passage or a document in a supported application, and based on this, your reading report is generated by the Reading Coach tool. The reading report shows your reading accuracy, the time it took you to read, the number of words correct per minute, and the words you found most challenging while reading. You will also be able to practice the words, which will help develop your reading skills in general. Currently, only Office or Microsoft365 (including OneNote for Web and Word for We

See all articles