首頁 > 後端開發 > Python教學 > python3+PyQt5實作支援多執行緒的頁面索引器應用程式

python3+PyQt5實作支援多執行緒的頁面索引器應用程式

不言
發布: 2018-04-20 14:31:33
原創
2758 人瀏覽過

這篇文章主要為大家詳細介紹了python3 PyQt5實現支援多線程的頁面索引器應用程序,具有一定的參考價值,感興趣的小伙伴們可以參考一下

本文透過Python3 pyqt5實作了python Qt GUI 快速編程的19章的頁面索引器應用程式範例。

/home/yrd/eric_workspace/chap19/walker_ans.py

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

#!/usr/bin/env python3

 

import codecs

import html.entities

import re

import sys

from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt)

 

class Walker(QThread):

 finished = pyqtSignal(bool,int)

 indexed = pyqtSignal(str,int)

 COMMON_WORDS_THRESHOLD = 250

 MIN_WORD_LEN = 3

 MAX_WORD_LEN = 25

 INVALID_FIRST_OR_LAST = frozenset("0123456789_")

 STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE)

 ENTITY_RE = re.compile(r"&(\w+?);|(\d+?);")

 SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE)

 

 def __init__(self, index, lock, files, filenamesForWords,

     commonWords, parent=None):

  super(Walker, self).__init__(parent)

  self.index = index

  self.lock = lock

  self.files = files

  self.filenamesForWords = filenamesForWords

  self.commonWords = commonWords

  self.stopped = False

  self.mutex = QMutex()

  self.completed = False

 

 

 def stop(self):

  try:

   self.mutex.lock()

   self.stopped = True

  finally:

   self.mutex.unlock()

 

 

 def isStopped(self):

  try:

   self.mutex.lock()

   return self.stopped

  finally:

   self.mutex.unlock()

 

 

 def run(self):

  self.processFiles()

  self.stop()

  self.finished.emit(self.completed,self.index)

 

 

 def processFiles(self):

  def unichrFromEntity(match):

   text = match.group(match.lastindex)

   if text.isdigit():

    return chr(int(text))

   u = html.entities.name2codepoint.get(text)

   return chr(u) if u is not None else ""

 

  for fname in self.files:

   if self.isStopped():

    return

   words = set()

   fh = None

   try:

    fh = codecs.open(fname, "r", "UTF8", "ignore")

    text = fh.read()

   except EnvironmentError as e:

    sys.stderr.write("Error: {0}\n".format(e))

    continue

   finally:

    if fh is not None:

     fh.close()

   if self.isStopped():

    return

   text = self.STRIPHTML_RE.sub("", text)

   text = self.ENTITY_RE.sub(unichrFromEntity, text)

   text = text.lower()

   for word in self.SPLIT_RE.split(text):

    if (self.MIN_WORD_LEN <= len(word) <=

     self.MAX_WORD_LEN and

     word[0] not in self.INVALID_FIRST_OR_LAST and

     word[-1] not in self.INVALID_FIRST_OR_LAST):

     try:

      self.lock.lockForRead()

      new = word not in self.commonWords

     finally:

      self.lock.unlock()

     if new:

      words.add(word)

   if self.isStopped():

    return

   for word in words:

    try:

     self.lock.lockForWrite()

     files = self.filenamesForWords[word]

     if len(files) > self.COMMON_WORDS_THRESHOLD:

      del self.filenamesForWords[word]

      self.commonWords.add(word)

     else:

      files.add(str(fname))

    finally:

     self.lock.unlock()

   self.indexed.emit(fname,self.index)

  self.completed = True

 

 

/home/yrd/eric_workspace/chap19/pageindexer_ans.pyw

 

#!/usr/bin/env python3

 

import collections

import os

import sys

from PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt)

from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame,

        QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget,

        QPushButton, QVBoxLayout)

import walker_ans as walker

 

 

def isAlive(qobj):

 import sip

 try:

  sip.unwrapinstance(qobj)

 except RuntimeError:

  return False

 return True

 

 

class Form(QDialog):

 

 def __init__(self, parent=None):

  super(Form, self).__init__(parent)

 

  self.mutex = QMutex()

  self.fileCount = 0

  self.filenamesForWords = collections.defaultdict(set)

  self.commonWords = set()

  self.lock = QReadWriteLock()

  self.path = QDir.homePath()

  pathLabel = QLabel("Indexing path:")

  self.pathLabel = QLabel()

  self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)

  self.pathButton = QPushButton("Set &Path...")

  self.pathButton.setAutoDefault(False)

  findLabel = QLabel("&Find word:")

  self.findEdit = QLineEdit()

  findLabel.setBuddy(self.findEdit)

  commonWordsLabel = QLabel("&Common words:")

  self.commonWordsListWidget = QListWidget()

  commonWordsLabel.setBuddy(self.commonWordsListWidget)

  filesLabel = QLabel("Files containing the &word:")

  self.filesListWidget = QListWidget()

  filesLabel.setBuddy(self.filesListWidget)

  filesIndexedLabel = QLabel("Files indexed")

  self.filesIndexedLCD = QLCDNumber()

  self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat)

  wordsIndexedLabel = QLabel("Words indexed")

  self.wordsIndexedLCD = QLCDNumber()

  self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat)

  commonWordsLCDLabel = QLabel("Common words")

  self.commonWordsLCD = QLCDNumber()

  self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat)

  self.statusLabel = QLabel("Click the &#39;Set Path&#39; "

         "button to start indexing")

  self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)

 

  topLayout = QHBoxLayout()

  topLayout.addWidget(pathLabel)

  topLayout.addWidget(self.pathLabel, 1)

  topLayout.addWidget(self.pathButton)

  topLayout.addWidget(findLabel)

  topLayout.addWidget(self.findEdit, 1)

  leftLayout = QVBoxLayout()

  leftLayout.addWidget(filesLabel)

  leftLayout.addWidget(self.filesListWidget)

  rightLayout = QVBoxLayout()

  rightLayout.addWidget(commonWordsLabel)

  rightLayout.addWidget(self.commonWordsListWidget)

  middleLayout = QHBoxLayout()

  middleLayout.addLayout(leftLayout, 1)

  middleLayout.addLayout(rightLayout)

  bottomLayout = QHBoxLayout()

  bottomLayout.addWidget(filesIndexedLabel)

  bottomLayout.addWidget(self.filesIndexedLCD)

  bottomLayout.addWidget(wordsIndexedLabel)

  bottomLayout.addWidget(self.wordsIndexedLCD)

  bottomLayout.addWidget(commonWordsLCDLabel)

  bottomLayout.addWidget(self.commonWordsLCD)

  bottomLayout.addStretch()

  layout = QVBoxLayout()

  layout.addLayout(topLayout)

  layout.addLayout(middleLayout)

  layout.addLayout(bottomLayout)

  layout.addWidget(self.statusLabel)

  self.setLayout(layout)

 

  self.walkers = []

  self.completed = []

  self.pathButton.clicked.connect(self.setPath)

  self.findEdit.returnPressed.connect(self.find)

  self.setWindowTitle("Page Indexer")

 

 

 def stopWalkers(self):

  for walker in self.walkers:

   if isAlive(walker) and walker.isRunning():

    walker.stop()

  for walker in self.walkers:

   if isAlive(walker) and walker.isRunning():

    walker.wait()

  self.walkers = []

  self.completed = []

 

 

 def setPath(self):

  self.stopWalkers()

  self.pathButton.setEnabled(False)

  path = QFileDialog.getExistingDirectory(self,

     "Choose a Path to Index", self.path)

  if not path:

   self.statusLabel.setText("Click the &#39;Set Path&#39; "

          "button to start indexing")

   self.pathButton.setEnabled(True)

   return

  self.statusLabel.setText("Scanning directories...")

  QApplication.processEvents() # Needed for Windows

  self.path = QDir.toNativeSeparators(path)

  self.findEdit.setFocus()

  self.pathLabel.setText(self.path)

  self.statusLabel.clear()

  self.filesListWidget.clear()

  self.fileCount = 0

  self.filenamesForWords = collections.defaultdict(set)

  self.commonWords = set()

  nofilesfound = True

  files = []

  index = 0

  for root, dirs, fnames in os.walk(str(self.path)):

   for name in [name for name in fnames

       if name.endswith((".htm", ".html"))]:

    files.append(os.path.join(root, name))

    if len(files) == 1000:

     self.processFiles(index, files[:])

     files = []

     index += 1

     nofilesfound = False

  if files:

   self.processFiles(index, files[:])

   nofilesfound = False

  if nofilesfound:

   self.finishedIndexing()

   self.statusLabel.setText(

     "No HTML files found in the given path")

 

 

 def processFiles(self, index, files):

  thread = walker.Walker(index, self.lock, files,

    self.filenamesForWords, self.commonWords, self)

  thread.indexed[str,int].connect(self.indexed)

  thread.finished[bool,int].connect(self.finished)

  thread.finished.connect(thread.deleteLater)

  self.walkers.append(thread)

  self.completed.append(False)

  thread.start()

  thread.wait(300) # Needed for Windows

 

 

 def find(self):

  word = str(self.findEdit.text())

  if not word:

   try:

    self.mutex.lock()

    self.statusLabel.setText("Enter a word to find in files")

   finally:

    self.mutex.unlock()

   return

  try:

   self.mutex.lock()

   self.statusLabel.clear()

   self.filesListWidget.clear()

  finally:

   self.mutex.unlock()

  word = word.lower()

  if " " in word:

   word = word.split()[0]

  try:

   self.lock.lockForRead()

   found = word in self.commonWords

  finally:

   self.lock.unlock()

  if found:

   try:

    self.mutex.lock()

    self.statusLabel.setText("Common words like &#39;{0}&#39; "

      "are not indexed".format(word))

   finally:

    self.mutex.unlock()

   return

  try:

   self.lock.lockForRead()

   files = self.filenamesForWords.get(word, set()).copy()

  finally:

   self.lock.unlock()

  if not files:

   try:

    self.mutex.lock()

    self.statusLabel.setText("No indexed file contains "

      "the word &#39;{0}&#39;".format(word))

   finally:

    self.mutex.unlock()

   return

  files = [QDir.toNativeSeparators(name) for name in

     sorted(files, key=str.lower)]

  try:

   self.mutex.lock()

   self.filesListWidget.addItems(files)

   self.statusLabel.setText(

     "{0} indexed files contain the word &#39;{1}&#39;".format(

     len(files), word))

  finally:

   self.mutex.unlock()

 

 

 def indexed(self, fname, index):

  try:

   self.mutex.lock()

   self.statusLabel.setText(fname)

   self.fileCount += 1

   count = self.fileCount

  finally:

   self.mutex.unlock()

  if count % 25 == 0:

   try:

    self.lock.lockForRead()

    indexedWordCount = len(self.filenamesForWords)

    commonWordCount = len(self.commonWords)

   finally:

    self.lock.unlock()

   try:

    self.mutex.lock()

    self.filesIndexedLCD.display(count)

    self.wordsIndexedLCD.display(indexedWordCount)

    self.commonWordsLCD.display(commonWordCount)

   finally:

    self.mutex.unlock()

  elif count % 101 == 0:

   try:

    self.lock.lockForRead()

    words = self.commonWords.copy()

   finally:

    self.lock.unlock()

   try:

    self.mutex.lock()

    self.commonWordsListWidget.clear()

    self.commonWordsListWidget.addItems(sorted(words))

   finally:

    self.mutex.unlock()

 

 

 def finished(self, completed, index):

  done = False

  if self.walkers:

   self.completed[index] = True

   if all(self.completed):

    try:

     self.mutex.lock()

     self.statusLabel.setText("Finished")

     done = True

    finally:

     self.mutex.unlock()

  else:

   try:

    self.mutex.lock()

    self.statusLabel.setText("Finished")

    done = True

   finally:

    self.mutex.unlock()

  if done:

   self.finishedIndexing()

 

 

 def reject(self):

  if not all(self.completed):

   self.stopWalkers()

   self.finishedIndexing()

  else:

   self.accept()

 

 

 def closeEvent(self, event=None):

  self.stopWalkers()

 

 

 def finishedIndexing(self):

  self.filesIndexedLCD.display(self.fileCount)

  self.wordsIndexedLCD.display(len(self.filenamesForWords))

  self.commonWordsLCD.display(len(self.commonWords))

  self.pathButton.setEnabled(True)

  QApplication.processEvents() # Needed for Windows

 

 

app = QApplication(sys.argv)

form = Form()

form.show()

app.exec_()

登入後複製

執行結果:

#相關推薦:

python3 PyQt5 Qt Designer實作堆疊視窗元件

python3 PyQt5 Qt Designer實作擴充對話框

以上是python3+PyQt5實作支援多執行緒的頁面索引器應用程式的詳細內容。更多資訊請關注PHP中文網其他相關文章!

相關標籤:
本網站聲明
本文內容由網友自願投稿,版權歸原作者所有。本站不承擔相應的法律責任。如發現涉嫌抄襲或侵權的內容,請聯絡admin@php.cn
作者最新文章
最新問題
熱門教學
更多>
最新下載
更多>
網站特效
網站源碼
網站素材
前端模板