在学习boost的时候测试了一下boost的algorithm中的三个search函数,分别是Boyer-Moore Search,Boyer-Moore-Horspool Search,Knuth-Morris-Pratt Search,同时也测试了std的search函数,测试的结果有点意外,std的search花的时间比前面三个快了两个数量级,问题出在哪儿?测试的代码如下:
#define _CRT_SECURE_NO_WARNINGS
#include <boost/algorithm/searching/boyer_moore.hpp>
#include <boost/algorithm/searching/boyer_moore_horspool.hpp>
#include <boost\algorithm\searching\knuth_morris_pratt.hpp>
#include <string>
#include <vector>
#include <algorithm>
#include <iostream>
#include <Windows.h>
#include <time.h>
using namespace std;
using namespace boost::algorithm;
void printMessage(char* msg, int startX, int startY)
{
HANDLE hd;
COORD pos;
pos.X = startX;
pos.Y = startY;
hd = GetStdHandle(STD_OUTPUT_HANDLE);
SetConsoleCursorPosition(hd, pos);
printf("%s", msg);
}
int main(int argc, char ** argv)
{
printf("Generating test set\n");
vector<int> vec;
srand(unsigned int(time(NULL)));
for (int i = 0; i < 10000; i++)
{
vec.push_back(rand() % 15000);
char msg[50];
sprintf(msg,"%.2f%%\n", i*1.0/99.99);
printMessage(msg, 0, 1);
}
LARGE_INTEGER t1, t2, tc;
QueryPerformanceFrequency(&tc);
QueryPerformanceCounter(&t1);
printf("Testing boyer moore search\n");
int count = 0;
for (int i = 0; i < 1000; i++)
{
vector<int> key;
key.push_back(rand() % 15000);
if (boyer_moore_search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
{
count++;
}
char msg[50];
sprintf(msg,"%.2f%%\n", i / 9.99);
printMessage(msg, 0, 3);
}
cout << "Hit " << count << "times\n";
QueryPerformanceCounter(&t2);
printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
system("pause");
QueryPerformanceCounter(&t1);
printf("Testing boyer_moore_horspool search\n");
count = 0;
for (int i = 0; i < 1000; i++)
{
vector<int> key;
key.push_back(rand() % 15000);
if (boyer_moore_horspool_search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
{
count++;
}
char msg[50];
sprintf(msg, "%.2f%%\n", i / 9.99);
printMessage(msg, 0, 8);
}
cout << "Hit " << count << "times\n";
QueryPerformanceCounter(&t2);
printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
system("pause");
QueryPerformanceCounter(&t1);
printf("Testing knuth_morris_pratt search\n");
count = 0;
for (int i = 0; i < 1000; i++)
{
vector<int> key;
key.push_back(rand() % 15000);
if (knuth_morris_pratt_search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
{
count++;
}
char msg[50];
sprintf(msg, "%.2f%%\n", i/ 9.99);
printMessage(msg, 0, 13);
}
cout << "Hit " << count << "times\n";
QueryPerformanceCounter(&t2);
printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
system("pause");
QueryPerformanceCounter(&t1);
printf("Tesing std search\n");
count = 0;
for (int i = 0; i < 1000; i++)
{
vector<int> key;
key.push_back(rand() % 15000);
if (search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
{
count++;
}
char msg[50];
sprintf(msg, "%.2f%%\n", i/ 9.99);
printMessage(msg, 0, 18);
}
cout << "Hit " << count << "times\n";
QueryPerformanceCounter(&t2);
printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
system("pause");
return 0;
}
测试环境是vs2013,测试结果如下图:
是什么导致了这么大的差距?
The problem is that the source strings used in several tests are the same, but the target strings are different; it should be ensured that the target strings in several tests are also the same in order to compare the differences in the algorithms themselves ("control variable method").
For example, brute force string matching:
The time complexity in the worst case is O(mn), where m and n are the source and target string lengths respectively. For example, there are two strings:
source: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab
target: aaaaab
In this case, the complexity will be O(mn), but if the target is: baaaaa
Then it is O(m).
You can put the program's key filling into the vec filling loop. It doesn't matter if they are all random numbers. Of course, you can also use a separate loop.
Personally, I think that by simply using random numbers as source and target strings, the performance of several algorithms will be close to O(m); if you want to highlight the complexity differences of several algorithms, you can construct special cases for specific algorithms (such as the above The source and target special cases are constructed based on the specific implementation of the violence law)