從字串取得IPv4 位址最快的方法
有問題的原始程式碼:
UINT32 GetIP(const char *p) { UINT32 dwIP=0,dwIP_Part=0; while(true) { if(p[0] == 0) { dwIP = (dwIP << 8) | dwIP_Part; break; } if(p[0]=='.') { dwIP = (dwIP << 8) | dwIP_Part; dwIP_Part = 0; p++; } dwIP_Part = (dwIP_Part*10)+(p[0]-'0'); p++; } return dwIP; }
更快的向量化解決方案:
利用 x86 指令集,更有效的解決方案如下:
UINT32 MyGetIP(const char *str) { // Load and convert input __m128i input = _mm_lddqu_si128((const __m128i*)str); input = _mm_sub_epi8(input, _mm_set1_epi8('0')); // Generate shuffled array __m128i cmp = input; UINT32 mask = _mm_movemask_epi8(cmp); __m128i shuf = shuffleTable[mask]; __m128i arr = _mm_shuffle_epi8(input, shuf); // Calculate coefficients __m128i coeffs = _mm_set_epi8(0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1); // Multiply and accumulate __m128i prod = _mm_maddubs_epi16(coeffs, arr); prod = _mm_hadd_epi16(prod, prod); // Reorder result __m128i imm = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 4, 2, 0); prod = _mm_shuffle_epi8(prod, imm); // Extract result return _mm_extract_epi32(prod, 0); }
ShuffleTable的預計算:
void MyInit() { int len[4]; for (len[0] = 1; len[0] <= 3; len[0]++) for (len[1] = 1; len[1] <= 3; len[1]++) for (len[2] = 1; len[2] <= 3; len[2]++) for (len[3] = 1; len[3] <= 3; len[3]++) { int slen = len[0] + len[1] + len[2] + len[3] + 4; int rem = 16 - slen; for (int rmask = 0; rmask < 1<<rem; rmask++) { int mask = 0; char shuf[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; int pos = 0; for (int i = 0; i < 4; i++) { for (int j = 0; j < len[i]; j++) { shuf[(3-i) * 4 + (len[i]-1-j)] = pos; pos++; } mask ^= (1<<pos); pos++; } mask ^= (rmask<<slen); _mm_store_si128(&shuffleTable[mask], _mm_loadu_si128((__m128i*)shuf)); } } }
評估:
由於向量化技術,此解決方案的速度明顯加快,效能比原始程式碼高出7.8倍。它在 3.4 GHz 處理器的單核心上每秒可處理約 3.36 億個 IP 位址。
以上是如何使用向量化技術來加速 IPv4 位址從字串到整數的轉換?的詳細內容。更多資訊請關注PHP中文網其他相關文章!