sse2.hpp 11 KB


  1. //
  2. // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
  3. // Vinnie Falco (vinnie.falco@gmail.com)
  4. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  5. //
  6. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  7. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. //
  9. // Official repository: https://github.com/boostorg/json
  10. //
  11. #ifndef BOOST_JSON_DETAIL_SSE2_HPP
  12. #define BOOST_JSON_DETAIL_SSE2_HPP
  13. #include <boost/json/detail/config.hpp>
  14. #include <boost/json/detail/utf8.hpp>
  15. #include <cstddef>
  16. #include <cstring>
  17. #ifdef BOOST_JSON_USE_SSE2
  18. # include <emmintrin.h>
  19. # include <xmmintrin.h>
  20. # ifdef _MSC_VER
  21. # include <intrin.h>
  22. # endif
  23. #endif
  24. namespace boost {
  25. namespace json {
  26. namespace detail {
  27. #ifdef BOOST_JSON_USE_SSE2
  28. template<bool AllowBadUTF8>
  29. inline
  30. const char*
  31. count_valid(
  32. char const* p,
  33. const char* end) noexcept
  34. {
  35. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  36. __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
  37. __m128i const q3 = _mm_set1_epi8( 0x1F );
  38. while(end - p >= 16)
  39. {
  40. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  41. __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
  42. __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
  43. __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
  44. __m128i v5 = _mm_min_epu8( v1, q3 );
  45. __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
  46. __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
  47. int w = _mm_movemask_epi8( v7 );
  48. if( w != 0 )
  49. {
  50. int m;
  51. #if defined(__GNUC__) || defined(__clang__)
  52. m = __builtin_ffs( w ) - 1;
  53. #else
  54. unsigned long index;
  55. _BitScanForward( &index, w );
  56. m = index;
  57. #endif
  58. return p + m;
  59. }
  60. p += 16;
  61. }
  62. while(p != end)
  63. {
  64. const unsigned char c = *p;
  65. if(c == '\x22' || c == '\\' || c < 0x20)
  66. break;
  67. ++p;
  68. }
  69. return p;
  70. }
  71. template<>
  72. inline
  73. const char*
  74. count_valid<false>(
  75. char const* p,
  76. const char* end) noexcept
  77. {
  78. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  79. __m128i const q2 = _mm_set1_epi8( '\\' );
  80. __m128i const q3 = _mm_set1_epi8( 0x20 );
  81. while(end - p >= 16)
  82. {
  83. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  84. __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
  85. __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
  86. __m128i v4 = _mm_cmplt_epi8( v1, q3 );
  87. __m128i v5 = _mm_or_si128( v2, v3 );
  88. __m128i v6 = _mm_or_si128( v5, v4 );
  89. int w = _mm_movemask_epi8( v6 );
  90. if( w != 0 )
  91. {
  92. int m;
  93. #if defined(__GNUC__) || defined(__clang__)
  94. m = __builtin_ffs( w ) - 1;
  95. #else
  96. unsigned long index;
  97. _BitScanForward( &index, w );
  98. m = index;
  99. #endif
  100. p += m;
  101. break;
  102. }
  103. p += 16;
  104. }
  105. while(p != end)
  106. {
  107. const unsigned char c = *p;
  108. if(c == '\x22' || c == '\\' || c < 0x20)
  109. break;
  110. if(c < 0x80)
  111. {
  112. ++p;
  113. continue;
  114. }
  115. // validate utf-8
  116. uint16_t first = classify_utf8(c);
  117. uint8_t len = first & 0xFF;
  118. if(BOOST_JSON_UNLIKELY(end - p < len))
  119. break;
  120. if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
  121. break;
  122. p += len;
  123. }
  124. return p;
  125. }
  126. #else
  127. template<bool AllowBadUTF8>
  128. char const*
  129. count_valid(
  130. char const* p,
  131. char const* end) noexcept
  132. {
  133. while(p != end)
  134. {
  135. const unsigned char c = *p;
  136. if(c == '\x22' || c == '\\' || c < 0x20)
  137. break;
  138. ++p;
  139. }
  140. return p;
  141. }
  142. template<>
  143. inline
  144. char const*
  145. count_valid<false>(
  146. char const* p,
  147. char const* end) noexcept
  148. {
  149. while(p != end)
  150. {
  151. const unsigned char c = *p;
  152. if(c == '\x22' || c == '\\' || c < 0x20)
  153. break;
  154. if(c < 0x80)
  155. {
  156. ++p;
  157. continue;
  158. }
  159. // validate utf-8
  160. uint16_t first = classify_utf8(c);
  161. uint8_t len = first & 0xFF;
  162. if(BOOST_JSON_UNLIKELY(end - p < len))
  163. break;
  164. if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
  165. break;
  166. p += len;
  167. }
  168. return p;
  169. }
  170. #endif
  171. // KRYSTIAN NOTE: does not stop to validate
  172. // count_unescaped
  173. #ifdef BOOST_JSON_USE_SSE2
  174. inline
  175. size_t
  176. count_unescaped(
  177. char const* s,
  178. size_t n) noexcept
  179. {
  180. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  181. __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
  182. __m128i const q3 = _mm_set1_epi8( 0x1F );
  183. char const * s0 = s;
  184. while( n >= 16 )
  185. {
  186. __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
  187. __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
  188. __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
  189. __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
  190. __m128i v5 = _mm_min_epu8( v1, q3 );
  191. __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
  192. __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
  193. int w = _mm_movemask_epi8( v7 );
  194. if( w != 0 )
  195. {
  196. int m;
  197. #if defined(__GNUC__) || defined(__clang__)
  198. m = __builtin_ffs( w ) - 1;
  199. #else
  200. unsigned long index;
  201. _BitScanForward( &index, w );
  202. m = index;
  203. #endif
  204. s += m;
  205. break;
  206. }
  207. s += 16;
  208. n -= 16;
  209. }
  210. return s - s0;
  211. }
  212. #else
  213. inline
  214. std::size_t
  215. count_unescaped(
  216. char const*,
  217. std::size_t) noexcept
  218. {
  219. return 0;
  220. }
  221. #endif
  222. // count_digits
  223. #ifdef BOOST_JSON_USE_SSE2
  224. // assumes p..p+15 are valid
  225. inline int count_digits( char const* p ) noexcept
  226. {
  227. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  228. v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
  229. v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
  230. int m = _mm_movemask_epi8(v1);
  231. int n;
  232. if( m == 0 )
  233. {
  234. n = 16;
  235. }
  236. else
  237. {
  238. #if defined(__GNUC__) || defined(__clang__)
  239. n = __builtin_ffs( m ) - 1;
  240. #else
  241. unsigned long index;
  242. _BitScanForward( &index, m );
  243. n = static_cast<int>(index);
  244. #endif
  245. }
  246. return n;
  247. }
  248. #else
  249. // assumes p..p+15 are valid
  250. inline int count_digits( char const* p ) noexcept
  251. {
  252. int n = 0;
  253. for( ; n < 16; ++n )
  254. {
  255. unsigned char const d = *p++ - '0';
  256. if(d > 9) break;
  257. }
  258. return n;
  259. }
  260. #endif
  261. // parse_unsigned
  262. inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
  263. {
  264. while( n >= 4 )
  265. {
  266. // faster on on clang for x86,
  267. // slower on gcc
  268. #ifdef __clang__
  269. r = r * 10 + p[0] - '0';
  270. r = r * 10 + p[1] - '0';
  271. r = r * 10 + p[2] - '0';
  272. r = r * 10 + p[3] - '0';
  273. #else
  274. uint32_t v;
  275. std::memcpy( &v, p, 4 );
  276. v -= 0x30303030;
  277. unsigned w0 = v & 0xFF;
  278. unsigned w1 = (v >> 8) & 0xFF;
  279. unsigned w2 = (v >> 16) & 0xFF;
  280. unsigned w3 = (v >> 24);
  281. #ifdef BOOST_JSON_BIG_ENDIAN
  282. r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0;
  283. #else
  284. r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
  285. #endif
  286. #endif
  287. p += 4;
  288. n -= 4;
  289. }
  290. switch( n )
  291. {
  292. case 0:
  293. break;
  294. case 1:
  295. r = r * 10 + p[0] - '0';
  296. break;
  297. case 2:
  298. r = r * 10 + p[0] - '0';
  299. r = r * 10 + p[1] - '0';
  300. break;
  301. case 3:
  302. r = r * 10 + p[0] - '0';
  303. r = r * 10 + p[1] - '0';
  304. r = r * 10 + p[2] - '0';
  305. break;
  306. }
  307. return r;
  308. }
  309. // KRYSTIAN: this function is unused
  310. // count_leading
  311. /*
  312. #ifdef BOOST_JSON_USE_SSE2
  313. // assumes p..p+15
  314. inline std::size_t count_leading( char const * p, char ch ) noexcept
  315. {
  316. __m128i const q1 = _mm_set1_epi8( ch );
  317. __m128i v = _mm_loadu_si128( (__m128i const*)p );
  318. __m128i w = _mm_cmpeq_epi8( v, q1 );
  319. int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
  320. std::size_t n;
  321. if( m == 0 )
  322. {
  323. n = 16;
  324. }
  325. else
  326. {
  327. #if defined(__GNUC__) || defined(__clang__)
  328. n = __builtin_ffs( m ) - 1;
  329. #else
  330. unsigned long index;
  331. _BitScanForward( &index, m );
  332. n = index;
  333. #endif
  334. }
  335. return n;
  336. }
  337. #else
  338. // assumes p..p+15
  339. inline std::size_t count_leading( char const * p, char ch ) noexcept
  340. {
  341. std::size_t n = 0;
  342. for( ; n < 16 && *p == ch; ++p, ++n );
  343. return n;
  344. }
  345. #endif
  346. */
  347. // count_whitespace
  348. #ifdef BOOST_JSON_USE_SSE2
  349. inline const char* count_whitespace( char const* p, const char* end ) noexcept
  350. {
  351. if( p == end )
  352. {
  353. return p;
  354. }
  355. if( static_cast<unsigned char>( *p ) > 0x20 )
  356. {
  357. return p;
  358. }
  359. __m128i const q1 = _mm_set1_epi8( ' ' );
  360. __m128i const q2 = _mm_set1_epi8( '\n' );
  361. __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
  362. __m128i const q4 = _mm_set1_epi8( '\r' );
  363. while( end - p >= 16 )
  364. {
  365. __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
  366. __m128i w0 = _mm_or_si128(
  367. _mm_cmpeq_epi8( v0, q1 ),
  368. _mm_cmpeq_epi8( v0, q2 ));
  369. __m128i v1 = _mm_or_si128( v0, q3 );
  370. __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
  371. __m128i w2 = _mm_or_si128( w0, w1 );
  372. int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
  373. if( m != 0 )
  374. {
  375. #if defined(__GNUC__) || defined(__clang__)
  376. std::size_t c = __builtin_ffs( m ) - 1;
  377. #else
  378. unsigned long index;
  379. _BitScanForward( &index, m );
  380. std::size_t c = index;
  381. #endif
  382. p += c;
  383. return p;
  384. }
  385. p += 16;
  386. }
  387. while( p != end )
  388. {
  389. if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
  390. {
  391. return p;
  392. }
  393. ++p;
  394. }
  395. return p;
  396. }
  397. /*
  398. // slightly faster on msvc-14.2, slightly slower on clang-win
  399. inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
  400. {
  401. char const * p0 = p;
  402. while( n > 0 )
  403. {
  404. char ch = *p;
  405. if( ch == '\n' || ch == '\r' )
  406. {
  407. ++p;
  408. --n;
  409. continue;
  410. }
  411. if( ch != ' ' && ch != '\t' )
  412. {
  413. break;
  414. }
  415. ++p;
  416. --n;
  417. while( n >= 16 )
  418. {
  419. std::size_t n2 = count_leading( p, ch );
  420. p += n2;
  421. n -= n2;
  422. if( n2 < 16 )
  423. {
  424. break;
  425. }
  426. }
  427. }
  428. return p - p0;
  429. }
  430. */
  431. #else
  432. inline const char* count_whitespace( char const* p, const char* end ) noexcept
  433. {
  434. for(; p != end; ++p)
  435. {
  436. char const c = *p;
  437. if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
  438. }
  439. return p;
  440. }
  441. #endif
  442. } // detail
  443. } // namespace json
  444. } // namespace boost
  445. #endif