@@ -3991,16 +3991,20 @@ namespace {
39913991
39923992const size_t _Byte_size = _Needle_length *sizeof (_Ty);
39933993
3994+ constexpr size_t _Vec_size =sizeof (_Mask);
3995+ constexpr size_t _Vec_mask = _Vec_size -1 ;
3996+ static_assert ((_Vec_size & _Vec_mask) ==0 );
3997+
39943998const void * _Stop = _Needle_ptr;
3995- _Advance_bytes (_Stop, _Byte_size & ~size_t { 0x1F } );
3996- for (; _Needle_ptr != _Stop; _Needle_ptr +=32 /sizeof (_Ty)) {
3999+ _Advance_bytes (_Stop, _Byte_size & ~_Vec_mask );
4000+ for (; _Needle_ptr != _Stop; _Needle_ptr +=_Vec_size /sizeof (_Ty)) {
39974001const __m128i _Data =_mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Needle_ptr));
39984002if (!_mm_testz_si128 (_Mask, _Data)) {
39994003return false ;
40004004 }
40014005 }
40024006
4003- _Advance_bytes (_Stop, _Byte_size &0x1E );
4007+ _Advance_bytes (_Stop, _Byte_size &_Vec_mask );
40044008for (; _Needle_ptr != _Stop; ++_Needle_ptr) {
40054009if ((*_Needle_ptr & ~_Ty{0xFF }) !=0 ) {
40064010return false ;