fannkuch-redux C++ g++ #6 program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// Contributed by Andrei Simion (with patch from Vincent Yu)
// Based on "fannkuch-redux C++ g++ #5", contributed by Dave Compton,
// which in turn was based on the C program by Jeremy Zerfasm
// which in turn was based on the Ada program by Jonathan Parker and 
// Georg Bauhaus which in turn was based on code by Dave Fladebo, 
// Eckehard Berns, Heiner Marxen, Hongwei Xi, and The Anh Tran and 
// also the Java program by Oleg Mazurov.

#include <array>
#include <iostream>
#include <algorithm>
#include <immintrin.h>

using small_int_t = char;
using big_int_t   = int64_t;

constexpr small_int_t max_n = 16;
constexpr int max_blocks = 24;

struct Masks
{
    __m128i masks_reverse[16];
    __m128i masks_shift[16];
   
    Masks()
    {
        // first elements are never accessed
        masks_reverse[1] = _mm_setr_epi8(1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[2] = _mm_setr_epi8(2, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[3] = _mm_setr_epi8(3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[4] = _mm_setr_epi8(4, 3, 2, 1, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[5] = _mm_setr_epi8(5, 4, 3, 2, 1, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[6] = _mm_setr_epi8(6, 5, 4, 3, 2, 1, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[7] = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[8] = _mm_setr_epi8(8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 10, 11, 12, 13, 14, 15);
        masks_reverse[9] = _mm_setr_epi8(9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 10, 11, 12, 13, 14, 15);
        masks_reverse[10] = _mm_setr_epi8(10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 11, 12, 13, 14, 15);
        masks_reverse[11] = _mm_setr_epi8(11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 12, 13, 14, 15);
        masks_reverse[12] = _mm_setr_epi8(12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 13, 14, 15);
        masks_reverse[13] = _mm_setr_epi8(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 14, 15);
        masks_reverse[14] = _mm_setr_epi8(14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15);
        masks_reverse[15] = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);

        masks_shift[1] = _mm_setr_epi8(1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[2] = _mm_setr_epi8(1, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[3] = _mm_setr_epi8(1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[4] = _mm_setr_epi8(1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[5] = _mm_setr_epi8(1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[6] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[7] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[8] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12, 13, 14, 15);
        masks_shift[9] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15);
        masks_shift[10] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 11, 12, 13, 14, 15);
        masks_shift[11] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 12, 13, 14, 15);
        masks_shift[12] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 13, 14, 15);
        masks_shift[13] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 14, 15);
        masks_shift[14] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 15);
        masks_shift[15] = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0);
    }
};

std::array<big_int_t, max_n> compute_factorials(small_int_t n)
{
    std::array<big_int_t, max_n> factorials;

    factorials[0] = 1;
    for (small_int_t i = 1; i <= n; ++i)
        factorials[i] = factorials[i-1] * i;

    return factorials;
} 

std::array<int, 2> get_blocks_and_size(small_int_t n, 
                                       std::array<big_int_t, max_n>& factorials)
{
    int blocks = max_blocks;
    if (blocks > factorials[n])
        blocks = 1;
    int block_size = factorials[n] / blocks;
    
    return {blocks, block_size};
}

std::array<big_int_t, max_n> create_count(small_int_t n, big_int_t start,
                                          std::array<big_int_t, max_n>& factorials)
{
    std::array<big_int_t, max_n> count;
   
    for (small_int_t i = n - 1; i >= 0; --i) 
    {
        big_int_t d = start / factorials[i];
        start = start % factorials[i];
        count[i] = d;
    }

    return count;
}

__m128i create_current(small_int_t n, 
                              std::array<big_int_t, max_n>& count)
{
    // not a "hotspot", using intrinsics for rotations don't bring
    // much benefit here considering the increased complexity
    alignas(16) std::array<small_int_t, max_n> current_aux = 
	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};  
    
    for (small_int_t i = n - 1; i >= 0; --i) 
    {
        std::rotate(current_aux.begin(), 
                    current_aux.begin() + count[i],
                    current_aux.begin() + i + 1);
    }
    __m128i current = _mm_load_si128((__m128i*)current_aux.data());
    return current;
}

inline __m128i increment_permutation(std::array<big_int_t, max_n>& count,
			          __m128i current, const Masks& masks)
{
    for (small_int_t i = 1; ; ++i) 
    {
        current = _mm_shuffle_epi8(current, masks.masks_shift[i]);  
        if (++count[i] <= i)
            break;
        count[i] = 0;
    }
    return current;
}

inline __m128i reverse(__m128i x, small_int_t idx, const Masks& masks)
{
    return _mm_shuffle_epi8(x, masks.masks_reverse[idx]);
}


int main(int argc, char **argv)
{   
    int n = atoi(argv[1]);
    auto factorials = compute_factorials(n);
    auto [blocks, block_size] = get_blocks_and_size(n, factorials);

    small_int_t max_flips = 0;
    big_int_t checksum  = 0;

    // iterate over each block.
    #pragma omp parallel for \
        reduction(max:max_flips) \
        reduction(+:checksum)
    for (big_int_t block_start = 0; 
            block_start < factorials[n];
            block_start += block_size) 
    {
        Masks masks;
        
        std::array<big_int_t, max_n> count = create_count(n, block_start, factorials);      

        __m128i current = create_current(n, count);
        __m128i current_start = current;	

        small_int_t first = _mm_extract_epi8(current, 0);

        // iterate over each permutation in the block.
        big_int_t crt_idx = block_start;
        big_int_t block_end = block_start + block_size;
	
        while (crt_idx < block_end) 
        { 
            if (first > 0)
            {   
                small_int_t flips = 0;
                while (first != 0) 
                {
                    // patch from Vincent Yu
                    auto next = reinterpret_cast<char(&)[16]>(current)[first];
                    current = reverse(current, first, masks);
                    first = next;
                    ++flips;
                } 
                
                checksum += (crt_idx % 2) == 0 ? flips : -flips;

                if (flips > max_flips)
                    max_flips = flips;
            }

            current = increment_permutation(count, current_start, masks);
            current_start = current;
            
            first = _mm_extract_epi8(current, 0);
            ++crt_idx;
        }
    }
    std::cout << checksum << "\nPfannkuchen(" << n << ") = " << (int)max_flips << "\n";
}

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
g++ (Ubuntu 14.2.0-4ubuntu2) 14.2.0


 Fri, 10 Jan 2025 03:42:36 GMT

MAKE:
/usr/bin/g++ -c -pipe -O3 -fomit-frame-pointer -march=ivybridge  -std=c++17 -fopenmp fannkuchredux.gpp-6.c++ -o fannkuchredux.gpp-6.c++.o &&  \
        /usr/bin/g++ fannkuchredux.gpp-6.c++.o -o fannkuchredux.gpp-6.gpp_run -fopenmp 
In file included from /usr/include/c++/14/bits/stl_pair.h:61,
                 from /usr/include/c++/14/bits/stl_algobase.h:64,
                 from /usr/include/c++/14/array:43,
                 from fannkuchredux.gpp-6.c++:12:
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 32 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 32 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 33 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [1, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 33 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 34 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [2, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 34 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 35 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [3, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 35 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 36 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [4, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 36 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 37 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [5, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 37 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 38 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [6, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 38 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 39 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [7, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 39 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 40 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [8, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 40 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 41 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [9, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 41 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 42 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [10, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 42 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 43 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [11, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 43 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 44 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [12, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 44 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 45 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [13, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 45 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
In function ‘std::_Require<std::__not_<std::__is_tuple_like<_Tp> >, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp> > std::swap(_Tp&, _Tp&) [with _Tp = char]’,
    inlined from ‘void std::iter_swap(_ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:185:11,
    inlined from ‘_ForwardIterator2 std::swap_ranges(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2) [with _ForwardIterator1 = char*; _ForwardIterator2 = char*]’ at /usr/include/c++/14/bits/stl_algobase.h:215:16,
    inlined from ‘_RandomAccessIterator std::_V2::__rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, std::random_access_iterator_tag) [with _RandomAccessIterator = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1265:20,
    inlined from ‘_FIter std::_V2::rotate(_FIter, _FIter, _FIter) [with _FIter = char*]’ at /usr/include/c++/14/bits/stl_algo.h:1357:27,
    inlined from ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’ at fannkuchredux.gpp-6.c++:112:20:
/usr/include/c++/14/bits/move.h:222:11: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
  222 |       __a = _GLIBCXX_MOVE(__b);
      |           ^
/usr/include/c++/14/array: In function ‘__m128i create_current(small_int_t, std::array<long int, 16>&)’:
/usr/include/c++/14/array:115:55: note: at offset 46 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
  115 |       typename __array_traits<_Tp, _Nm>::_Type        _M_elems;
      |                                                       ^~~~~~~~
/usr/include/c++/14/array:115:55: note: at offset [14, 16] into destination object ‘std::array<char, 16>::_M_elems’ of size 16
/usr/include/c++/14/array:115:55: note: at offset 46 into destination object ‘std::array<char, 16>::_M_elems’ of size 16
rm fannkuchredux.gpp-6.c++

4.64s to complete and log all make actions

COMMAND LINE:
 ./fannkuchredux.gpp-6.gpp_run 12

PROGRAM OUTPUT:
3968050
Pfannkuchen(12) = 65