std::swap vs std::exchange vs swap operator

user1508519 picture user1508519 · Dec 27, 2013 · Viewed 13k times · Source

An implementation of std::swap might look like this:

template <class T> void swap (T& a, T& b)
{
  T c(std::move(a)); a=std::move(b); b=std::move(c);
}
template <class T, size_t N> void swap (T (&a)[N], T (&b)[N])
{
  for (size_t i = 0; i<N; ++i) swap (a[i],b[i]);
}

An implementation std::exchange n3668 might look like this:

 template< typename T, typename U = T >
   T exchange( T & obj, U && new_val )
   {
     T old_val = std::move(obj);
     obj = std::forward<U>(new_val);
     return old_val;
   }

It says:

For primitive types, this is equivalent to the obvious implementation, while for more complex types, this definition

  • Avoids copying the old value when that type defines a move constructor
  • Accepts any type as the new value, taking advantage of any converting assignment operator
  • Avoids copying the new value if it's a temporary or moved.

I chose the name for symmetry with atomic_exchange, since they behave the same except for this function not being atomic.

n3746 also proposes a built-in swap operator that looks like this:

inline C& C::operator :=: (C&& y) &  { see below; return *this; } 
inline C& C::operator :=: (C& y)  &  { return *this :=: std::move(y); }

From what I gather, the proposals would like all three of these options to live side by side, rather than replacing each other. Why is it necessary to have three different ways to swap objects?

Answer

Howard Hinnant picture Howard Hinnant · Dec 30, 2013

std::swap vs std::exchange

swap(x, y) and exchange(x, y) are not the same thing. exchange(x, y) never assigns a new value to y. You could do so if you use it like this: y = exchange(x, y). But that isn't the main use case for exchange(x, y). N3668 includes the statement:

The benefit isn't huge, but neither is the specification cost.

(with regard to standardizing exchange).

N3668 was voted into the C++1y working draft at the Bristol meeting, April 2013. The meeting minutes indicate that there was some discussion about the best name for this function in the Library Working Group, and that ultimately there was no objection to putting it up for a formal vote in full committee. The formal vote was strongly in favor of putting it into the working draft, but not unanimous.

Bottom line: exchange is a minor utility, does not compete with swap(x, y), and has far fewer use cases.

std::swap vs swap operator

N3553, a previous revision to N3746, was discussed in the Evolution Working Group at the April 2013 meeting in Bristol. The meeting minutes acknowledge "annoying ADL problems" with std::swap(x, y), but conclude that a swap operator would not address those problems. Because of backwards compatibility, the EWG also believed that if accepted, std::swap and the swap operator would forever co-exist. The EWG decided in Bristol not to proceed with N3553.

The Sep. 2013 Chicago EWG meeting minutes make no mention of N3746. I was not present at that meeting but presume that the EWG declined to look at N3746 because of its previous decision in Bristol on N3553.

Bottom line: The C++ committee does not appear to be moving forward with a swap operator at this time.

Update: Can std::exchange be faster than std::swap?

Preview: No. At best exchange will be just as fast as swap. At worst, it can be slower.

Consider a test like this:

using T = int;

void
test_swap(T& x, T& y)
{
    using std::swap;
    swap(x, y);
}

void
test_exchange(T& x, T& y)
{
    y = std::exchange(x, std::move(y));
}

Which generates faster code?

Using clang -O3, they both generate identical code (except for the mangled names of the functions):

__Z9test_swapRiS_:                      ## @_Z9test_swapRiS_
    .cfi_startproc
## BB#0:                                ## %entry
    pushq   %rbp
Ltmp0:
    .cfi_def_cfa_offset 16
Ltmp1:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp2:
    .cfi_def_cfa_register %rbp
    movl    (%rdi), %eax
    movl    (%rsi), %ecx
    movl    %ecx, (%rdi)
    movl    %eax, (%rsi)
    popq    %rbp
    retq
    .cfi_endproc

For some arbitrary type X, which does not have a specialized swap function, both tests will generate one call to X(X&&) (assuming move members exist for X), and two calls X& operator=(X&&):

test_swap

__Z9test_swapR1XS0_:                    ## @_Z9test_swapR1XS0_
    .cfi_startproc
## BB#0:                                ## %entry
    pushq   %rbp
Ltmp0:
    .cfi_def_cfa_offset 16
Ltmp1:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp2:
    .cfi_def_cfa_register %rbp
    pushq   %r15
    pushq   %r14
    pushq   %rbx
    pushq   %rax
Ltmp3:
    .cfi_offset %rbx, -40
Ltmp4:
    .cfi_offset %r14, -32
Ltmp5:
    .cfi_offset %r15, -24
    movq    %rsi, %r14
    movq    %rdi, %rbx
    leaq    -32(%rbp), %r15
    movq    %r15, %rdi
    movq    %rbx, %rsi
    callq   __ZN1XC1EOS_
    movq    %rbx, %rdi
    movq    %r14, %rsi
    callq   __ZN1XaSEOS_
    movq    %r14, %rdi
    movq    %r15, %rsi
    callq   __ZN1XaSEOS_
    addq    $8, %rsp
    popq    %rbx
    popq    %r14
    popq    %r15
    popq    %rbp
    retq
    .cfi_endproc

test_exchange

    .globl  __Z13test_exchangeR1XS0_
    .align  4, 0x90
__Z13test_exchangeR1XS0_:               ## @_Z13test_exchangeR1XS0_
    .cfi_startproc
## BB#0:                                ## %entry
    pushq   %rbp
Ltmp6:
    .cfi_def_cfa_offset 16
Ltmp7:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp8:
    .cfi_def_cfa_register %rbp
    pushq   %r14
    pushq   %rbx
    subq    $16, %rsp
Ltmp9:
    .cfi_offset %rbx, -32
Ltmp10:
    .cfi_offset %r14, -24
    movq    %rsi, %r14
    movq    %rdi, %rbx
    leaq    -24(%rbp), %rdi
    movq    %rbx, %rsi
    callq   __ZN1XC1EOS_
    movq    %rbx, %rdi
    movq    %r14, %rsi
    callq   __ZN1XaSEOS_
    leaq    -32(%rbp), %rsi
    movq    %r14, %rdi
    callq   __ZN1XaSEOS_
    addq    $16, %rsp
    popq    %rbx
    popq    %r14
    popq    %rbp
    retq
    .cfi_endproc

Again nearly the same code.

But for types that have an optimized swap, test_swap is likely to generate far superior code. Consider:

using T = std::string;

(using libc++)

test_swap

    .globl  __Z9test_swapRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEES6_
    .align  4, 0x90
__Z9test_swapRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEES6_: ## @_Z9test_swapRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEES6_
    .cfi_startproc
## BB#0:                                ## %entry
    pushq   %rbp
Ltmp0:
    .cfi_def_cfa_offset 16
Ltmp1:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp2:
    .cfi_def_cfa_register %rbp
    movq    16(%rdi), %rax
    movq    %rax, -8(%rbp)
    movq    (%rdi), %rax
    movq    8(%rdi), %rcx
    movq    %rcx, -16(%rbp)
    movq    %rax, -24(%rbp)
    movq    16(%rsi), %rax
    movq    %rax, 16(%rdi)
    movq    (%rsi), %rax
    movq    8(%rsi), %rcx
    movq    %rcx, 8(%rdi)
    movq    %rax, (%rdi)
    movq    -8(%rbp), %rax
    movq    %rax, 16(%rsi)
    movq    -24(%rbp), %rax
    movq    -16(%rbp), %rcx
    movq    %rcx, 8(%rsi)
    movq    %rax, (%rsi)
    popq    %rbp
    retq
    .cfi_endproc

test_exchange

    .globl  __Z13test_exchangeRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEES6_
    .align  4, 0x90
__Z13test_exchangeRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEES6_: ## @_Z13test_exchangeRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEES6_
Lfunc_begin0:
    .cfi_startproc
    .cfi_personality 155, ___gxx_personality_v0
    .cfi_lsda 16, Lexception0
## BB#0:                                ## %entry
    pushq   %rbp
Ltmp9:
    .cfi_def_cfa_offset 16
Ltmp10:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp11:
    .cfi_def_cfa_register %rbp
    pushq   %r14
    pushq   %rbx
    subq    $32, %rsp
Ltmp12:
    .cfi_offset %rbx, -32
Ltmp13:
    .cfi_offset %r14, -24
    movq    %rsi, %r14
    movq    %rdi, %rbx
    movq    16(%rbx), %rax
    movq    %rax, -32(%rbp)
    movq    (%rbx), %rax
    movq    8(%rbx), %rcx
    movq    %rcx, -40(%rbp)
    movq    %rax, -48(%rbp)
    movq    $0, 16(%rbx)
    movq    $0, 8(%rbx)
    movq    $0, (%rbx)
Ltmp3:
    xorl    %esi, %esi
                                        ## kill: RDI<def> RBX<kill>
    callq   __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm
Ltmp4:
## BB#1:                                ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE5clearEv.exit.i.i
    movq    16(%r14), %rax
    movq    %rax, 16(%rbx)
    movq    (%r14), %rax
    movq    8(%r14), %rcx
    movq    %rcx, 8(%rbx)
    movq    %rax, (%rbx)
    movq    $0, 16(%r14)
    movq    $0, 8(%r14)
    movq    $0, (%r14)
    movw    $0, (%r14)
Ltmp6:
    xorl    %esi, %esi
    movq    %r14, %rdi
    callq   __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm
Ltmp7:
## BB#2:                                ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
    movq    -32(%rbp), %rax
    movq    %rax, 16(%r14)
    movq    -48(%rbp), %rax
    movq    -40(%rbp), %rcx
    movq    %rcx, 8(%r14)
    movq    %rax, (%r14)
    xorps   %xmm0, %xmm0
    movaps  %xmm0, -48(%rbp)
    movq    $0, -32(%rbp)
    leaq    -48(%rbp), %rdi
    callq   __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev
    addq    $32, %rsp
    popq    %rbx
    popq    %r14
    popq    %rbp
    retq
LBB1_3:                                 ## %terminate.lpad.i.i.i.i
Ltmp5:
    movq    %rax, %rdi
    callq   ___clang_call_terminate
LBB1_4:                                 ## %terminate.lpad.i.i.i
Ltmp8:
    movq    %rax, %rdi
    callq   ___clang_call_terminate
Lfunc_end0:
    .cfi_endproc
    .section    __TEXT,__gcc_except_tab
    .align  2
GCC_except_table1:
Lexception0:
    .byte   255                     ## @LPStart Encoding = omit
    .byte   155                     ## @TType Encoding = indirect pcrel sdata4
    .asciz  "\242\200\200"          ## @TType base offset
    .byte   3                       ## Call site Encoding = udata4
    .byte   26                      ## Call site table length
Lset0 = Ltmp3-Lfunc_begin0              ## >> Call Site 1 <<
    .long   Lset0
Lset1 = Ltmp4-Ltmp3                     ##   Call between Ltmp3 and Ltmp4
    .long   Lset1
Lset2 = Ltmp5-Lfunc_begin0              ##     jumps to Ltmp5
    .long   Lset2
    .byte   1                       ##   On action: 1
Lset3 = Ltmp6-Lfunc_begin0              ## >> Call Site 2 <<
    .long   Lset3
Lset4 = Ltmp7-Ltmp6                     ##   Call between Ltmp6 and Ltmp7
    .long   Lset4
Lset5 = Ltmp8-Lfunc_begin0              ##     jumps to Ltmp8
    .long   Lset5
    .byte   1                       ##   On action: 1
    .byte   1                       ## >> Action Record 1 <<
                                        ##   Catch TypeInfo 1
    .byte   0                       ##   No further actions
                                        ## >> Catch TypeInfos <<
    .long   0                       ## TypeInfo 1
    .align  2

So in summary, never use std::exchange to perform a swap.