EDIT: I edited both the question and its title to be more precise.
Considering the following source code:
#include <vector>
struct xyz {
xyz() { } // empty constructor, but the compiler doesn't care
xyz(const xyz& o): v(o.v) { }
xyz& operator=(const xyz& o) { v=o.v; return *this; }
int v; // <will be initialized to int(), which means 0
};
std::vector<xyz> test() {
return std::vector<xyz>(1024); // will do a memset() :-(
}
...how can I avoid the memory allocated by the vector<> to be initialized with copies of its first element, which is a O(n) operation I'd rather skip for the sake of speed, since my default constructor does nothing ?
A g++ specific solution will do, if no generic one exists (but I couldn't find any attribute to do that).
EDIT: generated code follows (command line: arm-elf-g++-4.5 -O3 -S -fno-verbose-asm -o - test.cpp | arm-elf-c++filt | grep -vE '^[[:space:]]+[.@].*$' )
test():
mov r3, #0
stmfd sp!, {r4, lr}
mov r4, r0
str r3, [r0, #0]
str r3, [r0, #4]
str r3, [r0, #8]
mov r0, #4096
bl operator new(unsigned long)
add r1, r0, #4096
add r2, r0, #4080
str r0, [r4, #0]
stmib r4, {r0, r1}
add r2, r2, #12
b .L4 @
.L8: @
add r0, r0, #4 @
.L4: @
cmp r0, #0 @ fill the memory
movne r3, #0 @
strne r3, [r0, #0] @
cmp r0, r2 @
bne .L8 @
str r1, [r4, #4]
mov r0, r4
ldmfd sp!, {r4, pc}
EDIT: For the sake of completeness, here is the assembly for x86_64:
.globl test()
test():
LFB450:
pushq %rbp
LCFI0:
movq %rsp, %rbp
LCFI1:
pushq %rbx
LCFI2:
movq %rdi, %rbx
subq $8, %rsp
LCFI3:
movq $0, (%rdi)
movq $0, 8(%rdi)
movq $0, 16(%rdi)
movl $4096, %edi
call operator new(unsigned long)
leaq 4096(%rax), %rcx
movq %rax, (%rbx)
movq %rax, 8(%rbx)
leaq 4092(%rax), %rdx
movq %rcx, 16(%rbx)
jmp L4 @
L8: @
addq $4, %rax @
L4: @
testq %rax, %rax @ memory-filling loop
je L2 @
movl $0, (%rax) @
L2: @
cmpq %rdx, %rax @
jne L8 @
movq %rcx, 8(%rbx)
movq %rbx, %rax
addq $8, %rsp
popq %rbx
leave
LCFI4:
ret
LFE450:
EH_frame1:
LSCIE1:
LECIE1:
LSFDE1:
LASFDE1:
LEFDE1:
EDIT: I think the conclusion is to not use std::vector<>
when you want to avoid unneeded initialization. I ended up unrolling my own templated container, which performs better (and has specialized versions for neon and armv7).
The initialization of the elements allocated is controlled by the Allocator template argument, if you need it customized, customize it. But remember that this can get easily wind-up in the realm of dirty hacking, so use with caution. For instance, here is a pretty dirty solution. It will avoid the initialization, but it most probably will be worse in performance, but for demonstration's sake (as people have said this is impossible!... impossible is not in a C++ programmer's vocabulary!):
template <typename T>
class switch_init_allocator : public std::allocator< T > {
private:
bool* should_init;
public:
template <typename U>
struct rebind {
typedef switch_init_allocator<U> other;
};
//provide the required no-throw constructors / destructors:
switch_init_allocator(bool* aShouldInit = NULL) throw() : std::allocator<T>(), should_init(aShouldInit) { };
switch_init_allocator(const switch_init_allocator<T>& rhs) throw() : std::allocator<T>(rhs), should_init(rhs.should_init) { };
template <typename U>
switch_init_allocator(const switch_init_allocator<U>& rhs, bool* aShouldInit = NULL) throw() : std::allocator<T>(rhs), should_init(aShouldInit) { };
~switch_init_allocator() throw() { };
//import the required typedefs:
typedef typename std::allocator<T>::value_type value_type;
typedef typename std::allocator<T>::pointer pointer;
typedef typename std::allocator<T>::reference reference;
typedef typename std::allocator<T>::const_pointer const_pointer;
typedef typename std::allocator<T>::const_reference const_reference;
typedef typename std::allocator<T>::size_type size_type;
typedef typename std::allocator<T>::difference_type difference_type;
//redefine the construct function (hiding the base-class version):
void construct( pointer p, const_reference cr) {
if((should_init) && (*should_init))
new ((void*)p) T ( cr );
//else, do nothing.
};
};
template <typename T>
class my_vector : public std::vector<T, switch_init_allocator<T> > {
public:
typedef std::vector<T, switch_init_allocator<T> > base_type;
typedef switch_init_allocator<T> allocator_type;
typedef std::vector<T, allocator_type > vector_type;
typedef typename base_type::size_type size_type;
private:
bool switch_flag; //the order here is very important!!
vector_type vec;
public:
my_vector(size_type aCount) : switch_flag(false), vec(aCount, allocator_type(&switch_flag)) { };
//... and the rest of this wrapper class...
vector_type& get_vector() { return vec; };
const vector_type& get_vector() const { return vec; };
void set_switch(bool value) { switch_flag = value; };
};
class xyz{};
int main(){
my_vector<xyz> v(1024); //this won't initialize the memory at all.
v.set_switch(true); //set back to true to turn initialization back on (needed for resizing and such)
}
Of course, the above is awkward and not recommended, and certainly won't be any better than actually letting the memory get filled with copies of the first element (especially since the use of this flag-checking will impede on each element-construction). But it is an avenue to explore when looking to optimize the allocation and initialization of elements in an STL container, so I wanted to show it. The point is that the only place you can inject code that will stop the std::vector container from calling the copy-constructor to initialize your elements is in the construct function of the vector's allocator object.
Also, you could do away with the "switch" and simply do a "no-init-allocator", but then, you also turn off copy-construction which is needed to copy the data during resizing (which would make this vector class much less useful).