在C++标准库中,vector是最常用的容器之一,但很多开发者只是停留在使用层面。实际上,亲手实现一个简易版的vector,能让你深入理解以下几个关键点:
我在刚接触STL时,曾经因为不清楚vector内部机制而踩过不少坑。比如在循环中同时进行插入和删除操作导致迭代器失效,或者因为频繁插入导致多次扩容影响性能。这些问题只有当你真正动手实现过,才会有深刻体会。
我们先从最基本的类模板开始:
cpp复制template <typename T>
class Vector {
public:
// 类型别名
using value_type = T;
using pointer = T*;
using reference = T&;
using const_reference = const T&;
using size_type = size_t;
using difference_type = ptrdiff_t;
// 构造函数和析构函数
Vector();
explicit Vector(size_type count);
Vector(size_type count, const T& value);
~Vector();
// 迭代器相关
pointer begin();
pointer end();
// 容量相关
size_type size() const;
size_type capacity() const;
bool empty() const;
// 元素访问
reference operator[](size_type pos);
const_reference operator[](size_type pos) const;
reference at(size_type pos);
const_reference at(size_type pos) const;
// 修改操作
void push_back(const T& value);
void pop_back();
void clear();
private:
T* m_data; // 数据存储指针
size_type m_size; // 当前元素数量
size_type m_cap; // 当前容量
};
这个基础框架已经包含了vector最核心的接口。注意我们使用了模板来支持任意类型,这与STL的设计理念一致。
动态数组的核心在于内存的动态分配和释放。我们需要考虑以下几个关键点:
经过多次实践测试,我发现2倍扩容在大多数场景下能提供较好的平衡:
cpp复制void reserve(size_type new_cap) {
if (new_cap <= m_cap) return;
// 计算新的容量
size_type new_capacity = m_cap ? m_cap * 2 : 1;
if (new_capacity < new_cap) {
new_capacity = new_cap;
}
// 分配新内存
T* new_data = static_cast<T*>(::operator new(new_capacity * sizeof(T)));
// 迁移数据
for (size_type i = 0; i < m_size; ++i) {
try {
new (&new_data[i]) T(std::move(m_data[i]));
} catch (...) {
// 如果构造失败,析构已构造的对象并释放内存
for (size_type j = 0; j < i; ++j) {
new_data[j].~T();
}
::operator delete(new_data);
throw;
}
m_data[i].~T();
}
// 释放旧内存
::operator delete(m_data);
m_data = new_data;
m_cap = new_capacity;
}
这里使用了placement new来在已分配的内存上构造对象,确保异常安全。如果中间有任何构造失败,我们会清理已构造的对象并重新抛出异常。
构造函数需要考虑多种情况:
cpp复制// 默认构造函数
Vector() : m_data(nullptr), m_size(0), m_cap(0) {}
// 指定数量的构造函数
explicit Vector(size_type count) : m_data(nullptr), m_size(0), m_cap(0) {
resize(count);
}
// 指定数量和初始值的构造函数
Vector(size_type count, const T& value) : m_data(nullptr), m_size(0), m_cap(0) {
resize(count, value);
}
// 析构函数
~Vector() {
clear();
::operator delete(m_data);
}
实现安全的元素访问需要考虑边界检查:
cpp复制reference operator[](size_type pos) {
return m_data[pos];
}
const_reference operator[](size_type pos) const {
return m_data[pos];
}
reference at(size_type pos) {
if (pos >= m_size) {
throw std::out_of_range("Vector::at - index out of range");
}
return m_data[pos];
}
const_reference at(size_type pos) const {
if (pos >= m_size) {
throw std::out_of_range("Vector::at - index out of range");
}
return m_data[pos];
}
push_back是vector最常用的操作之一,需要特别注意扩容时的异常安全:
cpp复制void push_back(const T& value) {
if (m_size >= m_cap) {
reserve(m_cap ? m_cap * 2 : 1);
}
try {
new (&m_data[m_size]) T(value);
++m_size;
} catch (...) {
// 如果构造失败,不需要处理内存,因为空间已经预留
throw;
}
}
虽然我们使用了原生指针作为迭代器,但为了完整性和未来扩展性,可以定义一个迭代器类:
cpp复制class iterator {
public:
using iterator_category = std::random_access_iterator_tag;
using value_type = T;
using difference_type = ptrdiff_t;
using pointer = T*;
using reference = T&;
iterator(pointer ptr) : m_ptr(ptr) {}
reference operator*() const { return *m_ptr; }
pointer operator->() const { return m_ptr; }
iterator& operator++() { ++m_ptr; return *this; }
iterator operator++(int) { iterator tmp = *this; ++m_ptr; return tmp; }
// 其他必要操作...
private:
pointer m_ptr;
};
现代C++中,移动语义可以显著提升性能:
cpp复制// 移动构造函数
Vector(Vector&& other) noexcept
: m_data(other.m_data), m_size(other.m_size), m_cap(other.m_cap) {
other.m_data = nullptr;
other.m_size = 0;
other.m_cap = 0;
}
// 移动赋值运算符
Vector& operator=(Vector&& other) noexcept {
if (this != &other) {
clear();
::operator delete(m_data);
m_data = other.m_data;
m_size = other.m_size;
m_cap = other.m_cap;
other.m_data = nullptr;
other.m_size = 0;
other.m_cap = 0;
}
return *this;
}
// 移动版本的push_back
void push_back(T&& value) {
if (m_size >= m_cap) {
reserve(m_cap ? m_cap * 2 : 1);
}
new (&m_data[m_size]) T(std::move(value));
++m_size;
}
我们的实现应该提供基本的异常安全保证:
为了优化小vector的性能,可以实现小型缓冲区优化(SBO):
cpp复制template <typename T, size_t SmallSize = 16>
class SmallVector {
union {
T* m_data;
char m_small[SmallSize * sizeof(T)];
};
size_type m_size;
size_type m_cap;
bool is_small() const { return m_cap <= SmallSize; }
// 其他实现...
};
这种优化对小尺寸的vector特别有效,可以避免频繁的内存分配。
编写全面的测试用例是确保实现正确性的关键:
cpp复制void test_vector() {
// 默认构造
Vector<int> v1;
assert(v1.size() == 0);
assert(v1.empty());
// 带大小构造
Vector<int> v2(10);
assert(v2.size() == 10);
// 带初始值构造
Vector<int> v3(5, 42);
assert(v3.size() == 5);
assert(v3[0] == 42);
// push_back
v1.push_back(1);
assert(v1.size() == 1);
assert(v1[0] == 1);
// 扩容测试
for (int i = 0; i < 100; ++i) {
v1.push_back(i);
}
assert(v1.size() == 101);
// 异常安全测试
struct ThrowOnCopy {
ThrowOnCopy() = default;
ThrowOnCopy(const ThrowOnCopy&) { throw std::runtime_error("copy failed"); }
};
Vector<ThrowOnCopy> v4;
bool caught = false;
try {
v4.push_back(ThrowOnCopy{});
} catch (...) {
caught = true;
}
assert(caught);
assert(v4.empty());
}
比较我们的实现与std::vector的性能差异:
cpp复制void benchmark() {
const size_t count = 1000000;
// 测试push_back性能
auto start = std::chrono::high_resolution_clock::now();
Vector<int> v1;
for (size_t i = 0; i < count; ++i) {
v1.push_back(i);
}
auto end = std::chrono::high_resolution_clock::now();
std::cout << "Our vector: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
<< " ms\n";
start = std::chrono::high_resolution_clock::now();
std::vector<int> v2;
for (size_t i = 0; i < count; ++i) {
v2.push_back(i);
}
end = std::chrono::high_resolution_clock::now();
std::cout << "std::vector: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
<< " ms\n";
}
vector的迭代器在以下操作后会失效:
解决方案:
cpp复制// 错误示例
for (auto it = vec.begin(); it != vec.end(); ++it) {
if (*it == value) {
vec.erase(it); // 错误!it已经失效
}
}
// 正确写法
for (auto it = vec.begin(); it != vec.end(); ) {
if (*it == value) {
it = vec.erase(it); // erase返回下一个有效迭代器
} else {
++it;
}
}
确保在以下情况下正确释放内存:
以下是核心实现的完整代码:
cpp复制template <typename T>
class Vector {
public:
using value_type = T;
using pointer = T*;
using reference = T&;
using const_reference = const T&;
using size_type = size_t;
using difference_type = ptrdiff_t;
Vector() : m_data(nullptr), m_size(0), m_cap(0) {}
explicit Vector(size_type count) : Vector() {
resize(count);
}
Vector(size_type count, const T& value) : Vector() {
resize(count, value);
}
~Vector() {
clear();
::operator delete(m_data);
}
Vector(const Vector& other) : Vector() {
reserve(other.m_size);
for (size_type i = 0; i < other.m_size; ++i) {
push_back(other.m_data[i]);
}
}
Vector(Vector&& other) noexcept
: m_data(other.m_data), m_size(other.m_size), m_cap(other.m_cap) {
other.m_data = nullptr;
other.m_size = 0;
other.m_cap = 0;
}
Vector& operator=(const Vector& other) {
if (this != &other) {
clear();
reserve(other.m_size);
for (size_type i = 0; i < other.m_size; ++i) {
push_back(other.m_data[i]);
}
}
return *this;
}
Vector& operator=(Vector&& other) noexcept {
if (this != &other) {
clear();
::operator delete(m_data);
m_data = other.m_data;
m_size = other.m_size;
m_cap = other.m_cap;
other.m_data = nullptr;
other.m_size = 0;
other.m_cap = 0;
}
return *this;
}
pointer begin() { return m_data; }
pointer end() { return m_data + m_size; }
size_type size() const { return m_size; }
size_type capacity() const { return m_cap; }
bool empty() const { return m_size == 0; }
reference operator[](size_type pos) { return m_data[pos]; }
const_reference operator[](size_type pos) const { return m_data[pos]; }
reference at(size_type pos) {
if (pos >= m_size) throw std::out_of_range("Vector::at");
return m_data[pos];
}
const_reference at(size_type pos) const {
if (pos >= m_size) throw std::out_of_range("Vector::at");
return m_data[pos];
}
void push_back(const T& value) {
if (m_size >= m_cap) {
reserve(m_cap ? m_cap * 2 : 1);
}
new (&m_data[m_size]) T(value);
++m_size;
}
void push_back(T&& value) {
if (m_size >= m_cap) {
reserve(m_cap ? m_cap * 2 : 1);
}
new (&m_data[m_size]) T(std::move(value));
++m_size;
}
template <typename... Args>
void emplace_back(Args&&... args) {
if (m_size >= m_cap) {
reserve(m_cap ? m_cap * 2 : 1);
}
new (&m_data[m_size]) T(std::forward<Args>(args)...);
++m_size;
}
void pop_back() {
if (m_size > 0) {
--m_size;
m_data[m_size].~T();
}
}
void clear() {
for (size_type i = 0; i < m_size; ++i) {
m_data[i].~T();
}
m_size = 0;
}
void reserve(size_type new_cap) {
if (new_cap <= m_cap) return;
size_type new_capacity = m_cap ? m_cap * 2 : 1;
if (new_capacity < new_cap) {
new_capacity = new_cap;
}
T* new_data = static_cast<T*>(::operator new(new_capacity * sizeof(T)));
for (size_type i = 0; i < m_size; ++i) {
try {
new (&new_data[i]) T(std::move(m_data[i]));
} catch (...) {
for (size_type j = 0; j < i; ++j) {
new_data[j].~T();
}
::operator delete(new_data);
throw;
}
m_data[i].~T();
}
::operator delete(m_data);
m_data = new_data;
m_cap = new_capacity;
}
void resize(size_type new_size) {
if (new_size > m_cap) {
reserve(new_size);
}
if (new_size > m_size) {
for (size_type i = m_size; i < new_size; ++i) {
new (&m_data[i]) T();
}
} else if (new_size < m_size) {
for (size_type i = new_size; i < m_size; ++i) {
m_data[i].~T();
}
}
m_size = new_size;
}
void resize(size_type new_size, const T& value) {
if (new_size > m_cap) {
reserve(new_size);
}
if (new_size > m_size) {
for (size_type i = m_size; i < new_size; ++i) {
new (&m_data[i]) T(value);
}
} else if (new_size < m_size) {
for (size_type i = new_size; i < m_size; ++i) {
m_data[i].~T();
}
}
m_size = new_size;
}
private:
T* m_data;
size_type m_size;
size_type m_cap;
};
STL容器支持自定义分配器,我们的实现也可以添加这一特性:
cpp复制template <typename T, typename Allocator = std::allocator<T>>
class VectorWithAllocator {
// 使用Allocator进行内存管理
// 实现略...
};
可以通过以下方式提升异常安全等级:
现代C++中可以考虑添加并行操作支持:
实现与其他STL容器的互操作性:
通过实现这些完整的vector功能,你会对C++的内存管理、异常安全、模板编程等核心概念有更深入的理解。这种理解将帮助你在实际开发中更高效地使用STL容器,并在需要时能够实现自己的定制化容器。