C++标准模板库(STL)中的string类是一个专门用于处理字符串的容器类,它封装了字符序列的存储与管理,提供了丰富的字符串操作接口。与C语言风格的字符数组相比,string类具有三大核心优势:
从设计角度看,string类采用了"写时复制"(Copy-On-Write)的优化策略,当进行字符串复制时,实际只在修改时才创建真正的副本。这种设计在保证安全性的同时兼顾了性能效率。
注意:虽然现代编译器对string的实现各有差异,但都遵循C++标准规定的接口规范。例如GCC使用SSO(Small String Optimization)优化,对小字符串直接存储在对象内部,避免堆内存分配。
string类提供了7种主要构造函数重载,满足不同初始化需求:
cpp复制// 默认构造:创建空字符串
string();
// C风格字符串构造
string(const char* s);
// 拷贝构造
string(const string& str);
// 子串构造:从str的pos位置开始,复制len个字符
string(const string& str, size_t pos, size_t len = npos);
// 填充构造:创建包含n个字符c的字符串
string(size_t n, char c);
// 部分C字符串构造:复制s的前n个字符
string(const char* s, size_t n);
// 移动构造(C++11新增)
string(string&& str) noexcept;
cpp复制// 最优初始化方式对比
string s1; // 默认构造,无内存分配
string s2(10, 'x'); // 直接分配10字符空间
string s3("hello"); // 从字面量构造
string s4(s3); // 拷贝构造(可能触发COW)
string s5(std::move(s3)); // 移动构造(C++11),资源转移
// 不推荐的初始化方式
string s6 = "hello"; // 隐含转换构造+拷贝构造
string s7 = s3.substr(0); // 不必要的临时对象
经验法则:优先使用直接初始化语法,避免不必要的临时对象。对于大字符串,考虑使用移动语义(C++11+)提升性能。
push_back:尾部追加单个字符
cpp复制string str("abc");
str.push_back('d'); // "abcd"
insert:任意位置插入
cpp复制string str("abc");
str.insert(1, "xyz"); // "axyzbc"
append:尾部追加字符串
cpp复制string str("abc");
str.append("def"); // "abcdef"
operator+=:复合赋值追加
cpp复制string str("abc");
str += "def"; // "abcdef"
cpp复制string logMsg = "[INFO] Processing data...";
// 删除前6个字符
logMsg.erase(0, 6); // "Processing data..."
// 删除尾部多余内容
size_t dotPos = logMsg.find_last_of('.');
if (dotPos != string::npos) {
logMsg.erase(dotPos); // 删除从第一个'.'开始到结尾
}
// 安全删除示例
void safeErase(string& str, size_t pos, size_t len) {
if (pos >= str.length()) return;
len = min(len, str.length() - pos);
str.erase(pos, len);
}
cpp复制string config = "Timeout=300;Retry=3;CacheSize=1024;";
// 查找键值对
size_t pos = config.find("Timeout=");
if (pos != string::npos) {
size_t end = config.find(';', pos);
string value = config.substr(pos + 8, end - (pos + 8));
cout << "Timeout value: " << value << endl;
}
// 多字符集查找
string forbidden = "@#$%";
size_t found = config.find_first_of(forbidden);
if (found != string::npos) {
cerr << "Invalid character at position: " << found << endl;
}
operator==:简单相等比较
cpp复制if (input == "quit") exit(0);
compare方法:详细比较结果
cpp复制int result = str1.compare(str2);
if (result < 0) {
cout << "str1 < str2" << endl;
}
STL算法:复杂比较需求
cpp复制// 不区分大小写比较
bool equal = std::equal(str1.begin(), str1.end(),
str2.begin(), str2.end(),
[](char a, char b) {
return tolower(a) == tolower(b);
});
cpp复制string str;
cout << "初始容量: " << str.capacity() << endl; // 通常为15或22
for (int i = 0; i < 100; ++i) {
str.push_back('x');
if (i % 10 == 0) {
cout << "Size: " << str.size()
<< " Capacity: " << str.capacity() << endl;
}
}
典型输出模式:
code复制初始容量: 15
Size: 1 Capacity: 15
Size: 11 Capacity: 15
Size: 21 Capacity: 31
Size: 31 Capacity: 47
...
cpp复制// 糟糕的做法:频繁重新分配
string buildString(int n) {
string result;
for (int i = 0; i < n; ++i) {
result += "chunk"; // 可能多次重新分配
}
return result;
}
// 优化版本:预分配空间
string buildStringOptimized(int n) {
string result;
result.reserve(n * 6); // 假设每个"chunk"占6字节
for (int i = 0; i < n; ++i) {
result += "chunk";
}
return result;
}
cpp复制string str = "Hello";
// 1. 下标操作符(无边界检查)
char c1 = str[1]; // 'e'
str[1] = 'E'; // 修改
// 2. at方法(带边界检查)
try {
char c2 = str.at(10); // 抛出std::out_of_range
} catch (const std::out_of_range& e) {
cerr << e.what() << endl;
}
// 3. 迭代器访问
for (auto it = str.begin(); it != str.end(); ++it) {
*it = toupper(*it);
}
// 4. 范围for循环(C++11)
for (char& c : str) {
c = tolower(c);
}
cpp复制// 危险代码:可能越界
void unsafeAccess(string& s, size_t index) {
s[index] = 'X'; // 无边界检查
}
// 安全版本
void safeAccess(string& s, size_t index) {
if (index < s.length()) {
s[index] = 'X';
} else {
throw out_of_range("Index out of range");
}
}
// 更安全的替代方案
void saferAccess(string& s, size_t index) {
s.at(index) = 'X'; // 自动边界检查
}
数值转换:
cpp复制string numStr = "3.14159";
double pi = stod(numStr);
int value = 42;
string strVal = to_string(value);
字符串视图(string_view):
cpp复制void processText(string_view sv) {
// 零开销传递字符串引用
size_t pos = sv.find("key:");
// ...
}
字面量运算符:
cpp复制using namespace std::string_literals;
auto str = "hello"s; // 自动转为string类型
auto multiline = R"(Line1
Line2)"s; // 原始字符串字面量
cpp复制// 拼接大量字符串的低效做法
string result;
for (const auto& item : items) {
result += item; // 可能多次重新分配
}
// 高效拼接方案1:预计算总长度
size_t totalLength = 0;
for (const auto& item : items) {
totalLength += item.length();
}
result.reserve(totalLength);
// 高效拼接方案2:使用ostringstream
ostringstream oss;
for (const auto& item : items) {
oss << item;
}
string result = oss.str();
// 高效拼接方案3(C++11):move语义
string result = std::accumulate(items.begin(), items.end(), string(),
[](string&& a, const string& b) {
return std::move(a) + b;
});
cpp复制class Logger {
public:
enum Level { INFO, WARNING, ERROR };
void log(Level level, const string& message) {
string prefix;
switch (level) {
case INFO: prefix = "[INFO] "; break;
case WARNING: prefix = "[WARN] "; break;
case ERROR: prefix = "[ERROR] "; break;
}
string formatted = prefix + getTimestamp() + " " + message;
// 确保消息以换行结束
if (!formatted.empty() && formatted.back() != '\n') {
formatted.push_back('\n');
}
writeToFile(formatted);
}
private:
string getTimestamp() {
time_t now = time(nullptr);
char buf[80];
strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", localtime(&now));
return string(buf);
}
void writeToFile(const string& content) {
// 实际文件操作...
}
};
cpp复制class ConfigParser {
public:
void parse(const string& filename) {
ifstream file(filename);
string line;
while (getline(file, line)) {
// 去除行首尾空白
trim(line);
// 跳过空行和注释
if (line.empty() || line[0] == '#') continue;
size_t delimPos = line.find('=');
if (delimPos != string::npos) {
string key = line.substr(0, delimPos);
string value = line.substr(delimPos + 1);
trim(key);
trim(value);
configMap[key] = value;
}
}
}
string get(const string& key) const {
auto it = configMap.find(key);
return it != configMap.end() ? it->second : "";
}
private:
unordered_map<string, string> configMap;
static void trim(string& str) {
// 去除首部空白
str.erase(0, str.find_first_not_of(" \t"));
// 去除尾部空白
str.erase(str.find_last_not_of(" \t") + 1);
}
};
cpp复制// UTF-8字符串处理工具类
class UTF8Util {
public:
static size_t length(const string& utf8str) {
size_t len = 0;
for (char c : utf8str) {
// 统计非连续字节(UTF-8首字节)
if ((c & 0xC0) != 0x80) ++len;
}
return len;
}
static string substring(const string& utf8str, size_t start, size_t length) {
string result;
size_t currentPos = 0;
size_t count = 0;
while (currentPos < utf8str.size() && count < start + length) {
size_t charLen = getCharLength(utf8str, currentPos);
if (count >= start) {
result.append(utf8str, currentPos, charLen);
}
currentPos += charLen;
++count;
}
return result;
}
private:
static size_t getCharLength(const string& str, size_t pos) {
unsigned char c = str[pos];
if ((c & 0x80) == 0) return 1; // ASCII
if ((c & 0xE0) == 0xC0) return 2;
if ((c & 0xF0) == 0xE0) return 3;
if ((c & 0xF8) == 0xF0) return 4;
return 1; // 无效UTF-8,保守处理
}
};
cpp复制string normalizeLineEndings(const string& input) {
string output;
output.reserve(input.size());
for (size_t i = 0; i < input.size(); ++i) {
if (input[i] == '\r') {
// 处理CR或CRLF
if (i + 1 < input.size() && input[i+1] == '\n') {
output += '\n';
++i; // 跳过LF
} else {
output += '\n';
}
} else {
output += input[i];
}
}
return output;
}
cpp复制// 自定义分配器示例(简化版)
template <typename T>
class PoolAllocator {
public:
using value_type = T;
PoolAllocator() = default;
template <typename U>
PoolAllocator(const PoolAllocator<U>&) {}
T* allocate(size_t n) {
// 实际实现应使用内存池
return static_cast<T*>(::operator new(n * sizeof(T)));
}
void deallocate(T* p, size_t) {
::operator delete(p);
}
};
// 使用自定义分配器的string
using PoolString = basic_string<char, char_traits<char>, PoolAllocator<char>>;
void processLargeData() {
PoolString s1("Initial value");
PoolString s2 = std::move(s1); // 使用移动语义
// 大量字符串操作...
}
Small String Optimization(SSO)是大多数现代string实现的优化策略,通常对15-22字节以下字符串直接存储在栈上,避免堆分配。
cpp复制void checkSSOThreshold() {
string s;
cout << "Empty string capacity: " << s.capacity() << endl;
for (size_t i = 1; i <= 30; ++i) {
s.push_back('x');
if (s.capacity() != s.size()) {
cout << "SSO breakpoint at size " << i
<< ", new capacity: " << s.capacity() << endl;
break;
}
}
}
cpp复制// 检测string内存泄漏的包装类
class InstrumentedString : public string {
public:
static int instanceCount;
InstrumentedString() : string() { ++instanceCount; }
InstrumentedString(const char* s) : string(s) { ++instanceCount; }
// 其他构造函数...
~InstrumentedString() { --instanceCount; }
};
int InstrumentedString::instanceCount = 0;
void checkStringLeaks() {
{
InstrumentedString s1("test");
auto s2 = s1;
cout << "Current instances: " << InstrumentedString::instanceCount << endl;
}
cout << "After scope: " << InstrumentedString::instanceCount << endl;
}
cpp复制#include <chrono>
void benchmarkStringOperations() {
const int iterations = 1000000;
// 测试+=操作
auto start = chrono::high_resolution_clock::now();
string s1;
for (int i = 0; i < iterations; ++i) {
s1 += "append";
}
auto end = chrono::high_resolution_clock::now();
cout << "+= time: "
<< chrono::duration_cast<chrono::milliseconds>(end - start).count()
<< " ms" << endl;
// 测试append操作
start = chrono::high_resolution_clock::now();
string s2;
for (int i = 0; i < iterations; ++i) {
s2.append("append");
}
end = chrono::high_resolution_clock::now();
cout << "append time: "
<< chrono::duration_cast<chrono::milliseconds>(end - start).count()
<< " ms" << endl;
// 测试预分配版本
start = chrono::high_resolution_clock::now();
string s3;
s3.reserve(iterations * 6); // "append"是6字节
for (int i = 0; i < iterations; ++i) {
s3 += "append";
}
end = chrono::high_resolution_clock::now();
cout << "pre-allocated time: "
<< chrono::duration_cast<chrono::milliseconds>(end - start).count()
<< " ms" << endl;
}
cpp复制// 传统接口:可能产生不必要的拷贝
void processString(const string& str) {
// ...
}
// 现代接口:接受string_view,兼容多种字符串类型
void modernProcess(string_view sv) {
// 查找操作示例
size_t pos = sv.find("key:");
if (pos != string_view::npos) {
string_view value = sv.substr(pos + 4);
// 处理值...
}
}
void testStringView() {
// 可以接受各种字符串类型
modernProcess("literal string"); // C字符串字面量
string str = "string object";
modernProcess(str); // string对象
modernProcess(str.substr(0, 6)); // string子串
}
C++20引入的format库提供了更强大的字符串格式化能力:
cpp复制#include <format>
void demoFormat() {
string name = "Alice";
int age = 30;
double score = 95.5;
// 类型安全格式化
string info = format("Name: {}, Age: {}, Score: {:.1f}", name, age, score);
// 位置参数
string msg = format("Hello {1}, this is {0}", "Bob", "Alice");
// 编译时格式检查(C++20)
constexpr string_view fmtStr = "The answer is {}";
string answer = format(fmtStr, 42);
}
cpp复制// 不安全的共享string访问
void unsafeConcurrentAccess() {
string sharedStr;
auto worker = [&sharedStr](int id) {
for (int i = 0; i < 1000; ++i) {
sharedStr += to_string(id); // 数据竞争
}
};
thread t1(worker, 1);
thread t2(worker, 2);
t1.join();
t2.join();
cout << "Result length: " << sharedStr.length() << endl; // 不确定结果
}
// 线程安全版本
void safeConcurrentAccess() {
mutex mtx;
string sharedStr;
auto worker = [&](int id) {
string localStr;
for (int i = 0; i < 1000; ++i) {
localStr += to_string(id);
}
lock_guard<mutex> lock(mtx);
sharedStr += localStr;
};
thread t1(worker, 1);
thread t2(worker, 2);
t1.join();
t2.join();
cout << "Safe result length: " << sharedStr.length() << endl; // 确定2000
}
cpp复制class ThreadSafeString {
public:
string getSnapshot() const {
lock_guard<mutex> lock(mtx_);
return str_; // 返回拷贝,保证线程安全
}
void append(string_view sv) {
lock_guard<mutex> lock(mtx_);
str_ += sv;
}
// 高效读取接口
template <typename Func>
void readWith(Func&& func) const {
lock_guard<mutex> lock(mtx_);
func(str_); // 在锁保护下处理字符串
}
private:
string str_;
mutable mutex mtx_;
};
cpp复制vector<uint8_t> loadBinaryFile(const string& filename) {
ifstream file(filename, ios::binary | ios::ate);
if (!file) {
throw runtime_error("Cannot open file: " + filename);
}
streamsize size = file.tellg();
file.seekg(0, ios::beg);
vector<uint8_t> buffer(size);
if (!file.read(reinterpret_cast<char*>(buffer.data()), size)) {
throw runtime_error("Failed to read file: " + filename);
}
return buffer;
}
string binaryToHexString(const vector<uint8_t>& data) {
static const char hexDigits[] = "0123456789ABCDEF";
string result;
result.reserve(data.size() * 2);
for (uint8_t byte : data) {
result.push_back(hexDigits[byte >> 4]);
result.push_back(hexDigits[byte & 0xF]);
}
return result;
}
cpp复制void processBinaryData() {
// 二进制数据初始化
string binaryData;
binaryData.push_back(0x00);
binaryData.push_back(0xFF);
binaryData.append("\x01\x02\x03", 3);
// 安全比较二进制数据
bool isEqual = binaryData.size() == 5 &&
binaryData[0] == 0x00 &&
binaryData[1] == static_cast<char>(0xFF);
// 二进制查找
size_t pos = binaryData.find("\x01\x02", 0, 2);
if (pos != string::npos) {
cout << "Pattern found at position: " << pos << endl;
}
// 二进制替换
binaryData.replace(2, 2, "\xAA\xBB", 2);
}
cpp复制namespace StringUtils {
// 安全分割函数
vector<string> split(string_view str, string_view delimiters, bool skipEmpty = true) {
vector<string> tokens;
size_t start = 0;
size_t end = str.find_first_of(delimiters);
while (end != string_view::npos) {
string_view token = str.substr(start, end - start);
if (!token.empty() || !skipEmpty) {
tokens.emplace_back(token);
}
start = end + 1;
end = str.find_first_of(delimiters, start);
}
// 添加最后一个token
string_view lastToken = str.substr(start);
if (!lastToken.empty() || !skipEmpty) {
tokens.emplace_back(lastToken);
}
return tokens;
}
// 安全类型转换
template <typename T>
optional<T> toNumber(string_view str, int base = 10) {
try {
if constexpr (is_same_v<T, int>) {
return stoi(string(str), nullptr, base);
} else if constexpr (is_same_v<T, long>) {
return stol(string(str), nullptr, base);
} else if constexpr (is_same_v<T, double>) {
return stod(string(str));
}
// 其他类型...
} catch (...) {
return nullopt;
}
}
}
cpp复制#include <regex>
void regexWithString() {
string text = "Emails: alice@example.com, bob@test.org";
regex emailPattern(R"((\w+@[\w\.-]+\.\w+))");
// 迭代匹配
sregex_iterator it(text.begin(), text.end(), emailPattern);
sregex_iterator end;
for (; it != end; ++it) {
smatch match = *it;
cout << "Found email: " << match.str() << endl;
cout << "Username: " << match[1].str() << endl;
}
// 替换操作
string anonymized = regex_replace(text, emailPattern, "[REDACTED]");
cout << "Anonymized: " << anonymized << endl;
}
cpp复制// KMP算法实现
vector<size_t> kmpSearch(string_view text, string_view pattern) {
vector<size_t> positions;
if (pattern.empty()) return positions;
// 构建部分匹配表
vector<int> lps(pattern.size(), 0);
for (size_t i = 1, len = 0; i < pattern.size(); ) {
if (pattern[i] == pattern[len]) {
lps[i++] = ++len;
} else {
if (len != 0) {
len = lps[len - 1];
} else {
lps[i++] = 0;
}
}
}
// 执行搜索
for (size_t i = 0, j = 0; i < text.size(); ) {
if (text[i] == pattern[j]) {
++i;
++j;
if (j == pattern.size()) {
positions.push_back(i - j);
j = lps[j - 1];
}
} else {
if (j != 0) {
j = lps[j - 1];
} else {
++i;
}
}
}
return positions;
}
void testKMP() {
string text = "ABABDABACDABABCABAB";
string pattern = "ABABCABAB";
auto matches = kmpSearch(text, pattern);
for (auto pos : matches) {
cout << "Pattern found at index: " << pos << endl;
}
}
cpp复制// 基数排序优化版
void radixSort(vector<string>& strings, size_t maxLen = 256) {
const size_t bucketSize = 256; // ASCII字符集
vector<vector<string>> buckets(bucketSize);
// 从最后一个字符开始排序
for (int pos = static_cast<int>(maxLen) - 1; pos >= 0; --pos) {
// 清空桶
for (auto& bucket : buckets) bucket.clear();
// 分配到桶
for (const auto& str : strings) {
char c = (pos < str.size()) ? str[pos] : '\0';
buckets[static_cast<unsigned char>(c)].push_back(str);
}
// 收集回数组
strings.clear();
for (const auto& bucket : buckets) {
strings.insert(strings.end(), bucket.begin(), bucket.end());
}
}
}
void testStringSort() {
vector<string> words = {
"banana", "apple", "orange", "grape",
"kiwi", "melon", "pear", "peach"
};
radixSort(words);
for (const auto& word : words) {
cout << word << endl;
}
}
cpp复制#include <codecvt>
#include <locale>
string wideToUTF8(const wstring& wstr) {
wstring_convert<codecvt_utf8<wchar_t>> converter;
return converter.to_bytes(wstr);
}
wstring utf8ToWide(const string& str) {
wstring_convert<codecvt_utf8<wchar_t>> converter;
return converter.from_bytes(str);
}
void testEncodingConversion() {
wstring wideStr = L"中文测试";
string utf8Str = wideToUTF8(wideStr);
wstring convertedBack = utf8ToWide(utf8Str);
cout << "UTF-8 length: " << utf8Str.length() << endl;
cout << "Wide length: " << wideStr.length() << endl;
assert(wideStr == convertedBack);
}
cpp复制string convertEncoding(const string& input,
const string& fromEncoding,
const string& toEncoding) {
// 实际项目中可使用iconv或系统API实现
// 这里仅展示概念框架
if (fromEncoding == "GBK" && toEncoding == "UTF-8") {
// 模拟GBK到UTF-8转换
return "Converted:" + input;
}
throw runtime_error("Unsupported encoding conversion: " +
fromEncoding + " to " + toEncoding);
}
void processMultibyteText() {
string gbkText = "\xC4\xE3\xBA\xC3"; // GBK编码的"你好"
try {
string utf8Text = convertEncoding(gbkText, "GBK", "UTF-8");
cout << "Converted text: " << utf8Text << endl;
} catch (const exception& e) {
cerr << "Conversion failed: " << e.what() << endl;
}
}
cpp复制// 编译期字符串长度计算
constexpr size_t strLength(const char* str) {
return *str ? 1 + strLength(str + 1) : 0;
}
// 编译期字符串连接
template <size_t N1, size_t N2>
struct ConcatStrings {
constexpr ConcatStrings(const char (&s1)[N1], const char (&s2)[N2]) {
for (size_t i = 0; i < N1 - 1; ++i) {
value[i] = s1[i];
}
for (size_t i = 0; i < N2; ++i) {
value[N1 - 1 + i] = s2[i];
}
}
char value[N1 + N2 - 1];
};
void testCompileTimeStrings() {
constexpr const char hello[] = "Hello";
constexpr const char world[] = " World!";
constexpr ConcatStrings<sizeof(hello), sizeof(world)> helloWorld(hello, world);
static_assert(helloWorld.value[5] == ' ', "Compile-time check failed");
cout << helloWorld.value << endl;
}
cpp复制template <typename... Args>
string formatChecked(const string& fmt, const Args&... args) {
const size_t expected = count(fmt.begin(), fmt.end(), '{');
static_assert(expected == sizeof...(args),
"Number of arguments doesn't match format specifiers");
// 实际格式化实现...
return "Formatted string"; // 简化示例
}
void testTypeSafeFormat() {
string name = "Alice";
int age = 30;
// 编译时检查通过
string info = formatChecked("Name: {}, Age: {}", name, age);
// 以下代码会导致编译错误
// string error = formatChecked("Name: {}", name, age);
}
cpp复制class CompactString {
public:
CompactString() : data_(nullptr), size_(0) {}
explicit CompactString(const char* str) {
size_ = strlen(str);
if (size_ > 0) {
data_ = new char[size_ + 1];
memcpy(data_, str, size_ + 1);
} else {
data_ = nullptr;
}
}
~CompactString() { delete[] data_; }
size_t size() const { return size_; }
const char* c_str() const { return data_ ? data_ : ""; }
// 禁用拷贝构造和赋值(简化示例)
CompactString(const CompactString&) = delete;
CompactString& operator=(const CompactString&) = delete;
private:
char* data_;
size_t size_;
};
void testCompactString() {
CompactString s1;
CompactString s2("hello");
cout << "Empty size: " << s1.size() << endl;
cout << "Content: " << s2.c_str() << endl;
}
cpp复制class StringPool {
public:
static StringPool& instance() {
static StringPool pool;
return pool;
}
const char* intern(const char* str) {
auto it = pool_.find(str);
if (it != pool_.end()) {
return it->c_str();
}
auto result = pool_.insert(str);
return result.first->c_str();
}
private:
StringPool() = default;
unordered_set<string> pool_;
};
void testStringPool() {
const char* s1 = StringPool::instance().intern("hello");
const char* s2 = StringPool::instance().intern("world");
const char* s3 = StringPool::instance().intern("hello");
cout << "s1: " << s1 << " (" << (void*)s1 << ")" << endl;
cout << "s2: " << s2 << " (" << (void*)s2 << ")" << endl;
cout