random memes }

Building a better string class - what advantage in copy-on-write for std::string?

[Summary] [Continued]

The prior set of measurements in - Building a better string class - comparing class implementations - did not really measure the advantage (if any) of the use of copy-on-write in the std::string implementation. Did some measurements to see whether this helps improve the relative standing of std::string.

First, the same "mixed" measurement as in the prior tests, but using the test strings as classes (rather than const char*). Second, let's do everything we can to give std::string the advantage (assuming copy-on-write is an advantage) by measuring only assignments.

RATE ZString string mixed calls 126.9 MB/second (1000.0 MB in 7.9 seconds) RATE C++ string mixed calls 39.7 MB/second (1000.0 MB in 25.2 seconds) RATE ZString string assign calls 144.9 MB/second (1000.0 MB in 6.9 seconds) RATE C++ string assign calls 92.3 MB/second (1000.0 MB in 10.8 seconds)

The results seem to match up with my expectations. With mixed operations the std::string class performs relatively poorly. With an assignment-only test std::string does quite a lot better, but is still slower.

This matches up to my expectation as the amount of code executed to implement the copy-on-write "optimization" is in fact slower than simply copying the buffer. At least this is true for small strings (what I see as the more usual case). Tried increasing the test strings from 26 to 62 characters to see if std::string might fare better with longer strings.

RATE ZString string mixed calls 229.6 MB/second (1200.0 MB in 5.2 seconds) RATE C++ string mixed calls 88.3 MB/second (1200.0 MB in 13.6 seconds) RATE ZString string assign calls 268.7 MB/second (1200.0 MB in 4.5 seconds) RATE C++ string assign calls 221.9 MB/second (1200.0 MB in 5.4 seconds)

Apparently not :).

Repeating the same test, but this time compiled with GNU C++ on Linux.

RATE ZString string mixed calls 192.3 MB/second (1200.0 MB in 6.2 seconds) RATE C++ string mixed calls 108.8 MB/second (1200.0 MB in 11.0 seconds) RATE ZString string assign calls 225.1 MB/second (1200.0 MB in 5.3 seconds) RATE C++ string assign calls 324.3 MB/second (1200.0 MB in 3.7 seconds)

Interesting! The GNU std::string implementation is apparently better than Microsoft's, and does indeed yield an advantage - at least when doing only assignments.

Repeating the same test on Linux, but this time with the (original) smaller strings.

RATE ZString string mixed calls 134.2 MB/second (1000.0 MB in 7.5 seconds) RATE C++ string mixed calls 59.6 MB/second (1000.0 MB in 16.8 seconds) RATE ZString string assign calls 155.5 MB/second (1000.0 MB in 6.4 seconds) RATE C++ string assign calls 152.0 MB/second (1000.0 MB in 6.6 seconds)

For smaller strings, even with the better GNU implementation, a simple buffer copy edges out copy-on-write in assignment.

The test program and ZString class sources follow.

string.cpp

#include <stdlib.h> #include <stdio.h> #include <time.h> #include <string.h>

// Optimization that seems to work best (at least with an Athlon). //#pragma function(strlen,strcpy,strcat)

#include "ZString.h" #include

// test strings

//const int nLoop = 10000000; //static const char sOut1[] = "abcdefghjklmnopqrstuvwxyz"; //static const char sOut2[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";

const int nLoop = 5000000; static const char sOut1[] = "abcdefghjklmnopqrstuvwxyz0123456789ABCDEFGHJKLMNOPQRSTUVWXYZ"; static const char sOut2[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ0123456789abcdefghjklmnopqrstuvwxyz";

int nLength = ::strlen(sOut1);

void report_times(const char* s,int dt,int cb) { //printf("TIME %s %d ticks for %d characters written\n",s,dt,cb); double ts = ((double)dt) / CLOCKS_PER_SEC; double mb = ((double)cb) / 1000000; double rate = mb / ts; printf("RATE %s %0.1f MB/second (%0.1f MB in %0.1f seconds)\n",s,rate,mb,ts); }

// // Perform a function for a fixed number of iterations and return time. //

int dtLoop = 0;

typedef void (doit)(const char,const char*);

int time_function(doit fn) { clock_t t0 = ::clock(); for (int i=0; i<nLoop; ++i) { const char* s1 = sOut1 + (15 & i); const char* s2 = sOut2 + nLength - (15 & i); (*fn)(s1,s2); } return (int)(::clock() - t0) - dtLoop; }

// // Time C string operations. //

int nTotal = 0;

void do_total(const char* s1,const char* s2) { nTotal += 4 * nLength; }

void do_z_string_mixed_calls(const char* _s1,const char* _s2) { ZString s1 = _s1; ZString s2 = _s2; ZString sWork; sWork.strcpy(s1); sWork.strcat(s2); sWork.strcpy(s2); sWork.strcat(s1); sWork.strcpy(s1); sWork.strcat(s1); sWork.strcpy(s2); sWork.strcat(s2); }

void do_cpp_string_mixed_calls(const char* _s1,const char* _s2) { std::string s1 = _s1; std::string s2 = _s2; std::string sWork; sWork.assign(s1); sWork.append(s2); sWork.assign(s2); sWork.append(s1); sWork.assign(s1); sWork.append(s1); sWork.assign(s2); sWork.append(s2); }

void do_z_string_assign_calls(const char* _s1,const char* _s2) { ZString s1 = _s1; ZString s2 = _s2; ZString sWork; sWork.strcpy(s1); sWork.strcpy(s2); sWork.strcpy(s2); sWork.strcpy(s1); sWork.strcpy(s1); sWork.strcpy(s1); sWork.strcpy(s2); sWork.strcpy(s2); }

void do_cpp_string_assign_calls(const char* _s1,const char* _s2) { std::string s1 = _s1; std::string s2 = _s2; std::string sWork; sWork.assign(s1); sWork.assign(s2); sWork.assign(s2); sWork.assign(s1); sWork.assign(s1); sWork.assign(s1); sWork.assign(s2); sWork.assign(s2); }

int main(int ac,char** av) { dtLoop = time_function(do_total); report_times("ZString string mixed calls",time_function(do_z_string_mixed_calls),nTotal); report_times("C++ string mixed calls",time_function(do_cpp_string_mixed_calls),nTotal); report_times("ZString string assign calls",time_function(do_z_string_assign_calls),nTotal); report_times("C++ string assign calls",time_function(do_cpp_string_assign_calls),nTotal); return 0; }

ZString.h

#ifndef ZSTRING_H #define ZSTRING_H

// // Simple string class. //

class ZString {

protected: char* sBuffer; int cbBufferMax;

public: ZString() { upsizeTo(1); } ZString(const char* s) { upsizeTo(::strlen(s)); ::strcpy(sBuffer,s); } ~ZString() { recycle(); }

protected: void recycle(); void upsizeTo(int); void sizeTo(int n) { if (cbBufferMax <= n) { upsizeTo(n); } }

public: operator const char*() { return sBuffer; } char* getBuffer() { return sBuffer; } char* getBuffer(int n) { sizeTo(n); return sBuffer; } int getBufferSize() { return cbBufferMax; }

public: void operator=(const char* s) { strcpy(s); }

public: void strcpy(const char* s) { sizeTo(::strlen(s)); ::strcpy(sBuffer,s); } void strcpy(const char* s,int n) { sizeTo(n); ::strncpy(sBuffer,s,n); sBuffer[n] = 0; } void strcat(const char* s) { int n1 = ::strlen(sBuffer); int n2 = ::strlen(s); sizeTo(n1 + n2); ::strcpy(sBuffer+n1,s); } void strcat(const char* s,int n) { int n1 = ::strlen(sBuffer); sizeTo(n1 + n); ::strncpy(sBuffer+n1,s,n); sBuffer[n1+n] = 0; } void strlwr() { ::strlwr(sBuffer); } void strupr() { ::strupr(sBuffer); }

};

#endif

ZString.cpp

#include <stdlib.h> #include <string.h> #include "ZString.h"

// // Out-of-line string methods and free list maintenance. //

enum { STRING_BUFFER_SIZE = 256 }; static void* g_pFreeList;

void ZString::recycle() { if (STRING_BUFFER_SIZE != cbBufferMax) { delete sBuffer; sBuffer = 0; return; } ((void*)sBuffer) = g_pFreeList; g_pFreeList = sBuffer; sBuffer = 0; cbBufferMax = 0; }

void ZString::upsizeTo(int n) { // Allocate oversize strings. if (STRING_BUFFER_SIZE <= n) { // round up to a quanta n = ((n + STRING_BUFFER_SIZE + 1) / STRING_BUFFER_SIZE) * STRING_BUFFER_SIZE; char* p = new char[n]; ::strcpy(p,sBuffer); recycle(); sBuffer = p; cbBufferMax = n; return; } cbBufferMax = STRING_BUFFER_SIZE; // Grab a buffer from the free list if present (the usual case). if (g_pFreeList) { sBuffer = (char*) g_pFreeList; g_pFreeList = ((void*)g_pFreeList); } else { // Allocate a stock-sized buffer. sBuffer = new char[STRING_BUFFER_SIZE]; } *sBuffer = 0; }