[Summary] [Continued]

The prior set of measurements in - Building a better string class - comparing class implementations - did not really measure the advantage (if any) of the use of copy-on-write in the std::string implementation. Did some measurements to see whether this helps improve the relative standing of std::string.

First, the same “mixed” measurement as in the prior tests, but using the test strings as classes (rather than const char*). Second, let’s do everything we can to give std::string the advantage (assuming copy-on-write is an advantage) by measuring only assignments.

RATE ZString string mixed calls 126.9 MB/second (1000.0 MB in 7.9 seconds)
RATE C++ string mixed calls 39.7 MB/second (1000.0 MB in 25.2 seconds)
RATE ZString string assign calls 144.9 MB/second (1000.0 MB in 6.9 seconds)
RATE C++ string assign calls 92.3 MB/second (1000.0 MB in 10.8 seconds)

The results seem to match up with my expectations. With mixed operations the std::string class performs relatively poorly. With an assignment-only test std::string does quite a lot better, but is still slower.

This matches up to my expectation as the amount of code executed to implement the copy-on-write “optimization” is in fact slower than simply copying the buffer. At least this is true for small strings (what I see as the more usual case). Tried increasing the test strings from 26 to 62 characters to see if std::string might fare better with longer strings.

RATE ZString string mixed calls 229.6 MB/second (1200.0 MB in 5.2 seconds)
RATE C++ string mixed calls 88.3 MB/second (1200.0 MB in 13.6 seconds)
RATE ZString string assign calls 268.7 MB/second (1200.0 MB in 4.5 seconds)
RATE C++ string assign calls 221.9 MB/second (1200.0 MB in 5.4 seconds)

Apparently not :).

Repeating the same test, but this time compiled with GNU C++ on Linux.

RATE ZString string mixed calls 192.3 MB/second (1200.0 MB in 6.2 seconds)
RATE C++ string mixed calls 108.8 MB/second (1200.0 MB in 11.0 seconds)
RATE ZString string assign calls 225.1 MB/second (1200.0 MB in 5.3 seconds)
RATE C++ string assign calls 324.3 MB/second (1200.0 MB in 3.7 seconds)

Interesting! The GNU std::string implementation is apparently better than Microsoft’s, and does indeed yield an advantage - at least when doing only assignments.

Repeating the same test on Linux, but this time with the (original) smaller strings.

RATE ZString string mixed calls 134.2 MB/second (1000.0 MB in 7.5 seconds)
RATE C++ string mixed calls 59.6 MB/second (1000.0 MB in 16.8 seconds)
RATE ZString string assign calls 155.5 MB/second (1000.0 MB in 6.4 seconds)
RATE C++ string assign calls 152.0 MB/second (1000.0 MB in 6.6 seconds)

For smaller strings, even with the better GNU implementation, a simple buffer copy edges out copy-on-write in assignment.

The test program and ZString class sources follow.

string.cpp

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>

// Optimization that seems to work best (at least with an Athlon).
//#pragma function(strlen,strcpy,strcat)

#include "ZString.h"
#include <string>

// test strings

//const int nLoop = 10000000;
//static const char sOut1[] = "abcdefghjklmnopqrstuvwxyz";
//static const char sOut2[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ";

const int nLoop = 5000000;
static const char sOut1[] = "abcdefghjklmnopqrstuvwxyz0123456789ABCDEFGHJKLMNOPQRSTUVWXYZ";
static const char sOut2[] = "ABCDEFGHJKLMNOPQRSTUVWXYZ0123456789abcdefghjklmnopqrstuvwxyz";

int nLength = ::strlen(sOut1);

void report_times(const char* s,int dt,int cb)
{
    //printf("TIME %s %d ticks for %d characters written\n",s,dt,cb);
    double ts = ((double)dt) / CLOCKS_PER_SEC;
    double mb = ((double)cb) / 1000000;
    double rate = mb / ts;
    printf("RATE %s %0.1f MB/second (%0.1f MB in %0.1f seconds)\n",s,rate,mb,ts);
}

//
//  Perform a function for a fixed number of iterations and return time.
//

int dtLoop = 0;

typedef void (*doit)(const char*,const char*);

int time_function(doit fn)
{
    clock_t t0 = ::clock();
    for (int i=0; i<nLoop; ++i) {
        const char* s1 = sOut1 + (15 & i);
        const char* s2 = sOut2 + nLength - (15 & i);
        (*fn)(s1,s2);
    }
    return (int)(::clock() - t0) - dtLoop;
}

//
//  Time C string operations.
//

int nTotal = 0;

void do_total(const char* s1,const char* s2)
{
    nTotal += 4 * nLength;
}

void do_z_string_mixed_calls(const char* _s1,const char* _s2)
{
    ZString s1 = _s1;
    ZString s2 = _s2;
    ZString sWork;
    sWork.strcpy(s1);
    sWork.strcat(s2);
    sWork.strcpy(s2);
    sWork.strcat(s1);
    sWork.strcpy(s1);
    sWork.strcat(s1);
    sWork.strcpy(s2);
    sWork.strcat(s2);
}

void do_cpp_string_mixed_calls(const char* _s1,const char* _s2)
{
    std::string s1 = _s1;
    std::string s2 = _s2;
    std::string sWork;
    sWork.assign(s1);
    sWork.append(s2);
    sWork.assign(s2);
    sWork.append(s1);
    sWork.assign(s1);
    sWork.append(s1);
    sWork.assign(s2);
    sWork.append(s2);
}

void do_z_string_assign_calls(const char* _s1,const char* _s2)
{
    ZString s1 = _s1;
    ZString s2 = _s2;
    ZString sWork;
    sWork.strcpy(s1);
    sWork.strcpy(s2);
    sWork.strcpy(s2);
    sWork.strcpy(s1);
    sWork.strcpy(s1);
    sWork.strcpy(s1);
    sWork.strcpy(s2);
    sWork.strcpy(s2);
}

void do_cpp_string_assign_calls(const char* _s1,const char* _s2)
{
    std::string s1 = _s1;
    std::string s2 = _s2;
    std::string sWork;
    sWork.assign(s1);
    sWork.assign(s2);
    sWork.assign(s2);
    sWork.assign(s1);
    sWork.assign(s1);
    sWork.assign(s1);
    sWork.assign(s2);
    sWork.assign(s2);
}


int main(int ac,char** av)
{
    dtLoop = time_function(do_total);
    report_times("ZString string mixed calls",time_function(do_z_string_mixed_calls),nTotal);
    report_times("C++ string mixed calls",time_function(do_cpp_string_mixed_calls),nTotal);
    report_times("ZString string assign calls",time_function(do_z_string_assign_calls),nTotal);
    report_times("C++ string assign calls",time_function(do_cpp_string_assign_calls),nTotal);
    return 0;
}

ZString.h

#ifndef __ZSTRING_H__
#define __ZSTRING_H__

//
//  Simple string class.
//

class ZString
{

protected:
    char* sBuffer;
    int cbBufferMax;

public:
    ZString() { upsizeTo(1); }
    ZString(const char* s) { upsizeTo(::strlen(s)); ::strcpy(sBuffer,s); }
    ~ZString() { recycle(); }

protected:
    void recycle();
    void upsizeTo(int);
    void sizeTo(int n) {
        if (cbBufferMax <= n) {
            upsizeTo(n);
        }
    }

public:
    operator const char*()          { return sBuffer; }
    char* getBuffer()               { return sBuffer; }
    char* getBuffer(int n)          { sizeTo(n); return sBuffer; }
    int getBufferSize()             { return cbBufferMax; }

public:
    void operator=(const char* s) {
        strcpy(s);
    }

public:
    void strcpy(const char* s) {
        sizeTo(::strlen(s));
        ::strcpy(sBuffer,s);
    }
    void strcpy(const char* s,int n) {
        sizeTo(n);
        ::strncpy(sBuffer,s,n);
        sBuffer[n] = 0;
    }
    void strcat(const char* s) {
        int n1 = ::strlen(sBuffer);
        int n2 = ::strlen(s);
        sizeTo(n1 + n2);
        ::strcpy(sBuffer+n1,s);
    }
    void strcat(const char* s,int n) {
        int n1 = ::strlen(sBuffer);
        sizeTo(n1 + n);
        ::strncpy(sBuffer+n1,s,n);
        sBuffer[n1+n] = 0;
    }
    void strlwr() {
        ::strlwr(sBuffer);
    }
    void strupr() {
        ::strupr(sBuffer);
    }

};

#endif

ZString.cpp

#include <stdlib.h>
#include <string.h>
#include "ZString.h"

//
//  Out-of-line string methods and free list maintenance.
//

enum { STRING_BUFFER_SIZE = 256 };
static void* g_pFreeList;

void ZString::recycle()
{
    if (STRING_BUFFER_SIZE != cbBufferMax) {
        delete sBuffer;
        sBuffer = 0;
        return;
    }
    *((void**)sBuffer) = g_pFreeList;
    g_pFreeList = sBuffer;
    sBuffer = 0;
    cbBufferMax = 0;
}

void ZString::upsizeTo(int n)
{
    // Allocate oversize strings.
    if (STRING_BUFFER_SIZE <= n) {
        // round up to a quanta
        n = ((n + STRING_BUFFER_SIZE + 1) / STRING_BUFFER_SIZE) * STRING_BUFFER_SIZE;
        char* p = new char[n];
        ::strcpy(p,sBuffer);
        recycle();
        sBuffer = p;
        cbBufferMax = n;
        return;
    }
    cbBufferMax = STRING_BUFFER_SIZE;
    // Grab a buffer from the free list if present (the usual case).
    if (g_pFreeList) {
        sBuffer = (char*) g_pFreeList;
        g_pFreeList = *((void**)g_pFreeList);
    } else {
        // Allocate a stock-sized buffer.
        sBuffer = new char[STRING_BUFFER_SIZE];
    }
    *sBuffer = 0;
}