Below is a function I created and have found extremely useful for splitting strings based on a particular delimiter. The implementation only requires STL which makes it easy to port to any OS that supports STL. The function is fairly lightweight although I haven't done extensive performance testing.
The delimiter can be n number of characters represented as a
string. The parts of the string in between the delimiter are then put
into a string vector. The class StringUtils contains one
static function SplitString. The int
returned is the number of delimiters found within the input string.
I used this utility mainly for parsing strings that were being passed
across platform boundaries. Whether you are using raw sockets or
middleware such as TIBCO® it is uncomplicated to pass string
data.
I found it more efficient to pass delimited string data verses
repeated calls or messages. Another place I used this was in passing
BSTRs back and forth between a Visual
Basic client and an ATL COM DLL. It proved to
be easier than passing a SAFEARRAY as an [in] or [out]
parameter. This was also beneficial when I did not want the added
overhead of MFC and hence could not use CString.
The SplitString function uses the STL string
functions find and substr to iterate
through the input string. The hardest part was figuring out how to
get the substring of the input string based on the offsets of the
delimiter, not forgetting to take into account the length of the
delimiter. Another hurdle was making sure not to call substr
with an offset greater than the length of the input string.
#ifndef __STRINGUTILS_H_
#define __STRINGUTILS_H_
#include <string>
#include <vector>
using namespace std;
class StringUtils
{
public:
static int SplitString(const string& input,
const string& delimiter, vector<string>& results,
bool includeEmpties = true);
};
#endifint StringUtils::SplitString(const string& input,
const string& delimiter, vector<string>& results,
bool includeEmpties)
{
int iPos = 0;
int newPos = -1;
int sizeS2 = (int)delimiter.size();
int isize = (int)input.size();
if(
( isize == 0 )
||
( sizeS2 == 0 )
)
{
return 0;
}
vector<int> positions;
newPos = input.find (delimiter, 0);
if( newPos < 0 )
{
return 0;
}
int numFound = 0;
while( newPos >= iPos )
{
numFound++;
positions.push_back(newPos);
iPos = newPos;
newPos = input.find (delimiter, iPos+sizeS2);
}
if( numFound == 0 )
{
return 0;
}
for( int i=0; i <= (int)positions.size(); ++i )
{
string s("");
if( i == 0 )
{
s = input.substr( i, positions[i] );
}
int offset = positions[i-1] + sizeS2;
if( offset < isize )
{
if( i == positions.size() )
{
s = input.substr(offset);
}
else if( i > 0 )
{
s = input.substr( positions[i-1] + sizeS2,
positions[i] - positions[i-1] - sizeS2 );
}
}
if( includeEmpties || ( s.size() > 0 ) )
{
results.push_back(s);
}
}
return numFound;
}main.exe "|mary|had|a||little|lamb||" "|"
int SplitString(
const string& input,
const string& delimiter,
vector<string>& results,
bool includeEmpties = true
)
-------------------------------------------------------
input = |mary|had|a||little|lamb||
delimiter = |
return value = 8 // Number of delimiters found
results.size() = 9
results[0] = ''
results[1] = 'mary'
results[2] = 'had'
results[3] = 'a'
results[4] = ''
results[5] = 'little'
results[6] = 'lamb'
results[7] = ''
results[8] = ''
int SplitString(
const string& input,
const string& delimiter,
vector<string>& results,
bool includeEmpties = false
)
-------------------------------------------------------
input = |mary|had|a||little|lamb||
delimiter = |
return value = 8 // Number of delimiters found
results.size() = 5
results[0] = 'mary'
results[1] = 'had'
results[2] = 'a'
results[3] = 'little'
results[4] = 'lamb'For those of you who absolutely cannot use STL and are committed
to MFC I made a few minor changes to the above implementation. It
uses CString instead of std::string and a
CStringArray instead of a std::vector:
//------------------------
// SplitString in MFC
//------------------------
int StringUtils::SplitString(const CString& input,
const CString& delimiter, CStringArray& results)
{
int iPos = 0;
int newPos = -1;
int sizeS2 = delimiter.GetLength();
int isize = input.GetLength();
CArray<INT, int> positions;
newPos = input.Find (delimiter, 0);
if( newPos < 0 ) { return 0; }
int numFound = 0;
while( newPos > iPos )
{
numFound++;
positions.Add(newPos);
iPos = newPos;
newPos = input.Find (delimiter, iPos+sizeS2+1);
}
for( int i=0; i <= positions.GetSize(); i++ )
{
CString s;
if( i == 0 )
s = input.Mid( i, positions[i] );
else
{
int offset = positions[i-1] + sizeS2;
if( offset < isize )
{
if( i == positions.GetSize() )
s = input.Mid(offset);
else if( i > 0 )
s = input.Mid( positions[i-1] + sizeS2,
positions[i] - positions[i-1] - sizeS2 );
}
}
if( s.GetLength() > 0 )
results.Add(s);
}
return numFound;
}I added this version in case you might need to use it with any
type of string. The only requirement is the string class must have a
constructor that takes a char*. The code only depends on
the STL vector. I also added the option to not include empty strings
in the results, which will occur if delimiters are adjacent:
//-----------------------------------------------------------
// StrT: Type of string to be constructed
// Must have char* ctor.
// str: String to be parsed.
// delim: Pointer to delimiter.
// results: Vector of StrT for strings between delimiter.
// empties: Include empty strings in the results.
//-----------------------------------------------------------
template< typename StrT >
int split(const char* str, const char* delim,
vector<StrT>& results, bool empties = true)
{
char* pstr = const_cast<char*>(str);
char* r = NULL;
r = strstr(pstr, delim);
int dlen = strlen(delim);
while( r != NULL )
{
char* cp = new char[(r-pstr)+1];
memcpy(cp, pstr, (r-pstr));
cp[(r-pstr)] = '\0';
if( strlen(cp) > 0 || empties )
{
StrT s(cp);
results.push_back(s);
}
delete[] cp;
pstr = r + dlen;
r = strstr(pstr, delim);
}
if( strlen(pstr) > 0 || empties )
{
results.push_back(StrT(pstr));
}
return results.size();
}// using CString
//------------------------------------------
int i = 0;
vector<CString> results;
split("a-b-c--d-e-", "-", results);
for( i=0; i < results.size(); ++i )
{
cout << results[i].GetBuffer(0) << endl;
results[i].ReleaseBuffer();
}
// using std::string
//------------------------------------------
vector<string> stdResults;
split("a-b-c--d-e-", "-", stdResults);
for( i=0; i < stdResults.size(); ++i )
{
cout << stdResults[i].c_str() << endl;
}
// using std::string without empties
//------------------------------------------
stdResults.clear();
split("a-b-c--d-e-", "-", stdResults, false);
for( i=0; i < stdResults.size(); ++i )
{
cout << stdResults[i].c_str() << endl;
}