Board index » cppbuilder » String Parsing Optimization
Junk Mail
![]() CBuilder Developer |
String Parsing Optimization2003-09-30 07:45:37 AM cppbuilder114 Thanks Gambit for helping out with last problem with memory issues. I've run across another little stumbling block. It works great when I only have a couple of delimiters but When I start using a lot of delimiters say 30 it really starts to slow down the processing. Basically I loop through my delimiters and determine which is the closest delimiter to the front of the string. The one that is closest I use as my delimiter to extract my next token from the string. Someone know of a quicker way of determining this or how I could optimise my loop? The code thats in between the coments that look like this is what needs to be optimized. //======= //********* code //********* //======= just including these so you know the types of everything ... private: AnsiString FBaseString; TStringList* FDelimiters; TStringList* FTokens; TStringList* FAlphabetizedTokens; bool FNeedsAlphabetized; bool FNeedsTokenCount; int* FTokenCount; int FTotalTokensCount; int FUniqueTokensCount; ... void CTokenizer::Parse() { AnsiString strTempBase; int delimiterCount = FDelimiters->Count; if(delimiterCount) strTempBase = FBaseString + FDelimiters->Strings[0]; int maxPos = strTempBase.Length(); FTokens->Clear(); int pos = 1; AnsiString strDelimiter; AnsiString strToken; FTotalTokensCount = 0; char* ptrStart = strTempBase.c_str(); char* ptrEnd = ptrStart; char* ptrEnd2; char* ptrMaxTempBase; int whichDelimiter = -1; while(pos < maxPos) { if(delimiterCount) { strDelimiter = FDelimiters->Strings[0]; whichDelimiter = 0; } ptrEnd = AnsiStrPos(ptrStart, strDelimiter.c_str()); //======================================= //****************************************************** // This is the section of code that needs to be optimized //basically I'm looping thourgh my TStringList of delimiters and //determining which one is the closest delimiter if(delimiterCount>1) { for(int i = 1; i < delimiterCount; i++) { ptrEnd2 = AnsiStrPos(ptrStart, FDelimiters->Strings[i].c_str()); if((ptrEnd2) && (*ptrEnd2 != '\0') && (ptrEnd2 < ptrEnd)) { ptrEnd = ptrEnd2; whichDelimiter = i; } } } //****************************************************** //======================================= if(ptrEnd) { strToken = strTempBase.SubString(pos, ptrEnd - ptrStart); if(strToken != "") { FTokens->Add(strToken); FTotalTokensCount++; } ptrEnd += FDelimiters->Strings[whichDelimiter].Length(); pos += ptrEnd - ptrStart; ptrStart = ptrEnd; } } FNeedsAlphabetized = true; } if you would like me post the entire class I will. if interested I was using these as my delimiters CTokenizer tokenizer; tokenizer.Delimiters->Add(" "); tokenizer.Delimiters->Add("<br>"); tokenizer.Delimiters->Add(""); tokenizer.Delimiters->Add(" "); tokenizer.Delimiters->Add("\r"); tokenizer.Delimiters->Add("\n"); tokenizer.Delimiters->Add(";"); tokenizer.Delimiters->Add("."); tokenizer.Delimiters->Add("<"); tokenizer.Delimiters->Add(">"); tokenizer.Delimiters->Add("/"); tokenizer.Delimiters->Add("!"); tokenizer.Delimiters->Add("*"); tokenizer.Delimiters->Add("-"); tokenizer.Delimiters->Add("'"); tokenizer.Delimiters->Add("\""); tokenizer.Delimiters->Add("["); tokenizer.Delimiters->Add("]"); tokenizer.Delimiters->Add("|"); tokenizer.Delimiters->Add("("); tokenizer.Delimiters->Add(")"); tokenizer.Delimiters->Add(","); tokenizer.Delimiters->Add("?"); tokenizer.Delimiters->Add("{"); tokenizer.Delimiters->Add("}"); tokenizer.Delimiters->Add("+"); tokenizer.Delimiters->Add("="); tokenizer.Delimiters->Add("&"); tokenizer.Delimiters->Add(":"); |