views:

151

answers:

4

I'm using Delphi 2009. In my program, I have been working very hard to optimize all my Delphi code for speed and memory use, especially my Unicode string handling.

I have the following statement:

    Result := Result + GetFirstLastName(IndiID, 1);

When I debug that line, upon return from the GetFirstLastName function, it traces into the routine _UStrArrayClr in the System unit:

procedure _UStrArrayClr(var StrArray; Count: Integer);
asm
        JMP     _LStrArrayClr
end;

This calls _LStrArrayClr:

procedure       _LStrArrayClr(var StrArray; cnt: longint);
{$IFDEF PUREPASCAL}
var
  P: Pointer;
begin
  P := @StrArray;
  while cnt > 0 do
  begin
    _LStrClr(P^);
    Dec(cnt);
    Inc(Integer(P), sizeof(Pointer));
  end;
end;
{$ELSE}
asm
        { ->    EAX pointer to str      }
        {       EDX cnt         }

        PUSH    EBX
        PUSH    ESI
        MOV     EBX,EAX
        MOV     ESI,EDX

@@loop:
        MOV     EDX,[EBX]                       { fetch str                     }
        TEST    EDX,EDX                         { if nil, nothing to do         }
        JE      @@doneEntry
        MOV     dword ptr [EBX],0               { clear str                     }
        MOV     ECX,[EDX-skew].StrRec.refCnt    { fetch refCnt                  }
        DEC     ECX                             { if < 0: literal str           }
        JL      @@doneEntry
   LOCK DEC     [EDX-skew].StrRec.refCnt        { threadsafe dec refCount       }
        JNE     @@doneEntry
        LEA     EAX,[EDX-skew].StrRec.codePage  { if refCnt now zero, deallocate}
        CALL    _FreeMem
@@doneEntry:
        ADD     EBX,4
        DEC     ESI
        JNE     @@loop

        POP     ESI
        POP     EBX
end;
{$ENDIF}

and runs through the loop once for each character, and on exit from there it calls _UStrCat:

procedure _UStrCat(var Dest: UnicodeString; const Source: UnicodeString);
asm
        { ->    EAX     pointer to dest }
        {       EDX source              }

        TEST    EDX,EDX       // Source empty, nop.
        JE      @@exit

        MOV     ECX,[EAX]     // ECX := Dest
        TEST    ECX,ECX       // Nil source => assignment
        JE      _UStrAsg

        PUSH    EBX
        PUSH    ESI
        PUSH    EDI
        MOV     EBX,EAX         // EBX := @Dest
        MOV     ESI,EDX         // ESI := Source
        CMP     ESI,ECX
        JE      @@appendSelf

        CMP     [ECX-skew].StrRec.elemSize,2
        JE      @@destIsUnicode
        CALL    _EnsureUnicodeString
        MOV     EDI,EAX
        MOV     ECX,EAX

@@destIsUnicode:
        PUSH    0
        CMP     [ESI-skew].StrRec.elemSize,2
        JE      @@sourceIsUnicode

        MOV     EDI,ECX
        MOV     EAX,ESI
        MOV     [ESP],ESI
        CALL    _UStrAddRef
        MOV     EAX,ESP
        CALL    _EnsureUnicodeString
        MOV     ESI,[ESP]
        MOV     ECX,EDI

@@sourceIsUnicode:
        MOV     EDI,[ECX-skew].StrRec.length  // EDI := Length(Dest)
        MOV     EDX,[ESI-skew].StrRec.length  // EDX := Length(Source)
        ADD     EDX,EDI         // EDX := (Length(Source) + Length(Dest)) * 2
        TEST    EDX,$C0000000
        JNZ     @@lengthOverflow

        MOV     EAX,EBX
        CALL    _UStrSetLength  // Set length of Dest
        MOV     EAX,ESI         // EAX := Source
        MOV     ECX,[ESI-skew].StrRec.length // ECX := Length(Source)

@@noTemp:
        MOV     EDX,[EBX]       // EDX := Dest
        SHL     EDI,1           // EDI to bytes (Length(Dest) * 2)
        ADD     EDX,EDI         // Offset EDX for destination of move
        SHL     ECX,1           // convert Length(Source) to bytes
        CALL    Move            // Move(Source, Dest + Length(Dest)*2, Length(Source)*2)
        MOV     EAX,ESP         // Need to clear out the temp we may have created above
        MOV     EDX,[EAX]
        TEST    EDX,EDX
        JE      @@tempEmpty

        CALL    _LStrClr

@@tempEmpty:
        POP     EAX
        POP     EDI
        POP     ESI
        POP     EBX
        RET

@@appendSelf:
        CMP     [ECX-skew].StrRec.elemSize,2
        JE      @@selfIsUnicode
        MOV     EAX,EBX
        XOR     EDX,EDX
        CALL    _EnsureUnicodeString
        MOV     ECX,EAX
        MOV     EAX,EBX

@@selfIsUnicode:
        MOV     EDI,[ECX-skew].StrRec.length
        MOV     EDX,EDI
        SHL     EDX,1
        TEST    EDX,$C0000000
        JNZ     @@lengthOverflow
        CALL    _UStrSetLength
        MOV     EAX,[EBX]
        MOV     ECX,EDI
        PUSH    0
        JMP     @@noTemp

@@lengthOverflow:
        JMP     _IntOver

@@exit:
end;

and runs through the whole of that routine.

My "Result" is a string and is thus Unicode. And my GetFirstLastName returns a string which is Unicode. No conversion of character set should be needed.

I can't really tell what these System procedures are doing, but they are adding a lot of overhead to my routine.

What are they doing? Are they necessary? If they aren't necessary, how can I prevent the compiler from calling those routines?

+7  A: 

LStrArrayClear isn't running over a loop once per character; it's running once per string in the array, to decrement the ref count and free the string if it hits 0. This is inserted by the compiler to clean up any strings allocated as local variables, or any temporary strings it creates to hold the results of two strings being concatenated.

UStrCat is the string concatenation routine. It's what string1 + string2 translates to under the hood. The compiler determines that it's supposed to result in a Unicode string, so it takes the two input strings, tests both of them to see if they're Unicode themselves, converts them if they're not (but yours are, so the conversion gets skipped,) then sets the size of the result and copies the data.

UStrCat is necessary, and there's not much you can do about it. LStrArrayClear is where things get a bit fuzzier. When you create a routine that works with strings, the compiler has to allocate enough temporary strings to handle everything you could do in there, whether or not you ever do it. And then it has to clear them afterwards. So cutting down on unnecessary string manipulation by moving uncommon tasks to other functions can help, especially in a tight loop.

For example, how often do you see something like this?

if SomethingIsVeryWrong then
   raise ETimeToPanic.Create('Everybody panic! File ' + filename + ' is corrupt at address ' + intToStr(FailureAddress) + '!!!');

This error message contains 5 different substrings. Even if it manages to optimize things by reusing them, it still needs to allocate at least two temporary strings to make this work. Let's say this is taking place inside a tight loop and you don't expect this error to happen frequently, if at all. You can eliminate the temporary strings by offloading the string concatenation into a Format call. That's such a convenient optimization, in fact, that it's built into Exception.

if SomethingIsVeryWrong then
   raise ETimeToPanic.CreateFmt('Everybody panic! File %s is corrupt at address %d!!!', [filename, FailureAddress]);

Yes, a call to Format will run significantly slower than straight concatenation, but if something goes wrong, it only runs once and performance is the least of your worries anyway.

Mason Wheeler
@Mason: Well, I wouldn't want to offload into a Format call. But maybe the fact that I'm appending a string to the string result of a function call is causing the extra temporary. Would doing this: var S: string; S := GetFirstLastName(IndiID, 1); Result := Result + S; prevent the creation of some extra code because now that temporary S is known to be a Unicode string?
lkessler
@lkessler: No, that's just substituting an explicit temporary string for an automatic one. The compiler still has to clear it either way. Any time you work with strings, these calls will show up. You can reduce it by reducing unnecessary string processing, as in my Exception.CreateFmt example, but you can't really get rid of it. One other thing you can do is to pass strings as **const** if possible. If the compiler knows you won't change them, it doesn't have to update the refcount for that string.
Mason Wheeler
@Mason: Yes, I do attempt to use const whenever possible, and var at other times when possible. My declaration of GetFirstLastName is: function GetFirstLastName(const IndiLoc: pointer; const NameNum: integer): string;
lkessler
@lkessler: The other thing to bear in mind is that these routines are optimized ASM and they're *very fast.* Try running your code through Sampling Profiler and see if you're spending any significant amount of time in there. Optimization is great, but it's a waste of time if you're going after the wrong problem...
Mason Wheeler
Excellent answer, Mason!
Allen Bauer
+6  A: 

The compiler will often create temporaries in which to hold the intermediate values of expressions. These temporaries need to be "finalized" or cleaned up. Since the compiler doesn't know whether or not a certain temp has actually been used (it will skip the finalization if it sees that the variable is still nil), it will always attempt a cleanup pass.

Allen Bauer
@Allen: This is extremely useful to know. Are there certain ways to code Delphi to avoid the creation of these "temporaries" in the critical parts of my program?
lkessler
Assembler ;-). Nearly all expressions will require some form of temporary. For simple integer/integral mathematical expression many times these "temporaries" live in CPU registers which are the fastest form of temporary since it takes no memory and doesn't need to ever leave the CPU (let's ignore task switching). For more complex types, such as strings, they usually cannot live in a register because of the need to use the memory manager to allocate storage for the temp, thus the need to finalize them.Other techniques for strings is to allocate a large buffer and fill it in incrementally.
Allen Bauer
+2  A: 

You may also be insterested in these:

Alexander
Thanks for the links. I already knew about $STRINGCHECKS and have that compiler option turned off for my program, which is not the default.
lkessler
+2  A: 

Take a look at TStringBuilder class.

Alexander
I have looked it and what Marco Cantu has said about TStringBuilder. It is mainly for .NET compatibility and is usually slower than simple concatenation.
lkessler
I think you've misread this post. Do you *usually* append 1 character? This post says that TStringBuilder can be slower in certain specific conditions.
Alexander