Delphi Knowledge Base: február 2004

2004. február 29., vasárnap

Determine which application is associated with a specific file name extension

Problem/Question/Abstract:

Determine which application is associated with a specific file name extension

Answer:

To determine which application is associated with a specific file name extension you can look in the registry under HKEY_CLASSES_ROOT.

Easier is to use the Windows API function FindExecutable() - see the following example. The code shows the fully qualified associated application or the last error (using GetLastError).

program Project1;

uses
  Forms, Dialogs, ShellAPI, SysUtils, Windows;

{$R *.RES}

var
  sApp: array[0..256] of char;
begin
  // this should return something like c:\program files\office\excel.exe

  if FindExecutable('c:\temp\xxx.xls', nil, sApp) >= 32 then
    ShowMessage(sApp)
  else
    ShowMessage(SysErrorMessage(GetLastError));
end.

2004. február 28., szombat

How to get the number of matches when searching a string

Problem/Question/Abstract:

I want to check how many of the individual characters in a search string are matching a source string. When comparing the strings, each character position should be checked individually. Examples:
Seach string: 'ABC' / Source string: 'ABX' / Result: 2 matching positions
Seach string: 'ABC' / Source string: 'AXB' / Result: 1 matching position

Answer:

Solve 1:

Assuming the compare always starts from the beginnig of the source string, i.e. Search string: 'ABC' and Source string: 'AXAB' returns 1:

function MyStrComp(strSearch, strSource: string): integer;
var
  LenSearch, LenSource, Len: integer;
  LoopCounter: integer;
begin
  LenSearch := length(strSearch);
  LenSource := length(strSource);
  if LenSearch > LenSource then
    Len := LenSource
  else
    Len := LenSearch;
  if Len = 0 then
    Result := 0
  else
  begin
    LoopCounter := 1;
    repeat
      if (strSearch[LoopCounter] <> strSource[LoopCounter] then
        break;
        Inc(LoopCounter);
    until
    (LoopCounter > Len);
    Result := LoopCounter - 1;
  end;
end;

The above function may not be the fastest, but it should be pretty fast. If you are going to call this compare function in a loop and the search string will not change within the loop, find the length of the search string before getting in the loop, then pass it to the compare function as a parameter can save a call to the length function in the compare function.

If you know that the source string is always longer than the search string, you can skip comparing the lengths and use the length of the search string to control your repeat loop.

Solve 2:

From AdvanceStringCore www.excommunicant.co.uk. You may use this routine for non commercial purposes providing appropriate credit is given.

{Concurrent matching char elements}

function ConcurrentCountMatchingCharElements(const SourceChars, MaskChars: string;
  const CSensitive: Boolean): Integer;

{Version 1.0 (September 2000 - Lachlan Fairgrieve)
(C)2000 Excommunicant www.excommunicant.co.uk
Returns the number of times [mask[i]] and [source[i]] characters match}

var
  {initialise :
  validate strings (for length)
  Count Lengths
  normalise lengths
  Prepare Pointers (For case insensitive and standard search)
  Source Scan : For Every [Source] Char check against every [mask] char}
  NewSource, NewMask: string; {Hold normalised strings}
  SourceP, MaskP: PChar;
  index: Integer; {Source and Mask index vars}
  ScanLength: Integer; {The number of chrs to scan}
begin
  {Initialise}
  Result := 0;
  if SourceChars = '' then
    exit;
  if MaskChars = '' then
    exit;
  ScanLength := Length(SourceChars);
  if Length(MaskChars) < ScanLength then
    ScanLength := Length(MaskChars);
  {prepare string pointers}
  if not CSensitive then
  begin
    NewSource := uppercase(SourceChars);
    NewMask := uppercase(MaskChars);
    SourceP := Pointer(NewSource);
    MaskP := Pointer(NewMask);
  end
  else
  begin
    SourceP := Pointer(SourceChars);
    MaskP := Pointer(MaskChars);
  end;
  {source scan}
  for index := 1 to ScanLength do
  begin
    if MaskP^ = SourceP^ then
      inc(result);
    inc(MaskP);
    inc(SourceP);
  end;
end;

2004. február 27., péntek

Roll up and restore a TForm when clicking on the title bar

Problem/Question/Abstract:

How to roll up and restore a TForm when clicking on the title bar

Answer:

The standard behaviour for double clicking a title bar is to maximize/ restore the form. The following class changes the double click action to add a new effect which is RollUp/ Restore. Copy the following unit and place it in a directory which is recognised by Delphi's search path.

unit OrckaForm;

interface

{$B-}

uses
  Messages, Forms, Classes;

type
  TOrckaForm = class(TForm)
  private
    FOldHeight: Longint;
    FRollUp, FRolledUp: Boolean;
  protected
    procedure WMNCLDblClick(var Msg: TMessage); message WM_NCLBUTTONDBLCLK;
    procedure WMGetMinMaxInfo(var Msg: TMessage); message WM_GETMINMAXINFO;
  public
    constructor Create(AOwner: TComponent); override;
    property RollUp: Boolean read FRollUp write FRollUp;
  end;

implementation

uses
  Windows;

procedure TOrckaForm.WMNCLDblClick(var Msg: TMessage);
begin
  if (Msg.wParam = HTCAPTION) and (FRollUp) then
  begin
    if FRolledUp then
    begin
      FRolledUp := False;
      Height := FOldHeight;
    end
    else
    begin
      FRolledUp := True;
      FOldHeight := Height;
      Height := 0
    end;
  end
  else
    inherited;
end;

constructor TOrckaForm.Create(AOwner: TComponent);
begin
  inherited Create(AOwner);
  FOldHeight := Height;
  FRollUp := True;
  FRolledUp := False;
end;

procedure TOrckaForm.WMGetMinMaxInfo(var Msg: TMessage);
begin
  inherited;
  if FRolledUp then
    pMinMaxInfo(Msg.lParam)^.ptMaxTrackSize.y := Height;
end;

end.

To use the form create a new form which will look something like..

unit Unit3;

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs;

type
  TForm3 = class(TForm)
  private
    { Private declarations }

Add OrckaForm to the uses clause and change the following line

TForm3 = class(TForm)

to

TForm3 = class(TOrckaForm)

Run your project, whenever you double click the title the form will roll up/ restore.

2004. február 26., csütörtök

Search for shared folders in a network

Problem/Question/Abstract:

How to search for shared folders in a network

Answer:

You will need a listbox, a radiogroup with 3 radio buttons and of course a button. It takes a while depending on the size of your network.

procedure TForm1.EnumNetResources(List: TStrings);

  procedure EnumFunc(NetResource: PNetResource);
  var
    Enum: THandle;
    Count, BufferSize: DWORD;
    Buffer: array[0..16384 div SizeOf(TNetResource)] of TNetResource;
    i: Integer;
  begin
    if WNetOpenEnum(RESOURCE_GLOBALNET, RESOURCETYPE_ANY, 0, NetResource,
      Enum) = NO_ERROR then
    try
      Count := $FFFFFFFF;
      BufferSize := SizeOf(Buffer);
      while WNetEnumResource(Enum, Count, @Buffer, BufferSize) = NO_ERROR do
        for i := 0 to Count - 1 do
        begin
          case RadioGroup1.ItemIndex of
            0:
              begin {Network Machines}
                if Buffer[i].dwType = RESOURCETYPE_ANY then
                  List.Add(Buffer[i].lpRemoteName);
              end;
            1:
              begin {Shared Drives}
                if Buffer[i].dwType = RESOURCETYPE_DISK then
                  List.Add(Buffer[i].lpRemoteName);
              end;
            2:
              begin {Printers}
                if Buffer[i].dwType = RESOURCETYPE_PRINT then
                  List.Add(Buffer[i].lpRemoteName);
              end;
          end;
          if (Buffer[i].dwUsage and RESOURCEUSAGE_CONTAINER) > 0 then
            EnumFunc(@Buffer[i])
        end;
    finally
      WNetCloseEnum(Enum);
    end;
  end;

begin
  EnumFunc(nil);
end;

procedure TForm1.Button1Click(Sender: TObject);
begin
  Listbox1.Clear;
  Screen.Cursor := crHourglass;
  EnumNetResources(ListBox1.Items);
  Screen.Cursor := crDefault;
end;

2004. február 25., szerda

Parsing the Web

Problem/Question/Abstract:

Three Classes for Grabbing HTML/XML Information

Answer:

I recently bought one of the new digital satellite dishes and ran across an interesting challenge - figuring out just what was on, and when. DirectTV provided a Web site with a basic search engine, but the Web-based search was slow and very narrowly focused. As a typical programmer, I knew I could provide a better, more powerful UI, if I could just figure out how their Web search engine worked.

A quick scan of the HTML source pointed out a relatively simple search form that I could easily duplicate, but the HTML results came back in a mildly complicated HTML table. Brute force code would have been simple enough to construct to parse through the table, but I'd been looking for a reason to build a more general parser, so off I went. If I'd known just how lax the HTML rules are, and just how many hacks there are, I'd have just stuck with the brute force method and saved myself a lot of agony, but since I'm here now ...

The Basics

To put together a parser for HTML, an understanding of the rules is required. HTML originated as an SGML-like syntax, and over time has grown to fit more closely within the confines of SGML. These days the syntax is described within an SGML Data Type Definition (DTD) bringing it into a reasonably well-understood and managed domain. Given that SGML now establishes the underpinnings of HTML, the parser should apply the SGML syntax rules as a starting point. This also allows for the consideration of some simple extensions that allow parsing of XML.

Therefore, the parser is built to work on SGML in general, with specific handlers for exceptions and extensions that occur in HTML and XML. The rules for SGML are straightforward, and provide five basic constructs that we care about:

elements,
attributes,
comments,
SGML directives, and
"everything else"

Elements are the facet of the SGML content with which we are most concerned, and around which the parser is established. Elements have a start tag, content, and an end tag, for example:

<TITLE>HTML Parsing</TITLE>

where TITLE is considered to be the "name" of the tag. Element names are case-insensitive. So we start with the following parsing rules:

Element start and end tags are surrounded by < and > characters.
Element end tags are denoted by the / character immediately following a <.
Element content is surrounded by start and end tags.

HTML Extensions

In HTML, we immediately note that there are exceptions to these rules. For some elements - most notably <P> - the end tags may be omitted, even though the element may have contents. This offers perhaps the most annoying challenge of the HTML parsing rule set, because there are several methods by which the element may be terminated.

To start with, we note another syntax rule from SGML: elements may not span. That is, if an element's start tag is contained within another element's start and end tags, its end tag must also appear there. Put simply, if we encounter an end tag, all omitted end tags are considered "closed" back up to the matching start tag. Also, by observation (I couldn't find a formal rule for this in the HTML specification), virtually all elements with optional end tags close when they encounter another of themselves, <LI> and <OPTION> being fine examples. Further, the HTML reference material does indicate that <P> elements that omit their end tags are terminated by "block elements." The HTML DTD must be consulted to determine which elements are considered block elements. Unfortunately, all of this prevents us from using a general rule, and requires that we become concerned with the HTML DTD.

A quick consideration of the DTD is therefore in order. The DTD calls out which elements must have end tags, and which may omit (or are forbidden to have) end tags. For example, the DTD fragment for <P> is:

<!ELEMENT P - O (%inline;)* >

The important thing to note here is - O. The - indicates the start tag is required, and O means the end tag may be omitted. Compare this to the fragment for <BR>:

<!ELEMENT BR - O EMPTY >

where - O EMPTY indicates that the start tag is required. Since the element is EMPTY, however, the optional end tag is now expressly forbidden. In the fragment for <P>, the (%inline;)* shows the legal contents of the P element. In this case, %inline; refers to a list of elements defined earlier in the DTD. Notably missing from the %inline; list is <P> itself. A perusal of the other ambiguous elements reinforces the observation that in general, an element may not immediately contain itself (although this ambitiously general rule is certainly not guaranteed to remain valid for future releases of the HTML DTD). In the same way that %inline; is defined, so there exists a list name %block; which contains the list of block elements.

This leads to another set of parsing rules:

<P> with an omitted end tag is terminated when a block element is encountered.
Elements are terminated when another element of the same name is encountered.
Elements are terminated if a parent's end tag is encountered; no spans are allowed.

Attributes

Attributes represent the various properties of elements. By definition, attributes appear in name/value pairs within the start tag of the element. For example, in:

<BODY bgcolor="#FFFFFF">

the BODY element has an attribute name bgcolor, and the attribute has a value of #FFFFFF. Double quotation marks or single quotation marks are required to delimit the value, unless the value contains only letters, digits, hyphens, and periods. If the value contains single quotation marks, it should be delimited with double quotation marks, and vice versa. Attribute names are case-insensitive. Also worth noting is that not all attributes have a value. For instance, the NOWRAP attribute of the <TD> element.

Attributes appear within an element's start tag
Attributes are delimited by a space character (ASCII 32)
Attribute values are delimited by " or '

Comments

SGML also provides that its content may include comments. Comments are of the form:



The <! is a markup declaration open delimiter, and indicates that an SGML directive is to follow. Comments are specifically denoted by -- following the open delimiter (white space is not allowed between the <! and the --, but is allowed between the closing -- and >). Further, a comment may contain < and > characters. Comments may not include other comments.

 terminates a comment.
< and > are ignored while parsing a comment.

The remaining SGML directives are denoted by the beginning markup declaration open delimiter <!. To further complicate things, comments may exist within the directives delimited by --.

<! denotes an SGML directive (if it's not a comment)
Comments within the directives are delimited by --

Lastly, we consider what remains. Content of elements not contained within a start tag, end tag, or comment is considered by the parser to be PCData (parsed character data).

Store text not included in element start/end tags or comments as PCData.

XML Extensions

As mentioned before, XML is also derived from SGML. While HTML is basically a DTD described within and using SGML, XML is a subset of SGML capable both of representing data and containing other DTDs of its own. XML also demonstrates that those working on the standards in the programming community actually do learn from the mistakes of those that went before them. For instance, the rules of containment are much more formal in XML than they are in HTML, making parsing a great deal simpler. This means that while a DTD may be included in an XML document for syntax checking purposes, it isn't necessarily required for the actual parsing of the XML content, as it is for HTML.

Knowing this, we can add two more rules and provide DTD-less XML parsing as well. For one, empty elements in HTML are simply called out as such in the DTD with their end tags forbidden (<BR> for example). If an element in XML is to be empty (that is, it will have no content) its start tag may have a / just before the closing > indicating that no content and no end tag will follow. Additionally, XML directives may appear with the ? character rather than !.

Empty elements in XML may be terminated by a / just before the > in the start tag, e.g. <partno/>.
Additional directives appear using ?, instead of the ! character.

Everything Else

Any items encountered in the content that are not contained in element start or end tags, comments, or DTD items are considered by the parser to be PCData. The content of elements fits this bill, as do carriage returns and line feeds encountered by the parser. This leads to the final parsing rule:

Any content located outside of start/end tags, comments, or DTD items is PCData.

Additional Considerations

It is also worth noting that syntax errors and occurrences of "browser-tolerated" HTML inconsistencies are frequently encountered, and as such, should not raise exceptions except in extreme cases. Instead, a warning should be noted and parsing should continue if possible.

The Parser

The goal of parsing the SGML content is to place the data it represents into a form more readily accessible to other components. Since SGML calls out a hierarchical structure a hierarchy is probably the most accurate way to store the parsed content. With that in mind, the parser is built from two primary classes, and a third supporting class.

First and foremost is the THTMLParser class. Its Parse method accepts the content to be parsed, and places the processed results in the Tree property.
Next is the TTagNode class in which the parsed results are contained. This class is a hierarchical storage container with Parent pointing to the TTagNode that contains the current node, and Children containing a list of children immediately owned by the current node.
TTagNodeList is provided as a list container for a collection of TTagNode objects, typically produced by a call to the GetTags method of the TTagNode class.

A Simple Example

Consider the sample HTML shown in Figure 1. The parser would produce from the HTML a hierarchy that can be visualized as in Figure 2. Each of the boxes in the tree represents a TTagNode instance.

<HTML>
<HEAD>
<TITLE>Example HTML</TITLE>
</HEAD>
<BODY>

<H1>Example HTML</H1>
Plain old text right out there in the middle of the document.
<P>Text contained within in paragraph</P>
<B>Unordered List</B>
<UL>
<LI>Item #1
<LI>Item #2
<LI>Item #3
</UL>
</BODY>
</HTML>
Figure 1: Sample HTML.

Figure 2: Hierarchical representation of parsed HTML.

Each node has a NodeType property that indicates what type of node it is. All node types except ntePCData also have text in the Caption property that provides more information about the node's contents. See the table in Figure 3 for details.

Node Contents
NodeType
Caption
HTML elements
nteElement
Element name
Text
ntePCData

Comments
nteComment
!
SGML/XML/DTD directives
nteDTDItem
! or ? and directive

Figure 3: TTagNode node types.

For example, the HTML node in Figure 2 has a NodeType of nteElement, while the comment tag is of type nteComment, and the PCData nodes are of type ntePCData.

The content or text for each HTML element is contained in a node in its Children list. For example, the TITLE node in Figure 2 has a PCData node whose Text property contains "Example HTML". The GetPCData method of a TTagNode returns the PCData text for all children of the node for which it's called. Note, this method is recursive and will return the PCData text for all nodes in the tree beneath the node upon which it's called.

Retrieving Elements

The GetTags method of a TTagNode will return a list of all children that match an element name. If '*' or '' is specified as the element name, then all children will be returned. Note that this method is recursive. The result list is a TTagNodeList.

The code in Figure 4 illustrates how the GetTags method is used to collect a list of all <LI> elements in the HTML from Figure 1 and insert their contents into a list box. The process is as follows:

Create a container for the results, i.e. the Elements list.
Call the GetTags method, passing the desired element name and the container.
Iterate through the container, placing the text for each element in a list box.
Destroy the container.

procedure Button1OnClick(Sender: TObject);
var
  Elements: TTagNodeList;
  Counter: Integer;
begin
  Elements := TTagNodeList.Create;
  HTMLParser1.Tree.GetTags('LI', Elements);
  for Counter := 0 to Elements.Count - 1 do
    ListBox1.Items.Add(Elements[Counter].GetPCData);
  Elements.Free;
end;
Figure 4: Using GetTags.

TTagNodeList has two methods that offer another approach (assuming that a result set has already been acquired): GetTagCount and FindTagByIndex. GetTagCount returns a count of the occurrences of an element name. FindTagByIndex returns the index within the list of the specified occurrence of an element name. For instance, the statement:

ShowMessage(Elements.FindTagByIndex('li', 1).GetPCData);

were it included in Figure 4, would display the text for the second occurrence of the <LI> element in the Elements container. This can prove exceptionally useful for locating a specific tag from target HTML content. For example, if the third <TABLE> in the HTML contained the desired data, the following code would make quick work of locating the root <TABLE> node:

HTMLParser1.GetTags('*', Elements);
Node := Elements.FindTagByIndex('table', 2);
if Assigned(Node) then
begin
  // Perform processing on <TABLE> node.
end;

Working with Results as a Hierarchy

The procedure in Figure 5 provides yet another method for accessing the contents of the Tree (albeit a more brute force approach). In this example, the HTML content is assumed to be fairly straightforward: <TABLE> elements contain <TR> elements, which contain <TH> and <TD> elements. Given this reasonably accurate assumption, the code will walk the children of a <TABLE> node, and for each <TR> node found will walk its children looking for occurrences of either <TH> or <TR>, and add their text (contained in PCData nodes) to a TStrings container, e.g. the Lines property of a TMemo control.

// Return TableNode's contents in a TStrings container.

procedure GetTable(TableNode: TTagNode; Lines: TStrings);
var
  RowCtr,
    DataCtr: Integer;
  Node,
    RowNode: TTagNode;
  TempStr: string;
begin
  Lines.Clear;
  if CompareText(TableNode.Caption, 'table') = 0 then
  begin
    for RowCtr := 0 to TableNode.ChildCount - 1 do
    begin
      RowNode := TableNode.Children[RowCtr];
      if CompareText(RowNode.Caption, 'tr') = 0 then
      begin
        TempStr := '';
        for DataCtr := 0 to RowNode.ChildCount - 1 do
        begin
          Node := RowNode.Children[DataCtr];
          if CompareText(Node.Caption, 'td') = 0 then
            TempStr := TempStr + Node.GetPCData + #9
          else if CompareText(Node.Caption, 'th') = 0 then
            TempStr := TempStr + Node.GetPCData + #9;
        end;
        TempStr := Trim(TempStr);
        if TempStr <> '' then
          Lines.Add(TempStr);
      end;
    end;
  end;
end;
Figure 5: Working with results as a hierarchy.

As an illustration of just how difficult HTML processing can be, the following caveats apply to the code provided in Figure 5 and would have to be handled to provide a robust solution:

Subtables are not handled. That is, <TABLE> elements encountered within a <TD> element are ignored.
Row and column spanning is not handled.
<TBODY>, <THEAD> and a host of other table elements are not considered (although admittedly they are rare).

Working with Results as a List

The procedure in Figure 6 demonstrates working with the parsed results as a list. The goal here is to retrieve a list of all comments, links, meta tags, and images from the document, with the <TITLE> thrown in for good measure. The code does this in several simple steps:

Parse the HTML.
Create a container for the list of matching nodes from the tree.
Call the GetTags method passing '*', and the container ('*' indicates that all items in the tree should be returned).
Iterate through the container collecting matches and place their contents in the StringList.
Destroy the container.

procedure TForm1.Parse(HTML: string; Lines: TStrings);
const
  cKnownTags = '|title|img  |a    |meta |!    ';
  cTITLE = 0;
  cIMG = 1;
  cA = 2;
  cMETA = 3;
  cComment = 4;
var
  Index,
    Counter: Integer;
  TempStr: string;
  Nodes: TTagNodeList;
begin
  HTMLParser1.Parse(HTML);
  Nodes := TTagNodeList.Create;
  // Retrieve all nodes.
  HTMLParser1.Tree.GetTags('*', Nodes);
  for Counter := 0 to Nodes.Count - 1 do
  begin
    TempStr := '|' + LowerCase(Nodes[Counter].Caption);
    // Index of element name.
    Index := Pos(TempStr, cKnownTags);
    if Index > 0 then
    begin
      Index := Index div 6;
      case Index of
        cTITLE:
          Lines.Add('Title=' +
            HTMLDecode(Nodes[Counter].GetPCData));
        cIMG:
          begin
            TempStr := Nodes[Counter].Params.Values['src'];
            if TempStr <> '' then
              Lines.Add(
                Nodes[Counter].Params.Values['src']);
          end;
        cA:
          begin
            TempStr :=
              Nodes[Counter].Params.Values['href'];
            if TempStr <> '' then
              Lines.Add(TempStr + '=' +
                HTMLDecode(Nodes[Counter].GetPCData));
          end;
        cMETA:
          with Nodes[Counter].Params do
            Lines.Add(Values['name'] + '=' +
              Values['content']);
        cComment:
          Lines.Add('[Comment] ' +
            HTMLDecode(Nodes[Counter].Text));
      end; {  case Index }
    end; {  if Index > 0 }
  end; {  for Counter := 0 to Nodes.Count - 1  }
  Nodes.Free;
end;
Figure 6: Working with results as a list.

The important thing to understand here is that the TTagNodeList class is just a list of pointers to nodes from the Tree. This is quite beneficial in that once a desired node is located in the list, it may be used as if it had been acquired by traversing the tree. For example, when the case statement in Figure 6 encounters a TITLE element, its contents are retrieved by making a call to the TITLE node's GetPCData method (which depends on the parsed tree structure behaving as it appears to in Figure 2). Note that the PCData often contains encoded items such as > and < (< and > respectively). HTMLDecode is provided for handling most simple cases, but doesn't handle all cases (notably non-US character encoding).

This example also demonstrates the use of the attributes from an element. When an <A> element is encountered, the HREF attribute is examined. If it exists, the <A> element is treated as a link to some other resource. If the HREF attribute were not specified, this might be an instance of <A> serving as an anchor instead of a link. For more details on how the Params property of the TTagNode behaves, see the Delphi help for the Names and Values properties of the TStrings class.

Searching the www.directv.com Program Guide

Applying the HTML parser to the original need turns out to be another simple (albeit involved) exercise. First, an understanding of the CGI scripts that allow searching of the program guide is required. As it turns out, the search script is rather crude and accepts only three parameters: timezone, category, and search text. timezone is simply a number representing Eastern, Central, Mountain, or Pacific. category allows the search to span all programs, or to be narrowed to certain types of programs such as movies or sports. The search text should be all, or a portion of, the desired program name. The search is specific to program names, and doesn't consider program descriptions.

The results of the search are returned as an HTML table including channel, date, time, duration, and program name. The program name is contained within a link to the program description, which will need to be retrieved as well. This is slightly complicated by the fact that the link provided is written using JavaScript which we cannot simply call. However, the URL produced by the JavaScript function is easy to replicate, as it contains a program ID number that can be passed to another CGI script that returns the desired description.

The next step is to parse the HTML and process the results into a more useful format. TStringTable is provided as a simple container for just this purpose. The TStringTable offers a non-visual equivalent to TStringGrid with a few additional methods to make manipulating the data a bit easier. Once the HTML table has been processed and placed in the string table, a bit of house cleaning is required. For one, there are rows at the end of the HTML table that need to be ignored, as they contain images, not program content. Also, the channel appears only in the first row of a set of programs that occur on that channel.

The contents can now be added to a ListView. Once that task is complete, the descriptions can be fetched using the program IDs to call the description CGI, and then added to the ListView.

Further Study

While these examples are not terribly glamorous, the parser can be applied to more meaningful problems. For instance, a friend of mine has put together an extremely handy application using the Pricewatch site (http://www.pricewatch.com/) to monitor prices on PC hardware. Pricewatch offers a current snapshot of pricing of a particular piece of hardware from various vendors, usually sorted from least expensive to most. However, it doesn't allow for viewing of several different pieces of hardware at once, and it doesn't track the history of the price changes for the hardware. So, the application provides a way to build a list of hardware to be tracked, and then offers a simple trend analysis by gathering and saving off the price information on a regular basis. This provides a useful picture for the consumer of just how quickly the prices are moving downward on a particular item. If the pricing is in a steep downward curve, waiting to purchase might be wise. If the curve is flat, the time to purchase might be at hand.

The parser is used not only to retrieve the pricing data, but also to help deal with one of the more significant issues facing those attempting to interface with sites they do not control: unexpected changes in the target site's contents. In this case, a review of the <form> elements from the main page is performed to ensure that the query mechanism remains intact. As a further safeguard, the search results page is also examined to verify a match against the expected HTML format. If unexpected items are found in either case, processing cannot continue, but at least the user can be warned of the situation.

In a more interesting demonstration of the parser's abilities, it has been combined with a database to create a poor man's OODB (object-oriented database). XML is used to wrap the data, and is then stored in text fields in the database. When needed, the XML is retrieved and the parser used to extract the data. Without going in to detail, this is useful because the data stored in the database can carry semantic information with it (the XML elements) that provides information about the data's structure. In systems where the data structure is dynamic, this provides a simple way to avoid excessive database maintenance and further provides a clean, easily understood mechanism for the exchange of data between various applications and platforms. In the case mentioned here, a legacy defect (bug) tracking system hosted on a Solaris platform was wrapped in a Delphi based UI.

Additional Demonstration Applications

To further demonstrate the power of the classes presented in this article, two additional applications accompany this article. An HTML Engine Demo application (see Figure 7) displays a great deal of information about any selected URL, including meta tags, links, and images.

Figure 7: The HTML Engine Demo application.

The Parser Test application parses any URL, or SGML/HTML/XML document, and displays the results in a TreeView (see Figure 8). It can also display links, selected tags, text, etc.

Figure 8: The Parser Test application.

Room for Improvement

This parser builds a reasonable basis for parsing of HTML and XML but, offers significant room for further development.

Incorporation of a DTD processor (DTDs can be parsed with the existing parser, but no handling of the parsed contents is provided). This would provide two main benefits: more thorough parsing of elements based a true understanding of their legal contents and no need for hard coding a representation of the HTML DTD within the parser. Further, DTD-based XML parsing would then be possible.

A DOM container model to complement the TTagNode model. DOM represents a fairly well understood and commonly encountered model for representing the parsed contents of XML. While it doesn't suit all needs, it does provide a useful, standard way to communicate about the parsed elements.

XQL or other suitable extended model query mechanism. The GetTags method is reasonably sufficient, but for more exhaustive queries against XML contents a more advanced mechanism is desired. For example, it would be extremely handy if GetTags could be passed 'order/partno' to indicate that we're searching for all PARTNO elements that are immediately below an ORDER element.

Further performance tuning. While some attention has been paid to this area, no extreme efforts to speed things up were applied. Most notably, Delphi's string routines are not considered to be as fast as those provided in some third-party string-handling collections (notably HyperString).

Resources and Alternatives

For more information and examples, please visit http://www.dallas.net/~richardp/delphi/components/delphicomps.htm.

HTML 4.0 Specification -http://www.w3.org/TR/REC-html40. This is the single most useful resource to those seeking HTML enlightenment. It is extremely detailed and well written.

HTML 4.0 Loose DTD -http://www.w3.org/TR/REC-html40/loose.dtd. A part of the HTML 4.0 specification, this offers the exact specification of just what the HTML rules are. It is upon this DTD (as opposed to the "strict" DTD) that the parser is designer to operate.

"XML: Creating Structures of Meaning," Visual Developer, Nov/Dec 1998, Vol. 9 No. 4. A quick survey of XML for the beginner. Syntax and use are explored here with an eye to bringing the novice on board.

"Using Internet Explorer's HTML Parser," Dr. Dobb's Journal, #302, August 1999 (http://www.ddj.com/articles/1999/9908/9908toc.htm). This article offers an examination of using the HTML parser that is available within Microsoft's Internet Explorer via COM interface. The source for the article is in C++, but it's not difficult to follow.

"XML from Delphi," Delphi Informant Magazine, July 1999, Vol. 5 No. 7. A beginner's explanation of XML, and an excellent example of using the XML parser included in Internet Explorer 4.0.

Begin Listing One - Searching www.directv.com
unit Unit1;

interface

uses
  Windows, Messages, SysUtils, Classes, Controls, Forms,
  Dialogs, ComCtrls, StdCtrls, ExtCtrls, GetURL, HTMLMisc,
  HTMLParser, StringTable;

type
  TForm1 = class(TForm)
    WIGetURL1: TWIGetURL;
    StatusBar1: TStatusBar;
    Panel2: TPanel;
    pbSearch: TButton;
    ebSearchText: TEdit;
    Panel3: TPanel;
    ListView1: TListView;
    HTMLParser1: THTMLParser;
    procedure pbSearchClick(Sender: TObject);
    procedure WIGetURL1Status(Sender: TObject;
      Status: Integer; StatusInformation: Pointer;
      StatusInformationLength: Integer);
  private
    procedure GetTable(TableNode: TTagNode;
      Table: TStringTable);
    function GetDescription(Node: TTagNode): string;
  end;

var
  Form1: TForm1;

implementation

{$R *.DFM}

const
  cDirecTVSearchURL =
    'http:// 206.17.88.15/cgi-bin/pgm_search.cgi/';
  cDirecTVDescURL =
    'http:// 206.17.88.15/cgi-bin/pgm_desc.cgi/';

  // Return TableNode's contents in Table

procedure TForm1.GetTable(TableNode: TTagNode;
  Table: TStringTable);
var
  RowCtr,
    DataCtr: Integer;
  Node,
    RowNode: TTagNode;
begin
  Table.Clear;
  if LowerCase(TableNode.Caption) = 'table' then
  begin
    for RowCtr := 0 to TableNode.ChildCount - 1 do
    begin
      RowNode := TableNode.Children[RowCtr];
      if LowerCase(RowNode.Caption) = 'tr' then
      begin
        Table.NewRow;
        for DataCtr := 0 to RowNode.ChildCount - 1 do
        begin
          Node := RowNode.Children[DataCtr];
          if LowerCase(Node.Caption) = 'td' then
            Table.AddColumnObject(Node.GetPCData, Node)
          else if LowerCase(Node.Caption) = 'th' then
            Table.AddHeader(Node.GetPCData);
        end;
        if Table.Row[Table.RowCount - 1].Count <= 0 then
          Table.DeleteRow(Table.RowCount - 1);
      end;
    end;
  end;
end;

function TForm1.GetDescription(Node: TTagNode): string;
var
  TempStr: string;
begin
  Result := '';
  if Node.ChildCount > 0 then
    TempStr := Node.Children[0].Params.Values['href']
  else
    TempStr := '';

  if TempStr <> '' then
  begin
    // Parse out the description id
    Delete(TempStr, 1, Pos('(', TempStr));
    Delete(TempStr, Pos(')', TempStr), Length(TempStr));
    WIGetURL1.URL := cDirecTVDescURL + TempStr;
    Screen.Cursor := crHourglass;
    Application.ProcessMessages;
    WIGetURL1.GetURL;
    Screen.Cursor := crDefault;
    StatusBar1.Panels.Items[0].Text := '';
    if WIGetURL1.Status = wiSuccess then
    begin
      TempStr := WIGetURL1.Text;
      // Use brute force to scrape out program description.
      if Pos('<BLOCKQUOTE>', TempStr) > 0 then
      begin
        Delete(TempStr, 1, Pos('<BLOCKQUOTE>', TempStr) + 11);
        Delete(TempStr, Pos('</BLOCKQUOTE>', TempStr),
          Length(TempStr));
      end;
      Result := TempStr;
    end;
  end
end;

procedure TForm1.pbSearchClick(Sender: TObject);
const
  tzPacific = '0'; // Time zones.
  tzMountain = '1';
  tzCentral = '2';
  tzEastern = '3';
  cgMovies = '0'; // Categories.
  cgSports = '1';
  cgSpecials = '2';
  cgSeries = '3';
  cgNews = '4';
  cgShopping = '5';
  cgAllCategories = '-1';
var
  Cols,
    Rows: Integer;
  NewItem: TListItem;
  Node: TTagNode;
  Nodes: TTagNodeList;
  ResultTable: TStringTable;
  TempStr: string;
begin
  if ebSearchText.Text = '' then
    Exit;
  WIGetURL1.URL := cDirecTVSearchURL + tzCentral + '/' +
    cgAllCategories + '/' + urlEncode(ebSearchText.Text);
  Screen.Cursor := crHourglass;
  Application.ProcessMessages;
  WIGetURL1.GetURL;
  Screen.Cursor := crDefault;
  StatusBar1.Panels.Items[0].Text := '';
  if WIGetURL1.Status = wiSuccess then
  begin
    if Pos('No program titles that match',
      WIGetURL1.Text) > 0 then
      ShowMessage('No matches found')
    else
    begin
      // Attempt to parse HTML table we're looking for.
      HTMLParser1.Parse(WIGetURL1.Text);
      Nodes := TTagNodeList.Create;
      HTMLParser1.Tree.GetTags('table', Nodes);
      if Nodes.Count > 0 then
      begin
        ResultTable := TStringTable.Create;
        GetTable(Nodes[0], ResultTable);
        // Get rid of image tags at bottom of search
        // response (the 2nd column has no contents).
        with ResultTable do
          for Rows := RowCount - 1 downto 0 do
            if Cells[1, Rows] = '' then
              DeleteRow(Rows);
        // Ensure all cells are filled appropriately
        // (in the HTML table, a RowSpan attribute
        // allows the "Channel" to be displayed in
        // one cell for several programs).
        with ResultTable do
          for Rows := 0 to RowCount - 1 do
            for Cols := 0 to ColCount - 1 do
              if Cells[Cols, Rows] = '' then
                if Rows > 0 then
                  Cells[Cols, Rows] :=
                    Cells[Cols, Rows - 1];
        // Add items to ListView (Program, Channel,
        // Data, Time).
        ListView1.Items.Clear;
        for Rows := 0 to
          ResultTable.RowCount - 1 do
        begin
          NewItem := ListView1.Items.Add;
          NewItem.Caption :=
            ResultTable.Cells[4, Rows];
          NewItem.SubItems.Add(
            ResultTable.Cells[0, Rows]);
          NewItem.SubItems.Add(
            ResultTable.Cells[1, Rows]);
          NewItem.SubItems.Add(
            ResultTable.Cells[2, Rows]);
        end;
        // Retrieve program descriptions (program id
        // contained in 4th column's node).
        for Rows := 0 to
          ResultTable.RowCount - 1 do
        begin
          // It's rude to whack the server :-)
          Sleep(500);
          Node :=
            TTagNode(ResultTable.Objects[4, Rows]);
          TempStr := GetDescription(Node);
          ListView1.Items[Rows].
            SubItems.Add(TempStr);
        end;
        ResultTable.Free;
      end // if Nodes.Count > 0 ...
      else
        ShowMessage(
          'Error - Expected table...found none');
    end; // else of Pos('No program titles that...
  end // WIGetURL1.Status = wiSuccess...
  else
    ShowMessage('Unable to contact search server [' +
      WIGetURL1.ErrorMessage + ']');
end;

end.
End Listing One

Component Download: http://www.baltsoft.com/files/dkb/attachment/Parsing_the_Web.zip

2004. február 24., kedd

Certificate basics

Problem/Question/Abstract:

Key management can be an issue in security-related tasks. Certificates define a standard way of managing keys and associated information. They are also used in SSL/TLS protocol and S/MIME.

Answer:

Definition

Two types of encryption algorithms are used - symmetric and asymmetric. Symmetric encryption algorithms deal with one secret key, which is used to both encrypt and decrypt information. Asymmetric algorithms usually deal with a pair of keys, one of which is private (secret) and another one is public (known to everybody). Certificate is a block of information in standard format that contains a public key and supplementary information about this key. In broader meaning certificate can also include an associated private key. The private key is not the part of certificate binary data, but logically they create one entity.

There exist different certificate formats, designed for different applications. Most known formats are PGP and X.509. X.509 certificates are used in SSL/TLS protocol, which is the base for secure Internet communications. Format of X.509 certificates is defined in RFC 2459.

X.509 certificate structure is defined using Abstract Syntax Notation 1 (ASN.1). This notation allows uniform identification of binary data structures on any system, not dependent from byte size and byte order. The details of the structure can be found below.

Features and possible uses

The certificate contains the following information:

Information about the person or organization, to which it was issued,

Information about the organization that issued the certificate

Date range when the certificate is valid.

(Optional) information, which lets the application automatically verify validity of the certificate and find out whether the certificate was revoked.

Also certificate can contain predefined or user-defined information about intended use of the certificate. This means that the application can find out user's credentials and scope of operations that the user is permitted to do. Each application can define it's own flags for the certificates it uses or manages.

Due to its nature, certificate is currently the best choice for authenticating the person and information that is distributed by this person, and also for defining the scope of operations allowed to the person.

Certificates are indispensable for providing easy-to-manage access control system for client-server and n-tier applications. In this scenario the server issues certificates, in which it places information about allowed uses of the certificate. Certificates together with their private keys are a good alternative to regular username/password pairs. When the user connects to the server, it uses the certificate in the process of establishing connection with the server. The server is able not only to validate the user's authenticity, but also determine, what actions this user is allowed to take, or to which user group it belongs.

Utilizing SSL/TLS protocol is the best way to involve the certificates into the "conversation", but if your middleware doesn't provide an easy way to add SSL to data exchange, you will need to manage certificates by hand. In both cases SecureBlackbox package will assist you in certificate management. And if you need SSL, SecureBlackbox is a complete SSL/TLS solution.

Certificate management

Creation of the certificate is called issuing. The organizations that issue certificates are called Certificate Authorities (CA). The person or organization to which the certificate is given (for which it is issued) is called Subject.

The certificate is valid during some time period. If the certificate is used out of this time period, it is either "not yet valid" or "expired".

When the certificate is issued, the certificate authority signs it and the signature of this certificate authority is put to the certificate.

Certificate authority can claim the certificate to be not valid anymore in some cases. For example, when Subject claims that the certificate's private key was stolen. Another case is when Certificate Authority's private key has become known to third parties and the signature doesn't identify authenticity anymore. Such certificate is called revoked. Certificate authority keeps the list of revoked certificates and each certificate can contain the address (URI) of certificate revocation list.

There can be several type of CAs. If you utilize certificates for some n-tier software solution (for authentication/security purposes), the certificates will most likely be generated by the server part of the software solution. In this case the user of the server part will act as a CA. If the certificate is issued by some organization to itself (i.e. the CA is the Subject), the certificate is called self-signed.

If the data exchange, in which the certificates are involved, is public (i.e. more than one organization participates in the process), self-signed certificates are a security compromise. The side, which receives the certificate during data exchange session, can't reliably verify the identity of the other side unless there is some repository, which can confirm the identity of that other side. Certificate Authority itself is a good repository. There are several commercial organizations that issue and manage certificates (most of them are affiliates of VeriSign company). They act as certificate repositories and their authenticity is well known to public.

Certificate authorities, that publicly issue certificates, usually require that the person, who requests the certificate, prove his identity by passing regular (paper) documents (passport, driver's license etc).

Another type of public activity that involves certificates is electronic document signature or encryption. In this case there is one party that signs/encrypts the data, and any number of parties that get the information and must check it for validity.

When one side of data exchange session receives the certificate from other side, it is supposed to validate the certificate, i.e. check the following:

Certificate integrity must not be corrupted. Each certificate contains the data that can be used to check certificate integrity. Information in corrupted certificate can't be used at all;

If the certificate is not self-signed, CA's authenticity should be verified either. This is done by looking for this CA's certificate in the repository. Repository can be local to the application that validates the certificate (for example Windows 32 API provides API for accessing built-in Windows certificate storage). Custom storage can be used (it can be located on other system including public certificate repositories. If the certificate is found, we use CA certificate to validate the certificate in question. If the certificate is not found, we can't confirm authenticity of the certificate in question;

Certificate validity time period must correspond to the data signing/encryption time. One should not use expired or not yet valid certificates to sign/encrypt the data;

Certificate should not be present in certificate revocation list. As mentioned, each certificate can contain the reference to the certificate revocation list. The validating side should check the list if the reference is present. If the certificate is not self-signed, and there is no reference to certificate revocation list inside, it is possible to check the CA's certificate for such a reference and use it instead.

(optional). Certificate purpose must correspond to the action being certified. Each certificate can have a list of purposes, for which it is applicable.

Usually, certificate is only valid if all of the above conditions are true. If any of conditions is false, the data source can't be fully trusted. SecureBlackbox offers methods to validate the certificate (except revocation list lookup).

Supplementary services

One of the most important actions in certificate management is key recovery.

When the key is lost (because of data storage corruption or some accident or just erased by mistake), it's probably lost forever. This means that if the key pair was used to encrypt the data, the data is also unrecoverable. One of the ways to prevent this is to put the key (or key pair) to the digital envelope and store this envelope somewhere. The envelope, however, must be secure too. This is achieved by involving a third party (trusted person) to the process. Trusted person gives it's public key for envelope creation. Trusted person itself doesn't necessarily have access to the envelope. If the original key is lost, the owner of the envelope can contact the trusted person, who will apply it's secret key to the envelope and open the envelope.

The disadvantage is that the trusted person must remain trusted and it must remain available. If its security is compromised or the person becomes unavailable, all users of trusted person's services must recreate their envelopes. To overcome the security risk of involving a trusted person it is possible to involve several trusted persons. Each person's public key is used to encrypt only a part of the information being secured (it's either secret key or symmetric key). Then single person can't discover the secret information even when it has the

When it is necessary to recover the key, each of those persons should be contacted to decrypt part of the envelope. Unfortunately in this case if any of those persons becomes unavailable, the whole envelope is useless.

In business it makes sense to use key recovery services, which are provided by public companies. These companies can be related to certificate authorities somehow or can be independent organizations. As with the trusted person mentioned above, the recovery service can have access to the keys used for signing documents or to decrypt data. The benefit of the recovery service is that it usually has secure infrastructure for key keeping and transfer.

If the certificate authority (CA) issues or plans to issue lots of certificates, it makes sense to delegate some of its functions to registration authorities. Those authorities receive certificate requests from the organizations or individuals, check their identity and issue certificates. They also manage certificate revocation lists and re-issue expired certificates.

Certificate management is quite a complicated thing, which can involve other types of services and actions. We don't review them here because the topic is enough for a book, not just an article.

Certificate structure

X.509 defines the following blocks of data in the certificate:

Version. This is the version of the certificate format. Currently can be 1, 2, and 3.

Certificate serial number.

Signature algorithm identifier, which identifies the algorithm used to sign the certificate and parameters for the algorithm.

Issuer name. This is the name of the company that has issued (generated and signed) the certificate.

Validity period. This field defines lifetime of the certificate.

Subject name. This field contains the name of the person/company, to which the certificate is issued.

Public key information. Contains the body of the public key, information about its algorithm and parameters.

Extensions (see below)

Signature algorithm identifier (the same as described above).

Signature value (body of the signature).

There were several revisions of certificate standard. The latest is X.509.3. Revisions 2 and 3 define extensions to the original format.

Extensions can be critical or non-critical. If they are critical, the application, which doesn't handle such extension, should not process such certificate further. Non-critical extensions can be ignored.

The certificate itself contains a flag, which defines whether certificate extension is critical or non- critical.

X.509.2 defines the following extensions:

Issuer Unique Identifier

Subject Unique Identifier

X.509.2 extensions are not currently recommended for use by RFC 2459.

X.509.3 defines the following extensions:

Authority Key Identifier. Critical for not self-signed certificates.

Subject Key Identifier. Critical for not self-signed certificates and recommended for self-signed certificates.

Key Usage. This field is used to restrict the set of operations, for which the certificate is eligible. The standard recommends flagging this extension as critical.

Extended Key usage. This field is similar to Key Usage field, but RFC 2459 doesn't constrain possible values of this field and any application can define it's own flags.

CRL Distribution Points. This is the URL of the certificate revocation list, which can be used to check validity of the certificate. This extension is non-critical, but it is recommended that the applications support it.

Private Key usage period. This field defines the lifetime of the private key, associated with the certificate. If the field is not present, the lifetime is the same as the lifetime of the certificate itself. RFC 2459 doesn't recommend using this field.

Other, more specific extensions. Full list can be found in RFC 2459

Conclusions

Certificates are very powerful and advanced technology, which, when used properly, can provide the highest level of security in many areas of electronic activity. Basic certificate management, though, is not too complicated thing and can be done by any application, especially with the help of SecureBlackbox package.

Component Download: http://www.secureblackbox.com/files/blackboxeval.zip

2004. február 23., hétfő

Determine whether the value in a variant type variable is an integer or a string

Problem/Question/Abstract:

I am using a function which has a variant in its parameters. Depending on that value - if that is an integer or a string - I create different returns. What is the appropriate way to know if the value in a variant type variable is an integer or a string?

Answer:

{ ... }
var
  vt: TVarType;
begin
  vt := VarType(v);
  if vt in [varSmallInt, varInteger, varShortInt, varByte, varWord, varLongWord,
    varInt64] then
    { ... }
  else if vt in [varOleStr, varStrArg, varString] then
    { ... }
  else
    raise EVariantError.Create('Unsupported variant type');
end;

2004. február 22., vasárnap

Wrapping filters around TStream classes

Problem/Question/Abstract:

In Java there are various predefined stream classes that provide filters for other stream classes - the filter classes essentially "wrap" the streams they operate on. The filters can often be applied to further filters. This article demonstrates how we can do this in Delphi in a way that is extendable - i.e. we can wrap filters around other filters.

Answer:

First of all, let's look at why we want to do this. Well say you want to write some data primitives as text to a stream and the text to be formatted to fit on a page, word wrapping properly. Then if we can wrap a filter that formats the primitives around another that formats the text and this filter is wrapped round a file stream object, then all we have to do is access the methods of the first class and the rest of the process happens automatically.

The approach I've taken is to define a class, TStreamWrapper, that provides a base class for any filters that we want to define. Any TStreamWrapper performs it's i/o using another TStream object - the wrapped object. The key point is that TStreamWrapper is itself derived from TStream, so that it can also wrap other TSteamWrapper objects - giving the extensibility we need. TStreamWrapper can also cause a wrapped stream to be freed when it is itself freed - allowing the wrapped streams to be created "on the fly" when the TStreamWrapper constructor is called.

There is no additional functionality built in to TStreamWrapper - this is to be provided by derived classes. A small example class is demonstrated here.

First to TStreamWrapper. Here's the class declaration:

type
  TStreamWrapper = class(TStream)
  private
    FBaseStream: TStream; {The "wrapped" stream}
    FCloseStream: Boolean; {Free wrapped stream on destruction?}
  protected
    procedure SetSize(NewSize: Longint); override;
    {Sets the size of the stream to the given value if the operation is
    supported by the underlying stream}
    property BaseStream: TStream read FBaseStream;
    {Gives access to the underlying stream to descended classes}
  public
    constructor Create(const Stream: TStream;
      const CloseStream: Boolean = False); virtual;
    {If CloseStream is true the given underlying stream is freed when
    this object is freed}
    destructor Destroy; override;
    // Implementation of abstract methods of TStream
    function Read(var Buffer; Count: Longint): Longint; override;
    function Write(const Buffer; Count: Longint): Longint; override;
    function Seek(Offset: Longint; Origin: Word): Longint; override;
  end;

and the implementation is:

constructor TStreamWrapper.Create(const Stream: TStream;
  const CloseStream: Boolean);
begin
  inherited Create;
  // Record wrapped stream and if we free it on destruction
  FBaseStream := Stream;
  FCloseStream := CloseStream;
end;

destructor TStreamWrapper.Destroy;
begin
  // Close wrapped stream if required
  if FCloseStream then
    FBaseStream.Free;
  inherited Destroy;
end;

function TStreamWrapper.Read(var Buffer; Count: Integer): Longint;
begin
  // Simply call underlying stream's Read method
  Result := FBaseStream.Read(Buffer, Count);
end;

function TStreamWrapper.Seek(Offset: Integer; Origin: Word): Longint;
begin
  // Simply call the same method in the wrapped stream
  Result := FBaseStream.Seek(Offset, Origin);
end;

procedure TStreamWrapper.SetSize(NewSize: Integer);
begin
  // Set the size property of the wrapped stream
  FBaseStream.Size := NewSize;
end;

function TStreamWrapper.Write(const Buffer; Count: Integer): Longint;
begin
  // Simply call the same method in the wrapped stream
  Result := FBaseStream.Write(Buffer, Count);
end;

We can now derive a small filter class - TStrStream. As it stands it's not particularly useful, but does demostrate the techniques. The class reads writes strings (which are preceded by their lengths) to any stream. The declaration is:

type
  TStrStream = class(TStreamWrapper)
  public
    procedure WriteString(AString: string);
    function ReadString: string;
  end;

  The class is implemented as follows:

function TStrStream.ReadString: string;
var
  StrLen: Integer; // the length of the string
  PBuf: PChar; // buffer to hold the string that is read
begin
  // Get length of string (as 32 bit integer)
  ReadBuffer(StrLen, SizeOf(Integer));
  // Now get string
  // allocate enough memory to hold string
  GetMem(PBuf, StrLen);
  try
    // read chars into buffer and set resulting string
    ReadBuffer(PBuf^, StrLen);
    SetString(Result, PBuf, StrLen);
  finally
    // deallocate buffer
    FreeMem(PBuf, StrLen);
  end;
end;

procedure TStrStream.WriteString(AString: string);
var
  Len: Integer; // length of string
begin
  // Write out length of string as 32 bit integer
  Len := Length(AString);
  WriteBuffer(Len, SizeOf(Integer));
  // Now write out the string's characters
  WriteBuffer(PChar(AString)^, Len);
end;

The following code should demonstrate how to write a string to a file and read it back in again. Here we use a file stream that is created on the fly and automatically closed when we are done. Of course you could create the stream and close it separately.

procedure WriteText(const Txt: string);
var
  TS: TStrStream;
begin
  // This opens stream on a file stream that will be closed when this stream closes
  TS := TStrStream.Create(TFileStream.Create('test.dat', fmCreate), True);
  TS.WriteString(Txt);
  TS.Free; // this closes wrapped file stream
end;

function ReadText: string;
var
  TS: TStrStream;
begin
  TS := TStrStream.Create(TFileStream.Create('test.dat', fmOpenRead), True);
  Result := TS.ReadString;
  TS.Free;
end;

The filter in this example provides additional methods to those in TStreamWrapper. We can also provide filters that override the Read and Write methods to alter the way that files are written. My resource file classes (available for download from my website) use this method to allow data to be written to RCDATA resource files - the classes take care of maintaining the correct file structure.

Component Download: http://www.delphidabbler.com/download.php?file=streamwrapdemo.zip

2004. február 21., szombat

Ensure that at least one item in a TListView is always selected

Problem/Question/Abstract:

The problem is that if the user clicks on a TListView outside the items the current selected item is deselected. I want my TListView to always keep one item selected so this never happens. What is the easiest way to accomplish this?

Answer:

{ ... }
Listview1.HideSelection := False;
{ ... }

{ ... }
var
  FPrevItem: TListItem;
  { ... }

procedure TForm1.ListView1MouseDown(Sender: TObject; Button: TMouseButton;
  Shift: TShiftState; X, Y: Integer);
begin
  if not Assigned(Listview1.Selected) then
    Listview1.Selected := FPrevItem;
end;

procedure TForm1.ListView1Change(Sender: TObject; Item: TListItem; Change:
  TItemChange);
begin
  FPrevItem := Item;
end;

2004. február 20., péntek

Undocumented: How to change class inheritance during runtime

Problem/Question/Abstract:

Can the inheritance of a class be changed during runtime?
Yes, it can be! Here is how...

Answer:

This demo replaces the standard TPanel with a TMyPanel class. Part of this code is from the book "Delphi Win32 Losungen" written by Andreas Kosch.

This code is just a demo to show what kind of fun stuff you can do with the runtime type information (RTTI). Learn from it, play with it, have fun with it, impress your friends, etc. But: you must NEVER use this code in commercial or otherway important programs!

A good designed class hierarchy does not need runtime changes to the inheritance structure.

unit main;

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
  StdCtrls, extCtrls;

type
  TClassReplaceDemo = class(TForm)
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
    FPanel: TPanel;
  public
    { Public declarations }
  end;

  TMyPanel = class(TCustomControl)
  protected
    procedure WMSize(var Message: TWMSize); message WM_Size;
  end;

var
  ClassReplaceDemo: TClassReplaceDemo;

implementation

{$R *.DFM}

procedure ReplaceParentClass(DelphiClass, OldParent, NewParent: TClass);
var
  AClassPointer: ^Byte;
  pVCl, pNew: ^Pointer;
  Protect: DWord;

begin
  // check if parameters are legal
  if Assigned(NewParent) and Assigned(DelphiClass) then
  begin
    // Find the correct parent
    while (DelphiClass.ClassParent <> OldParent) do
    begin
      with DelphiClass do
      begin
        // Is the class parent ok?
        if (ClassParent = nil) or (ClassParent = NewParent) then
          raise Exception.Create('Illegal class parent');
        // move one up in
        DelphiClass := ClassParent;
      end;
    end;

    // Get the classpointer of the delphi class
    AClassPointer := Pointer(DelphiClass);
    Inc(AClassPointer, vmtParent);
    pVCL := Pointer(AClassPointer);

    // get the classpointer of the new class
    AClassPointer := Pointer(NewParent);
    Inc(AClassPointer, vmtSelfPtr);
    pNew := Pointer(AClassPointer);

    // insert the new class
    VirtualProtect(pVCL, SizeOf(Pointer), PAGE_READWRITE, @Protect);
    try
      pVCL^ := pNEW;
    finally
      VirtualProtect(pVCL, SizeOf(Pointer), Protect, @Protect);
    end;
  end;
end;

{ TMyPanel }

procedure TMyPanel.WMSize(var Message: TWMSize);
begin
  Caption := Format('Width: %d  Height: %d', [Width, Height]);
end;

{ TForm1 }

procedure TClassReplaceDemo.Button1Click(Sender: TObject);
begin
  if FPanel = nil then
  begin
    // Create a 'normal' panel
    FPanel := TPanel.Create(Self);

    // put it on the form
    FPanel.Parent := Self;

    // define it's size
    FPanel.BoundsRect := Rect(10, 50, 150, 100);

    // You will now see the caption is automagicly set
  end;
end;

initialization
  // Replace the normal TPanel with our own TMyPanel
  ReplaceParentClass(TPanel, TCustomControl, TMyPanel);
finalization
  // cleanup the mess we made
  ReplaceParentClass(TPanel, TMyPanel, TCustomControl);
end.

2004. február 19., csütörtök

Get the number of occurrences of a substring within a string

Problem/Question/Abstract:

How to get the number of occurrences of a substring within a string

Answer:

Answer 1:

I don't know of any existing code in Delphi to do this. If performance isn't critical you can always use brute force. The following works but can easily be improved:

function CountSubString(SubStr, St: string): integer;
var
  i: integer;
begin
  Result := 0;
  for i := 1 to Length(st) do
    if (copy(st, i, length(SubStr)) = SubStr) then
      inc(Result);
end;

Answer 2:

While you can fiddle with the pos function and truncating the containing string after each "find," I think the brute force method is easiest to code (untested):

function SubstrCount(sub, container: string): integer;
var
  index: integer;
begin
  result := 0;
  for index := 1 to (length(container) - length(sub) + 1) do
    if copy(container, index, length(sub)) = sub then
      inc(result);
end;

If you need to skip over the found substring, say to count two occurrences of "AA" within "AAAA" instead of three, change to:

function SubstrCount(sub, container: string): integer;
var
  index: integer;
begin
  result := 0;
  index := 1;
  while index <= (length(container) - length(sub) + 1) do
  begin
    if copy(container, index, length(sub)) = sub then
    begin
      inc(result);
      index := index + length(sub);
    end
    else
      inc(index);
  end;
end;

Answer 3:

function SubStringCount(const str, substr: string): Integer;
var
  start: integer;
begin
  result := 0;
  start := 0;
  repeat
    start := posex(substr, str, start + 1);
    if (start = 0) then
      break
    else
      inc(Result);
  until
    false;
end;

2004. február 18., szerda

Making an application a TCP/IP Client...

Problem/Question/Abstract:

Connecting to a TCP/IP Server from a Delphi client

Answer:

As every Delphi Developer knows that in order to make a Delphi Application as a TCP/IP Client, we can use the TclientSocket Component.

I faced a problem when I try to connect to a TCP/IP server (another computer) and send data to that machine and get data back.

In form’s OnShow event, I set the Address and Port properties of the TclientSocket component to the TCP/IP server’s IP Address and Port Number and set Active to true. After that I tried to send the data in the same event. I was not able to send the data. I found that the Active property as True(meaning connected).

So I thought the problem could be with the form’s OnShow event; Then I put the same code (setting the IP address and port number and active to true) in FormCreate/OnClick event; I was not able to come out of that problem.

The thing is that I did try to connect to the TCP/IP server and try to send the data at the same time. This doesn’t seem to work properly. After that I referred the Delphi Help carefully and got the solution.

The Solution I found is:

First we need to set the IP Address/Port Number of the TCP/IP server in the Tclientsocket component properties in the main form’s OnCreate event of the project and set Active to true. After that we can use the Open and Close methods of the TclientSocket component to connect/disconnect the TCP/IP server.

If we wish to send data to a TCP/IP server often from different forms in a project, we can use a DataModule and put a TclientSocket component in it and use it everywhere throughout the project by including that datamodule in all the unit files.

And one more thing, in the ClientSocketRead event we need to put some time delay while reading data back from the TCP/IP server. This time delay could be some milliseconds and depends on the network traffic since we may not read all the data sent from the TCP/IP server at a time even if you keep your buffer a large one. So you may need to wait for some milliseconds between reads.

I used the clienttype of the TclientSocket as ctNonBlocking; We can also use ctBlocking as clienttype; but in that case the TCP/IP server should be a threaded one.

Even though it seems to be a simple stuff, I just wanted to share with all of our friends in Delphi.

2004. február 17., kedd

Converting enumerated type values into strings

Problem/Question/Abstract:

How to convert font style values into string values?

Answer:

For converting enumerated type values into string we should use the GetEnumName function from TypInfo unit. Below is example how to perform this action for TFontStyle type and for our custom type:

type
TOurType = (otFirst, otSecond, otThird, otForth, otFifth, otLast);

procedure TForm1.Button1Click(Sender: TObject);
var
OT: TOurType;
FT: TFontStyle;
begin
// TFontStyle values
Memo1.Lines.Add('The TFontStyle values:');
for FT := Low(TFontStyle) to High(TFontStyle) do
Memo1.Lines.Add(GetEnumName(TypeInfo(TFontStyle), ord(FT)));

// The custom TOurType values
Memo1.Lines.Add('The TOurType values:');
for OT := Low(TOurType) to High(TOurType) do
Memo1.Lines.Add(GetEnumName(TypeInfo(TOurType), ord(OT)));
end;

2004. február 16., hétfő

Exception or Event Logger

Problem/Question/Abstract:

Each project has logical errors or by runtime. It would be fine to write these exceptions down to a file in order to find out, what's happened weeks ago ;) or to track component events

Answer:

Exceptions are mistakes and errors due to some run-time problem. This is obviously a wishy-washy definition, but generally run-time problems would be things like running out of memory whilst adding a data object or an index out of bounds. In our team we wrote a procedure (months ago), which intercepts those nasty things like exceptions by assigning a new event-handler in the main-unit:

{$IFDEF DEBUG}
Application.OnException := AppOnException;
{$ENDIF}

If DEBUG is not set the code runs at full speed, but I advise to set the handler all the time, cause then you can analyse each applications exception file. Normal testing should identify programming mistakes, whereas the other type of error are exceptions to the norm.
The event-handler goes like this:

procedure TCWForm.AppOnException(sender: TObject; E: Exception);
var
Addr: string[9];
FErrorLog: System.Text;
FileNamePath: string;
begin //writes errorlog.txt file
FileNamePath := extractFilePath(application.exeName) + 'errorlog.txt';
AssignFile(FErrorLog, FileNamePath);
try
System.Append(FErrorlog);
except
on EInOutError do
Rewrite(FErrorLog);
end;
Addr := IntToHex(Seg(ErrorAddr), 4) + ';' + IntToHex(Ofs(ErrorAddr), 4);
Writeln(ErrorLog, format('%s[%s]%s%s', [DateTimeToStr(Now),
getNetUserName, E.Message, Addr]));
System.Close(FErrorLog);
MessageDlg('CW5' + E.Message + '. occured at: ' + Addr, mtError, [mbOK], 0);
end;

To avoid scope conflicts, Assign File replaces the Assign procedure that was available in previous versions of Delphi. The Addr also depends on the OS. Note, that you want still see exceptions on the screen, you get it with the last MessageDlg in the AppOnException-routine.

Then you get an output in a well shaped manner:

*************************ERRORLOG************************************
26.09.99 12:09:16 [MAX] List index out of bounds 52FF;1226
26.09.99 13:05:28 [MAX] Database BezSpr not found 5F6F;1226
26.09.99 13:21:37 [THOMAS] List index out of bounds 69DF;1226
26.09.99 13:43:35 [MAX] GP fault in module CW5.EXE at 0002:3588 2A9F;1226
30.09.99 14:32:23 [SIMON] Cannot perform this operation on a closed dataset 320F;1254
30.09.99 14:35:36 [MAX] Record locked by another user. Table:GBK.DB

Maybe, the function getNetUserName has to be changed, it depends on the operating-system or the database you deal with:

function getNetUserName: string;
var
szVar: array[0..32] of char;
begin
DBIGetNetUserName(szVar);
result := StrPas(@szVar);
end;

Instead of Addr := by an Exception, use in a 32 - bit environment:

mem: TMemoryStatus;
mem.dwLength := sizeOf(TMemoryStatus);
GlobalMemoryStatus(mem);
edit3.text := intToStr((mem.dwAvailPageFile) div 1024);
edit4.text := intToStr((mem.dwAvailPhys) div 1024);

Component Event Logger

On the other side you want to know component events to track down user or system behavior. This is also usefull to show the events on runtime in a listbox or to store it in a file.
First your declare a procedure in your class:

events: TListBox;

procedure LogEvent(const EventStr: string; Component: TComponent = nil);

Second you define the procedure with a listbox as events:

procedure TransAct.LogEvent(const EventStr: string;
Component: TComponent = nil);
var
ItemCount: Integer;
begin
if (csDestroying in ComponentState) or not Events.Visible then
Exit;
if (Component <> nil) and (Component.Name <> '') then
Events.Items.Add(Format('%s(%s)', [EventStr, Component.Name]))
else
Events.Items.Add(EventStr);
ItemCount := Events.Items.Count;
Events.ItemIndex := ItemCount - 1;
if ItemCount > (Events.ClientHeight div Events.ItemHeight) then
Events.TopIndex := ItemCount - 1; //tracing
end;

Third you call the procedure LogEvent in your code as you want, e.g.:

LogEvent('OnDataChange', Sender as TComponent);
LogEvent('BeforeOpen', DataSet);
LogEvent('AfterClose', DataSet);

procedure TransAct.DataSetBeforeClose(DataSet: TDataSet);
begin
LogEvent('BeforeClose');
end;

procedure TransAct.DataSetError(DataSet: TDataSet;
E: EDatabaseError; var Action: TDataAction);
begin
LogEvent('OnDelete/OnEdit/OnPost Errors', DataSet);
end;

procedure TransAct.Disconnect(Connection: TADOConnection;
var EventStatus: TEventStatus);
begin
LogEvent('Disconnect', Connection);
end;

function TScanner.SaveLogData(const UserData: WideString; const CheckSum: DWORD):
Boolean;
var
SL: TStringList;
FileName: string;
begin
SL := TStringList.Create;
FileName :=
'D:\Scanner\LogData\' + FormatDateTime('yyyymmdd-hhnnsszzz', Now) + '.txt';
SL.Text := UserData;
SL.SaveToFile(FileName);
SL.Free;
Result := True;
end;

2004. február 15., vasárnap

What's a buffer overflow and how to avoid it in Delphi?

Problem/Question/Abstract:
Answer:

This article tries to explain what a buffer overflow is and what countermeasures (or counterattacks;) can be taken to avoid it.

A buffer is a contiguous allocated block of memory, such as an array or a pointer in Pascal. In C and C++, there are no automatic bounds checking on the buffer (responsability by the programmer), which means a hacker can spoil the buffer.
for example:

int main () {
int buffer[5];
buffer[7] = 10;
}

In Delphi I mean as far as Delphi uses Pascal style strings, programs are much safer than those made in C/C++.  Strong types with internal checks of operations and ranges by the compiler at design-time can prevent an overflow.
Delphi can use Pascal strings as well as generic windows strings (PChar). When interfacing with Win API there is no other option except using Pchar.
This mean that potentially Delphi is a bit safer, because the potential problems can be isolated - more experienced developers are allowed to work with PChar, while less experienced allowed to work only with encapsulated functionality using Pascal String.

Internal represantation of a Delphi String:

------------------------I-------------I---------------I--------------I
reference counter 32bit  length(32bit)  payload(nbyte)  unused space
------------------------I-------------I---------------I--------------I
I--->string variable

The Delphi compiler hiddens the fact that the string variable is a heap pointer
to the above structure but setting the memory in advance is advisable:

path: string;
setLength(path, 1025);
setLength(path, getSystemDirectory(pChar(path),length(path)-1));

Is it really safer? Ok. we can say it's safer against character-array buffer overflows or Delphi isn't used enough to make attacking it interesting. But most attacks succeded on the STACK but not on a HEAP structure, cause overriding a heap is more difficult . This does not imply that the language is overall safer - there could be very well significant damage elsewhere, and char-based overflows are only one attack method.
For example you declare a PChar buffer, you have the possibility to check with GetMem() the capacitiy otherwise  you get an EOutOfMemory-exception:

Buffer: PChar;  //not a buffer before you use getmem
Size:= FileSize(F);
GetMem(Buffer, Size); //allocates n-Bytes on the heap
BlockRead(F, Buffer^, Size);

What's the danger?
------------------------------------------------------------
Most of "unsecure" programs are valid, and "almost" every compiler can compile it without any errors. However, the attacked program attempts to write beyond the allocated buffer memory, which might result in unexpected behavior. Because malicious code (assembly instructions to spawn a root shell) is an input argument to the program, it resides in the stack and not in the code segment. Therefore, the simplest solution is to invalidate the stack to execute any instructions.
Execution is done by overwriting a function's return address (in the stack), an intelligent hacker might want to spawn a shell (with root permissions) by jumping the execution path to such code.
A hacker or attacker places the code they are trying to execute in the buffer's overflowing area. With the  manipulated return address it points back to the buffer and executes the intended code.
That's it.

Avoid it
------------------------------------------------------------
So Delphi is mostly secure against buffer overflows (Imho), cause of compiler and conceptual basics.
Hence, the best way to deal with buffer overflow problems is to not allow them to occur in the first place. Developers should be educated about how to minimize the use of these vulnerable functions.
It is advisable to make a buffer too large in the first place or implement a circular buffer (that you may receive data quicker than you process it, resulting in a buffer overflow):

var
Source,
Dest: PChar;
CopyLen: Integer;
begin
Source:= aSource;
Dest:= @FData[FBufferEnd];
if BufferWriteSize < Count then
raise EFIFOStream.Create('Buffer over-run.');

Or you check with GetMem or a simple count-logic possible overflows:

GetMem(Buffer, BufferSize);
Ptr:= Buffer;
Count:= 0;
for I:= 0 to ListBox.Items.Count - 1 do begin
Line:= ListBox.Items.strings[I];
// Check buffer overflow
Count:= Count + Length(Line) + 3;
if Count = BufferSize then
Break;
//the function aborts immediately.

Or use try finally/raise to check buffer with a DefSize and free it:

begin
if FBufFixed then
BSize := FBufSize
else
BSize:= DefSize;
GetMem(Buffer, BSize);
try
BufEnd:= Buffer;
Count:= Stream.Read(Buffer[0], BSize);
BufEnd:= BufEnd + Count;
if Count < BSize then BufEnd[0]:= #0 else begin
raise EStreamError.Create(LoadStr(SLineTooLong));
end;
SetText(Buffer);
finally
FreeMem(Buffer, BSize);
end;
end;

Apart from education or experience, modern compiler like Delphi change the way a program is compiled, allowing bounds checking to go into compiled code automatically, without changing the source code. These compilers generate the code with built-in safeguards that try to prevent the use of illegal addresses. Any code that tries to access an illegal address is not allowed to execute.

Or a tool does it by protecting the return address on the stack from being altered. It places a canary word next to the return address whenever a function is called. If the canary word has been altered when the function returns, then some attacks or attempt has been made on the overflow buffers.

Furthermore, it may affect the application's performance to a great extent. In some case, executable size and execution time may increase a certain way. That's the prize for more code security.
Max Kleiner

2004. február 14., szombat

Create a DBExpress-Connection at Runtime

Problem/Question/Abstract:

If you have a Webservice or a nonvisual component, you can't put a TSQLConnection on a form so you have to call the connection at runtime

Answer:

The normal way for Delphi and Kylix is just to check dbExpress, put a TSQLConnection on a form then double-click the TSQLConnection to display the Connection Editor and set parameter values (database path, connection name etc.) to indicate the settings.

But in our example, all goes by runtime (path and login) with dbExpress we don't need an alias or the BDE either.

procedure TVCLScanner.PostUser(const Email, FirstName, LastName: WideString);
var
Connection: TSQLConnection;
DataSet: TSQLDataSet;
begin
Connection := TSQLConnection.Create(nil);
with Connection do
begin
ConnectionName := 'VCLScanner';
DriverName := 'INTERBASE';
LibraryName := 'dbexpint.dll';
VendorLib := 'GDS32.DLL';
GetDriverFunc := 'getSQLDriverINTERBASE';
Params.Add('User_Name=SYSDBA');
Params.Add('Password=masterkey');
Params.Add('Database=milo2:D:\frank\webservices\umlbank.gdb');
LoginPrompt := False;
Open;
end;
DataSet := TSQLDataSet.Create(nil);
with DataSet do
begin
SQLConnection := Connection;
CommandText := Format('INSERT INTO kings VALUES("%s","%s","%s")',
[Email, FirstN, LastN]);
try
ExecSQL;
except
end;
end;
Connection.Close;
DataSet.Free;
Connection.Free;
end;

Sending commands to the server

Another possibilities is to send commands like CreateTable to the Server. For TSQLConnection, Execute takes three parameters: a string that specifies a single SQL statement that you want to execute, a TParams object that supplies any parameter values for that statement, and a pointer that can receive a TCustomSQLDataSet that is created to return records.

Note: Execute can only execute one SQL statement at a time. It is not possible to execute multiple SQL statements with a single call to Execute, as you can with SQL scripting utilities. To execute more than one statement, call Execute repeatedly.

It is relatively easy to execute a statement that does not include any parameters. For example, the following code in our example executes a CREATE TABLE statement (DataDefinitionLanguage) without any parameters on a TSQLConnection component:

procedure createUserTable;
var
Connection: TSQLConnection;
SQLstmt: string;
begin
Connection := TSQLConnection.Create(nil);
with Connection do
begin
ConnectionName := 'VCLScanner';
DriverName := 'INTERBASE';
LibraryName := 'dbexpint.dll';
VendorLib := 'GDS32.DLL';
GetDriverFunc := 'getSQLDriverINTERBASE';
Params.Add('User_Name=SYSDBA');
Params.Add('Password=masterkey');
with TWebModule1.create(nil) do
begin
getFile_DataBasePath;
Params.Add(dbPath);
free;
end;
LoginPrompt := False;
Connected := True;
SQLstmt := 'CREATE TABLE NewMaxCusts ' +
'( ' +
'  CustNo INTEGER NOT NULL, ' +
'  Company CHAR(40), ' +
'  State CHAR(2), ' +
'  PRIMARY KEY (CustNo) ' +
')';
try
Execute(SQLstmt, nil, nil);
except
raise
end;
Close;
Free;
end; //end Connection
end;

2004. február 13., péntek

Retrieve CPU information

Problem/Question/Abstract:

How to retrieve CPU information?

Answer:

Sometimes u need to know some information about the CPU like: brand id, factory speed, wich instruction set supported etc. If so, than u can use this code.

unit main;

interface

uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
StdCtrls, ExtCtrls;

type
Tfrm_main = class(TForm)
img_info: TImage;
procedure FormShow(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
procedure info(s1, s2: string);
end;

var
frm_main: Tfrm_main;
gn_speed_y: Integer;
gn_text_y: Integer;
const
gn_speed_x: Integer = 8;
gn_text_x: Integer = 15;
gl_start: Boolean = True;

implementation

{$R *.DFM}

procedure Tfrm_main.FormShow(Sender: TObject);
var
_eax, _ebx, _ecx, _edx: Longword;
i: Integer;
b: Byte;
b1: Word;
s, s1, s2, s3, s_all: string;
begin
//Set the startup colour of the image
img_info.Canvas.Brush.Color := clblue;
img_info.Canvas.FillRect(rect(0, 0, img_info.Width, img_info.Height));

gn_text_y := 5; //position of the 1st text

asm                //asm call to the CPUID inst.
mov eax,0         //sub. func call
db $0F,$A2         //db $0F,$A2 = CPUID instruction
mov _ebx,ebx
mov _ecx,ecx
mov _edx,edx
end;

for i := 0 to 3 do //extract vendor id
begin
b := lo(_ebx);
s := s + chr(b);
b := lo(_ecx);
s1 := s1 + chr(b);
b := lo(_edx);
s2 := s2 + chr(b);
_ebx := _ebx shr 8;
_ecx := _ecx shr 8;
_edx := _edx shr 8;
end;
info('CPU', '');
info('   - ' + 'Vendor ID: ', s + s2 + s1);

asm
mov eax,1
db $0F,$A2
mov _eax,eax
mov _ebx,ebx
mov _ecx,ecx
mov _edx,edx
end;
//06B1
//|0000| |0000 0000| |0000| |00| |00| |0110| |1011| |0001|
b := lo(_eax) and 15;
info('   - ' + 'Stepping ID: ', IntToStr(b));
b := lo(_eax) shr 4;
info('   - ' + 'Model Number: ', IntToHex(b, 1));
b := hi(_eax) and 15;
info('   - ' + 'Family Code: ', IntToStr(b));
b := hi(_eax) shr 4;
info('   - ' + 'Processor Type: ', IntToStr(b));
//31.   28. 27.   24. 23.   20. 19.   16.
//  0 0 0 0   0 0 0 0   0 0 0 0   0 0 0 0
b := lo((_eax shr 16)) and 15;
info('   - ' + 'Extended Model: ', IntToStr(b));

b := lo((_eax shr 20));
info('   - ' + 'Extended Family: ', IntToStr(b));

b := lo(_ebx);
info('   - ' + 'Brand ID: ', IntToStr(b));
b := hi(_ebx);
info('   - ' + 'Chunks: ', IntToStr(b));
b := lo(_ebx shr 16);
info('   - ' + 'Count: ', IntToStr(b));
b := hi(_ebx shr 16);
info('   - ' + 'APIC ID: ', IntToStr(b));

//Bit 18 =? 1     //is serial number enabled?
if (_edx and $40000) = $40000 then
info('   - ' + 'Serial Number ', 'Enabled')
else
info('   - ' + 'Serial Number ', 'Disabled');

s := IntToHex(_eax, 8);
asm                  //determine the serial number
mov eax,3
db $0F,$A2
mov _ecx,ecx
mov _edx,edx
end;
s1 := IntToHex(_edx, 8);
s2 := IntToHex(_ecx, 8);
Insert('-', s, 5);
Insert('-', s1, 5);
Insert('-', s2, 5);
info('   - ' + 'Serial Number: ', s + '-' + s1 + '-' + s2);

asm
mov eax,1
db $0F,$A2
mov _edx,edx
end;
info('', '');
//Bit 23 =? 1
if (_edx and $800000) = $800000 then
info('MMX ', 'Supported')
else
info('MMX ', 'Not Supported');

//Bit 24 =? 1
if (_edx and $01000000) = $01000000 then
info('FXSAVE & FXRSTOR Instructions ', 'Supported')
else
info('FXSAVE & FXRSTOR Instructions Not ', 'Supported');

//Bit 25 =? 1
if (_edx and $02000000) = $02000000 then
info('SSE ', 'Supported')
else
info('SSE ', 'Not Supported');

//Bit 26 =? 1
if (_edx and $04000000) = $04000000 then
info('SSE2 ', 'Supported')
else
info('SSE2 ', 'Not Supported');

info('', '');

asm     //execute the extended CPUID inst.
mov eax,$80000000   //sub. func call
db $0F,$A2
mov _eax,eax
end;

if _eax > $80000000 then //any other sub. funct avail. ?
begin
info('Extended CPUID: ', 'Supported');
info('   - Largest Function Supported: ', IntToStr(_eax - $80000000));
asm     //get brand ID
mov eax,$80000002
db $0F
db $A2
mov _eax,eax
mov _ebx,ebx
mov _ecx,ecx
mov _edx,edx
end;
s := '';
s1 := '';
s2 := '';
s3 := '';
for i := 0 to 3 do
begin
b := lo(_eax);
s3 := s3 + chr(b);
b := lo(_ebx);
s := s + chr(b);
b := lo(_ecx);
s1 := s1 + chr(b);
b := lo(_edx);
s2 := s2 + chr(b);
_eax := _eax shr 8;
_ebx := _ebx shr 8;
_ecx := _ecx shr 8;
_edx := _edx shr 8;
end;

s_all := s3 + s + s1 + s2;

asm
mov eax,$80000003
db $0F
db $A2
mov _eax,eax
mov _ebx,ebx
mov _ecx,ecx
mov _edx,edx
end;
s := '';
s1 := '';
s2 := '';
s3 := '';
for i := 0 to 3 do
begin
b := lo(_eax);
s3 := s3 + chr(b);
b := lo(_ebx);
s := s + chr(b);
b := lo(_ecx);
s1 := s1 + chr(b);
b := lo(_edx);
s2 := s2 + chr(b);
_eax := _eax shr 8;
_ebx := _ebx shr 8;
_ecx := _ecx shr 8;
_edx := _edx shr 8;
end;
s_all := s_all + s3 + s + s1 + s2;

asm
mov eax,$80000004
db $0F
db $A2
mov _eax,eax
mov _ebx,ebx
mov _ecx,ecx
mov _edx,edx
end;
s := '';
s1 := '';
s2 := '';
s3 := '';
for i := 0 to 3 do
begin
b := lo(_eax);
s3 := s3 + chr(b);
b := lo(_ebx);
s := s + chr(b);
b := lo(_ecx);
s1 := s1 + chr(b);
b := lo(_edx);
s2 := s2 + chr(b);
_eax := _eax shr 8;
_ebx := _ebx shr 8;
_ecx := _ecx shr 8;
_edx := _edx shr 8;
end;
info('Brand String: ', '');
if s2[Length(s2)] = #0 then
setlength(s2, Length(s2) - 1);
info('', '   - ' + s_all + s3 + s + s1 + s2);
end
else
info('   - Extended CPUID ', 'Not Supported.');
end;

procedure Tfrm_main.info(s1, s2: string);
begin
if s1 <> '' then
begin
img_info.Canvas.Brush.Color := clblue;
img_info.Canvas.Font.Color := clyellow;
img_info.Canvas.TextOut(gn_text_x, gn_text_y, s1);
end;
if s2 <> '' then
begin
img_info.Canvas.Brush.Color := clblue;
img_info.Canvas.Font.Color := clWhite;
img_info.Canvas.TextOut(gn_text_x + img_info.Canvas.TextWidth(s1), gn_text_y, s2);
end;
Inc(gn_text_y, 13);
end;

end.