Home > Delphi, nodeJS, Object Pascal, Smart Mobile Studio > Heres a little parser I wrote

Heres a little parser I wrote

January 22, 2015 Leave a comment Go to comments

If you plan on parsing source-code in some form or another, you need a proper text-buffer that let’s you parse the text as simple as possible.

Here is a cool one I wrote during my lunch break. It’s written in Smart Pascal (Smart Mobile Studio) but should be easy enough to port back to Delphi or Lazarus.

Here is how you use it to dump out word-by-word from a snippet:

procedure TForm1.testNewBuffer;
var
  mbuffer:  TPascalTextBuffer;
  mText:  String;
begin
  mBuffer:=TPascalTextBuffer.Create;
  try
    mBuffer.Buffer:=#"procedure TForm1.test(first,second,third);
      begin
        // this is cool
      end;";

    mBuffer.first;

    while not mBuffer.EOF do
    begin
      mtext:=mBuffer.ReadWord;
      if mtext.length>0 then
      writeln(mText) else
      break;
    end;

  finally
    mBuffer.free;
  end;
end;

Which gives you the following output:

Much easier to handle tokens and symbol names this way

Much easier to handle tokens and symbol names this way

 

unit uniparse;

interface

uses 
  SmartCL.System;


  type

  (*  About:    Generic string buffer class
      Comments: This class allows you to traverse a text-buffer using
                next/back/first/last methods. The current character is
                always exposed in the "current" property.

                It also contains simple bookmark features, allowing you do
                recurse into the buffer for validation, and jump back
                with a single line.
  *)
  TCustomTextBuffer = Class(TObject)
  private
    FBuffer:    string;
    Fpos:       Integer;
    FBookmarks: Array of Integer;
  protected
    procedure setBuffer(Value:String);
    procedure setPosition(Value:Integer);
    function  getLength:Integer;
    function  getEmpty:Boolean;
    function  getBOF:Boolean;
    function  getEOF:Boolean;
    function  getCurrent:String;
  public
    property  Position:Integer read Fpos write setPosition;
    property  Buffer:String read FBuffer write setBuffer;
    Property  BufferLength:Integer read getLength;
    Property  Empty:Boolean read getEmpty;
    property  BOF:Boolean read getBOF;
    property  EOF:Boolean read getEOF;
    Property  Current:String read getCurrent;

    procedure Skip(Value:Array of String);

    function  PeekAhead(count:Integer):String;
    Function  CompareAhead(value:String):Boolean;

    function  Peek(count:Integer):String;
    function  Compare(Value:String):Boolean;

    procedure Bookmark;
    procedure UnBookmark;

    procedure Next;
    procedure Back;
    Procedure First;
    procedure Last;

    procedure Clear;virtual;
  end;

  (*  About:    word based buffer class
      Comments: This class extents TCustomTextBuffer with word-reading
                capabilities. This means you can traverse a text buffer
                word-by-word rather than character-by-character.

      Note:     The parser automatically skips un-readable characters like
                space, linefeed, cariage return and tab.

      Note:     "Breaker" characters will count as a single word.
                For instance, a text like this:

                "Procedure test(sender:TObject)"

                will break down into:
                  - Procedure
                  - test
                  - (
                  - sender
                  - :
                  - TObject
                  - )

                This makes it much easier to write parsers which deals with
                expected sequences and text structures. It also makes it much
                easier to deal with character-combinations, such as "(*" and
                "/*" combinations which are typically used for remarks.

      Note:     Expected sequences can be validated easily with the Ensure()
                method.
  *)
  TWordTextBuffer = Class(TCustomTextBuffer)
  public
    function  ReadTo(aBreakChars:Array of String):String;
    function  ReadWord:String;
    function  &Ensure(Sequence:Array of string):Boolean;
  end;

  TLanguageBuffer = Class(TWordTextBuffer)
  public
    function  Remark:Boolean;virtual;abstract;
    procedure SkipRemark;virtual;abstract;
  end;

  TNPPTextBuffer = Class(TWordTextBuffer)
  end;

  TPascalTextBuffer = Class(TLanguageBuffer)
  public
    function  Remark:Boolean;override;
    procedure SkipRemark;override;
  end;

implementation

//#############################################################################
//  TPascalTextBuffer
//#############################################################################

function  TPascalTextBuffer.Remark:Boolean;
Begin
  result:=((Current ='/') and (PeekAhead(1)='/'))
    or    ((current ='(') and (PeekAhead(1)='*'))
    or    ((current ='/') and (PeekAhead(1)='*'));
end;

procedure TPascalTextBuffer.SkipRemark;
Begin
  if not Empty
  and not EOF then
  begin
    if ((Current ='/') and (PeekAhead(1)='/')) then
    Begin
      self.ReadTo([#13]);
      next;
    end else

    if ((current ='(') and (PeekAhead(1)='*')) then
    Begin
      next;
      next;
      self.ReadTo(['*']);
      if PeekAhead(1)=')' then
      next;
    end else

    if ((current ='/') and (PeekAhead(1)='*')) then
    begin
      next;
      next;
      ReadTo(['*']);
      if PeekAhead(1)='/' then
      next;
    end;

  end;
end;

//#############################################################################
//  TWordTextBuffer
//#############################################################################

function TWordTextBuffer.ReadTo(aBreakChars:Array of String):String;
Begin
  setLength(result,0);
  if not Empty then
  Begin
    while not EOF do
    begin
      if not (current in aBreakChars) then
      result+=Current else
      break;
      next;
    end;
  end;
end;

function TWordTextBuffer.&Ensure(Sequence:Array of string):Boolean;
var
  x:      Integer;
  mRead:  String;
Begin
  result:=False;
  if not Empty
  and not EOF then
  begin
    if sequence.length>0 then
    begin
      for x:=sequence.low to sequence.high do
      begin
        mRead:=readWord;
        result:=sameText(mRead,Sequence[x]);
        if not result then
        break;
      end;
    end;
  end;
end;

function TWordTextBuffer.ReadWord:String;
Begin
  if not Empty
  and not EOF then
  begin
    Skip([' ',#9,#10,#13]);

    while not EOF do
    begin
      if (current in ['A'..'Z','a'..'z','0'..'9','_']) then
      result += current else
      break;
      next;
    end;

    if (result.length=0)
    and not EOF then
    Begin
      result:=Current;
      next;
    end;
  end;
end;

//#############################################################################
//  TCustomTextBuffer
//#############################################################################

Function TCustomTextBuffer.CompareAhead(value:String):Boolean;
var
  mText:  String;
begin
  if not empty
  and not EOF then
  Begin
    mText:=peekAhead(Length(Value));
    result:=Sametext(mText,Value);
  end else
  result:=False;
end;

function TCustomTextBuffer.Compare(Value:String):Boolean;
var
  mText:  String;
  mlen:   Integer;
begin
  if not empty
  and not EOF then
  Begin
    mLen:=Length(Value);
    mText:=Current + peekAhead(mLen-1);
    result:=Sametext(mText,Value);
  end else
  result:=False;
end;

function TCustomTextBuffer.Peek(count:Integer):String;
begin
  if Count>0 then
  begin
    bookmark;
    try
      setLength(result,0);
      if not EOF then
      Begin
        while length(result)<Count do
        Begin
          result:=result + current;
          if not EOF then
          next else
          break;
        end;
      end;
    finally
      unBookmark;
    end;
  end;
end;

function TCustomTextBuffer.PeekAhead(count:Integer):String;
Begin
  if Count>0 then
  begin
    bookmark;
    try
      setLength(result,0);
      next;
      if not EOF then
      Begin
        while length(result)<Count do
        Begin
          result:=result + current;
          if not EOF then
          next else
          break;
        end;
      end;
    finally
      unBookmark;
    end;
  end;
end;

procedure TCustomTextBuffer.Skip(Value:Array of String);
Begin
  if not empty
  and not EOF then
  Begin
    repeat
      if (current in Value) then
      next else
      break;
    until EOF;
  end;
end;

procedure TCustomTextBuffer.Bookmark;
Begin
  if not Empty then
  FBookmarks.add(FPos) else
  raise exception.create('Failed to add bookmark, buffer is empty error');
end;

procedure TCustomTextBuffer.UnBookmark;
Begin
  if FBookmarks.Length>0 then
  begin
    FPos:=FBookmarks[FBookmarks.high];
    FBookmarks.delete(FBookmarks.high,1);
  end else
  raise Exception.Create
  ('Failed to revert to bookmark, no bookmarks found error');
end;

function TCustomTextBuffer.getCurrent:String;
begin
  if not Empty
  and not EOF
  and not BOF then
  result:=FBuffer[FPos] else
  raise exception.Create('Read failed, invalid position');
end;

Procedure TCustomTextBuffer.First;
Begin
  if not empty then
  Fpos:=1;
end;

procedure TCustomTextBuffer.Last;
Begin
  if not empty then
  Fpos:=Length(FBuffer);
end;

procedure TCustomTextBuffer.Next;
Begin
  if not Empty
  and not EOF then
  inc(FPos);
end;

procedure TCustomTextBuffer.Back;
Begin
  if not empty
  and not BOF then
  dec(Fpos);
end;

function TCustomTextBuffer.getBOF:Boolean;
Begin
  result:=FPos<1;
end;

function TCustomTextBuffer.getEOF:Boolean;
Begin
  result:=FPos>Length(FBuffer);
end;

function TCustomTextBuffer.getEmpty:Boolean;
Begin
  result:=Length(FBuffer)<1;
end;

procedure TCustomTextBuffer.Clear;
begin
  Fbookmarks.Clear;
  FBuffer:='';
  FPos:=0;
end;

procedure TCustomTextBuffer.setBuffer(Value: String);
begin
  FBuffer:=Value;
  FBookmarks.Clear;
  FPos:=0;
end;

function TCustomTextBuffer.getLength: Integer;
begin
  result:=Length(FBuffer);
end;

procedure TCustomTextBuffer.setPosition(Value: Integer);
begin
  if (value>0) and (value<length(FBuffer)) then
  FPos:=value;
end;

end.

Advertisements
  1. January 22, 2015 at 3:53 pm

    Damn dude, you eat slow or code fast!
    Total respect 🙂

  1. January 29, 2015 at 5:50 pm

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: