Archive
Archive for January 22, 2015
Heres a little parser I wrote
January 22, 2015
2 comments
If you plan on parsing source-code in some form or another, you need a proper text-buffer that let’s you parse the text as simple as possible.
Here is a cool one I wrote during my lunch break. It’s written in Smart Pascal (Smart Mobile Studio) but should be easy enough to port back to Delphi or Lazarus.
Here is how you use it to dump out word-by-word from a snippet:
procedure TForm1.testNewBuffer; var mbuffer: TPascalTextBuffer; mText: String; begin mBuffer:=TPascalTextBuffer.Create; try mBuffer.Buffer:=#"procedure TForm1.test(first,second,third); begin // this is cool end;"; mBuffer.first; while not mBuffer.EOF do begin mtext:=mBuffer.ReadWord; if mtext.length>0 then writeln(mText) else break; end; finally mBuffer.free; end; end;
Which gives you the following output:
unit uniparse; interface uses SmartCL.System; type (* About: Generic string buffer class Comments: This class allows you to traverse a text-buffer using next/back/first/last methods. The current character is always exposed in the "current" property. It also contains simple bookmark features, allowing you do recurse into the buffer for validation, and jump back with a single line. *) TCustomTextBuffer = Class(TObject) private FBuffer: string; Fpos: Integer; FBookmarks: Array of Integer; protected procedure setBuffer(Value:String); procedure setPosition(Value:Integer); function getLength:Integer; function getEmpty:Boolean; function getBOF:Boolean; function getEOF:Boolean; function getCurrent:String; public property Position:Integer read Fpos write setPosition; property Buffer:String read FBuffer write setBuffer; Property BufferLength:Integer read getLength; Property Empty:Boolean read getEmpty; property BOF:Boolean read getBOF; property EOF:Boolean read getEOF; Property Current:String read getCurrent; procedure Skip(Value:Array of String); function PeekAhead(count:Integer):String; Function CompareAhead(value:String):Boolean; function Peek(count:Integer):String; function Compare(Value:String):Boolean; procedure Bookmark; procedure UnBookmark; procedure Next; procedure Back; Procedure First; procedure Last; procedure Clear;virtual; end; (* About: word based buffer class Comments: This class extents TCustomTextBuffer with word-reading capabilities. This means you can traverse a text buffer word-by-word rather than character-by-character. Note: The parser automatically skips un-readable characters like space, linefeed, cariage return and tab. Note: "Breaker" characters will count as a single word. For instance, a text like this: "Procedure test(sender:TObject)" will break down into: - Procedure - test - ( - sender - : - TObject - ) This makes it much easier to write parsers which deals with expected sequences and text structures. It also makes it much easier to deal with character-combinations, such as "(*" and "/*" combinations which are typically used for remarks. Note: Expected sequences can be validated easily with the Ensure() method. *) TWordTextBuffer = Class(TCustomTextBuffer) public function ReadTo(aBreakChars:Array of String):String; function ReadWord:String; function &Ensure(Sequence:Array of string):Boolean; end; TLanguageBuffer = Class(TWordTextBuffer) public function Remark:Boolean;virtual;abstract; procedure SkipRemark;virtual;abstract; end; TNPPTextBuffer = Class(TWordTextBuffer) end; TPascalTextBuffer = Class(TLanguageBuffer) public function Remark:Boolean;override; procedure SkipRemark;override; end; implementation //############################################################################# // TPascalTextBuffer //############################################################################# function TPascalTextBuffer.Remark:Boolean; Begin result:=((Current ='/') and (PeekAhead(1)='/')) or ((current ='(') and (PeekAhead(1)='*')) or ((current ='/') and (PeekAhead(1)='*')); end; procedure TPascalTextBuffer.SkipRemark; Begin if not Empty and not EOF then begin if ((Current ='/') and (PeekAhead(1)='/')) then Begin self.ReadTo([#13]); next; end else if ((current ='(') and (PeekAhead(1)='*')) then Begin next; next; self.ReadTo(['*']); if PeekAhead(1)=')' then next; end else if ((current ='/') and (PeekAhead(1)='*')) then begin next; next; ReadTo(['*']); if PeekAhead(1)='/' then next; end; end; end; //############################################################################# // TWordTextBuffer //############################################################################# function TWordTextBuffer.ReadTo(aBreakChars:Array of String):String; Begin setLength(result,0); if not Empty then Begin while not EOF do begin if not (current in aBreakChars) then result+=Current else break; next; end; end; end; function TWordTextBuffer.&Ensure(Sequence:Array of string):Boolean; var x: Integer; mRead: String; Begin result:=False; if not Empty and not EOF then begin if sequence.length>0 then begin for x:=sequence.low to sequence.high do begin mRead:=readWord; result:=sameText(mRead,Sequence[x]); if not result then break; end; end; end; end; function TWordTextBuffer.ReadWord:String; Begin if not Empty and not EOF then begin Skip([' ',#9,#10,#13]); while not EOF do begin if (current in ['A'..'Z','a'..'z','0'..'9','_']) then result += current else break; next; end; if (result.length=0) and not EOF then Begin result:=Current; next; end; end; end; //############################################################################# // TCustomTextBuffer //############################################################################# Function TCustomTextBuffer.CompareAhead(value:String):Boolean; var mText: String; begin if not empty and not EOF then Begin mText:=peekAhead(Length(Value)); result:=Sametext(mText,Value); end else result:=False; end; function TCustomTextBuffer.Compare(Value:String):Boolean; var mText: String; mlen: Integer; begin if not empty and not EOF then Begin mLen:=Length(Value); mText:=Current + peekAhead(mLen-1); result:=Sametext(mText,Value); end else result:=False; end; function TCustomTextBuffer.Peek(count:Integer):String; begin if Count>0 then begin bookmark; try setLength(result,0); if not EOF then Begin while length(result)<Count do Begin result:=result + current; if not EOF then next else break; end; end; finally unBookmark; end; end; end; function TCustomTextBuffer.PeekAhead(count:Integer):String; Begin if Count>0 then begin bookmark; try setLength(result,0); next; if not EOF then Begin while length(result)<Count do Begin result:=result + current; if not EOF then next else break; end; end; finally unBookmark; end; end; end; procedure TCustomTextBuffer.Skip(Value:Array of String); Begin if not empty and not EOF then Begin repeat if (current in Value) then next else break; until EOF; end; end; procedure TCustomTextBuffer.Bookmark; Begin if not Empty then FBookmarks.add(FPos) else raise exception.create('Failed to add bookmark, buffer is empty error'); end; procedure TCustomTextBuffer.UnBookmark; Begin if FBookmarks.Length>0 then begin FPos:=FBookmarks[FBookmarks.high]; FBookmarks.delete(FBookmarks.high,1); end else raise Exception.Create ('Failed to revert to bookmark, no bookmarks found error'); end; function TCustomTextBuffer.getCurrent:String; begin if not Empty and not EOF and not BOF then result:=FBuffer[FPos] else raise exception.Create('Read failed, invalid position'); end; Procedure TCustomTextBuffer.First; Begin if not empty then Fpos:=1; end; procedure TCustomTextBuffer.Last; Begin if not empty then Fpos:=Length(FBuffer); end; procedure TCustomTextBuffer.Next; Begin if not Empty and not EOF then inc(FPos); end; procedure TCustomTextBuffer.Back; Begin if not empty and not BOF then dec(Fpos); end; function TCustomTextBuffer.getBOF:Boolean; Begin result:=FPos<1; end; function TCustomTextBuffer.getEOF:Boolean; Begin result:=FPos>Length(FBuffer); end; function TCustomTextBuffer.getEmpty:Boolean; Begin result:=Length(FBuffer)<1; end; procedure TCustomTextBuffer.Clear; begin Fbookmarks.Clear; FBuffer:=''; FPos:=0; end; procedure TCustomTextBuffer.setBuffer(Value: String); begin FBuffer:=Value; FBookmarks.Clear; FPos:=0; end; function TCustomTextBuffer.getLength: Integer; begin result:=Length(FBuffer); end; procedure TCustomTextBuffer.setPosition(Value: Integer); begin if (value>0) and (value<length(FBuffer)) then FPos:=value; end; end.
You must be logged in to post a comment.