1
1
unit HtmlParserEx;
2
- { ' $DEFINE UseXPath }
2
+ { $DEFINE UseXPath }
3
3
{ $IF RTLVersion < 24.0}
4
4
{ $MESSAGE ERROR 'Only XE3 and later versions are supported'}
5
5
{ $ENDIF}
@@ -24,12 +24,6 @@ interface
24
24
LowStrIndex = low(string); // Mobile platform=0, PC platform=1
25
25
26
26
type
27
- { $IFNDEF MSWINDOWS}
28
- { The interface uses WideString so that it can be used by languages such as C++, VB, etc.
29
- But if you leave the Windows platform, other platforms do not have the COM data type of WideString.
30
- }
31
- WideString = string;
32
- { $ENDIF}
33
27
IHtmlElement = interface ;
34
28
IHtmlElementList = interface ;
35
29
TElementEachEvent = reference to procedure(AIndex:Integer; AEl:IHtmlElement);
@@ -38,19 +32,19 @@ interface
38
32
IHtmlElement = interface
39
33
[' {8C75239C-8CFA-499F-B115-7CEBEDFB421B}' ]
40
34
function GetParent :IHtmlElement; stdcall;
41
- function GetTagName :WideString ; stdcall;
42
- procedure SetTagName (Value :WideString ); stdcall;
43
- function GetContent :WideString ; stdcall;
44
- function GetOrignal :WideString ; stdcall;
35
+ function GetTagName :String ; stdcall;
36
+ procedure SetTagName (Value :String ); stdcall;
37
+ function GetContent :String ; stdcall;
38
+ function GetOrignal :String ; stdcall;
45
39
function GetChildrenCount :Integer; stdcall;
46
40
function GetChildren (Index:Integer):IHtmlElement; stdcall;
47
41
function GetCloseTag :IHtmlElement; stdcall;
48
- function GetInnerHtml ():WideString ; stdcall;
49
- function GetOuterHtml ():WideString ; stdcall;
50
- function GetInnerText ():WideString ; stdcall;
51
- procedure SetInnerText (Value :WideString ); stdcall;
52
- function GetAttributes (Key:WideString):WideString ; stdcall;
53
- procedure SetAttributes (Key:WideString ; Value :WideString ); stdcall;
42
+ function GetInnerHtml ():String ; stdcall;
43
+ function GetOuterHtml ():String ; stdcall;
44
+ function GetInnerText ():String ; stdcall;
45
+ procedure SetInnerText (Value :String ); stdcall;
46
+ function GetAttributes (Key:String):String ; stdcall;
47
+ procedure SetAttributes (Key:String ; Value :String ); stdcall;
54
48
procedure RemoveAttr (AAttrName:string); stdcall;
55
49
function GetSourceLineNum ():Integer; stdcall;
56
50
function GetSourceColNum ():Integer; stdcall;
@@ -59,35 +53,35 @@ interface
59
53
procedure Remove ; stdcall;
60
54
function AppedChild (const ATag:string):IHtmlElement; stdcall;
61
55
// Does the property exist
62
- function HasAttribute (AttributeName:WideString ):Boolean; stdcall;
56
+ function HasAttribute (AttributeName:String ):Boolean; stdcall;
63
57
{ Find Element using CSS selector syntax, "pseudo-class" is not supported
64
58
http://www.w3.org/TR/CSS2/selector.html
65
59
}
66
- function SimpleCSSSelector (const selector:WideString ):IHtmlElementList; stdcall;
67
- function Find (const selector:WideString ):IHtmlElementList; stdcall;
60
+ function SimpleCSSSelector (const selector:String ):IHtmlElementList; stdcall;
61
+ function Find (const selector:String ):IHtmlElementList; stdcall;
68
62
{ $IFDEF UseXPath}
69
- function FindX (const AXPath:WideString ):IHtmlElementList; stdcall;
63
+ function FindX (const AXPath:String ):IHtmlElementList; stdcall;
70
64
{ $ENDIF}
71
65
// enum property
72
- function EnumAttributeNames (Index:Integer):WideString ; stdcall;
73
- property TagName:WideString read GetTagName write SetTagName;
66
+ function EnumAttributeNames (Index:Integer):String ; stdcall;
67
+ property TagName:String read GetTagName write SetTagName;
74
68
property ChildrenCount:Integer read GetChildrenCount;
75
69
property Children[index:Integer]:IHtmlElement read GetChildren; default;
76
70
property CloseTag:IHtmlElement read GetCloseTag;
77
- property Content:WideString read GetContent;
78
- property Orignal:WideString read GetOrignal;
71
+ property Content:String read GetContent;
72
+ property Orignal:String read GetOrignal;
79
73
property Parent:IHtmlElement read GetParent;
80
74
// Get the position of an element in the source code
81
75
property SourceLineNum:Integer read GetSourceLineNum;
82
76
property SourceColNum:Integer read GetSourceColNum;
83
77
//
84
- property InnerHtml:WideString read GetInnerHtml;
85
- property OuterHtml:WideString read GetOuterHtml;
86
- property InnerText:WideString read GetInnerText write SetInnerText;
87
- property Text:WideString read GetInnerText write SetInnerText;
88
- property Attributes[Key:WideString]:WideString read GetAttributes write SetAttributes;
78
+ property InnerHtml:String read GetInnerHtml;
79
+ property OuterHtml:String read GetOuterHtml;
80
+ property InnerText:String read GetInnerText write SetInnerText;
81
+ property Text:String read GetInnerText write SetInnerText;
82
+ property Attributes[Key:String]:String read GetAttributes write SetAttributes;
89
83
// ying32 does not change the original, just simplifies the use
90
- property Attrs[Key:WideString]:WideString read GetAttributes write SetAttributes;
84
+ property Attrs[Key:String]:String read GetAttributes write SetAttributes;
91
85
end ;
92
86
93
87
@@ -110,15 +104,15 @@ THtmlListEnumerator = class
110
104
procedure RemoveAll ; stdcall;
111
105
procedure Remove (ANode:IHtmlElement); stdcall;
112
106
procedure Each (f:TElementEachEvent); stdcall;
113
- function GetText :WideString ; stdcall;
107
+ function GetText :String ; stdcall;
114
108
function GetEnumerator :THtmlListEnumerator;
115
- property Text:WideString read GetText;
109
+ property Text:String read GetText;
116
110
property Count:Integer read GetCount;
117
111
property Items[index:Integer]:IHtmlElement read GetItems; default;
118
112
end ;
119
113
120
114
121
- function ParserHTML (const Source:WideString ):IHtmlElement; stdcall;
115
+ function ParserHTML (const Source:String ):IHtmlElement; stdcall;
122
116
function DecodeHtmlEntities (S:string):string; forward ;
123
117
124
118
implementation
@@ -217,19 +211,19 @@ THtmlElement = class(TInterfacedObject, IHtmlElement)
217
211
protected
218
212
// ying32
219
213
function GetParent :IHtmlElement; stdcall;
220
- function GetTagName :WideString ; stdcall;
221
- procedure SetTagName (Value :WideString ); stdcall;
222
- function GetContent :WideString ; stdcall;
223
- function GetOrignal :WideString ; stdcall;
214
+ function GetTagName :String ; stdcall;
215
+ procedure SetTagName (Value :String ); stdcall;
216
+ function GetContent :String ; stdcall;
217
+ function GetOrignal :String ; stdcall;
224
218
function GetChildrenCount :Integer; stdcall;
225
219
function GetChildren (Index:Integer):IHtmlElement; stdcall;
226
220
function GetCloseTag :IHtmlElement; stdcall;
227
- function GetInnerHtml ():WideString ; stdcall;
228
- function GetOuterHtml ():WideString ; stdcall;
229
- function GetInnerText ():WideString ; stdcall;
230
- procedure SetInnerText (Value :WideString ); stdcall;
231
- function GetAttributes (Key:WideString):WideString ; stdcall;
232
- procedure SetAttributes (Key:WideString ; Value :WideString ); stdcall;
221
+ function GetInnerHtml ():String ; stdcall;
222
+ function GetOuterHtml ():String ; stdcall;
223
+ function GetInnerText ():String ; stdcall;
224
+ procedure SetInnerText (Value :String ); stdcall;
225
+ function GetAttributes (Key:String):String ; stdcall;
226
+ procedure SetAttributes (Key:String ; Value :String ); stdcall;
233
227
procedure RemoveAttr (AAttrName:string); stdcall;
234
228
function GetSourceLineNum ():Integer; stdcall;
235
229
function GetSourceColNum ():Integer; stdcall;
@@ -239,32 +233,32 @@ THtmlElement = class(TInterfacedObject, IHtmlElement)
239
233
function AppedChild (const ATag:string):IHtmlElement; stdcall;
240
234
241
235
// Does the property exist
242
- function HasAttribute (AttributeName:WideString ):Boolean; stdcall;
236
+ function HasAttribute (AttributeName:String ):Boolean; stdcall;
243
237
{ Find Element with CSS selector syntax, does not support "pseudo-class"
244
238
http://www.w3.org/TR/CSS2/selector.html
245
239
}
246
- function SimpleCSSSelector (const selector:WideString ):IHtmlElementList; stdcall;
247
- function Find (const selector:WideString ):IHtmlElementList; stdcall;
240
+ function SimpleCSSSelector (const selector:String ):IHtmlElementList; stdcall;
241
+ function Find (const selector:String ):IHtmlElementList; stdcall;
248
242
{ $IFDEF UseXPath}
249
- function FindX (const AXPath:WideString ):IHtmlElementList; stdcall;
243
+ function FindX (const AXPath:String ):IHtmlElementList; stdcall;
250
244
{ $ENDIF}
251
245
// enum property
252
- function EnumAttributeNames (Index:Integer):WideString ; stdcall;
253
- property TagName:WideString read GetTagName write SetTagName;
246
+ function EnumAttributeNames (Index:Integer):String ; stdcall;
247
+ property TagName:String read GetTagName write SetTagName;
254
248
property ChildrenCount:Integer read GetChildrenCount;
255
249
property Children[index:Integer]:IHtmlElement read GetChildren; default;
256
250
property CloseTag:IHtmlElement read GetCloseTag;
257
- property Content:WideString read GetContent;
258
- property Orignal:WideString read GetOrignal;
251
+ property Content:String read GetContent;
252
+ property Orignal:String read GetOrignal;
259
253
property Parent:IHtmlElement read GetParent;
260
254
// Get the position of an element in the source code
261
255
property SourceLineNum:Integer read GetSourceLineNum;
262
256
property SourceColNum:Integer read GetSourceColNum;
263
257
//
264
- property InnerHtml:WideString read GetInnerHtml;
265
- property OuterHtml:WideString read GetOuterHtml;
266
- property InnerText:WideString read GetInnerText;
267
- property Attributes[Key:WideString]:WideString read GetAttributes write SetAttributes;
258
+ property InnerHtml:String read GetInnerHtml;
259
+ property OuterHtml:String read GetOuterHtml;
260
+ property InnerText:String read GetInnerText;
261
+ property Attributes[Key:String]:String read GetAttributes write SetAttributes;
268
262
property Childrens:IHtmlElementList read GetChildrens;
269
263
private
270
264
FClosed:Boolean;
@@ -305,7 +299,7 @@ TIHtmlElementList = class(TInterfacedObject, IHtmlElementList)
305
299
procedure RemoveAll ; stdcall;
306
300
procedure Remove (ANode:IHtmlElement); stdcall;
307
301
procedure Each (f:TElementEachEvent); stdcall;
308
- function GetText :WideString ; stdcall;
302
+ function GetText :String ; stdcall;
309
303
public
310
304
constructor Create;
311
305
destructor Destroy; override;
@@ -1040,7 +1034,7 @@ function BuildTree(ElementList:THtmlElementList):THtmlElement;
1040
1034
end ;
1041
1035
1042
1036
1043
- function ParserHTML (const Source:WideString ):IHtmlElement; stdcall;
1037
+ function ParserHTML (const Source:String ):IHtmlElement; stdcall;
1044
1038
var
1045
1039
ElementList:THtmlElementList;
1046
1040
begin
@@ -1592,7 +1586,7 @@ function TIHtmlElementList.GetItems(Index:Integer):IHtmlElement;
1592
1586
end ;
1593
1587
1594
1588
1595
- function TIHtmlElementList.GetText :WideString ;
1589
+ function TIHtmlElementList.GetText :String ;
1596
1590
var
1597
1591
LEL:IHtmlElement;
1598
1592
begin
@@ -1673,7 +1667,7 @@ function THtmlElement.AppedChild(const ATag:string):IHtmlElement;
1673
1667
end ;
1674
1668
1675
1669
1676
- function THtmlElement.EnumAttributeNames (Index:Integer):WideString ;
1670
+ function THtmlElement.EnumAttributeNames (Index:Integer):String ;
1677
1671
var
1678
1672
Attrs:TStringDynArray;
1679
1673
begin
@@ -1684,7 +1678,7 @@ function THtmlElement.EnumAttributeNames(Index:Integer):WideString;
1684
1678
end ;
1685
1679
1686
1680
1687
- function THtmlElement.GetAttributes (Key:WideString):WideString ;
1681
+ function THtmlElement.GetAttributes (Key:String):String ;
1688
1682
begin
1689
1683
Result := ' ' ;
1690
1684
Key := LowerCase(Key);
@@ -1717,7 +1711,7 @@ function THtmlElement.GetCloseTag:IHtmlElement;
1717
1711
end ;
1718
1712
1719
1713
1720
- function THtmlElement.GetContent :WideString ;
1714
+ function THtmlElement.GetContent :String ;
1721
1715
begin
1722
1716
Result := FContent;
1723
1717
end ;
@@ -1878,7 +1872,7 @@ procedure THtmlElement._SimpleCSSSelector(const ItemGroup:TCSSSelectorItemGroup;
1878
1872
end ;
1879
1873
1880
1874
1881
- function THtmlElement.GetInnerHtml :WideString ;
1875
+ function THtmlElement.GetInnerHtml :String ;
1882
1876
var
1883
1877
Sb:TStringBuilder;
1884
1878
begin
@@ -1889,7 +1883,7 @@ function THtmlElement.GetInnerHtml:WideString;
1889
1883
end ;
1890
1884
1891
1885
1892
- function THtmlElement.GetInnerText :WideString ;
1886
+ function THtmlElement.GetInnerText :String ;
1893
1887
var
1894
1888
Sb:TStringBuilder;
1895
1889
begin
@@ -1900,13 +1894,13 @@ function THtmlElement.GetInnerText:WideString;
1900
1894
end ;
1901
1895
1902
1896
1903
- function THtmlElement.GetOrignal :WideString ;
1897
+ function THtmlElement.GetOrignal :String ;
1904
1898
begin
1905
1899
Result := FOrignal;
1906
1900
end ;
1907
1901
1908
1902
1909
- function THtmlElement.GetOuterHtml :WideString ;
1903
+ function THtmlElement.GetOuterHtml :String ;
1910
1904
var
1911
1905
Sb:TStringBuilder;
1912
1906
begin
@@ -1935,13 +1929,13 @@ function THtmlElement.GetSourceLineNum:Integer;
1935
1929
end ;
1936
1930
1937
1931
1938
- function THtmlElement.GetTagName :WideString ;
1932
+ function THtmlElement.GetTagName :String ;
1939
1933
begin
1940
1934
Result := FTagName;
1941
1935
end ;
1942
1936
1943
1937
1944
- function THtmlElement.HasAttribute (AttributeName:WideString ):Boolean;
1938
+ function THtmlElement.HasAttribute (AttributeName:String ):Boolean;
1945
1939
begin
1946
1940
Result := FAttributes.ContainsKey(LowerCase(AttributeName));
1947
1941
end ;
@@ -1973,27 +1967,27 @@ procedure THtmlElement.RemoveAttr(AAttrName:string);
1973
1967
end ;
1974
1968
1975
1969
1976
- procedure THtmlElement.SetAttributes (Key:WideString ; Value :WideString );
1970
+ procedure THtmlElement.SetAttributes (Key:String ; Value :String );
1977
1971
begin
1978
1972
FAttributes.AddOrSetValue(LowerCase(Key), Value );
1979
1973
end ;
1980
1974
1981
1975
1982
- procedure THtmlElement.SetInnerText (Value :WideString );
1976
+ procedure THtmlElement.SetInnerText (Value :String );
1983
1977
begin
1984
1978
FContent := Value ;
1985
1979
end ;
1986
1980
1987
1981
1988
- procedure THtmlElement.SetTagName (Value :WideString );
1982
+ procedure THtmlElement.SetTagName (Value :String );
1989
1983
begin
1990
1984
FTagName := UpperCase(Value );
1991
1985
if FCloseTag <> nil then
1992
1986
FCloseTag.TagName := Self.FTagName;
1993
1987
end ;
1994
1988
1995
1989
1996
- function THtmlElement.SimpleCSSSelector (const selector:WideString ):IHtmlElementList;
1990
+ function THtmlElement.SimpleCSSSelector (const selector:String ):IHtmlElementList;
1997
1991
var
1998
1992
r:TIHtmlElementList;
1999
1993
begin
@@ -2003,7 +1997,7 @@ function THtmlElement.SimpleCSSSelector(const selector:WideString):IHtmlElementL
2003
1997
end ;
2004
1998
2005
1999
2006
- function THtmlElement.Find (const selector:WideString ):IHtmlElementList;
2000
+ function THtmlElement.Find (const selector:String ):IHtmlElementList;
2007
2001
begin
2008
2002
Result := SimpleCSSSelector(selector);
2009
2003
end ;
@@ -2012,7 +2006,7 @@ function THtmlElement.Find(const selector:WideString):IHtmlElementList;
2012
2006
function XPathToCSSSelector (const AXPath:string):string; forward ;
2013
2007
2014
2008
2015
- function THtmlElement.FindX (const AXPath:WideString ):IHtmlElementList;
2009
+ function THtmlElement.FindX (const AXPath:String ):IHtmlElementList;
2016
2010
begin
2017
2011
Result := SimpleCSSSelector(XPathToCSSSelector(AXPath));
2018
2012
end ;
0 commit comments