@@ -8,36 +8,7 @@ import (
88)
99
1010// CallFunc write function interface
11- //
12- // Builtin Functions
13- //
14- // - text() get element text, return string, this is default function, if not define function in struct tag.
15- //
16- // - eachText() get each element text, return []string.
17- //
18- // - html() get element inner html, return string.
19- //
20- // - eachHtml() get each element inner html, return []string.
21- //
22- // - outerHtml() get element outer html, return string.
23- //
24- // - eachOutHtml() get each element outer html, return []string.
25- //
26- // - attr(name) get element attribute value, return string.
27- //
28- // - eachAttr() get each element attribute value, return []string.
29- //
30- // - attrInt(name, defaultValue) get element attribute value and to int, return int.
31- //
32- // - attrSplit(name, sep) get attribute value and split by separator to array string.
33- //
34- // - value() get element attribute value by name is `value`, return string, eg: <input value='xxxx' /> will return "xxx".
35- //
36- // - split(sep) get element text and split by separator to array string, return []string.
37- //
38- // - eachJoin(sep) get each element text and join to string, return string.
39- //
40- //
11+
4112// # Define Global Function
4213//
4314// func MyFunc(node *goquery.Selection, args ...string) (out interface{}, err error) {
@@ -70,43 +41,52 @@ import (
7041// Define your own function interface
7142type CallFunc func (node * goquery.Selection , args ... string ) (out interface {}, err error )
7243
73- var sysFuncs = map [string ]CallFunc {
74- "text" : text ,
75- "eachText" : eachText ,
76- "html" : html ,
77- "eachHtml" : eachHtml ,
78- "outerHtml" : outHtml ,
79- "eachOutHtml" : eachOutHtml , //
80- "attr" : attr , //
81- "eachAttr" : eachAttr ,
82- "attrInt" : attrInt ,
83- "attrSplit" : attrSplit ,
84- "value" : value ,
85- "split" : split ,
86- "eachJoin" : eachJoin ,
87- }
88-
89- // text() string
90- func text (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
44+ //Builtin Functions
45+ type BuiltinFunctions struct {
46+ }
47+
48+ var builtinFuncObj BuiltinFunctions
49+ var builtinFuncMap = map [string ]CallFunc {
50+ "text" : builtinFuncObj .Text ,
51+ "eachText" : builtinFuncObj .EachText ,
52+ "html" : builtinFuncObj .Html ,
53+ "eachHtml" : builtinFuncObj .EachHtml ,
54+ "outerHtml" : builtinFuncObj .OutHtml ,
55+ "eachOutHtml" : builtinFuncObj .EachOutHtml , //
56+ "attr" : builtinFuncObj .Attr , //
57+ "eachAttr" : builtinFuncObj .EachAttr ,
58+ "attrInt" : builtinFuncObj .AttrInt ,
59+ "attrSplit" : builtinFuncObj .AttrSplit ,
60+ "value" : builtinFuncObj .Value ,
61+ "split" : builtinFuncObj .Split ,
62+ "eachJoin" : builtinFuncObj .EachJoin ,
63+ "eq" : builtinFuncObj .Eq ,
64+ "eqAndAttr" : builtinFuncObj .EqAndAttr ,
65+ "eqAndHtml" : builtinFuncObj .EqAndHtml ,
66+ "eqAndOutHtml" : builtinFuncObj .EqAndOutHtml ,
67+ }
68+
69+ // text() get element text, return string, this is default function, if not define function in struct tag.
70+ func (builtin BuiltinFunctions ) Text (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
9171 return strings .TrimSpace (node .Text ()), nil
9272}
9373
94- // eachText() []string
95- func eachText (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
74+ // eachText() get each element text, return []string.
75+ func ( builtin BuiltinFunctions ) EachText (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
9676 list := make ([]string , 0 )
9777 node .Each (func (i int , selection * goquery.Selection ) {
9878 list = append (list , strings .TrimSpace (selection .Text ()))
9979 })
10080 return list , nil
10181}
10282
103- // html() string
104- func html (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
83+ // html() get element inner html, return string.
84+ func ( builtin BuiltinFunctions ) Html (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
10585 return node .Html ()
10686}
10787
108- // eachHtml() []string
109- func eachHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
88+ // eachHtml() get each element inner html, return []string.
89+ func ( builtin BuiltinFunctions ) EachHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
11090 list := make ([]string , 0 )
11191 node .EachWithBreak (func (i int , selection * goquery.Selection ) bool {
11292 var html string
@@ -123,17 +103,17 @@ func eachHtml(node *goquery.Selection, args ...string) (out interface{}, err err
123103 return list , nil
124104}
125105
126- // outHtml () string
127- func outHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
106+ // outerHtml () get element outer html, return string.
107+ func ( builtin BuiltinFunctions ) OutHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
128108 html , err := goquery .OuterHtml (node )
129109 if err != nil {
130110 return "" , err
131111 }
132112 return html , nil
133113}
134114
135- // eachOutHtml() []string
136- func eachOutHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
115+ // eachOutHtml() get each element outer html, return []string.
116+ func ( builtin BuiltinFunctions ) EachOutHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
137117 list := make ([]string , 0 )
138118 node .EachWithBreak (func (i int , selection * goquery.Selection ) bool {
139119 var html string
@@ -150,8 +130,8 @@ func eachOutHtml(node *goquery.Selection, args ...string) (out interface{}, err
150130 return list , nil
151131}
152132
153- // attr(name) string
154- func attr (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
133+ // attr(name) get element attribute value, return string.
134+ func ( builtin BuiltinFunctions ) Attr (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
155135 if len (args ) <= 0 {
156136 return "" , fmt .Errorf ("attr(xxx) must has name" )
157137 }
@@ -160,8 +140,8 @@ func attr(node *goquery.Selection, args ...string) (out interface{}, err error)
160140 return val , nil
161141}
162142
163- // eachAttr(name) []string
164- func eachAttr (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
143+ // eachAttr() get each element attribute value, return []string.
144+ func ( builtin BuiltinFunctions ) EachAttr (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
165145 if len (args ) <= 0 {
166146 return "" , fmt .Errorf ("attr(xxx) must has name" )
167147 }
@@ -173,8 +153,8 @@ func eachAttr(node *goquery.Selection, args ...string) (out interface{}, err err
173153 return list , nil
174154}
175155
176- // attrInt(name) int
177- func attrInt (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
156+ // attrInt(name, defaultValue) get element attribute value and to int, return int.
157+ func ( builtin BuiltinFunctions ) AttrInt (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
178158 if len (args ) < 2 {
179159 return "" , fmt .Errorf ("attrInt(name,defaultValue) must has name and default value, eg: attrInt(id,-1)" )
180160 }
@@ -188,8 +168,8 @@ func attrInt(node *goquery.Selection, args ...string) (out interface{}, err erro
188168 return outVal , nil
189169}
190170
191- // attrSplit(name, sep) [] string
192- func attrSplit (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
171+ // attrSplit(name, sep) get attribute value and split by separator to array string.
172+ func ( builtin BuiltinFunctions ) AttrSplit (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
193173 if len (args ) <= 0 {
194174 return "" , fmt .Errorf ("attr(xxx) must has name" )
195175 }
@@ -201,22 +181,22 @@ func attrSplit(node *goquery.Selection, args ...string) (out interface{}, err er
201181 return strings .Split (node .AttrOr (name , "" ), sep ), nil
202182}
203183
204- // value() string
205- func value (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
184+ // value() get element attribute value by name is `value`, return string
185+ func ( builtin BuiltinFunctions ) Value (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
206186 return node .AttrOr ("value" , "" ), nil
207187}
208188
209- // split(sep) []string
210- func split (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
189+ // split(sep) get element text and split by separator to array string, return []string.
190+ func ( builtin BuiltinFunctions ) Split (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
211191 sep := ","
212192 if len (args ) > 0 {
213193 sep = args [0 ]
214194 }
215195 return strings .Split (node .Text (), sep ), nil
216196}
217197
218- // eachJoin(sep) string
219- func eachJoin (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
198+ // eachJoin(sep) get each element text and join to string, return string.
199+ func ( builtin BuiltinFunctions ) EachJoin (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
220200 sep := ","
221201 if len (args ) > 0 {
222202 sep = args [0 ]
@@ -228,6 +208,59 @@ func eachJoin(node *goquery.Selection, args ...string) (out interface{}, err err
228208 return strings .Join (list , sep ), nil
229209}
230210
211+ // eq(index) reduces the set of matched elements to the one at the specified index, return string.
212+ func (builtin BuiltinFunctions ) Eq (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
213+ if len (args ) <= 0 {
214+ return "" , fmt .Errorf ("eq(index) must has index" )
215+ }
216+ indexValue := strings .TrimSpace (args [0 ])
217+ idx , err := strconv .Atoi (indexValue )
218+ if err != nil {
219+ return "" , fmt .Errorf ("index=`" + indexValue + "` is not number: " + err .Error ())
220+ }
221+ return node .Eq (idx ).Text (), nil
222+ }
223+
224+ // eqAndAttr(index, name) reduces the set of matched elements to the one at the specified index, and attr() return string.
225+ func (builtin BuiltinFunctions ) EqAndAttr (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
226+ if len (args ) <= 1 {
227+ return "" , fmt .Errorf ("eq(index) must has index" )
228+ }
229+ indexValue := strings .TrimSpace (args [0 ])
230+ idx , err := strconv .Atoi (indexValue )
231+ if err != nil {
232+ return "" , fmt .Errorf ("index=`" + indexValue + "` is not number: " + err .Error ())
233+ }
234+ name := strings .TrimSpace (args [1 ])
235+ return node .Eq (idx ).AttrOr (name , "" ), nil
236+ }
237+
238+ // eqAndHtml(index) reduces the set of matched elements to the one at the specified index, and html() return string.
239+ func (builtin BuiltinFunctions ) EqAndHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
240+ if len (args ) <= 1 {
241+ return "" , fmt .Errorf ("eq(index) must has index" )
242+ }
243+ indexValue := strings .TrimSpace (args [0 ])
244+ idx , err := strconv .Atoi (indexValue )
245+ if err != nil {
246+ return "" , fmt .Errorf ("index=`" + indexValue + "` is not number: " + err .Error ())
247+ }
248+ return node .Eq (idx ).Html ()
249+ }
250+
251+ // eqAndOutHtml(index) reduces the set of matched elements to the one at the specified index, and outHtml() return string.
252+ func (builtin BuiltinFunctions ) EqAndOutHtml (node * goquery.Selection , args ... string ) (out interface {}, err error ) {
253+ if len (args ) <= 1 {
254+ return "" , fmt .Errorf ("eq(index) must has index" )
255+ }
256+ indexValue := strings .TrimSpace (args [0 ])
257+ idx , err := strconv .Atoi (indexValue )
258+ if err != nil {
259+ return "" , fmt .Errorf ("index=`" + indexValue + "` is not number: " + err .Error ())
260+ }
261+ return goquery .OuterHtml (node .Eq (idx ))
262+ }
263+
231264// RegisterFunc register function for parse
232265func (p * Pagser ) RegisterFunc (name string , fn CallFunc ) error {
233266 p .funcs [name ] = fn
0 commit comments