Skip to content

Commit a9d58ea

Browse files
committed
function output
1 parent 454a96c commit a9d58ea

3 files changed

Lines changed: 109 additions & 73 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ type ExamData struct {
199199
200200
> - eachJoin(sep) get each element text and join to string, return string.
201201
202+
> - ...
203+
204+
More builtin functions see docs: <https://pkg.go.dev/github.com/foolin/pagser>
202205

203206
#### Extensions functions
204207

function.go

Lines changed: 105 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -8,36 +8,7 @@ import (
88
)
99

1010
// CallFunc write function interface
11-
//
12-
// Builtin Functions
13-
//
14-
// - text() get element text, return string, this is default function, if not define function in struct tag.
15-
//
16-
// - eachText() get each element text, return []string.
17-
//
18-
// - html() get element inner html, return string.
19-
//
20-
// - eachHtml() get each element inner html, return []string.
21-
//
22-
// - outerHtml() get element outer html, return string.
23-
//
24-
// - eachOutHtml() get each element outer html, return []string.
25-
//
26-
// - attr(name) get element attribute value, return string.
27-
//
28-
// - eachAttr() get each element attribute value, return []string.
29-
//
30-
// - attrInt(name, defaultValue) get element attribute value and to int, return int.
31-
//
32-
// - attrSplit(name, sep) get attribute value and split by separator to array string.
33-
//
34-
// - value() get element attribute value by name is `value`, return string, eg: <input value='xxxx' /> will return "xxx".
35-
//
36-
// - split(sep) get element text and split by separator to array string, return []string.
37-
//
38-
// - eachJoin(sep) get each element text and join to string, return string.
39-
//
40-
//
11+
4112
// # Define Global Function
4213
//
4314
// func MyFunc(node *goquery.Selection, args ...string) (out interface{}, err error) {
@@ -70,43 +41,52 @@ import (
7041
// Define your own function interface
7142
type CallFunc func(node *goquery.Selection, args ...string) (out interface{}, err error)
7243

73-
var sysFuncs = map[string]CallFunc{
74-
"text": text,
75-
"eachText": eachText,
76-
"html": html,
77-
"eachHtml": eachHtml,
78-
"outerHtml": outHtml,
79-
"eachOutHtml": eachOutHtml, //
80-
"attr": attr, //
81-
"eachAttr": eachAttr,
82-
"attrInt": attrInt,
83-
"attrSplit": attrSplit,
84-
"value": value,
85-
"split": split,
86-
"eachJoin": eachJoin,
87-
}
88-
89-
// text() string
90-
func text(node *goquery.Selection, args ...string) (out interface{}, err error) {
44+
//Builtin Functions
45+
type BuiltinFunctions struct {
46+
}
47+
48+
var builtinFuncObj BuiltinFunctions
49+
var builtinFuncMap = map[string]CallFunc{
50+
"text": builtinFuncObj.Text,
51+
"eachText": builtinFuncObj.EachText,
52+
"html": builtinFuncObj.Html,
53+
"eachHtml": builtinFuncObj.EachHtml,
54+
"outerHtml": builtinFuncObj.OutHtml,
55+
"eachOutHtml": builtinFuncObj.EachOutHtml, //
56+
"attr": builtinFuncObj.Attr, //
57+
"eachAttr": builtinFuncObj.EachAttr,
58+
"attrInt": builtinFuncObj.AttrInt,
59+
"attrSplit": builtinFuncObj.AttrSplit,
60+
"value": builtinFuncObj.Value,
61+
"split": builtinFuncObj.Split,
62+
"eachJoin": builtinFuncObj.EachJoin,
63+
"eq": builtinFuncObj.Eq,
64+
"eqAndAttr": builtinFuncObj.EqAndAttr,
65+
"eqAndHtml": builtinFuncObj.EqAndHtml,
66+
"eqAndOutHtml": builtinFuncObj.EqAndOutHtml,
67+
}
68+
69+
// text() get element text, return string, this is default function, if not define function in struct tag.
70+
func (builtin BuiltinFunctions) Text(node *goquery.Selection, args ...string) (out interface{}, err error) {
9171
return strings.TrimSpace(node.Text()), nil
9272
}
9373

94-
// eachText() []string
95-
func eachText(node *goquery.Selection, args ...string) (out interface{}, err error) {
74+
// eachText() get each element text, return []string.
75+
func (builtin BuiltinFunctions) EachText(node *goquery.Selection, args ...string) (out interface{}, err error) {
9676
list := make([]string, 0)
9777
node.Each(func(i int, selection *goquery.Selection) {
9878
list = append(list, strings.TrimSpace(selection.Text()))
9979
})
10080
return list, nil
10181
}
10282

103-
// html() string
104-
func html(node *goquery.Selection, args ...string) (out interface{}, err error) {
83+
// html() get element inner html, return string.
84+
func (builtin BuiltinFunctions) Html(node *goquery.Selection, args ...string) (out interface{}, err error) {
10585
return node.Html()
10686
}
10787

108-
// eachHtml() []string
109-
func eachHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
88+
// eachHtml() get each element inner html, return []string.
89+
func (builtin BuiltinFunctions) EachHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
11090
list := make([]string, 0)
11191
node.EachWithBreak(func(i int, selection *goquery.Selection) bool {
11292
var html string
@@ -123,17 +103,17 @@ func eachHtml(node *goquery.Selection, args ...string) (out interface{}, err err
123103
return list, nil
124104
}
125105

126-
// outHtml() string
127-
func outHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
106+
// outerHtml() get element outer html, return string.
107+
func (builtin BuiltinFunctions) OutHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
128108
html, err := goquery.OuterHtml(node)
129109
if err != nil {
130110
return "", err
131111
}
132112
return html, nil
133113
}
134114

135-
// eachOutHtml() []string
136-
func eachOutHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
115+
// eachOutHtml() get each element outer html, return []string.
116+
func (builtin BuiltinFunctions) EachOutHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
137117
list := make([]string, 0)
138118
node.EachWithBreak(func(i int, selection *goquery.Selection) bool {
139119
var html string
@@ -150,8 +130,8 @@ func eachOutHtml(node *goquery.Selection, args ...string) (out interface{}, err
150130
return list, nil
151131
}
152132

153-
// attr(name) string
154-
func attr(node *goquery.Selection, args ...string) (out interface{}, err error) {
133+
// attr(name) get element attribute value, return string.
134+
func (builtin BuiltinFunctions) Attr(node *goquery.Selection, args ...string) (out interface{}, err error) {
155135
if len(args) <= 0 {
156136
return "", fmt.Errorf("attr(xxx) must has name")
157137
}
@@ -160,8 +140,8 @@ func attr(node *goquery.Selection, args ...string) (out interface{}, err error)
160140
return val, nil
161141
}
162142

163-
// eachAttr(name) []string
164-
func eachAttr(node *goquery.Selection, args ...string) (out interface{}, err error) {
143+
// eachAttr() get each element attribute value, return []string.
144+
func (builtin BuiltinFunctions) EachAttr(node *goquery.Selection, args ...string) (out interface{}, err error) {
165145
if len(args) <= 0 {
166146
return "", fmt.Errorf("attr(xxx) must has name")
167147
}
@@ -173,8 +153,8 @@ func eachAttr(node *goquery.Selection, args ...string) (out interface{}, err err
173153
return list, nil
174154
}
175155

176-
// attrInt(name) int
177-
func attrInt(node *goquery.Selection, args ...string) (out interface{}, err error) {
156+
// attrInt(name, defaultValue) get element attribute value and to int, return int.
157+
func (builtin BuiltinFunctions) AttrInt(node *goquery.Selection, args ...string) (out interface{}, err error) {
178158
if len(args) < 2 {
179159
return "", fmt.Errorf("attrInt(name,defaultValue) must has name and default value, eg: attrInt(id,-1)")
180160
}
@@ -188,8 +168,8 @@ func attrInt(node *goquery.Selection, args ...string) (out interface{}, err erro
188168
return outVal, nil
189169
}
190170

191-
// attrSplit(name, sep) []string
192-
func attrSplit(node *goquery.Selection, args ...string) (out interface{}, err error) {
171+
// attrSplit(name, sep) get attribute value and split by separator to array string.
172+
func (builtin BuiltinFunctions) AttrSplit(node *goquery.Selection, args ...string) (out interface{}, err error) {
193173
if len(args) <= 0 {
194174
return "", fmt.Errorf("attr(xxx) must has name")
195175
}
@@ -201,22 +181,22 @@ func attrSplit(node *goquery.Selection, args ...string) (out interface{}, err er
201181
return strings.Split(node.AttrOr(name, ""), sep), nil
202182
}
203183

204-
// value() string
205-
func value(node *goquery.Selection, args ...string) (out interface{}, err error) {
184+
// value() get element attribute value by name is `value`, return string
185+
func (builtin BuiltinFunctions) Value(node *goquery.Selection, args ...string) (out interface{}, err error) {
206186
return node.AttrOr("value", ""), nil
207187
}
208188

209-
// split(sep) []string
210-
func split(node *goquery.Selection, args ...string) (out interface{}, err error) {
189+
// split(sep) get element text and split by separator to array string, return []string.
190+
func (builtin BuiltinFunctions) Split(node *goquery.Selection, args ...string) (out interface{}, err error) {
211191
sep := ","
212192
if len(args) > 0 {
213193
sep = args[0]
214194
}
215195
return strings.Split(node.Text(), sep), nil
216196
}
217197

218-
// eachJoin(sep) string
219-
func eachJoin(node *goquery.Selection, args ...string) (out interface{}, err error) {
198+
// eachJoin(sep) get each element text and join to string, return string.
199+
func (builtin BuiltinFunctions) EachJoin(node *goquery.Selection, args ...string) (out interface{}, err error) {
220200
sep := ","
221201
if len(args) > 0 {
222202
sep = args[0]
@@ -228,6 +208,59 @@ func eachJoin(node *goquery.Selection, args ...string) (out interface{}, err err
228208
return strings.Join(list, sep), nil
229209
}
230210

211+
// eq(index) reduces the set of matched elements to the one at the specified index, return string.
212+
func (builtin BuiltinFunctions) Eq(node *goquery.Selection, args ...string) (out interface{}, err error) {
213+
if len(args) <= 0 {
214+
return "", fmt.Errorf("eq(index) must has index")
215+
}
216+
indexValue := strings.TrimSpace(args[0])
217+
idx, err := strconv.Atoi(indexValue)
218+
if err != nil {
219+
return "", fmt.Errorf("index=`" + indexValue + "` is not number: " + err.Error())
220+
}
221+
return node.Eq(idx).Text(), nil
222+
}
223+
224+
// eqAndAttr(index, name) reduces the set of matched elements to the one at the specified index, and attr() return string.
225+
func (builtin BuiltinFunctions) EqAndAttr(node *goquery.Selection, args ...string) (out interface{}, err error) {
226+
if len(args) <= 1 {
227+
return "", fmt.Errorf("eq(index) must has index")
228+
}
229+
indexValue := strings.TrimSpace(args[0])
230+
idx, err := strconv.Atoi(indexValue)
231+
if err != nil {
232+
return "", fmt.Errorf("index=`" + indexValue + "` is not number: " + err.Error())
233+
}
234+
name := strings.TrimSpace(args[1])
235+
return node.Eq(idx).AttrOr(name, ""), nil
236+
}
237+
238+
// eqAndHtml(index) reduces the set of matched elements to the one at the specified index, and html() return string.
239+
func (builtin BuiltinFunctions) EqAndHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
240+
if len(args) <= 1 {
241+
return "", fmt.Errorf("eq(index) must has index")
242+
}
243+
indexValue := strings.TrimSpace(args[0])
244+
idx, err := strconv.Atoi(indexValue)
245+
if err != nil {
246+
return "", fmt.Errorf("index=`" + indexValue + "` is not number: " + err.Error())
247+
}
248+
return node.Eq(idx).Html()
249+
}
250+
251+
// eqAndOutHtml(index) reduces the set of matched elements to the one at the specified index, and outHtml() return string.
252+
func (builtin BuiltinFunctions) EqAndOutHtml(node *goquery.Selection, args ...string) (out interface{}, err error) {
253+
if len(args) <= 1 {
254+
return "", fmt.Errorf("eq(index) must has index")
255+
}
256+
indexValue := strings.TrimSpace(args[0])
257+
idx, err := strconv.Atoi(indexValue)
258+
if err != nil {
259+
return "", fmt.Errorf("index=`" + indexValue + "` is not number: " + err.Error())
260+
}
261+
return goquery.OuterHtml(node.Eq(idx))
262+
}
263+
231264
// RegisterFunc register function for parse
232265
func (p *Pagser) RegisterFunc(name string, fn CallFunc) error {
233266
p.funcs[name] = fn

pagser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func NewWithConfig(cfg Config) (*Pagser, error) {
3737
return &Pagser{
3838
config: cfg,
3939
tagers: make(map[string]*Tager, 0),
40-
funcs: sysFuncs,
40+
funcs: builtinFuncMap,
4141
}, nil
4242
}
4343

0 commit comments

Comments
 (0)