Skip to content

Commit ac64c68

Browse files
committed
document location and scraper
1 parent d811517 commit ac64c68

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

location/location.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ var locationRegexp = regexp.MustCompile(`Location: (.*)`)
1616

1717
var _ scraper.Extension = &LocationExtension{}
1818

19+
// NewLocationExtension creates a new LocationExtension using a Google Maps API Key with access
20+
// to the Geocoding API.
1921
func NewLocationExtension(apiKey string) (*LocationExtension, error) {
2022
c, err := maps.NewClient(maps.WithAPIKey(apiKey))
2123
if err != nil {
@@ -25,21 +27,27 @@ func NewLocationExtension(apiKey string) (*LocationExtension, error) {
2527
return &LocationExtension{c}, nil
2628
}
2729

30+
// LocationExtension implements scraper.Extension, and adds geolocation features to the TalkScraper
31+
// LocationExtension only works together with speakerdeck.TalkScraper at the moment.
2832
type LocationExtension struct {
2933
c *maps.Client
3034
}
3135

36+
// Name returns the LocationExtension name
3237
func (_ *LocationExtension) Name() string {
3338
return "LocationExtension"
3439
}
3540

41+
// Hook returns the hook for this extension
3642
func (le *LocationExtension) Hook() scraper.Hook {
3743
return scraper.Hook{
3844
DOMPath: ".deck-description.mb-4 p",
3945
Handler: le.onDescription,
4046
}
4147
}
4248

49+
// onDescription processes the location given in the Talk description field, and registers the geocoded
50+
// response to the Talk object.
4351
func (le *LocationExtension) onDescription(e *colly.HTMLElement, data interface{}) (*string, error) {
4452
// Fail fast, only consider descriptions with the "Location" substring
4553
if !strings.Contains(e.Text, "Location") {

scraper/scraper.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,31 +49,59 @@ import (
4949
log "github.com/sirupsen/logrus"
5050
)
5151

52+
// HookFn is a callback function for processing HTML data at a given place in the DOM tree
53+
// The first e argument gives access to the DOM, and the second data argument carries a pointer
54+
// to the data struct you want to save important information in. You can cast data to what's
55+
// returned by Scraper.InitialData(). The return values are an optional string which tells the
56+
// scraper to also scrape an other page, and an error.
5257
type HookFn func(e *colly.HTMLElement, data interface{}) (*string, error)
5358

59+
// Hook maps a handler of type HookFn to a DOMPath in the tree. The DOMPath can be any valid CSS selector.
5460
type Hook struct {
61+
// DOMPath specifies one or many elements in the DOM tree using a CSS selector
5562
DOMPath string
63+
64+
// Handler specifies the handler to be invoked for all of the elements on the HTML page matched by the CSS selector
5665
Handler HookFn
5766
}
5867

68+
// Scraper is an interface which scraping implementations should implement.
69+
// Any struct that satisfies this interface, may be passed to the generic Scrape function in this package.
5970
type Scraper interface {
71+
// Name returns an user-friendly name of the scraper
6072
Name() string
6173

74+
// Hooks returns the hooks for all HTML elements that should be matched and their handlers.
6275
Hooks() []Hook
6376

77+
// InitialData returns the struct pointer which is then shared between/passed to all hook handlers.
6478
InitialData() interface{}
6579
}
6680

81+
// Extension is an interface which allows for adding extensions on-demand to scraping implementations.
82+
// Upon calling Scrape(), you may pass extra extension implementations in ScrapeOptions. The extension
83+
// can register its own extra hook for processing the DOM. The extension shares/manipulates the same
84+
// data as the Scraper it's used together with.
6785
type Extension interface {
86+
// Name returns the name of the extension
6887
Name() string
88+
89+
// Hook is the hook registered by this extension
6990
Hook() Hook
7091
}
7192

93+
// ScrapeOptions contains extra parameters used when scraping
7294
type ScrapeOptions struct {
95+
// Extensions allows registering extensions to a Scrape() call
7396
Extensions []Extension
74-
LogLevel *log.Level
97+
// LogLevel specifies the logrus log level for the Scrape() function
98+
LogLevel *log.Level
7599
}
76100

101+
// Scrape takes in a Scraper struct, an URL to scrape, and optionally extra options.
102+
// This function calls handlers from the the Scraper.Hooks() for the given DOM paths, and
103+
// shares the Scraper.InitialData() struct pointer between them. The return value is that
104+
// struct pointer, and/or possibly an error.
77105
func Scrape(url string, s Scraper, opts *ScrapeOptions) (interface{}, error) {
78106
c := colly.NewCollector()
79107
mux := &sync.Mutex{}

0 commit comments

Comments
 (0)