package main

import (
 "context"
 "log"
 "time"
    "github.com/chromedp/chromedp"
)

func main() {
 // create chrome instance
 ctx, cancel := chromedp.NewContext(
  context.Background(),
  // chromedp.WithDebugf(log.Printf),
 )
 defer cancel()

 // create a timeout
 ctx, cancel = context.WithTimeout(ctx, 15*time.Second)
 defer cancel()

 // navigate to a page, wait for an element, click
 var example string
 err := chromedp.Run(ctx,
  chromedp.Navigate(`https://pkg.go.dev/time`),
  // wait for footer element is visible (ie, page is loaded)
  chromedp.WaitVisible(`body > footer`),
  // find and click "Example" link
  chromedp.Click(`#example-After`, chromedp.NodeVisible),
  // retrieve the text of the textarea
  chromedp.Value(`#example-After textarea`, &example),
 )
 if err != nil {
  log.Fatal(err)
 }
 log.Printf("Go's time.After example:\n%s", example)
}

高级使用

除了基本用法之外，chromedp还提供了许多高级功能。

连接远程 docker headless-shell

Chrome 无头 shell 的最小容器，对于自动化/驱动网络很有用

https://hub.docker.com/r/chromedp/headless-shell/https://hub.docker.com/r/chromedp/headless-shell/使用方法:

package main

import (
	"context"
	"fmt"
	"github.com/chromedp/cdproto/page"
	"log"
	"os"

	"github.com/chromedp/chromedp"
)

func main() {
	// create context
	// 连接 远程分配器 url =>  docker headless-shell
	ctx, cancel := chromedp.NewRemoteAllocator(context.Background(), "ws://127.0.0.1:9222/")
	ctx, _ = chromedp.NewContext(ctx)

	defer cancel()

	// 网页 pdf
	// capture pdf
	var buf []byte 
	if err := chromedp.Run(ctx, printToPDF(`https://www.google.com/`, &buf)); err != nil {
		log.Fatal(err)
	}

	if err := os.WriteFile("sample.pdf", buf, 0o644); err != nil {
		log.Fatal(err)
	}
	fmt.Println("wrote sample.pdf")

}

// print a specific pdf page.
func printToPDF(urlstr string, res *[]byte) chromedp.Tasks {
	return chromedp.Tasks{
		chromedp.Navigate(urlstr),
		chromedp.ActionFunc(func(ctx context.Context) error {
			buf, _, err := page.PrintToPDF().WithPrintBackground(false).Do(ctx)
			if err != nil {
				return err
			}
			*res = buf
			return nil
		}),
	}
}

截屏

将网页截取成图片有两个函数：chromedp.Screenshot和chromedp.FullScreenshot。其中chromedp.Screenshot是按网页中的某个div的元素截取。而chromedp.FullScreenshot是截取整个网页。我们看下下面的例子：

package main

import (
 "context"
 "log"
 "os"

 "github.com/chromedp/chromedp"
)

func main() {
 // create context
 ctx, cancel := chromedp.NewContext(
  context.Background(),
  // chromedp.WithDebugf(log.Printf),
 )
 defer cancel()

 // capture screenshot of an element
 var buf []byte
 if err := chromedp.Run(ctx, elementScreenshot(`https://pkg.go.dev/`, `img.Homepage-logo`, &buf)); err != nil {
  log.Fatal(err)
 }
 if err := os.WriteFile("elementScreenshot.png", buf, 0o644); err != nil {
  log.Fatal(err)
 }

 // capture entire browser viewport, returning png with quality=90
 if err := chromedp.Run(ctx, fullScreenshot(`https://brank.as/`, 90, &buf)); err != nil {
  log.Fatal(err)
 }
 if err := os.WriteFile("fullScreenshot.png", buf, 0o644); err != nil {
  log.Fatal(err)
 }

 log.Printf("wrote elementScreenshot.png and fullScreenshot.png")
}

// elementScreenshot takes a screenshot of a specific element.
func elementScreenshot(urlstr, sel string, res *[]byte) chromedp.Tasks {
 return chromedp.Tasks{
  chromedp.Navigate(urlstr),
  chromedp.Screenshot(sel, res, chromedp.NodeVisible),
 }
}

// fullScreenshot takes a screenshot of the entire browser viewport.
//
// Note: chromedp.FullScreenshot overrides the device's emulation settings. Use
// device.Reset to reset the emulation and viewport settings.
func fullScreenshot(urlstr string, quality int, res *[]byte) chromedp.Tasks {
 return chromedp.Tasks{
  chromedp.Navigate(urlstr),
  chromedp.FullScreenshot(res, quality),
 }
}

该示例就是通过elementScreenshot函数中截取了https://pkg.go.dev/中的img.Homepage-logo标签的图片。另外一个就是通过fullScreenshot函数来截取了https://brank.as/网站的长图。因为图像较大，大家可以运行代码查看具体的效果。

其他功能

模拟表单提交：可以使用chromedp.Submit函数模拟表单提交。
模拟鼠标滚动：可以使用chromedp.ScrollIntoView函数模拟鼠标滚动。
模拟键盘输入：可以使用chromedp.KeyEvent函数模拟键盘输入。

github上也给出了具体的示例代码，大家可以自行查看。示例链接：https://github.com/chromedp/examples

chromedp的应用场景

由于chromedp具有高效、稳定、可靠的特点，因此在以下场景中得到了广泛的应用：1.数据采集：可以使用chromedp对各类网站进行数据采集。2.自动化测试：可以使用chromedp对Web应用进行自动化测试。3.网络爬虫：可以使用chromedp对各类网站进行爬取。4.数据分析：可以使用chromedp对采集到的数据进行分析和处理。

使用代码案列

在Linux(Centos 8)上使用Headless Chrome无头浏览器采集的真实体验(附上Golang的采集代码) - 掘金最近在在采集微信文章的时候，遇到了点棘手的问题，通过搜狗搜索的微信搜索模式，使用普通的直接抓取页面的方式，无法绕过搜狗搜索的验证，因此使用gorequest成功的采集到微信文章。选择chromedphttps://juejin.cn/post/7048448245860663310

总结

chromedp基于Chrome DevTool协议实现。可以对网页内容进行采集、模拟点击、提交数据、将网页内容转换成pdf、抓取网页长图等功能。

华为开发者联盟HarmonyOS专区

鸿蒙生态一站式服务平台。

更多推荐

【grafana】使用教程

华为开发者联盟HarmonyOS专区

【PX4-AutoPilot教程-开发环境】使用VMware虚拟机安装Ubuntu系统并搭建PX4开发环境（ROS+mavros+jMAVSim+gazebo+QGC+QT）

学习PX4开发需要先配置好开发环境，对于新手推荐使用VMware虚拟机搭建Ubuntu系统，并下载PX4源码，配置好编译环境和工具链（ROS操作系统+mavros通信包+jMAVSim仿真+gazebo仿真+QGC地面站+QT开发平台）。教程中使用的是Ubuntu18.04系统（官方推荐使用版本），PX4固件版本为v1.13.0，飞控板为pixhawk2.4.8版本。