我正在尝试渲染一个网站以从 HTML 页面获取人类可见对象的坐标。我可以使用 Selenium 获取特定对象的坐标,但我需要所有对象的坐标。主要问题是过滤有信息和透明的分区(我附上图片),我该怎么做? 在此处输入图像描述

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep, strftime
driver = webdriver.Chrome()
driver.maximize_window() # now screen top-left corner == browser top-left corner 
question = driver.find_element_by_link_text("Newest")
y_relative_coord = question.location['y']
browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
y_absolute_coord = y_relative_coord + browser_navigation_panel_height
x_absolute_coord = question.location['x']
print(x_absolute_coord, y_absolute_coord)


import cv2
import numpy as np
from selenium import webdriver

driver = webdriver.Chrome()
driver.set_window_size(1920, 1080)
y_relative_coord = []
x_absolute_coord = []
contours = []
list_of_visible_elements = driver.find_elements_by_xpath( "//div[not(contains(@style,'display:none'))]")
for element in list_of_visible_elements:
    y_relative_coord = element.location['y']
    size = element.size
    w, h = size['width'], size['height']
    browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
    y_absolute_coord = y_relative_coord + browser_navigation_panel_height
    x_absolute_coord = element.location['x']
    x = [x_absolute_coord, y_absolute_coord, w, h]
img = cv2.imread('screenshot.png')
result = img.copy()
for cntr in contours:
    x,y,w,h = cntr
    cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.imshow("bounding_box", result)

UPD 我对代码进行了一些改进,它似乎工作正常,但仍然不是很准确,如何改进结果?

import cv2
import os
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()

driver = webdriver.Chrome(chrome_options=options)
driver.set_window_size(1920, 1080)
y_relative_coord = []
x_absolute_coord = []
contours = []
list_of_visible_elements = driver.find_elements_by_xpath( "//div[not(contains(@style,'display:none'))]")
for element in list_of_visible_elements:
    y_relative_coord = element.location['y']
    size = element.size
    w, h = size['width'], size['height']
    browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
    y_absolute_coord = y_relative_coord + browser_navigation_panel_height
    x_absolute_coord = element.location['x']
    if x_absolute_coord !=0 and y_absolute_coord !=0 and w != 0 and h != 0 : 
        x = [x_absolute_coord, y_absolute_coord, w, h]
img = cv2.imread('s.png')
result = img.copy()
for cntr in contours:
    x,y,w,h = cntr
    cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 1)
cv2.imshow("bounding_box", result)
except: pass


You can use driver.execute_script to run a recursive generator function in Javascript that will traverse all the visible DOM nodes and return only those coordinates which are within the browser window height:

from selenium import webdriver
d = webdriver.Chrome('/path/to/chromedriver')
r = d.execute_script("""
function* get_nodes(root){
   var style = window.getComputedStyle(root)
   if (style.getPropertyValue('display') != 'none'){
       if (root.offsetTop <= (window.outerHeight - window.innerHeight)){
          var b_d = root.getBoundingClientRect()
          yield [b_d.x, b_d.y, b_d.width, b_d.height]
       for (var i of root.childNodes){
          if (i.nodeType === 1){
             yield* get_nodes(i)
return [...get_nodes(document.body)]


