You should close it only when the spider finishes it work, listen to spider_closed
signal:
from scrapy
import signals
from scrapy.xlib.pydispatch
import dispatcher
from selenium
import webdriver
from scrapy.spider
import Spider
from ta.items
import TaItem
from scrapy.http.request
import Request
class ProductSpider(Spider):
name = "spider2"
start_urls = ['http://www.test.com/']
def __init__(self):
self.driver = webdriver.Firefox()
dispatcher.connect(self.spider_closed, signals.spider_closed)
def parse(self, response):
self.driver.get(response.url)
self.driver.implicitly_wait(20)
next = self.driver.find_elements_by_css_selector("div.body .heading a")
for a in next:
item = TaItem()
item['link'] = a.get_attribute("href")
yield Request(url = item['link'], meta = {
'item': item
}, callback = self.parse_detail)
def parse_detail(self, response):
item = response.meta['item']
yield item
def spider_closed(self, spider):
self.driver.close()
Python – Sendmail Errno[61] Connection Refused,Python – Scrapy: connection refused,pythonscrapyseleniumselenium-webdriverweb-scraping,There are 10 links I want to catch When I run spider,I can get the links in json file ,but there are still errors like this: It seems like selenium run twice.What's the problem is? Please guide me Thank you
There are 10 links I want to catch
When I run spider,I can get the links in json file ,but there are still errors like this:
It seems like selenium run twice.What's the problem is?
Please guide me Thank you
2014-08-06 10:30:26+0800 [spider2] DEBUG: Scraped from <200 http://www.test/a/1>
{'link': u'http://www.test/a/1'}
2014-08-06 10:30:26+0800 [spider2] ERROR: Spider error processing <GET
http://www.test/a/1>
Traceback (most recent call last):
........
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 571, in create_connection
raise err
socket.error: [Errno 61] Connection refused
Here is my code:
from selenium
import webdriver
from scrapy.spider
import Spider
from ta.items
import TaItem
from selenium.webdriver.support.wait
import WebDriverWait
from scrapy.http.request
import Request
class ProductSpider(Spider):
name = "spider2"
start_urls = ['http://www.test.com/']
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
self.driver.implicitly_wait(20)
next = self.driver.find_elements_by_css_selector("div.body .heading a")
for a in next:
item = TaItem()
item['link'] = a.get_attribute("href")
yield Request(url = item['link'], meta = {
'item': item
}, callback = self.parse_detail)
def parse_detail(self, response):
item = response.meta['item']
yield item
self.driver.close()
You should close it only when the spider finishes it work, listen to spider_closed
signal:
from scrapy
import signals
from scrapy.xlib.pydispatch
import dispatcher
from selenium
import webdriver
from scrapy.spider
import Spider
from ta.items
import TaItem
from scrapy.http.request
import Request
class ProductSpider(Spider):
name = "spider2"
start_urls = ['http://www.test.com/']
def __init__(self):
self.driver = webdriver.Firefox()
dispatcher.connect(self.spider_closed, signals.spider_closed)
def parse(self, response):
self.driver.get(response.url)
self.driver.implicitly_wait(20)
next = self.driver.find_elements_by_css_selector("div.body .heading a")
for a in next:
item = TaItem()
item['link'] = a.get_attribute("href")
yield Request(url = item['link'], meta = {
'item': item
}, callback = self.parse_detail)
def parse_detail(self, response):
item = response.meta['item']
yield item
def spider_closed(self, spider):
self.driver.close()
...implies that the connection was anycodings_python refused by the server.,This error can surface due to several anycodings_python reasons as follows:,So I was using TOR to act as a way of anycodings_proxy changing my proxies and ip addresses for my anycodings_proxy webdriver. Here is the code. All anycodings_proxy dependencies are installed (including anycodings_proxy Geckodriver and latest version of Firefox).,I want the user to change access db path in the VB client during installation
So I was using TOR to act as a way of anycodings_proxy changing my proxies and ip addresses for my anycodings_proxy webdriver. Here is the code. All anycodings_proxy dependencies are installed (including anycodings_proxy Geckodriver and latest version of Firefox).
from stem
import Signal
from stem.control
import Controller
from selenium
import webdriver
from selenium.webdriver.firefox.options
import Options
from bs4
import BeautifulSoup
def switchIP():
with Controller.from_port(port = 9051) as controller:
controller.authenticate()
controller.signal(Signal.NEWNYM)
def my_proxy(PROXY_HOST, PROXY_PORT):
fp = webdriver.FirefoxProfile()
fp.set_preference("network.proxy.type", 1)
fp.set_preference("network.proxy.socks", PROXY_HOST)
fp.set_preference("network.proxy.socks_port", int(PROXY_PORT))
fp.update_preferences()
options = Options()
options.headless = True
return webdriver.Firefox(options = options, firefox_profile = fp)
for x in range(10):
proxy = my_proxy("127.0.0.1", 9050)
proxy.get("https://whatsmyip.com/")
html = proxy.page_source
soup = BeautifulSoup(html, 'lxml')
print(soup.find("span", {
"id": "ipv4"
}))
print(soup.find("span", {
"id": "ipv6"
}))
switchIP()
This error message...
SocketError: [Errno 61] Connection refused
However I don't see any issues with your anycodings_python code block as such but while using tor anycodings_python to be able to change the proxies and ip anycodings_python addresses you need to start the torexe anycodings_python application using the popen() command as anycodings_python follows (example for windows OS):
import os
torexe = os.popen(r 'C:\Users\user_name\path\to\Tor Browser\Browser\TorBrowser\Tor\tor.exe')
I get a [Errno 61] Connection refused error when I try to get the client to connect to my router’s public IP address. My server binds to all interfaces using bind(("0.0.0.0", 50000)), and I already set up port forwarding for my router. I verified that the program is listening on that port by running netstat -an | grep LISTEN and finding the following line:,I suspect the problem is simply that you cannot, from inside your home network, connect to your router’s public ip address. I tried the same thing with my local network and ran into the same behavior.,I have client and server Python programs, and they work fine when ran on the same machine, and when the client connects to my machine’s local IP (not 127.0.0.1, but the IP assigned to my machine). I have not been able to get this to work with my public IP.,I can also seemingly reach the port through an online port checking tool, which shows that the port is open when I am running my program, and closed when I close that program. My program also registers the connection from this tool.
tcp_client = socket.socket(family = socket.AF_INET, type = socket.SOCK_STREAM)
tcp_client.connect(('my_public_ip', 50000))
Copyright © 2002-2020 程式人生 796T.COM All rights reserved.
我想抓住10個連結
當我執行spider時,我可以獲得json檔案中的連結,但是仍然有如下錯誤:
硒好像跑了兩次。有什麼問題嗎?
請引導我謝謝
2014-08-06 10:30:26+0800 [spider2] DEBUG: Scraped from <200 http://www.test/a/1>
{'link': u'http://www.test/a/1'}
2014-08-06 10:30:26+0800 [spider2] ERROR: Spider error processing <GET
http://www.test/a/1>
Traceback (most recent call last):
........
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 571, in create_connection
raise err
socket.error: [Errno 61] Connection refused
from selenium
import webdriver
from scrapy.spider
import Spider
from ta.items
import TaItem
from selenium.webdriver.support.wait
import WebDriverWait
from scrapy.http.request
import Request
class ProductSpider(Spider):
name = "spider2"
start_urls = ['http://www.test.com/']
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
self.driver.implicitly_wait(20)
next = self.driver.find_elements_by_css_selector("div.body .heading a")
for a in next:
item = TaItem()
item['link'] = a.get_attribute("href")
yield Request(url = item['link'], meta = {
'item': item
}, callback = self.parse_detail)
def parse_detail(self, response):
item = response.meta['item']
yield item
self.driver.close()
問題是你把司機關得太早了。
你應該在蜘蛛完成工作後關閉它,聽spider_closed
訊號:
from scrapy
import signals
from scrapy.xlib.pydispatch
import dispatcher
from selenium
import webdriver
from scrapy.spider
import Spider
from ta.items
import TaItem
from scrapy.http.request
import Request
class ProductSpider(Spider):
name = "spider2"
start_urls = ['http://www.test.com/']
def __init__(self):
self.driver = webdriver.Firefox()
dispatcher.connect(self.spider_closed, signals.spider_closed)
def parse(self, response):
self.driver.get(response.url)
self.driver.implicitly_wait(20)
next = self.driver.find_elements_by_css_selector("div.body .heading a")
for a in next:
item = TaItem()
item['link'] = a.get_attribute("href")
yield Request(url = item['link'], meta = {
'item': item
}, callback = self.parse_detail)
def parse_detail(self, response):
item = response.meta['item']
yield item
def spider_closed(self, spider):
self.driver.close()
Connection Refused means that the host (api.open-notify.org) is not listening on the port (https is on port 443) in your request.,I tried connecting to port 80 (plain http) instead and that worked for me:,Just trying to learn / work with the requests library, but when I try to run this very simple python code:,I've searched high and low and can't find a fix that seems to fit my use case. I've had some Python3 install issues on my M1 mac but think it's now running okay.
Just trying to learn / work with the requests library, but when I try to run this very simple python code:
import requests
response = requests.get("https://api.open-notify.org/this-api-doesnt-exist")
print(response.status_code)
I tried connecting to port 80 (plain http) instead and that worked for me:
>>> response = requests.get("http://api.open-notify.org/this-api-doesnt-exist") >>>
print(response.status_code)
404
For laughs, I tried connecting to the base url
>>> response = requests.get("http://api.open-notify.org")
>>> print(response.status_code)
500
>>> response
<Response [500]>
>>> response.text
'<html>\r\n
<head>
<title>500 Internal Server Error</title>
</head>\r\n
<body bgcolor="white">\r\n<center>
<h1>500 Internal Server Error</h1>
</center>\r\n
<hr>
<center>nginx/1.10.3</center>\r\n
</body>\r\n
</html>\r\n'