a zeV+@sddlZddlZddlZddlZddlZddlmZddlm Z ddl m Z m Z GdddZ Gddde ejZGd d d e ejZGd d d e ejZGd dde ejZGddde ZGdddeejZGdddeejZGdddeZGddde ejZGddde ejZGddde ejZGddde ejZGdd d eZGd!d"d"e ejZGd#d$d$e ejZGd%d&d&e ejZGd'd(d(e ejZGd)d*d*eejZ Gd+d,d,e ejZ!Gd-d.d.e Z"Gd/d0d0ejZ#Gd1d2d2ejZ$e%d3kre&dS)4N)support) socket_helper)BaseHTTPRequestHandler HTTPServerc@sHeZdZdZdZgZgZdZddZddZ dd Z d d Z d d Z dS) BaseRobotTestZtest_robotparserNcCs,t|j}tj|_|j|dSN) ioStringIO robots_txt readlinesurllib robotparserRobotFileParserparserparse)selflinesrI/opt/bitninja-python-dojo/embedded/lib/python3.9/test/test_robotparser.pysetUps zBaseRobotTest.setUpcCs$t|tr|\}}||fS|j|fSr) isinstancetupleagentrurlrrrrget_agent_and_urls zBaseRobotTest.get_agent_and_urlc Cs`|jD]T}||\}}|j||d$||j||Wdq1sP0YqdSN)rr)goodrsubTest assertTruer can_fetchrrrrtest_good_urlss zBaseRobotTest.test_good_urlsc Cs`|jD]T}||\}}|j||d$||j||Wdq1sP0YqdSr)badrr assertFalserr!rrrr test_bad_urls#s zBaseRobotTest.test_bad_urlscCs||j|jdSr) assertEqualr site_mapsrrrrtest_site_maps)szBaseRobotTest.test_site_maps) __name__ __module__ __qualname__r rrr#r'rrr"r%r)rrrrr src@s eZdZdZddgZgdZdS)UserAgentWildcardTestzUser-agent: * Disallow: /cyberworld/map/ # This is an infinite virtual URL space Disallow: /tmp/ # these will soon disappear Disallow: /foo.html / /test.html)/cyberworld/map/index.htmlz/tmp/xxx /foo.htmlNr*r+r,r rr#rrrrr--sr-c@seZdZdZgdZdgZdS)CrawlDelayAndCustomAgentTestz# robots.txt for http://www.example.com/ User-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: )r.r/)Z cybermapperr0r0Nr2rrrrr38s r3c@s&eZdZdZddgZdgZddgZdS) SitemapTesta# robots.txt for http://www.example.com/ User-agent: * Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml Sitemap: http://www.google.com/hostednews/sitemap_index.xml Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space r.r/r0z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r*r+r,r rr#r'rrrrr4Is  r4c@seZdZdZgZgdZdS)RejectAllRobotsTestz(# go away User-agent: * Disallow: / )r0r./tmp/Nr2rrrrr5Zsr5c@seZdZdZdZddZdS)BaseRequestRateTestNc Cs|j}|j|jD]}||\}}|j||dz||||j||}|||j|jdur||t j j ||j |jj ||j |jj Wdq1s0YqdSr)rrr#rrr& crawl_delay request_rateZassertIsInstancer r RequestRateZrequestsZseconds)rrrrZparsed_request_raterrrtest_request_ratehs(  z%BaseRequestRateTest.test_request_rate)r*r+r,r9r8r;rrrrr7dsr7c@seZdZdZdgZdS) EmptyFileTestrz/fooN)r*r+r,r rrrrrr<sr<c@s4eZdZdZdZejddZdZ dgZ gdZ dS) CrawlDelayAndRequestRateTestzUser-agent: figtree Crawl-delay: 3 Request-rate: 9/30 Disallow: /tmp Disallow: /a%3cd.html Disallow: /a%2fb.html Disallow: /%7ejoe/index.html figtree )r>r1)/tmpz /tmp.html /tmp/a.html /a%3cd.html /a%3Cd.htmlz /a%2fb.htmlz/~joe/index.htmlN) r*r+r,r rr rr:r9r8rr#rrrrr=s  r=c@seZdZdZdS)DifferentAgentTestzFigTree Robot libwww-perl/5.04Nr*r+r,rrrrrrFsrFc@s"eZdZdZdgZgdZdZdS)InvalidRequestRateTestzUser-agent: * Disallow: /tmp/ Disallow: /a%3Cd.html Disallow: /a/b.html Disallow: /%7ejoe/index.html Crawl-delay: 3 Request-rate: 9/banana rB)r6rCrDrEz /a/b.htmlz/%7Ejoe/index.htmlrAN)r*r+r,r rr#r8rrrrrHs rHc@seZdZdZdgZgZdS)InvalidCrawlDelayTestz2User-Agent: * Disallow: /. Crawl-delay: pears r1Nr2rrrrrIsrIc@s eZdZdZdZdgZdgZdS)AnotherInvalidRequestRateTestzeUser-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ Request-rate: whale/banana Googlebot/folder1/myfile.html/folder1/anotherfile.htmlNr*r+r,r rrr#rrrrrJsrJc@seZdZdZdZdgZdS)UserAgentOrderingTestzMUser-agent: Googlebot Disallow: / User-agent: Googlebot-Mobile Allow: / rKz/something.jpgN)r*r+r,r rr#rrrrrOsrOc@seZdZdZdS)UserAgentGoogleMobileTestzGooglebot-MobileNrGrrrrrPsrPc@s eZdZdZdZdgZdgZdS)GoogleURLOrderingTestzJUser-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ Z googlebotrLrMNrNrrrrrQsrQc@seZdZdZdgZdgZdS)DisallowQueryStringTestz2User-agent: * Disallow: /some/path?name=value /some/pathz/some/path?name=valueNr2rrrrrRsrRc@seZdZdZdgZdgZdS)UseFirstUserAgentWildcardTestzNUser-agent: * Disallow: /some/path User-agent: * Disallow: /another/path z /another/pathrSNr2rrrrrTsrTc@seZdZdZdgZdgZdS)EmptyQueryStringTestz>User-agent: * Allow: /some/path? Disallow: /another/path? z /some/path?z/another/path?Nr2rrrrrUsrUc@s0eZdZdZejddZdZddgZ dgZ dS) DefaultEntryTestzOUser-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ rAr.r/r0N) r*r+r,r r rr:r9r8rr#rrrrrVs rVc@seZdZdZdZddZdS)StringFormattingTestzUser-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: /some/path zxUser-agent: cybermapper Disallow: /some/path User-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/cCs|t|j|jdSr)r&strrexpected_outputr(rrrtest_string_formatting)sz+StringFormattingTest.test_string_formattingN)r*r+r,r r[r\rrrrrYs  rYc@seZdZddZddZdS) RobotHandlercCs|dddS)NizForbidden access)Z send_errorr(rrrdo_GET/szRobotHandler.do_GETcGsdSrr)rformatargsrrr log_message2szRobotHandler.log_messageN)r*r+r,r^rarrrrr]-sr]c@s*eZdZddZddZejddZdS)PasswordProtectedSiteTestCasecCsP|tjjttjdft|_t j d|jj ddid|_ d|j _ |j dS)NrzHTTPServer servingZ poll_intervalg{Gz?)nametargetkwargsT)Z addCleanupr Zrequest urlcleanuprrHOSTr]server threadingThreadZ serve_forevertdaemonstartr(rrrr8sz#PasswordProtectedSiteTestCase.setUpcCs"|j|j|jdSr)rhshutdownrkjoinZ server_closer(rrrtearDownHs  z&PasswordProtectedSiteTestCase.tearDowncCs\|jj}dtjdt|d}|d}tj}||| | | d|dS)Nzhttp://:rXz /robots.txt*) rhZserver_addressrrgrZr rrZset_urlreadr$r!)raddrrZ robots_urlrrrrtestPasswordProtectedSiteMs  z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r*r+r,rrprZ reap_threadsrurrrrrb6srbc@sFeZdZdZdeZeddZddZddZ d d Z d d Z d S)NetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtcCsTtdt|j*tj|j|_ |j Wdn1sF0YdS)NZnetwork) rZrequiresrZtransient_internetbase_urlr rrr rrs)clsrrr setUpClass]s zNetworkTestCase.setUpClasscCs$d|j|tj|dsdndS)Nz{}{}{}rXr.r)r_rwospathsplitext)rr{rrrrdszNetworkTestCase.urlcCsV||jj||jj||jd||jd||jddS)Nrrr)r$r disallow_all allow_allZ assertGreatermtimer8r9r(rrr test_basicis zNetworkTestCase.test_basiccCs||jd|d||jd|j||jd|d||jd|d||jd|d||jd|jdS)NrrZ elsewhereZNutchZbrianZwebstats)r rr!rr$rwr(rrrtest_can_fetchps zNetworkTestCase.test_can_fetchcCsftj|d}|||j||j| | d| | d| | ddS)Nz i-robot.txtrrr)r rrrrsr r~r$r}r&rZ assertIsNoner8r9)rrrrr test_read_404xs  zNetworkTestCase.test_read_404N) r*r+r,rwr_r classmethodryrrrrrrrrrvXs  rv__main__)'r rzriZunittestZurllib.robotparserr testrZ test.supportrZ http.serverrrrZTestCaser-r3r4r5r7r<r=rFrHrIrJrOrPrQrRrTrUrVrYr]rbrvr*mainrrrrs@  "          ")