#!python # Note that in order to run this script the file # remote_scraping_session.php must be in the same # directory. from remote_scraping_session import RemoteScrapingSession while True: search_term = raw_input("Enter a keyword you want to search: ") if search_term: break if search_term != "": # Instantiate a remote scraping session (also known # as screen-scraper's Python driver). session = RemoteScrapingSession() # Initialize the session. If you're running screen-scraper # on a different computer than the one the Python file resides # on you'll want to modify and use the top version instead # of the bottom one. #session.initialize( "Shopping Site", "192.168.0.5", 8778 ) session.initialize( "Shopping Site" ) # Set the variables. # Remember that these two top variables correspond to the POST # parameters we use for the "Login" scrapeable file. session.setVariable( "EMAIL_ADDRESS", "test@test.com" ) session.setVariable( "PASSWORD", "testing" ) # This parameter is the search term the user designated. session.setVariable( "SEARCH", search_term ) # We start screen-scraper at page 1 of the search results. # Note that we could have also done this in an "initialize" # script within screen-scraper, which is common. session.setVariable( "PAGE", "1" ) # Tell the session to scrape. This method call might take # a little while since it will need to wait for screen-scraper # to fully extract the data before it returns. session.scrape() # Check for errors. if session.isError(): print "An error occurred: " + session.getErrorMessage() raise SystemExit # We increase the size of the buffer in case we get a lot # of data back. session.setBufferSize( 64000 ) # Get the data set that was stored by screen-scraper in a # session variable. This data set corresponds to the "PRODUCTS" # extractor pattern found under the "Details page" scrapeable # file. data_set = session.getVariable( "PRODUCTS" ) # We check to see if we got any records back by looking for # a title in the first data record in the set. if not data_set: # Output a message if we didn't find any results. print "Sorry, no results were found for '" + search_term + "'." else: # Output the table headers. print "%20s%8s%15s%18s%18s" % ('Title', 'Price', 'Model', 'Shipping Weight', 'Manufactured By') # Iterate through each of the data records screen-scraper # extracted, outputting each of them to the browser. for i in range(len(data_set)): print "%20.20s%8.8s%15.15s%18.18s%18.18s" % (data_set[i]["TITLE"].strip(), data_set[i]["PRICE"], data_set[i]['MODEL'], data_set[i]['SHIPPING_WEIGHT'], data_set[i]['MANUFACTURED_BY']) # Be sure to disconnect from the server. session.disconnect()