Input from multiple files
Many sites requiring the user to input a zip code when performing a search. For example, when searching for car listings, a site will ask for the zip code where you would like to find a car (and perhaps distance from the entered zip code that would be acceptable). The follow script is designed to iterate through a set of input files, which each contain a list of zip codes for that state. The input files in this case are located within a folder named "input" in the screen-scraper directory. The files are named in the format "zips_CA", for example, which would contain California's zip codes.
import java.io.*;
String[] states = {"AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"};
i = 0;
// Iterate through each state abbreviation in the array above
while (i < states.length){
////////////////////////////////////////////
// The file changes depending on what state we are scraping
session.setVariable("INPUT_FILE", "input/zips_"+ states[i] + ".csv");
////////////////////////////////////////////
BufferedReader buffer = new BufferedReader(new FileReader(session.getVariable("INPUT_FILE")));
String line = "";
while ((line = buffer.readLine()) != null){
// The input file in this case will have one zip code per line
session.setVariable("ZIPCODE", line);
session.log("***Beginning zip code " + session.getVariable("ZIPCODE"));
// Scrape the "Search Results" with the new zip code retrieved from the
// current state's file
session.scrapeFile("Search Results");
}
i++;
}
String[] states = {"AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"};
i = 0;
// Iterate through each state abbreviation in the array above
while (i < states.length){
////////////////////////////////////////////
// The file changes depending on what state we are scraping
session.setVariable("INPUT_FILE", "input/zips_"+ states[i] + ".csv");
////////////////////////////////////////////
BufferedReader buffer = new BufferedReader(new FileReader(session.getVariable("INPUT_FILE")));
String line = "";
while ((line = buffer.readLine()) != null){
// The input file in this case will have one zip code per line
session.setVariable("ZIPCODE", line);
session.log("***Beginning zip code " + session.getVariable("ZIPCODE"));
// Scrape the "Search Results" with the new zip code retrieved from the
// current state's file
session.scrapeFile("Search Results");
}
i++;
}
Attachment | Size |
---|---|
zips_AL.csv | 5.73 KB |
zips_AR.csv | 4.16 KB |
zips_AZ.csv | 3.03 KB |
zips_CA.csv | 20.7 KB |
zips_CO.csv | 4.53 KB |
scraper on 07/16/2010 at 4:24 pm
- Printer-friendly version
- Login or register to post comments