Write to CSV
Probably the easiest way to write to a comma-seperated value (CSV) document is to use screen-scrapers included CsvWriter. If for some reason you can't/don't wish to use the CsvWriter the following code will also accomplish the task. CSV files are very useful for viewing in spreadsheets or inserting values into a database.
Also, you'll notice that the session variables are cleared out at the end of the script. This would be done when you don't want a session variable to persist into the next dataRecord. For more about scope and dataRecords please go here.
import java.util.Date;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
// Date/time string to add to filename or column
String getDateTime()
{
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd_hhmmss");
Date date = new Date();
return dateFormat.format(date);
}
// Fix format issues, and return wrapped in quotes
String fixString(String value)
{
if (value != null)
{
value = sutil.convertHTMLEntities(value);
value = value.replaceAll("\"", "\'");
value = value.replaceAll("\\s{2,}", " ");
value = value.trim();
}
return "\"" + (value==null ? "" : value) + "\"";
}
String fixPhone(String phone)
{
if (phone!=null)
{
phone = phone.replaceAll("\\D", "");
// If there is a leading 1, remove it
if (phone.startsWith("1"))
{
session.log("+++Starts with a one, so removing.");
phone = phone.substring(1,phone.length());
}
if (phone.length()>=10)
{
area = phone.substring(0,3);
prefix = phone.substring(3,6);
number = phone.substring(6,10);
newPhone = "(" + area + ") " + prefix + "-" + number;
}
// Deal with extensions
if (phone.length()>10)
{
newPhone += " x";
newPhone += phone.substring(10,phone.length());
}
}
return ((newPhone==null || newPhone==void) ? "" : newPhone) ;
}
// Set name of file to write to
// outputFile = "output/" + session.getName() + "_" + getDateTime() + ".csv";
outputFile = "output/" + session.getName() + ".csv";
// Set columns to write
// Will look for tokens of same name using usual naming convention
String[] names = {
"Dealer",
"Address1",
"Address2",
"City",
"State",
"Post code",
"Country",
"Phone",
"Fax"
};
try
{
File file = new File(outputFile);
fileExists = file.exists();
// Open up the file to be appended to
out = new FileWriter(outputFile, true);
session.log("Writing data to a file");
if (!fileExists)
{
// Write headers
for (i=0; i<names.length; i++)
{
out.write(names[i]);
if (i<names.length-1)
out.write(",");
}
out.write("\n");
}
// Write columns
for (i=0; i<names.length; i++)
{
var = names[i];
var = var.toUpperCase();
var = var.replaceAll("\\s", "_");
out.write(fixString(dataRecord.get(var)));
if (i<names.length-1)
out.write(",");
}
out.write( "\n" );
// Close up the file
out.close();
// Add to controller
session.addToNumRecordsScraped(1);
}
catch( Exception e )
{
session.log( "An error occurred while writing the data to a file: " + e.getMessage() );
}
import java.text.DateFormat;
import java.text.SimpleDateFormat;
// Date/time string to add to filename or column
String getDateTime()
{
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd_hhmmss");
Date date = new Date();
return dateFormat.format(date);
}
// Fix format issues, and return wrapped in quotes
String fixString(String value)
{
if (value != null)
{
value = sutil.convertHTMLEntities(value);
value = value.replaceAll("\"", "\'");
value = value.replaceAll("\\s{2,}", " ");
value = value.trim();
}
return "\"" + (value==null ? "" : value) + "\"";
}
String fixPhone(String phone)
{
if (phone!=null)
{
phone = phone.replaceAll("\\D", "");
// If there is a leading 1, remove it
if (phone.startsWith("1"))
{
session.log("+++Starts with a one, so removing.");
phone = phone.substring(1,phone.length());
}
if (phone.length()>=10)
{
area = phone.substring(0,3);
prefix = phone.substring(3,6);
number = phone.substring(6,10);
newPhone = "(" + area + ") " + prefix + "-" + number;
}
// Deal with extensions
if (phone.length()>10)
{
newPhone += " x";
newPhone += phone.substring(10,phone.length());
}
}
return ((newPhone==null || newPhone==void) ? "" : newPhone) ;
}
// Set name of file to write to
// outputFile = "output/" + session.getName() + "_" + getDateTime() + ".csv";
outputFile = "output/" + session.getName() + ".csv";
// Set columns to write
// Will look for tokens of same name using usual naming convention
String[] names = {
"Dealer",
"Address1",
"Address2",
"City",
"State",
"Post code",
"Country",
"Phone",
"Fax"
};
try
{
File file = new File(outputFile);
fileExists = file.exists();
// Open up the file to be appended to
out = new FileWriter(outputFile, true);
session.log("Writing data to a file");
if (!fileExists)
{
// Write headers
for (i=0; i<names.length; i++)
{
out.write(names[i]);
if (i<names.length-1)
out.write(",");
}
out.write("\n");
}
// Write columns
for (i=0; i<names.length; i++)
{
var = names[i];
var = var.toUpperCase();
var = var.replaceAll("\\s", "_");
out.write(fixString(dataRecord.get(var)));
if (i<names.length-1)
out.write(",");
}
out.write( "\n" );
// Close up the file
out.close();
// Add to controller
session.addToNumRecordsScraped(1);
}
catch( Exception e )
{
session.log( "An error occurred while writing the data to a file: " + e.getMessage() );
}
scraper on 07/16/2010 at 4:24 pm
- Printer-friendly version
- Login or register to post comments