On occasion rather than downloading an entire web page you may only want to know when it was last updated, or perhaps its content type. This can be done via an HTTP HEAD request (as opposed to a GET or POST). This script shows you how to go about that.
/**
* This script allows you to retrieve a specific HTTP header
* from a server, such as the content-length (i.e., the size
* of the file).
*/
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.httpclient.contrib.ssl.EasySSLProtocolSocketFactory;
urlString = "http://www.google.com/";
// Create a method instance.
HeadMethod method = new HeadMethod( urlString );
// Provide custom retry handler is necessary
method.getParams().setParameter
(
HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler( 3, false )
);
try
{
HttpClient client = new HttpClient();
session.setProxySettingsOnHttpClient( client, client.getHostConfiguration() );
try
{
HostConfiguration hostConfiguration = new HostConfiguration();
URL url = new URL( urlString );
if( url.toString().startsWith( "https" ) )
{
Protocol easyHTTPS = new Protocol( "https", new EasySSLProtocolSocketFactory(), 443 );
hostConfiguration.setHost( url.getHost(), 443, easyHTTPS );
}
else
{
hostConfiguration.setHost( url.getHost() );
}
}
catch( MalformedURLException mfue )
{
session.logError( "MalformedURLException: " + mfue, mfue );
}
// Execute the method.
int statusCode = client.executeMethod( method );
if( statusCode!=HttpStatus.SC_OK )
{
session.logError( "Error received status code: " + statusCode );
}
// Retrieve just the last modified header value.
String contentLength = method.getResponseHeader( "Content-Length" ).getValue();
session.log( "Content length: " + contentLength );
}
catch( Exception e )
{
session.logError( "An exception occurred: " + e.getMessage() );
}
finally
{
// Release the connection.
method.releaseConnection();
}