Problems with Website using Javascript, the 2nd

Hi,

HELP, HELP, I'm stuck!

I have found the reason for the behaviour that puzzled me yesterday (see Problems with Website using Javascript):

The site, sends a page containing
- the error message
- Java code,
-- that sets a cookie
-- calls a function redirect("reload"); // which brings up the "real" page, I presume.

-> Question 1: how can I accomplish this "reload"?

To make things worse, the value of the above mentioned cookie is calculated by JS code which is sent as two escaped strings. String1 contains HTML objects, String2 contains JS functions. These functions reference objects from String1 and calculate the cookie's value (genPid()).

That situation could be handled, if the functions in String2 would always be the same or follow a rigid scheme. Unfortunately this is not case! It seems that there is a whole collection of possible functions!!

-> Question 2: how can I write an HTML file containing objects and functions from strings 1 and 2, have this "excuted" and retrieve the resulting information?

-> Question 3: What is the Javascript equivalent to Interpreted Java's FileWriter?

At this point I cannot see another way of overcoming the block in "www.bazar.at" which is a key source of information for us.

best regards

Christian Pieler

for page code see below.

<html>
<head>
    <title></title>
    <meta http-equiv="Expires" content="28FEB2002" />
    <meta http-equiv="CACHE-CONTROL" content="NO-CACHE" />
    <script type="text/javascript">
        function addFields(formObj) {  }
        function redirect(commitType) {
            var cookieenabled = false;
            if (navigator.cookieEnabled) {
                if (navigator.cookieEnabled==true) {
                    var exdate=new Date();
                    exdate.setDate(exdate.getDate()+1);
                    document.cookie="PRID=" +escape(genPid())+";path=/; expires="+exdate.toGMTString()+"";
                    cookieenabled=(document.cookie.indexOf("PRID")!=-1)? true : false; } }
            if (cookieenabled) {
                if (commitType=="reload")
                    window.location.reload(true);
                else {
                    var oFrm = document.createElement("form");
                    var oEnvlp = document.getElementById("frmPlsHldr");
                    oFrm.method = "post";
                    addFields(oFrm);
                    oEnvlp.appendChild (oFrm);
                    oFrm.submit(); } }
            else {
                var oJSCookieMSGObj = document.getElementById('JSCookieMSG');
                oJSCookieMSGObj.style.display = 'block'; } }
    </script>
</head>
<body style="background-color:white">
<div style='display:none' id='sbbhscc'></div><script type="text/javascript">sbbvscc='%3c%73%70%61%6e%20%73%74%79%6c%65%3d"%64%69%73%70%6c%61%79%3a%6e%6f%6e%65"%20%74%69%74%6c%65%3d"%72%6a%79%45%69%4d%62%51%20%77%72%70%20%63%72%65%72%76%6f%55%20%47%74%78%72%46%20%4d%6e%72%53%4c%57%7a%20%57%46%4e%6c%75%41%78%72%20%4c%4f%72%49%7a"%20%69%64%3d"%73%62%62%5f%65%50%71%6e%4c%5a%77"%3e%50%5a%72%69%68%79%67%6b%20%77%6a%74%42%4b%72%20%67%72%61%4c%58%4a%20%72%50%75%6e%47%74%20%50%52%69%68%75%72%20%6e%66%72%67%4b%78%4d%20%68%72%79%72%6d%61%3c%2f%73%70%61%6e%3e%3c%2f%4a%53%51%3e'; sbbgscc='%66%75%6e%63%74%69%6f%6e%20%73%62%62%5f%54%5a%4b%57%28%29%20%7b%20%6f%4c%72%47%78%79%54%20%3d%20%74%79%70%65%6f%66%20%66%75%6e%63%74%69%6f%6e%20%28%29%20%7b%7d%3b%20%72%65%74%75%72%6e%20%53%74%72%69%6e%67%2e%66%72%6f%6d%43%68%61%72%43%6f%64%65%28%6f%4c%72%47%78%79%54%2e%63%68%61%72%43%6f%64%65%41%74%28%31%29%5e%36%32%29%3b%20%7d%66%75%6e%63%74%69%6f%6e%20%73%62%62%5f%4d%50%65%55%58%53%51%28%29%20%7b%20%73%62%62%4f%62%6a%20%3d%20%64%6f%63%75%6d%65%6e%74%2e%67%65%74%45%6c%65%6d%65%6e%74%42%79%49%64%28%27%73%62%62%5f%65%50%71%6e%4c%5a%77%27%29%3b%20%72%65%74%75%72%6e%20%73%62%62%4f%62%6a%2e%67%65%74%41%74%74%72%69%62%75%74%65%28%27%74%69%74%6c%65%27%29%2e%73%75%62%73%74%72%28%30%2c%31%29%3b%20%7d; function genPid() {return %73%62%62%5f%54%5a%4b%57%28%29+%73%62%62%5f%4d%50%65%55%58%53%51%28%29; }';</script><div id='sbbfrcc' style='position: absolute; top: -10px; left: -3px; font-size:1px'></div>

<div id='JSCookieMSG' style="display:none"><h2 class="info">Ein Fehler ist aufgetreten</h2>

    <p class="content">Bitte aktivieren Sie Cookies in Ihrem Browser, um diese Seite richtig angezeigt zu bekommen.  <a href="http://wiki.jappy.de/wiki/Cookies" target="_blank">Hilfe zu Cookies</a><br/><br/>

        Fehler-ID: 50a648944d390 <br/><br/>

    </p>

</div>
<div id='JSOffMSG'><noscript><h2 class="info">Ein Fehler ist aufgetreten</h2>

    <p class="content">Bitte aktivieren Sie Javascript in Ihrem Browser, um diese Seite richtig angezeigt zu bekommen.  <a href="http://wiki.jappy.de/wiki/Javascript" target="_blank">Hilfe zu Javascript</a><br/><br/>

        Fehler-ID: 50a648944d390 <br/><br/>

    </p>

</noscript></div>
<div id='frmPlsHldr'></div>
<script type="text/javascript">
    oJSOffMSG = document.getElementById('JSOffMSG');
    oJSOffMSG.style.display = 'none';
    try{ y=unescape(sbbvscc.replace(/^<\!\-\-\s*|\s*\-\->$/g,''));
        document.getElementById('sbbhscc').innerHTML=y;
        x=unescape(sbbgscc.replace(/^<\!\-\-\s*|\s*\-\->$/g,'')); }
    catch(e){
        x='function genPid() {return "jser"; }'; }
    document.write ('<'+'script type="text/javascri'+'pt">'+x+' redirect("reload");</'+'script>');
</script>
=======
collection of HTML code and functions sent in String1 and String2

HTML ============
<p style="display:none" id="sbb_cGxdLzj">SJF JuwVpx DjJV RJQ iScFKeJ VJgFNW yJoqzA</p></RFG>
HTML END ============
FUNCTIONS ============
function sbb_qBwoSGX() { ITW = typeof 1619; return String.fromCharCode(ITW.charCodeAt(2)^53); }function sbb_cqUjxnf() { sbbObj = document.getElementById('sbb_cGxdLzj'); return sbbObj.innerHTML.substr(1,1); }; function genPid() {return sbb_qBwoSGX()+sbb_cqUjxnf(); }
FUNCTIONS END ============
HTML ============
<form id="sbb_Qhx" method="post" style="display:none"><input name='sbb_mkNlQcw' type='checkbox'  value='iuSR'/><input name='sbb_mkNlQcw' type='checkbox'  value='OheB'/><input name='sbb_mkNlQcw' type='checkbox'  value='SJG'/><input name='sbb_mkNlQcw' type='checkbox'  value='IzEwPfd'/><input name='sbb_mkNlQcw' type='checkbox'  value='vBPlbXz'/><input name='sbb_mkNlQcw' type='checkbox'  value='KaXpQyHJ'/><input name='sbb_mkNlQcw' type='checkbox'  CHECKED  value='zUer'/><input name='sbb_mkNlQcw' type='checkbox'  value='Kwi'/><input name='sbb_mkNlQcw' type='checkbox'  value='WhcsmuOz'/></form>
HTML END ============
FUNCTIONS ============
function sbb_YQaOEwvT() { function sbb_wvHKN(objArr) { var ts=''; for (i=0; i<objArr.length; i++) {if (objArr[i].checked == true) ts +=objArr[i].value;} return ts;} sbbObj = document.getElementsByName('sbb_mkNlQcw'); sbbFrm=document.getElementById('sbb_Qhx'); sbbObj[4].checked = true; x=sbb_wvHKN(sbbFrm.sbb_mkNlQcw); return x.substr(4,1); }function sbb_xRkZigYB() { COgwRty = typeof 1559; return String.fromCharCode(COgwRty.charCodeAt(4)^0); }; function genPid() {return sbb_YQaOEwvT()+sbb_xRkZigYB(); }
FUNCTIONS END ============
HTML ============
<table id="sbb_jWxCmVqt" style="display:none"><tr><td onclick="return 'UWD'"><span>nHRMAQ</span></td><td onclick="return 'cST'"><span>HRiAobMT</span></td><td onclick="return 'mKxHjD'"><span>HuVy</span></td><td onclick="return 'ZGnuv'"><span>sKHwj</span></td><td onclick="return 'kdFl'"><span>CfTMBiVP</span></td></tr><tr><td onclick="return 'wyFH'"><span>nOlj</span></td><td onclick="return 'wjtQNks'"><span>qyCAglnD</span></td><td onclick="return 'oRjc'"><span>leXF</span></td><td onclick="return 'PLHsWr'"><span>Gdihp</span></td><td onclick="return 'idvf'"><span>siEA</span></td></tr><tr><td onclick="return 'seVK'"><span>bmws</span></td><td onclick="return 'EIpc'"><span>dZlk</span></td><td onclick="return 'HZR'"><span>ULC</span></td><td onclick="return 'nMljId'"><span>vUg</span></td><td onclick="return 'EXtag'"><span>KFbi</span></td></tr><tr><td onclick="return 'HQtRnpCm'"><span>MlkFn</span></td><td onclick="return 'PbeAq'"><span>qtw</span></td><td onclick="return 'xAW'"><span>WmbOq</span></td><td onclick="return 'rPj'"><span>HlT</span></td><td onclick="return 'pzovMl'"><span>rGTWS</span></td></tr><tr><td onclick="return 'KobX'"><span>lwBN</span></td><td onclick="return 'pCTj'"><span>Iyw</span></td><td onclick="return 'OTD'"><span>IhBW</span></td><td onclick="return 'XoKT'"><span>dgEuNi</span></td><td onclick="return 'utL'"><span>AMBlr</span></td></tr><tr><td onclick="return 'cfgt'"><span>FjyMW</span></td><td onclick="return 'dck'"><span>mzsPk</span></td><td onclick="return 'aWt'"><span>QFrM</span></td><td onclick="return 'qxjsd'"><span>zEnGCU</span></td><td onclick="return 'ktJE'"><span>wKaup</span></td></tr></table>
HTML END ============
FUNCTIONS ============
function sbb_bCxm() { var data='VFZw';var uEp = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; var o1, o2, o3, h1, h2, h3, h4, bits, i = 0, ac = 0, dec = "", tmp_arr = [];do {  h1 = uEp.indexOf(data.charAt(i++));h2 = uEp.indexOf(data.charAt(i++));  h3 = uEp.indexOf(data.charAt(i++)); h4 = uEp.indexOf(data.charAt(i++)); bits = h1<<18 | h2<<12 | h3<<6 | h4; o1 = bits>>16 & 0xff; o2 = bits>>8 & 0xff; o3 = bits & 0xff;if (h3 == 64) { tmp_arr[ac++] = String.fromCharCode(o1);} else if (h4 == 64) {  tmp_arr[ac++] = String.fromCharCode(o1, o2); } else {   tmp_arr[ac++] = String.fromCharCode(o1, o2, o3); }  } while (i < data.length);dec = tmp_arr.join(''); return dec.substr (0,1); }function sbb_vtdFzJ() { finalStr=''; sbbObj = document.getElementById('sbb_jWxCmVqt'); finalStr += sbbObj.rows[0].cells[3].onclick(); finalStr += sbbObj.rows[1].cells[4].onclick(); finalStr += sbbObj.rows[4].cells[1].onclick(); finalStr += sbbObj.rows[0].cells[2].onclick(); finalStr += sbbObj.rows[5].cells[3].onclick();  return finalStr.substr(5,1); }; function genPid() {return sbb_bCxm()+sbb_vtdFzJ(); }
FUNCTIONS END ============
HTML ============
<select id="sbb_twf" style="display:none"><option  value='jpQs'>SEF</option><option  value='MEXf'>TpQ</option><option  value='iAd'>uEMaL</option><option  value='EtIb'>ClQ</option><option  SELECTED  value='xXZNyq'>syCAIXOx</option><option  value='etlWQnzX'>tfM</option><option  value='EMk'>SyVXqJGn</option><option  value='FTHiV'>Xec</option></select>
HTML END ============
FUNCTIONS ============
function sbb_ocJb() { mvJPFX=(-5931606)+Math.floor((((((((((((52) * 75) * 39) * 39) + 29) - 66) - 77) + 27) - 75) + 45) - 72) - 34); return String.fromCharCode(mvJPFX); }function sbb_UHdZGEJ() {  sbbObj = document.getElementById('sbb_twf');  return sbbObj.options[sbbObj.selectedIndex].value.substr(0,1); }; function genPid() {return sbb_ocJb()+sbb_UHdZGEJ(); }
FUNCTIONS END ============
HTML ============
<p style="display:none" name="sbbnamePRgKiH" title="PRgKiH" id="sbb_VwGbcmOI">DSHTl cSupLH rHD HKO HQA yHs HeuHFK</p>
HTML END ============
FUNCTIONS ============
function sbb_TLKHbEjz() { sbbObj = document.getElementById('sbb_VwGbcmOI'); return sbbObj.innerHTML.substr(2,1); }function sbb_KdErUP() { WvaeB=(-387303)+Math.floor((((((((((((72) + 20) + 98) * 24) * 86) - 47) + 97) / 91) + 45) - 3) * 89) + 92); return String.fromCharCode(WvaeB); }; function genPid() {return sbb_TLKHbEjz()+sbb_KdErUP(); }
FUNCTIONS END ============

We often have cases where one

  1. We often have cases where one page sets some session values, and then redirects you. In this case you'd just have separate scrapeable files for each request.
  2. This is the tricky one. You would likely need to scrape the JavaScript function, then pass the function to the rhino interpreter. It's already included in screen-scraper, and this post on stackoverflow should get you started.
  3. JavaScript does have a File System class, but I don't think you'll need it.

rhino

The article about rhino says that it cannot manipulate HTML objects, which would necessary to execute the scripts that the site sends.
I have tried to write HTML + SCRIPTS to a HTML file that I process with Chrome called as an external process. That works fine, but I cannot get Chrome to write a "result file" that I can scrape with SS..
Will Modzilla do this? Nota bene: my scrape runs on a Windows machine.

best regards

Christian Pieler