views:

107

answers:

1

Hi All,

After searching lot on the internet we have found following code to get only body part of the web page loaded onto web browser control

IPIEHTMLDocument2 *pHTMLDocument;

IPIEHTMLElement* pBodyElement; 

CComPtr<IDispatch> spDispDoc;

HRESULT res = m_spWebBrowser2->get_Document(&spDispDoc);


if(SUCCEEDED(res))
{
    spDispDoc->QueryInterface( __uuidof(IPIEHTMLDocument2), (void**)&pHTMLDocument);

    WCHAR szText[256];
    DISPID id;
    OLECHAR FAR* szTemp;

    // store "body"
    szTemp = szText;
    StringCchPrintf(szText, 256, L"body", id);

    // get the body
    pHTMLDocument->GetIDsOfNames(IID_NULL, &szTemp, 1, LOCALE_USER_DEFAULT, &id);

    VARIANT varResult;
    varResult.vt = VT_DISPATCH;
    VARIANT FAR *pVarResult = &varResult;
    DISPPARAMS dispparamsNoArgs = {NULL, NULL, 0, 0};

    pHTMLDocument->Invoke(id, IID_NULL, LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET, &dispparamsNoArgs, pVarResult, NULL, NULL);

    BSTR bodyValue;

    if( NULL != pVarResult->pdispVal)
    {
        pVarResult->pdispVal->QueryInterface(IID_IPIEHTMLElement, (void**)&pBodyElement);

        pBodyElement->get_innerHTML(&bodyValue);
    }         
}

But now how we get the remaining head and other tag document text from the loaded web page, even we have tried passing "head" string to the GetIDsOfNames() method and it passes fail value, so we are struck. Please provide us the method to access/ extract entire web page content in windows mobile 6.0

Thanks, Ramanand Bhat.

A: 
void  CBrowserWindow::ExtractWebPageDoc()

{
HRESULT                         hrResult           = E_FAIL; 
IDispatch                      *pIDisp             = NULL;
IPIEHTMLDocument3              *pIHTMLDocument     = NULL;
IPIEHTMLElementCollection      *pHTMLElementcol    = NULL;
IPIEHTMLImgElement             *pHTMLImgElement    = NULL; 


hrResult = m_spIWebBrowser2->get_Document( &pIDisp);
if (NULL != pIDisp)
{
 hrResult = pIDisp->QueryInterface( __uuidof(IPIEHTMLDocument3), (void**)&pIHTMLDocument);
 if( NULL != pIHTMLDocument)
 {
  IPIEHTMLElement* pElement = NULL;
  CComBSTR pHTMLElement;

  hrResult = pIHTMLDocument->get_documentElement( &pElement);
  if (SUCCEEDED(hrResult)) 
  {        
   pElement->get_innerHTML(&pHTMLElement.m_str);
   SaveToHTMLFile( pHTMLElement);
  }

  hrResult = pIHTMLDocument->get_images( &pHTMLElementcol);
  if (NULL != pHTMLElementcol)
  {
   CComBSTR  strImage;
   VARIANT vtBase, vtIndex;
   long pHTMLElementCollectionLength = 0;

   VariantInit( &vtBase);
   vtIndex.vt = VT_UINT;

   hrResult = pHTMLElementcol->get_length( &pHTMLElementCollectionLength);
   for (int ilen = 0; ilen < (int)pHTMLElementCollectionLength ; ilen++)
   {   
    vtIndex.lVal = ilen;

    pIDisp = NULL;
    hrResult =  pHTMLElementcol->item( vtBase, vtIndex , &pIDisp);
    if (NULL != pIDisp)
    {
     hrResult = pIDisp->QueryInterface( __uuidof(IPIEHTMLImgElement), (void**)&pHTMLImgElement);

     if (NULL != pHTMLImgElement)
     //CComQIPtr<IPIEHTMLImgElement> imgElement( pIDisp);
     //imgElement->get_src( &strImage.m_str);    //I get it here :)
     pHTMLImgElement->get_src( &strImage.m_str);
    }
   }
  }
 }
}

}

Above mentioned code gets the entire web page content in windows mobile devices.

Ramanand Bhat