0
void CTest::parseHtml(CString m_csFilename)   //csFilename为html文件的绝对路径,例如D:\\test.html
{
    CString l_title = "";                                    //网页标题
    CString l_description = "";                       //网页描述
    CWaitCursor wait;
    if(m_csFilename.IsEmpty()){
        AfxMessageBox(_T("Please specify the file to parse"));
        return;
    }
    CFile f;

    //let's open file and read it into CString (u can use any buffer to read though
    if (f.Open(m_csFilename, CFile::modeRead|CFile::shareDenyNone)) {
        CString csWholeFile;
        f.Read(csWholeFile.GetBuffer(f.GetLength()), f.GetLength());
        csWholeFile.ReleaseBuffer(f.GetLength());
        f.Close();

        //declare our MSHTML variables and create a document
        MSHTML::IHTMLDocument2Ptr pDoc;
        MSHTML::IHTMLDocument3Ptr pDoc3;
        MSHTML::IHTMLElementCollectionPtr pCollection;
        MSHTML::IHTMLElementPtr pElement;

        HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER, 
            IID_IHTMLDocument2, (void**)&pDoc);

        //put the code into SAFEARRAY and write it into document
        SAFEARRAY* psa = SafeArrayCreateVector(VT_VARIANT, 0, 1);
        VARIANT *param;
        bstr_t bsData = (LPCTSTR)csWholeFile;
        hr = SafeArrayAccessData(psa, (LPVOID*)&param);
        param->vt = VT_BSTR;
        param->bstrVal = (BSTR)bsData;

        hr = pDoc->write(psa);
        hr = pDoc->close();

        pDoc3 = pDoc;

        pCollection = pDoc3->getElementsByTagName(L"TITLE");
        pElement = pCollection->item(0, (long)0);
        if(pElement != NULL){
            l_title = (LPCTSTR)bstr_t(pElement->GetinnerText());
        }

        pCollection = pDoc3->getElementsByTagName(L"META");
        for(long i=0; i<pCollection->length; i++){
            pElement = pCollection->item(i, (long)0);
            if(pElement != NULL){
                CString l_temp = (LPCTSTR)bstr_t(pElement->getAttribute("NAME", 2));
                if (l_temp.CompareNoCase("DESCRIPTION")==0)
                {
                    l_description = (LPCTSTR)bstr_t(pElement->getAttribute("CONTENT", 2));
                    break;
                }
            }
        }
       AfxMessageBox(_T(“网页标题为:”+ l_title));
       AfxMessageBox(_T(“网页描述为:”+ l_description ));
}

关闭 返回顶部
联系我们
Copyright © 2011. 聚财吧. All rights reserved.