VC解析HTML文件
2025/5/1 21:43:52
void CTest::parseHtml(CString m_csFilename) //csFilename为html文件的绝对路径,例如D:\\test.html
{
CString l_title = ""; //网页标题
CString l_description = ""; //网页描述
CWaitCursor wait;
if(m_csFilename.IsEmpty()){
AfxMessageBox(_T("Please specify the file to parse"));
return;
}
CFile f;
//let's open file and read it into CString (u can use any buffer to read though
if (f.Open(m_csFilename, CFile::modeRead|CFile::shareDenyNone)) {
CString csWholeFile;
f.Read(csWholeFile.GetBuffer(f.GetLength()), f.GetLength());
csWholeFile.ReleaseBuffer(f.GetLength());
f.Close();
//declare our MSHTML variables and create a document
MSHTML::IHTMLDocument2Ptr pDoc;
MSHTML::IHTMLDocument3Ptr pDoc3;
MSHTML::IHTMLElementCollectionPtr pCollection;
MSHTML::IHTMLElementPtr pElement;
HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2, (void**)&pDoc);
//put the code into SAFEARRAY and write it into document
SAFEARRAY* psa = SafeArrayCreateVector(VT_VARIANT, 0, 1);
VARIANT *param;
bstr_t bsData = (LPCTSTR)csWholeFile;
hr = SafeArrayAccessData(psa, (LPVOID*)¶m);
param->vt = VT_BSTR;
param->bstrVal = (BSTR)bsData;
hr = pDoc->write(psa);
hr = pDoc->close();
pDoc3 = pDoc;
pCollection = pDoc3->getElementsByTagName(L"TITLE");
pElement = pCollection->item(0, (long)0);
if(pElement != NULL){
l_title = (LPCTSTR)bstr_t(pElement->GetinnerText());
}
pCollection = pDoc3->getElementsByTagName(L"META");
for(long i=0; i<pCollection->length; i++){
pElement = pCollection->item(i, (long)0);
if(pElement != NULL){
CString l_temp = (LPCTSTR)bstr_t(pElement->getAttribute("NAME", 2));
if (l_temp.CompareNoCase("DESCRIPTION")==0)
{
l_description = (LPCTSTR)bstr_t(pElement->getAttribute("CONTENT", 2));
break;
}
}
}
AfxMessageBox(_T(“网页标题为:”+ l_title));
AfxMessageBox(_T(“网页描述为:”+ l_description ));
}