// Markup.h: interface for the CMarkup class. // // Markup Release 8.2 // Copyright (C) 1999-2006 First Objective Software, Inc. All rights reserved // Go to www.firstobject.com for the latest CMarkup and EDOM documentation // Use in commercial applications requires written permission // This software is provided "as is", with no warranty. #if !defined(AFX_MARKUP_H__948A2705_9E68_11D2_A0BF_00105A27C570__INCLUDED_) #define AFX_MARKUP_H__948A2705_9E68_11D2_A0BF_00105A27C570__INCLUDED_ #if _MSC_VER > 1000 #pragma once #pragma warning(disable:4996) // suppress VS 2005 deprecated function warnings #endif // _MSC_VER > 1000 #ifdef _DEBUG #define _DS(i) (i?&((LPCTSTR)m_strDoc)[m_aPos[i].nStart]:0) #define MARKUP_SETDEBUGSTATE m_pMainDS=_DS(m_iPos); m_pChildDS=_DS(m_iPosChild) #else #define MARKUP_SETDEBUGSTATE #endif class CMarkup { public: CMarkup() { SetDoc( NULL ); InitDocFlags(); }; CMarkup( LPCTSTR szDoc ) { SetDoc( szDoc ); InitDocFlags(); }; CMarkup( int nFlags ) { SetDoc( NULL ); m_nFlags = nFlags; }; CMarkup( const CMarkup& markup ) { *this = markup; }; void operator=( const CMarkup& markup ); ~CMarkup() {}; // Navigate bool Load( LPCTSTR szFileName ); bool SetDoc( LPCTSTR szDoc ); bool IsWellFormed(); bool FindElem( LPCTSTR szName=NULL ); bool FindChildElem( LPCTSTR szName=NULL ); bool IntoElem(); bool OutOfElem(); void ResetChildPos() { x_SetPos(m_iPosParent,m_iPos,0); }; void ResetMainPos() { x_SetPos(m_iPosParent,0,0); }; void ResetPos() { x_SetPos(0,0,0); }; CString GetTagName() const; CString GetChildTagName() const { return x_GetTagName(m_iPosChild); }; CString GetData() const { return x_GetData(m_iPos); }; CString GetChildData() const { return x_GetData(m_iPosChild); }; CString GetElemContent() const { return x_GetElemContent(m_iPos); }; CString GetAttrib( LPCTSTR szAttrib ) const { return x_GetAttrib(m_iPos,szAttrib); }; CString GetChildAttrib( LPCTSTR szAttrib ) const { return x_GetAttrib(m_iPosChild,szAttrib); }; CString GetAttribName( int n ) const; int FindNode( int nType=0 ); int GetNodeType() { return m_nNodeType; }; bool SavePos( LPCTSTR szPosName=_T("") ); bool RestorePos( LPCTSTR szPosName=_T("") ); const CString& GetError() const { return m_strError; }; int GetDocFlags() const { return m_nFlags; }; void SetDocFlags( int nFlags ) { m_nFlags = nFlags; }; enum MarkupDocFlags { MDF_IGNORECASE = 8, }; enum MarkupNodeFlags { MNF_WITHCDATA = 0x01, MNF_WITHNOLINES = 0x02, MNF_WITHXHTMLSPACE = 0x04, MNF_WITHREFS = 0x08, MNF_WITHNOEND = 0x10, MNF_ESCAPEQUOTES = 0x100, MNF_NONENDED = 0x100000, MNF_ILLDATA = 0x200000, }; enum MarkupNodeType { MNT_ELEMENT = 1, // 0x01 MNT_TEXT = 2, // 0x02 MNT_WHITESPACE = 4, // 0x04 MNT_CDATA_SECTION = 8, // 0x08 MNT_PROCESSING_INSTRUCTION = 16, // 0x10 MNT_COMMENT = 32, // 0x20 MNT_DOCUMENT_TYPE = 64, // 0x40 MNT_EXCLUDE_WHITESPACE = 123,// 0x7b MNT_LONE_END_TAG = 128,// 0x80 MNT_NODE_ERROR = 32768 // 0x8000 }; // Create bool Save( LPCTSTR szFileName ); const CString& GetDoc() const { return m_strDoc; }; bool AddElem( LPCTSTR szName, LPCTSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags); }; bool InsertElem( LPCTSTR szName, LPCTSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT); }; bool AddChildElem( LPCTSTR szName, LPCTSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_CHILD); }; bool InsertChildElem( LPCTSTR szName, LPCTSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT|MNF_CHILD); }; bool AddElem( LPCTSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags); }; bool InsertElem( LPCTSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT); }; bool AddChildElem( LPCTSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_CHILD); }; bool InsertChildElem( LPCTSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT|MNF_CHILD); }; bool AddAttrib( LPCTSTR szAttrib, LPCTSTR szValue ) { return x_SetAttrib(m_iPos,szAttrib,szValue); }; bool AddChildAttrib( LPCTSTR szAttrib, LPCTSTR szValue ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue); }; bool AddAttrib( LPCTSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPos,szAttrib,nValue); }; bool AddChildAttrib( LPCTSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue); }; bool AddSubDoc( LPCTSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,0); }; bool InsertSubDoc( LPCTSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_INSERT); }; CString GetSubDoc() const { return x_GetSubDoc(m_iPos); }; bool AddChildSubDoc( LPCTSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD); }; bool InsertChildSubDoc( LPCTSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD|MNF_INSERT); }; CString GetChildSubDoc() const { return x_GetSubDoc(m_iPosChild); }; bool AddNode( int nType, LPCTSTR szText ) { return x_AddNode(nType,szText,0); }; bool InsertNode( int nType, LPCTSTR szText ) { return x_AddNode(nType,szText,MNF_INSERT); }; // Modify bool RemoveElem(); bool RemoveChildElem(); bool RemoveNode(); bool SetAttrib( LPCTSTR szAttrib, LPCTSTR szValue ) { return x_SetAttrib(m_iPos,szAttrib,szValue); }; bool SetChildAttrib( LPCTSTR szAttrib, LPCTSTR szValue ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue); }; bool SetAttrib( LPCTSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPos,szAttrib,nValue); }; bool SetChildAttrib( LPCTSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue); }; bool SetData( LPCTSTR szData, int nFlags=0 ) { return x_SetData(m_iPos,szData,nFlags); }; bool SetChildData( LPCTSTR szData, int nFlags=0 ) { return x_SetData(m_iPosChild,szData,nFlags); }; bool SetData( int nValue ) { return x_SetData(m_iPos,nValue); }; bool SetChildData( int nValue ) { return x_SetData(m_iPosChild,nValue); }; bool SetElemContent( LPCTSTR szContent ) { return x_SetElemContent(szContent); }; // Utility static bool ReadTextFile( LPCTSTR szFileName, CString& strDoc, CString* pstrError=NULL, int* pnFlags=NULL ); static bool WriteTextFile( LPCTSTR szFileName, CString& strDoc, CString* pstrError=NULL, int* pnFlags=NULL ); static CString EscapeText( LPCTSTR szText, int nFlags = 0 ); static CString UnescapeText( LPCTSTR szText, int nTextLength = -1 ); protected: #ifdef _DEBUG LPCTSTR m_pMainDS; LPCTSTR m_pChildDS; #endif CString m_strDoc; CString m_strError; int m_iPosParent; int m_iPos; int m_iPosChild; int m_iPosFree; int m_iPosDeleted; int m_nNodeType; int m_nNodeOffset; int m_nNodeLength; int m_nFlags; struct ElemPos { ElemPos() {}; ElemPos( const ElemPos& pos ) { *this = pos; }; enum { EP_STBITS=22, EP_STMASK=0x2fffff, EP_LEVMASK=0xffff }; int StartTagLen() const { return (nTagLengths & EP_STMASK); }; void SetStartTagLen( int n ) { nTagLengths = (nTagLengths & ~EP_STMASK) + n; }; void AdjustStartTagLen( int n ) { nTagLengths += n; }; int EndTagLen() const { return (nTagLengths >> EP_STBITS); }; void SetEndTagLen( int n ) { nTagLengths = (nTagLengths & EP_STMASK) + (n << EP_STBITS); }; bool IsEmptyElement() { return (StartTagLen()==nLength)?true:false; }; int StartContent() const { return nStart + StartTagLen(); }; int ContentLen() const { return nLength - StartTagLen() - EndTagLen(); }; int StartAfter() const { return nStart + nLength; }; int Level() const { return nFlags & EP_LEVMASK; }; void SetLevel( int nLev ) { nFlags = (nFlags & ~EP_LEVMASK) | nLev; }; void ClearVirtualParent() { memset(this,0,sizeof(ElemPos)); }; // Memory size: 8 32-bit integers == 32 bytes int nStart; int nLength; int nTagLengths; // 22 bits 4MB limit for start tag, 10 bits 1K limit for end tag int nFlags; // 16 bits flags, 16 bits level 65536 depth limit int iElemParent; int iElemChild; // first child int iElemNext; int iElemPrev; // if this is first child, iElemPrev points to last }; enum MarkupNodeFlagsInternal { MNF_REPLACE = 0x001000, MNF_INSERT = 0x002000, MNF_CHILD = 0x004000, MNF_QUOTED = 0x008000, MNF_EMPTY = 0x010000, MNF_DELETED = 0x020000, MNF_FIRST = 0x080000, MNF_PUBLIC = 0x300000, MNF_ILLFORMED = 0x800000, MNF_USER = 0xf000000, }; struct NodePos { NodePos() {}; NodePos( int n ) { nFlags=n; nNodeType=0; nStart=0; nLength=0; }; int nNodeType; int nStart; int nLength; int nFlags; CString strMeta; }; struct TokenPos { TokenPos( LPCTSTR sz, int n ) { Clear(); szDoc=sz; nTokenFlags=n; }; void Clear() { nL=0; nR=-1; nNext=0; }; int Length() const { return nR - nL + 1; }; bool Match( LPCTSTR szName ) { int nLen = nR - nL + 1; if ( nTokenFlags & MDF_IGNORECASE ) return ( (_tcsncicmp( &szDoc[nL], szName, nLen ) == 0) && ( szName[nLen] == _T('\0') || _tcschr(_T(" =/[]"),szName[nLen]) ) ); else return ( (_tcsnccmp( &szDoc[nL], szName, nLen ) == 0) && ( szName[nLen] == _T('\0') || _tcschr(_T(" =/[]"),szName[nLen]) ) ); }; int nL; int nR; int nNext; LPCTSTR szDoc; int nTokenFlags; int nPreSpaceStart; int nPreSpaceLength; }; struct SavedPos { SavedPos() { nSavedPosFlags=0; iPos=0; }; CString strName; int iPos; int nSavedPosFlags; }; struct SavedPosMap { SavedPosMap() { pTable = NULL; }; ~SavedPosMap() { RemoveAll(); }; void RemoveAll() { if (pTable) Release(); pTable=NULL; }; enum { SPM_SIZE = 7, SPM_MAIN = 1, SPM_CHILD = 2, SPM_USED = 4, SPM_LAST = 8 }; void Release() { for (int n=0;n>PA_SEGBITS) + 1; }; ElemPos& operator[](int n) const { return pSegs[n>>PA_SEGBITS][n&PA_SEGMASK]; }; ElemPos** pSegs; int nSize; int nSegs; }; PosArray m_aPos; struct NodeStack { NodeStack() { nTop=-1; nSize=0; pN=NULL; }; ~NodeStack() { if (pN) delete [] pN; }; NodePos& Top() { return pN[nTop]; }; NodePos& At( int n ) { return pN[n]; }; void Add() { ++nTop; if (nTop==nSize) Alloc(nSize*2+6); }; void Remove() { --nTop; }; int TopIndex() { return nTop; }; protected: void Alloc( int nNewSize ) { NodePos* pNNew = new NodePos[nNewSize]; Copy(pNNew); nSize=nNewSize; }; void Copy( NodePos* pNNew ) { for(int n=0;n