随笔-90  评论-449  文章-0  trackbacks-0
    为了纪念自己为了明天的XML考试复习了半天,特地根据MSDN描述的标准做了一个XML的解释器。接口类似DOM和SAX的混合体。
    目前什么都不支持,不过以后打算做完DTD和XPath。XSD和XSL就先算了。反正是写着玩的。把代码贴出来先。这个代码使用了自己开发的一套C++基础库。

    接口:
  1 /*******************************************************************************
  2 Vczh Library++ 2.0
  3 XML
  4 开发者:陈梓瀚
  5 
  6 接口:
  7 类:
  8 函数:
  9 *******************************************************************************/
 10 
 11 #ifndef VL_XML
 12 #define VL_XML
 13 
 14 #include "..\Data\Data\VL_Data_String.h"
 15 #include "..\Data\Data\VL_Data_List.h"
 16 #include "..\Data\VL_Stream.h"
 17 #include "..\Data\VL_Uniop.h"
 18 
 19 
 20 namespace vl
 21 {
 22     namespace xml
 23     {
 24         using namespace collection;
 25         using namespace stream;
 26         using namespace uniop;
 27 
 28         enum VLE_XMLNodeType
 29         {
 30             vxntInstruction,
 31             vxntDTD,
 32             vxntElement,
 33             vxntComment,
 34             vxntText,
 35             vxntCData,
 36             vxntAttribute,
 37             vxntDocument
 38         };
 39 
 40         class VL_XMLInstruction;
 41         class VL_XMLDTD;
 42         class VL_XMLElement;
 43         class VL_XMLComment;
 44         class VL_XMLText;
 45         class VL_XMLCData;
 46         class VL_XMLAttribute;
 47         class VL_XMLDocument;
 48 
 49         class VL_XMLError : public VL_Base
 50         {
 51         public:
 52             VUnicodeString                Message;
 53             VInt                        Start;
 54             enum CodeType
 55             {
 56                 UnavailableVersion,
 57                 UnavailableIndex,
 58                 UnavailableName,
 59                 DuplicatedName,
 60                 UnavailableText,
 61                 FileDestroyed,
 62                 FileMistake
 63             }                            Code;
 64 
 65             VL_XMLError(VUnicodeString aMessage , CodeType aCode);
 66             VL_XMLError(VUnicodeString aMessage , CodeType aCode , VInt aStart);
 67         };
 68 
 69         class VL_XMLNode : public VL_Base
 70         {
 71         protected:
 72             VL_XMLNode*                    FParent;
 73             VL_XMLDocument*                FDocument;
 74 
 75             VL_XMLNode(VL_XMLNode* Parent);
 76         public:
 77             ~VL_XMLNode();
 78 
 79             virtual VLE_XMLNodeType        GetType()=0;
 80             virtual void                GetXML(VL_UniBuilder& XML)=0;
 81             VUnicodeString                GetXMLText();
 82         };
 83 
 84         class VL_XMLNodePtr : public VL_Base
 85         {
 86         public:
 87             typedef VL_List<VL_XMLNodePtr , false>        List;
 88         protected:
 89             VL_AutoPtr<VL_XMLNode>        FNode;
 90         public:
 91             VL_XMLNodePtr();
 92             VL_XMLNodePtr(VL_XMLNode* Node);
 93             VL_XMLNodePtr(const VL_XMLNodePtr& Ptr);
 94             ~VL_XMLNodePtr();
 95 
 96             VL_XMLInstruction*            GetInstruction();
 97             VL_XMLDTD*                    GetDTD();
 98             VL_XMLElement*                GetElement();
 99             VL_XMLComment*                GetComment();
100             VL_XMLText*                    GetText();
101             VL_XMLCData*                GetCData();
102             VL_XMLAttribute*            GetAttribute();
103             VL_XMLNode*                    operator->();
104         };
105 
106         class VL_XMLDecoder;
107         class VL_XMLDocument : public VL_XMLNode
108         {
109         protected:
110             VUnicodeString                FVersion;
111             VUnicodeString                FEncoding;
112             VBool                        FStandAlone;
113             VBool                        FUseDeclaration;
114             VL_XMLNodePtr::List            FPrologs;
115             VL_XMLNodePtr::List            FTails;
116             VL_XMLNodePtr                FRootElement;
117             VL_XMLDecoder*                FDecoder;
118 
119             void                        InternalLoad(VUnicodeString String);
120         public:
121             VL_XMLDocument();
122             ~VL_XMLDocument();
123 
124             VLE_XMLNodeType                GetType();
125             void                        GetXML(VL_UniBuilder& XML);
126             VUnicodeString                GetHeadXMLText();
127             VUnicodeString                GetBodyXMLText();
128 
129             void                        Save(IVL_OutputStream* Stream , VBool WriteBOM);
130             void                        Save(VUnicodeString& String);
131             void                        Load(IVL_InputStream* Stream , VLE_CharEncode Encode=vceBOM);    /*Error:FileMistake*/
132             void                        Load(VUnicodeString String);                                    /*Error:FileMistake*/
133             void                        Clear();
134 
135             VUnicodeString                GetVersion();
136             void                        SetVersion(VUnicodeString Version);                                /*Error:UnavailableVersion*/
137             VUnicodeString                GetEncoding();
138             void                        SetEncoding(VUnicodeString Encoding);
139             VBool                        GetStandAlone();
140             void                        SetStandAlone(VBool Use);
141             VBool                        GetUseDeclaration();
142             void                        SetUseDeclaration(VBool Use);
143 
144             VInt                        GetPrologCount();
145             VL_XMLNodePtr                GetProlog(VInt Index);                                            /*Error:UnavailableIndex*/
146             VL_XMLNodePtr                CreatePrologInstruction(VUnicodeString Name , VInt Index=-1);    /*Error:UnavailableName,UnavailableIndex*/
147             VL_XMLNodePtr                CreatePrologDTD(VInt Index=-1);                                    /*Error:UnavailableIndex*/
148             VL_XMLNodePtr                CreatePrologComment(VUnicodeString Value , VInt Index=-1);        /*Error:UnavailableText,UnavailableIndex*/
149             void                        DeleteProlog(VInt Index);                                        /*Error:UnavailableIndex*/
150 
151             VInt                        GetTailCount();
152             VL_XMLNodePtr                GetTail(VInt Index);                                            /*Error:UnavailableIndex*/
153             VL_XMLNodePtr                CreateTailComment(VUnicodeString Value , VInt Index=-1);        /*Error:UnavailableText,UnavailableIndex*/
154             void                        DeleteTail(VInt Index);                                            /*Error:UnavailableIndex*/
155 
156             VL_XMLNodePtr                GetRootElement();
157             VBool                        IsValidatedName(VUnicodeString Name);
158         };
159 
160         class VL_XMLTag : public VL_XMLNode
161         {
162         protected:
163             VUnicodeString                FName;
164             VL_XMLNodePtr::List            FAttributes;
165 
166             VInt                        IndexOfAttribute(VUnicodeString Name);
167             void                        GetNodeHeadXML(VL_UniBuilder& XML);
168 
169             VL_XMLTag(VL_XMLNode* Parent);
170         public:
171             ~VL_XMLTag();
172 
173             VUnicodeString                GetName();
174             void                        SetName(VUnicodeString Name);                                    /*Error:UnavailableName*/
175             VUnicodeString                GetNamespace();
176             VUnicodeString                GetLocal();
177 
178             VL_XMLNodePtr                CreateAttribute(VUnicodeString Name , VUnicodeString Value);    /*Error:UnavailableName,DuplicatedName,UnavailableText*/
179             VBool                        ContainsAttribute(VUnicodeString Name);
180             void                        DeleteAttribute(VUnicodeString Name);                            /*Error:UnavailableName*/
181             VInt                        GetAttributeCount();
182             VL_XMLNodePtr                GetAttribute(VUnicodeString Name);                                /*Error:UnavailableName*/
183             VL_XMLNodePtr                GetAttribute(VInt Index);                                        /*Error:UnavailableIndex*/
184         };
185 
186         class VL_XMLInstruction : public VL_XMLTag
187         {
188             friend class VL_XMLDocument;
189             friend class VL_XMLElement;
190         protected:
191             VL_XMLInstruction(VL_XMLNode* Parent);
192         public:
193             ~VL_XMLInstruction();
194 
195             VLE_XMLNodeType                GetType();
196             void                        GetXML(VL_UniBuilder& XML);
197         };
198 
199         class VL_XMLElement : public VL_XMLTag
200         {
201             friend class VL_XMLDocument;
202         protected:
203             VL_XMLNodePtr::List            FChildren;
204 
205             VL_XMLElement(VL_XMLNode* Parent);
206         public:
207             ~VL_XMLElement();
208 
209             VLE_XMLNodeType                GetType();
210             void                        GetXML(VL_UniBuilder& XML);
211 
212             VL_XMLNodePtr                CreateInstruction(VUnicodeString Name , VInt Index=-1);            /*Error:UnavailableName,UnavailableIndex*/
213             VL_XMLNodePtr                CreateElement(VUnicodeString Name , VInt Index=-1);                /*Error:UnavailableName,UnavailableIndex*/
214             VL_XMLNodePtr                CreateComment(VUnicodeString Text , VInt Index=-1);                /*Error:UnavailableText,UnavailableIndex*/
215             VL_XMLNodePtr                CreateText(VUnicodeString Text , VInt Index=-1);                /*Error:UnavailableText,UnavailableIndex*/
216             VL_XMLNodePtr                CreateCData(VUnicodeString Text , VInt Index=-1);                /*Error:UnavailableText,UnavailableIndex*/
217             VL_XMLNodePtr::List&        GetChildren();
218         };
219 
220         class VL_XMLComment : public VL_XMLNode
221         {
222             friend class VL_XMLDocument;
223             friend class VL_XMLElement;
224         protected:
225             VUnicodeString                FText;
226 
227             VL_XMLComment(VL_XMLNode* Parent);
228         public:
229             ~VL_XMLComment();
230 
231             VLE_XMLNodeType                GetType();
232             void                        GetXML(VL_UniBuilder& XML);
233 
234             VUnicodeString                GetText();
235             void                        SetText(VUnicodeString Text);                                    /*Error:UnavailableText*/
236         };
237 
238         class VL_XMLText : public VL_XMLNode
239         {
240             friend class VL_XMLElement;
241         protected:
242             VUnicodeString                FText;
243 
244             VL_XMLText(VL_XMLNode* Parent);
245         public:
246             ~VL_XMLText();
247 
248             VLE_XMLNodeType                GetType();
249             void                        GetXML(VL_UniBuilder& XML);
250 
251             VUnicodeString                GetText();
252             void                        SetText(VUnicodeString Text);                                    /*Error:UnavailableText*/
253         };
254 
255         class VL_XMLCData : public VL_XMLNode
256         {
257             friend class VL_XMLElement;
258         protected:
259             VUnicodeString                FText;
260 
261             VL_XMLCData(VL_XMLNode* Parent);
262         public:
263             ~VL_XMLCData();
264 
265             VLE_XMLNodeType                GetType();
266             void                        GetXML(VL_UniBuilder& XML);
267 
268             VUnicodeString                GetText();
269             void                        SetText(VUnicodeString Text);                                    /*Error:UnavailableText*/
270         };
271 
272         class VL_XMLAttribute : public VL_XMLNode
273         {
274             friend class VL_XMLTag;
275         protected:
276             VUnicodeString                FName;
277             VUnicodeString                FText;
278             VBool                        FQuot;
279 
280             VL_XMLAttribute(VL_XMLNode* Parent);
281         public:
282             ~VL_XMLAttribute();
283 
284             VLE_XMLNodeType                GetType();
285             void                        GetXML(VL_UniBuilder& XML);
286 
287             VUnicodeString                GetName();
288             VUnicodeString                GetNamespace();
289             VUnicodeString                GetLocal();
290             VUnicodeString                GetText();
291             void                        SetText(VUnicodeString Text);                                    /*Error:UnavailableText*/
292         };
293     }
294 }
295 
296 #endif

    使用自己做的正则表达式构造的一系列XML分析工具:
  1 /*********************************************************************************************************
  2 VL_XMLDecoder
  3 *********************************************************************************************************/
  4 
  5     struct VLS_XMLString
  6     {
  7         PWChar                    Start;
  8         VInt                    Length;
  9 
 10         VLS_XMLString()
 11         {
 12             Start=0;
 13             Length=0;
 14         }
 15 
 16         VLS_XMLString(PWChar aStart , VInt aLength)
 17         {
 18             Start=aStart;
 19             Length=aLength;
 20         }
 21 
 22         VBool Is(PCWChar String)
 23         {
 24             PCWChar Buffer=Start;
 25             while(*String)
 26             {
 27                 if(*Buffer++!=*String++)
 28                 {
 29                     return false;
 30                 }
 31             }
 32             return true;
 33         }
 34 
 35         VUnicodeString ToString()
 36         {
 37             return VUnicodeString(Start,Length);
 38         }
 39     };
 40 
 41     struct VLS_XMLAttribute
 42     {
 43         typedef VL_List<VLS_XMLAttribute , true>    List;
 44 
 45         VLS_XMLString            Name;
 46         VLS_XMLString            Value;
 47     };
 48 
 49     struct VLS_XMLElement
 50     {
 51         VLS_XMLAttribute::List    Attributes;
 52         VLS_XMLString            Name;
 53         VWChar                    StartChar;
 54         VWChar                    EndChar;
 55     };
 56 
 57     class VL_XMLDecoder : public VL_Base
 58     {
 59     public:
 60         VL_RegMatch Reg_CData;
 61         VL_RegMatch Reg_Text;
 62         VL_RegMatch Reg_Comment;
 63         VL_RegMatch Reg_Name;
 64         VL_RegMatch Reg_Value;
 65 
 66         VL_XMLDecoder():
 67             Reg_CData(L"\\<!\\[CDATA\\[([^\\]]|\\][^\\]]|\\]\\][^\\>])*\\]\\]\\>"),
 68             Reg_Text(L"[^\\<\\>]+"),
 69             Reg_Comment(L"\\<!\\-\\-([^\\-]|\\-[^\\-]|\\-\\-[^\\>])*\\-\\-\\>"),
 70             Reg_Name(L"[a-zA-Z_][a-zA-Z0-9\\-_.:]*"),
 71             Reg_Value(L"\"[^\"]*\"")
 72         {
 73         }
 74 
 75         void PassWhite(VLS_XMLString& String)
 76         {
 77             while(String.Length)
 78             {
 79                 switch(*String.Start)
 80                 {
 81                 case L' ':case L'\t':case L'\r':case L'\n':
 82                     String.Start++;
 83                     String.Length--;
 84                     break;
 85                 default:
 86                     return;
 87                 }
 88             }
 89         }
 90 
 91         VLS_XMLString GetCData(VLS_XMLString& Input)
 92         {
 93             VInt Length=Reg_CData.MatchBuffer(Input.Start);
 94             if(Length==-1)
 95             {
 96                 return VLS_XMLString();
 97             }
 98             else
 99             {
100                 VLS_XMLString Result(Input.Start+9,Length-12);
101                 Input.Start+=Length;
102                 Input.Length-=Length;
103                 return Result;
104             }
105         }
106 
107         VLS_XMLString GetText(VLS_XMLString& Input)
108         {
109             VInt Length=Reg_Text.MatchBuffer(Input.Start);
110             if(Length==-1)
111             {
112                 return VLS_XMLString();
113             }
114             else
115             {
116                 VLS_XMLString Result(Input.Start,Length);
117                 Input.Start+=Length;
118                 Input.Length-=Length;
119                 return Result;
120             }
121         }
122 
123         VLS_XMLString GetComment(VLS_XMLString& Input , VBool PassWhiteCharacters=false)
124         {
125             if(PassWhiteCharacters)PassWhite(Input);
126             VInt Length=Reg_Comment.MatchBuffer(Input.Start);
127             if(Length==-1)
128             {
129                 return VLS_XMLString();
130             }
131             else
132             {
133                 VLS_XMLString Result(Input.Start+4,Length-7);
134                 Input.Start+=Length;
135                 Input.Length-=Length;
136                 return Result;
137             }
138         }
139 
140         VLS_XMLString GetName(VLS_XMLString& Input , VBool PassWhiteCharacters)
141         {
142             if(PassWhiteCharacters)PassWhite(Input);
143             VInt Length=Reg_Name.MatchBuffer(Input.Start);
144             if(Length==-1)
145             {
146                 return VLS_XMLString();
147             }
148             else
149             {
150                 VLS_XMLString Result(Input.Start,Length);
151                 Input.Start+=Length;
152                 Input.Length-=Length;
153                 return Result;
154             }
155         }
156 
157         VLS_XMLString GetValue(VLS_XMLString& Input , VBool PassWhiteCharacters)
158         {
159             if(PassWhiteCharacters)PassWhite(Input);
160             VInt Length=Reg_Value.MatchBuffer(Input.Start);
161             if(Length==-1)
162             {
163                 return VLS_XMLString();
164             }
165             else
166             {
167                 VLS_XMLString Result(Input.Start+1,Length-2);
168                 Input.Start+=Length;
169                 Input.Length-=Length;
170                 return Result;
171             }
172         }
173 
174         VBool Test(VLS_XMLString& Input , PCWChar String , VBool PassWhiteCharacters)
175         {
176             if(PassWhiteCharacters)PassWhite(Input);
177             PWChar Buffer=Input.Start;
178             while(*String)
179             {
180                 if(*Buffer++!=*String++)
181                 {
182                     return false;
183                 }
184             }
185             Input.Length-=Buffer-Input.Start;
186             Input.Start=Buffer;
187             return true;
188         }
189 
190         PCWChar GetElement(VLS_XMLString& Input , VLS_XMLElement& Element , VBool CompleteAttribute , VBool PassWhiteCharacters)
191         {
192             if(PassWhiteCharacters)PassWhite(Input);
193             if(Test(Input,L"<?",false))
194             {
195                 Element.StartChar=L'?';
196             }
197             else if(Test(Input,L"<!",false))
198             {
199                 Element.StartChar=L'!';
200             }
201             else if(Test(Input,L"</",false))
202 &