为了纪念自己为了明天的XML考试复习了半天,特地根据MSDN描述的标准做了一个XML的解释器。接口类似DOM和SAX的混合体。
目前什么都不支持,不过以后打算做完DTD和XPath。XSD和XSL就先算了。反正是写着玩的。把代码贴出来先。这个代码使用了自己开发的一套C++基础库。
接口:
1 /*******************************************************************************
2 Vczh Library++ 2.0
3 XML
4 开发者:陈梓瀚
5
6 接口:
7 类:
8 函数:
9 *******************************************************************************/
10
11 #ifndef VL_XML
12 #define VL_XML
13
14 #include "..\Data\Data\VL_Data_String.h"
15 #include "..\Data\Data\VL_Data_List.h"
16 #include "..\Data\VL_Stream.h"
17 #include "..\Data\VL_Uniop.h"
18
19
20 namespace vl
21 {
22 namespace xml
23 {
24 using namespace collection;
25 using namespace stream;
26 using namespace uniop;
27
28 enum VLE_XMLNodeType
29 {
30 vxntInstruction,
31 vxntDTD,
32 vxntElement,
33 vxntComment,
34 vxntText,
35 vxntCData,
36 vxntAttribute,
37 vxntDocument
38 };
39
40 class VL_XMLInstruction;
41 class VL_XMLDTD;
42 class VL_XMLElement;
43 class VL_XMLComment;
44 class VL_XMLText;
45 class VL_XMLCData;
46 class VL_XMLAttribute;
47 class VL_XMLDocument;
48
49 class VL_XMLError : public VL_Base
50 {
51 public:
52 VUnicodeString Message;
53 VInt Start;
54 enum CodeType
55 {
56 UnavailableVersion,
57 UnavailableIndex,
58 UnavailableName,
59 DuplicatedName,
60 UnavailableText,
61 FileDestroyed,
62 FileMistake
63 } Code;
64
65 VL_XMLError(VUnicodeString aMessage , CodeType aCode);
66 VL_XMLError(VUnicodeString aMessage , CodeType aCode , VInt aStart);
67 };
68
69 class VL_XMLNode : public VL_Base
70 {
71 protected:
72 VL_XMLNode* FParent;
73 VL_XMLDocument* FDocument;
74
75 VL_XMLNode(VL_XMLNode* Parent);
76 public:
77 ~VL_XMLNode();
78
79 virtual VLE_XMLNodeType GetType()=0;
80 virtual void GetXML(VL_UniBuilder& XML)=0;
81 VUnicodeString GetXMLText();
82 };
83
84 class VL_XMLNodePtr : public VL_Base
85 {
86 public:
87 typedef VL_List<VL_XMLNodePtr , false> List;
88 protected:
89 VL_AutoPtr<VL_XMLNode> FNode;
90 public:
91 VL_XMLNodePtr();
92 VL_XMLNodePtr(VL_XMLNode* Node);
93 VL_XMLNodePtr(const VL_XMLNodePtr& Ptr);
94 ~VL_XMLNodePtr();
95
96 VL_XMLInstruction* GetInstruction();
97 VL_XMLDTD* GetDTD();
98 VL_XMLElement* GetElement();
99 VL_XMLComment* GetComment();
100 VL_XMLText* GetText();
101 VL_XMLCData* GetCData();
102 VL_XMLAttribute* GetAttribute();
103 VL_XMLNode* operator->();
104 };
105
106 class VL_XMLDecoder;
107 class VL_XMLDocument : public VL_XMLNode
108 {
109 protected:
110 VUnicodeString FVersion;
111 VUnicodeString FEncoding;
112 VBool FStandAlone;
113 VBool FUseDeclaration;
114 VL_XMLNodePtr::List FPrologs;
115 VL_XMLNodePtr::List FTails;
116 VL_XMLNodePtr FRootElement;
117 VL_XMLDecoder* FDecoder;
118
119 void InternalLoad(VUnicodeString String);
120 public:
121 VL_XMLDocument();
122 ~VL_XMLDocument();
123
124 VLE_XMLNodeType GetType();
125 void GetXML(VL_UniBuilder& XML);
126 VUnicodeString GetHeadXMLText();
127 VUnicodeString GetBodyXMLText();
128
129 void Save(IVL_OutputStream* Stream , VBool WriteBOM);
130 void Save(VUnicodeString& String);
131 void Load(IVL_InputStream* Stream , VLE_CharEncode Encode=vceBOM); /*Error:FileMistake*/
132 void Load(VUnicodeString String); /*Error:FileMistake*/
133 void Clear();
134
135 VUnicodeString GetVersion();
136 void SetVersion(VUnicodeString Version); /*Error:UnavailableVersion*/
137 VUnicodeString GetEncoding();
138 void SetEncoding(VUnicodeString Encoding);
139 VBool GetStandAlone();
140 void SetStandAlone(VBool Use);
141 VBool GetUseDeclaration();
142 void SetUseDeclaration(VBool Use);
143
144 VInt GetPrologCount();
145 VL_XMLNodePtr GetProlog(VInt Index); /*Error:UnavailableIndex*/
146 VL_XMLNodePtr CreatePrologInstruction(VUnicodeString Name , VInt Index=-1); /*Error:UnavailableName,UnavailableIndex*/
147 VL_XMLNodePtr CreatePrologDTD(VInt Index=-1); /*Error:UnavailableIndex*/
148 VL_XMLNodePtr CreatePrologComment(VUnicodeString Value , VInt Index=-1); /*Error:UnavailableText,UnavailableIndex*/
149 void DeleteProlog(VInt Index); /*Error:UnavailableIndex*/
150
151 VInt GetTailCount();
152 VL_XMLNodePtr GetTail(VInt Index); /*Error:UnavailableIndex*/
153 VL_XMLNodePtr CreateTailComment(VUnicodeString Value , VInt Index=-1); /*Error:UnavailableText,UnavailableIndex*/
154 void DeleteTail(VInt Index); /*Error:UnavailableIndex*/
155
156 VL_XMLNodePtr GetRootElement();
157 VBool IsValidatedName(VUnicodeString Name);
158 };
159
160 class VL_XMLTag : public VL_XMLNode
161 {
162 protected:
163 VUnicodeString FName;
164 VL_XMLNodePtr::List FAttributes;
165
166 VInt IndexOfAttribute(VUnicodeString Name);
167 void GetNodeHeadXML(VL_UniBuilder& XML);
168
169 VL_XMLTag(VL_XMLNode* Parent);
170 public:
171 ~VL_XMLTag();
172
173 VUnicodeString GetName();
174 void SetName(VUnicodeString Name); /*Error:UnavailableName*/
175 VUnicodeString GetNamespace();
176 VUnicodeString GetLocal();
177
178 VL_XMLNodePtr CreateAttribute(VUnicodeString Name , VUnicodeString Value); /*Error:UnavailableName,DuplicatedName,UnavailableText*/
179 VBool ContainsAttribute(VUnicodeString Name);
180 void DeleteAttribute(VUnicodeString Name); /*Error:UnavailableName*/
181 VInt GetAttributeCount();
182 VL_XMLNodePtr GetAttribute(VUnicodeString Name); /*Error:UnavailableName*/
183 VL_XMLNodePtr GetAttribute(VInt Index); /*Error:UnavailableIndex*/
184 };
185
186 class VL_XMLInstruction : public VL_XMLTag
187 {
188 friend class VL_XMLDocument;
189 friend class VL_XMLElement;
190 protected:
191 VL_XMLInstruction(VL_XMLNode* Parent);
192 public:
193 ~VL_XMLInstruction();
194
195 VLE_XMLNodeType GetType();
196 void GetXML(VL_UniBuilder& XML);
197 };
198
199 class VL_XMLElement : public VL_XMLTag
200 {
201 friend class VL_XMLDocument;
202 protected:
203 VL_XMLNodePtr::List FChildren;
204
205 VL_XMLElement(VL_XMLNode* Parent);
206 public:
207 ~VL_XMLElement();
208
209 VLE_XMLNodeType GetType();
210 void GetXML(VL_UniBuilder& XML);
211
212 VL_XMLNodePtr CreateInstruction(VUnicodeString Name , VInt Index=-1); /*Error:UnavailableName,UnavailableIndex*/
213 VL_XMLNodePtr CreateElement(VUnicodeString Name , VInt Index=-1); /*Error:UnavailableName,UnavailableIndex*/
214 VL_XMLNodePtr CreateComment(VUnicodeString Text , VInt Index=-1); /*Error:UnavailableText,UnavailableIndex*/
215 VL_XMLNodePtr CreateText(VUnicodeString Text , VInt Index=-1); /*Error:UnavailableText,UnavailableIndex*/
216 VL_XMLNodePtr CreateCData(VUnicodeString Text , VInt Index=-1); /*Error:UnavailableText,UnavailableIndex*/
217 VL_XMLNodePtr::List& GetChildren();
218 };
219
220 class VL_XMLComment : public VL_XMLNode
221 {
222 friend class VL_XMLDocument;
223 friend class VL_XMLElement;
224 protected:
225 VUnicodeString FText;
226
227 VL_XMLComment(VL_XMLNode* Parent);
228 public:
229 ~VL_XMLComment();
230
231 VLE_XMLNodeType GetType();
232 void GetXML(VL_UniBuilder& XML);
233
234 VUnicodeString GetText();
235 void SetText(VUnicodeString Text); /*Error:UnavailableText*/
236 };
237
238 class VL_XMLText : public VL_XMLNode
239 {
240 friend class VL_XMLElement;
241 protected:
242 VUnicodeString FText;
243
244 VL_XMLText(VL_XMLNode* Parent);
245 public:
246 ~VL_XMLText();
247
248 VLE_XMLNodeType GetType();
249 void GetXML(VL_UniBuilder& XML);
250
251 VUnicodeString GetText();
252 void SetText(VUnicodeString Text); /*Error:UnavailableText*/
253 };
254
255 class VL_XMLCData : public VL_XMLNode
256 {
257 friend class VL_XMLElement;
258 protected:
259 VUnicodeString FText;
260
261 VL_XMLCData(VL_XMLNode* Parent);
262 public:
263 ~VL_XMLCData();
264
265 VLE_XMLNodeType GetType();
266 void GetXML(VL_UniBuilder& XML);
267
268 VUnicodeString GetText();
269 void SetText(VUnicodeString Text); /*Error:UnavailableText*/
270 };
271
272 class VL_XMLAttribute : public VL_XMLNode
273 {
274 friend class VL_XMLTag;
275 protected:
276 VUnicodeString FName;
277 VUnicodeString FText;
278 VBool FQuot;
279
280 VL_XMLAttribute(VL_XMLNode* Parent);
281 public:
282 ~VL_XMLAttribute();
283
284 VLE_XMLNodeType GetType();
285 void GetXML(VL_UniBuilder& XML);
286
287 VUnicodeString GetName();
288 VUnicodeString GetNamespace();
289 VUnicodeString GetLocal();
290 VUnicodeString GetText();
291 void SetText(VUnicodeString Text); /*Error:UnavailableText*/
292 };
293 }
294 }
295
296 #endif
使用自己做的正则表达式构造的一系列XML分析工具:
1 /*********************************************************************************************************
2 VL_XMLDecoder
3 *********************************************************************************************************/
4
5 struct VLS_XMLString
6 {
7 PWChar Start;
8 VInt Length;
9
10 VLS_XMLString()
11 {
12 Start=0;
13 Length=0;
14 }
15
16 VLS_XMLString(PWChar aStart , VInt aLength)
17 {
18 Start=aStart;
19 Length=aLength;
20 }
21
22 VBool Is(PCWChar String)
23 {
24 PCWChar Buffer=Start;
25 while(*String)
26 {
27 if(*Buffer++!=*String++)
28 {
29 return false;
30 }
31 }
32 return true;
33 }
34
35 VUnicodeString ToString()
36 {
37 return VUnicodeString(Start,Length);
38 }
39 };
40
41 struct VLS_XMLAttribute
42 {
43 typedef VL_List<VLS_XMLAttribute , true> List;
44
45 VLS_XMLString Name;
46 VLS_XMLString Value;
47 };
48
49 struct VLS_XMLElement
50 {
51 VLS_XMLAttribute::List Attributes;
52 VLS_XMLString Name;
53 VWChar StartChar;
54 VWChar EndChar;
55 };
56
57 class VL_XMLDecoder : public VL_Base
58 {
59 public:
60 VL_RegMatch Reg_CData;
61 VL_RegMatch Reg_Text;
62 VL_RegMatch Reg_Comment;
63 VL_RegMatch Reg_Name;
64 VL_RegMatch Reg_Value;
65
66 VL_XMLDecoder():
67 Reg_CData(L"\\<!\\[CDATA\\[([^\\]]|\\][^\\]]|\\]\\][^\\>])*\\]\\]\\>"),
68 Reg_Text(L"[^\\<\\>]+"),
69 Reg_Comment(L"\\<!\\-\\-([^\\-]|\\-[^\\-]|\\-\\-[^\\>])*\\-\\-\\>"),
70 Reg_Name(L"[a-zA-Z_][a-zA-Z0-9\\-_.:]*"),
71 Reg_Value(L"\"[^\"]*\"")
72 {
73 }
74
75 void PassWhite(VLS_XMLString& String)
76 {
77 while(String.Length)
78 {
79 switch(*String.Start)
80 {
81 case L' ':case L'\t':case L'\r':case L'\n':
82 String.Start++;
83 String.Length--;
84 break;
85 default:
86 return;
87 }
88 }
89 }
90
91 VLS_XMLString GetCData(VLS_XMLString& Input)
92 {
93 VInt Length=Reg_CData.MatchBuffer(Input.Start);
94 if(Length==-1)
95 {
96 return VLS_XMLString();
97 }
98 else
99 {
100 VLS_XMLString Result(Input.Start+9,Length-12);
101 Input.Start+=Length;
102 Input.Length-=Length;
103 return Result;
104 }
105 }
106
107 VLS_XMLString GetText(VLS_XMLString& Input)
108 {
109 VInt Length=Reg_Text.MatchBuffer(Input.Start);
110 if(Length==-1)
111 {
112 return VLS_XMLString();
113 }
114 else
115 {
116 VLS_XMLString Result(Input.Start,Length);
117 Input.Start+=Length;
118 Input.Length-=Length;
119 return Result;
120 }
121 }
122
123 VLS_XMLString GetComment(VLS_XMLString& Input , VBool PassWhiteCharacters=false)
124 {
125 if(PassWhiteCharacters)PassWhite(Input);
126 VInt Length=Reg_Comment.MatchBuffer(Input.Start);
127 if(Length==-1)
128 {
129 return VLS_XMLString();
130 }
131 else
132 {
133 VLS_XMLString Result(Input.Start+4,Length-7);
134 Input.Start+=Length;
135 Input.Length-=Length;
136 return Result;
137 }
138 }
139
140 VLS_XMLString GetName(VLS_XMLString& Input , VBool PassWhiteCharacters)
141 {
142 if(PassWhiteCharacters)PassWhite(Input);
143 VInt Length=Reg_Name.MatchBuffer(Input.Start);
144 if(Length==-1)
145 {
146 return VLS_XMLString();
147 }
148 else
149 {
150 VLS_XMLString Result(Input.Start,Length);
151 Input.Start+=Length;
152 Input.Length-=Length;
153 return Result;
154 }
155 }
156
157 VLS_XMLString GetValue(VLS_XMLString& Input , VBool PassWhiteCharacters)
158 {
159 if(PassWhiteCharacters)PassWhite(Input);
160 VInt Length=Reg_Value.MatchBuffer(Input.Start);
161 if(Length==-1)
162 {
163 return VLS_XMLString();
164 }
165 else
166 {
167 VLS_XMLString Result(Input.Start+1,Length-2);
168 Input.Start+=Length;
169 Input.Length-=Length;
170 return Result;
171 }
172 }
173
174 VBool Test(VLS_XMLString& Input , PCWChar String , VBool PassWhiteCharacters)
175 {
176 if(PassWhiteCharacters)PassWhite(Input);
177 PWChar Buffer=Input.Start;
178 while(*String)
179 {
180 if(*Buffer++!=*String++)
181 {
182 return false;
183 }
184 }
185 Input.Length-=Buffer-Input.Start;
186 Input.Start=Buffer;
187 return true;
188 }
189
190 PCWChar GetElement(VLS_XMLString& Input , VLS_XMLElement& Element , VBool CompleteAttribute , VBool PassWhiteCharacters)
191 {
192 if(PassWhiteCharacters)PassWhite(Input);
193 if(Test(Input,L"<?",false))
194 {
195 Element.StartChar=L'?';
196 }
197 else if(Test(Input,L"<!",false))
198 {
199 Element.StartChar=L'!';
200 }
201 else if(Test(Input,L"</",false))
202 &