00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "common.hpp"
00020 #include "format_string.hpp"
00021 #include "serialise/error.hpp"
00022 #include "serialise/token.hpp"
00023
00024 namespace
00025 {
00026 using namespace obby::serialise;
00027 typedef std::string::const_iterator string_iterator;
00028
00031 const char* _(const char* msgid)
00032 {
00033 return obby::_(msgid);
00034 }
00035
00036 void escape(
00037 std::string& src
00038 )
00039 {
00040 std::string::size_type pos = 0;
00041 while( (pos = src.find_first_of("\n\t\\\"", pos)) != std::string::npos)
00042 {
00043 std::string replace_with;
00044
00045 switch(src[pos])
00046 {
00047 case '\n':
00048 replace_with = "\\n";
00049 break;
00050 case '\t':
00051 replace_with = "\\t";
00052 break;
00053 case '\\':
00054 replace_with = "\\\\";
00055 break;
00056 case '\"':
00057 replace_with = "\\\"";
00058 break;
00059 }
00060
00061 src.replace(pos, 1, replace_with);
00062 pos += replace_with.length();
00063 }
00064 }
00065
00066 void unescape(
00067 std::string& src,
00068 unsigned int src_line
00069 )
00070 {
00071 std::string::size_type pos = 0;
00072 while( (pos = src.find('\\', pos)) != std::string::npos)
00073 {
00074 char replace_with;
00075
00076
00077
00078 switch(src[pos + 1])
00079 {
00080 case 'n':
00081 replace_with = '\n';
00082 break;
00083 case '\\':
00084 replace_with = '\\';
00085 break;
00086 case 't':
00087 replace_with = '\t';
00088 break;
00089 case '\"':
00090 replace_with = '\"';
00091 break;
00092 default:
00093 obby::format_string str(
00094 _("Unexpected escape sequence: \\%0%")
00095 );
00096
00097 str << src[pos + 1];
00098 throw error(str.str(), src_line);
00099 }
00100
00101 src.replace(pos, 2, 1, replace_with);
00102 ++ pos;
00103 }
00104 }
00105
00106 void tokenise_identifier(
00107 token_list& list,
00108 const std::string& src,
00109 string_iterator& iter,
00110 unsigned int& line
00111 )
00112 {
00113
00114 string_iterator orig = iter ++;
00115 for(; iter != src.end(); ++ iter)
00116 if(!isalnum(*iter) && *iter != '_')
00117 break;
00118
00119 list.add(token::TYPE_IDENTIFIER, std::string(orig, iter), line);
00120 }
00121
00122 void tokenise_comment(
00123 token_list& list,
00124 const std::string& src,
00125 string_iterator& iter,
00126 unsigned int& line
00127 )
00128 {
00129
00130 for(++ iter; iter != src.end(); ++ iter)
00131 if(*iter == '\n')
00132 break;
00133 }
00134
00135 void tokenise_string(
00136 token_list& list,
00137 const std::string& src,
00138 string_iterator& iter,
00139 unsigned int& line
00140 )
00141 {
00142 string_iterator orig = ++ iter;
00143 unsigned int orig_line = line;
00144 bool escaped = false;
00145
00146 for(; iter != src.end(); ++ iter)
00147 {
00148
00149 if(*iter == '\n')
00150 ++ line;
00151
00152
00153 if(!escaped)
00154 {
00155 if(*iter == '\\')
00156 escaped = true;
00157 else if(*iter == '\"')
00158 break;
00159 }
00160 else
00161 {
00162
00163
00164 escaped = false;
00165 }
00166 }
00167
00168
00169 if(iter == src.end() )
00170 throw error(_("String not closed"), orig_line);
00171
00172
00173 std::string unescaped(orig, iter);
00174 unescape(unescaped, orig_line);
00175 list.add(token::TYPE_STRING, unescaped, orig_line);
00176
00177
00178 ++ iter;
00179 }
00180
00181 void tokenise_indentation(
00182 token_list& list,
00183 const std::string& src,
00184 string_iterator& iter,
00185 unsigned int& line
00186 )
00187 {
00188
00189 string_iterator orig = iter;
00190 for(; iter != src.end(); ++ iter)
00191 if(!isspace(*iter) || *iter == '\n')
00192 break;
00193
00194
00195 if(*iter != '\n' && *iter != '\0' && iter != src.end() )
00196 {
00197 list.add(
00198 token::TYPE_INDENTATION,
00199 std::string(orig, iter),
00200 line
00201 );
00202 }
00203 }
00204
00205 void tokenise(
00206 token_list& list,
00207 const std::string& src
00208 )
00209 {
00210 unsigned int line = 1;
00211 for(string_iterator iter = src.begin();
00212 iter != src.end();)
00213 {
00214
00215 if(*iter == '\0')
00216 break;
00217
00218 if(*iter == '\n')
00219 {
00220
00221 ++ line;
00222
00223
00224 ++ iter;
00225 tokenise_indentation(
00226 list, src, iter, line
00227 );
00228
00229 continue;
00230 }
00231
00232
00233 if(*iter == '\"')
00234 {
00235 tokenise_string(list, src, iter, line);
00236 continue;
00237 }
00238
00239
00240 if(*iter == '#')
00241 {
00242 tokenise_comment(list, src, iter, line);
00243 continue;
00244 }
00245
00246
00247 if(isalnum(*iter) || *iter == '_')
00248 {
00249 tokenise_identifier(list, src, iter, line);
00250 continue;
00251 }
00252
00253
00254 if(isspace(*iter) )
00255 {
00256 ++ iter;
00257 continue;
00258 }
00259
00260
00261 token::type type = token::TYPE_UNKNOWN;
00262 switch(*iter)
00263 {
00264 case '!':
00265 type = token::TYPE_EXCLAMATION;
00266 break;
00267 case '=':
00268 type = token::TYPE_ASSIGNMENT;
00269 break;
00270 }
00271
00272 if(type == token::TYPE_UNKNOWN)
00273 {
00274 obby::format_string str(
00275 _("Unexpected token: '%0%'")
00276 );
00277
00278 str << *iter;
00279 throw error(str.str(), line);
00280 }
00281
00282 list.add(type, std::string(1, *iter), line);
00283
00284
00285 ++ iter;
00286 }
00287 }
00288
00289 void detokenise(const token_list& list, std::string& target)
00290 {
00291 bool line_begin = true;
00292 std::string escaped_string;
00293
00294 for(token_list::iterator iter = list.begin();
00295 iter != list.end();
00296 ++ iter)
00297 {
00298 switch(iter->get_type() )
00299 {
00300 case token::TYPE_INDENTATION:
00301 target.append("\n" + iter->get_text() );
00302 line_begin = true;
00303 break;
00304 case token::TYPE_STRING:
00305 escaped_string = iter->get_text();
00306 escape(escaped_string);
00307
00308 target.append("\"");
00309 target.append(escaped_string);
00310 target.append("\"");
00311
00312 line_begin = false;
00313 break;
00314 case token::TYPE_IDENTIFIER:
00315 if(!line_begin)
00316 target.append(" ");
00317
00318 default:
00319 target.append(iter->get_text() );
00320 if(iter->get_type() != token::TYPE_EXCLAMATION)
00321 line_begin = false;
00322 break;
00323 }
00324 }
00325 }
00326 }
00327
00328 obby::serialise::token::token(
00329 type type,
00330 const std::string& text,
00331 unsigned int line
00332 ) :
00333 m_type(type), m_text(text), m_line(line)
00334 {
00335 }
00336
00337 obby::serialise::token::type obby::serialise::token::get_type() const
00338 {
00339 return m_type;
00340 }
00341
00342 const std::string& obby::serialise::token::get_text() const
00343 {
00344 return m_text;
00345 }
00346
00347 unsigned int obby::serialise::token::get_line() const
00348 {
00349 return m_line;
00350 }
00351
00352 obby::serialise::token_list::token_list()
00353 {
00354 }
00355
00356 void obby::serialise::token_list::serialise(
00357 std::string& string
00358 ) const
00359 {
00360 detokenise(*this, string);
00361 }
00362
00363 void obby::serialise::token_list::deserialise(
00364 const std::string& string
00365 )
00366 {
00367 tokenise(*this, string);
00368 }
00369
00370 void obby::serialise::token_list::add(
00371 token::type type,
00372 const std::string& text,
00373 unsigned int line
00374 )
00375 {
00376 m_list.push_back(token(type, text, line) );
00377 }
00378
00379 obby::serialise::token_list::iterator obby::serialise::token_list::begin() const
00380 {
00381 return m_list.begin();
00382 }
00383
00384 obby::serialise::token_list::iterator obby::serialise::token_list::end() const
00385 {
00386 return m_list.end();
00387 }
00388
00389 void obby::serialise::token_list::next_token(
00390 iterator& iter
00391 ) const
00392 {
00393 unsigned int orig_line = iter->get_line();
00394 if(++ iter == m_list.end() )
00395 throw error(_("Unexpected end of input"), orig_line);
00396 }
00397