token.cpp

Go to the documentation of this file.
00001 /* libobby - Network text editing library
00002  * Copyright (C) 2005 0x539 dev group
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 2 of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public
00015  * License along with this program; if not, write to the Free
00016  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00017  */
00018 
00019 #include "common.hpp"
00020 #include "format_string.hpp"
00021 #include "serialise/error.hpp"
00022 #include "serialise/token.hpp"
00023 
00024 namespace
00025 {
00026         using namespace obby::serialise;
00027         typedef std::string::const_iterator string_iterator;
00028 
00031         const char* _(const char* msgid)
00032         {
00033                 return obby::_(msgid);
00034         }
00035 
00036         void escape(
00037                 std::string& src
00038         )
00039         {
00040                 std::string::size_type pos = 0;
00041                 while( (pos = src.find_first_of("\n\t\\\"", pos)) != std::string::npos)
00042                 {
00043                         std::string replace_with;
00044 
00045                         switch(src[pos])
00046                         {
00047                         case '\n':
00048                                 replace_with = "\\n";
00049                                 break;
00050                         case '\t':
00051                                 replace_with = "\\t";
00052                                 break;
00053                         case '\\':
00054                                 replace_with = "\\\\";
00055                                 break;
00056                         case '\"':
00057                                 replace_with = "\\\"";
00058                                 break;
00059                         }
00060 
00061                         src.replace(pos, 1, replace_with);
00062                         pos += replace_with.length();
00063                 }
00064         }
00065 
00066         void unescape(
00067                 std::string& src,
00068                 unsigned int src_line
00069         )
00070         {
00071                 std::string::size_type pos = 0;
00072                 while( (pos = src.find('\\', pos)) != std::string::npos)
00073                 {
00074                         char replace_with;
00075 
00076                         // \\ cannot be at end of string - terminating " would
00077                         // have been escaped
00078                         switch(src[pos + 1])
00079                         {
00080                         case 'n':
00081                                 replace_with = '\n';
00082                                 break;
00083                         case '\\':
00084                                 replace_with = '\\';
00085                                 break;
00086                         case 't':
00087                                 replace_with = '\t';
00088                                 break;
00089                         case '\"':
00090                                 replace_with = '\"';
00091                                 break;
00092                         default:
00093                                 obby::format_string str(
00094                                         _("Unexpected escape sequence: \\%0%")
00095                                 );
00096 
00097                                 str << src[pos + 1];
00098                                 throw error(str.str(), src_line);
00099                         }
00100 
00101                         src.replace(pos, 2, 1, replace_with);
00102                         ++ pos;
00103                 }
00104         }
00105 
00106         void tokenise_identifier(
00107                 token_list& list,
00108                 const std::string& src,
00109                 string_iterator& iter,
00110                 unsigned int& line
00111         )
00112         {
00113                 // Read to next non-alphanumerical character
00114                 string_iterator orig = iter ++;
00115                 for(; iter != src.end(); ++ iter)
00116                         if(!isalnum(*iter) && *iter != '_')
00117                                 break;
00118 
00119                 list.add(token::TYPE_IDENTIFIER, std::string(orig, iter), line);
00120         }
00121 
00122         void tokenise_comment(
00123                 token_list& list,
00124                 const std::string& src,
00125                 string_iterator& iter,
00126                 unsigned int& line
00127         )
00128         {
00129                 // Ignore rest of line
00130                 for(++ iter; iter != src.end(); ++ iter)
00131                         if(*iter == '\n')
00132                                 break;
00133         }
00134 
00135         void tokenise_string(
00136                 token_list& list,
00137                 const std::string& src,
00138                 string_iterator& iter,
00139                 unsigned int& line
00140         )
00141         {
00142                 string_iterator orig = ++ iter;
00143                 unsigned int orig_line = line;
00144                 bool escaped = false;
00145 
00146                 for(; iter != src.end(); ++ iter)
00147                 {
00148                         // Line counting
00149                         if(*iter == '\n')
00150                                 ++ line;
00151 
00152                         // Is this character escaped?
00153                         if(!escaped)
00154                         {
00155                                 if(*iter == '\\')
00156                                         escaped = true;
00157                                 else if(*iter == '\"')
00158                                         break;
00159                         }
00160                         else
00161                         {
00162                                 // This one was escaped, process normally with
00163                                 // next one
00164                                 escaped = false;
00165                         }
00166                 }
00167 
00168                 // Unexpected end of input
00169                 if(iter == src.end() )
00170                         throw error(_("String not closed"), orig_line);
00171 
00172                 // Add unescaped string literal
00173                 std::string unescaped(orig, iter);
00174                 unescape(unescaped, orig_line);
00175                 list.add(token::TYPE_STRING, unescaped, orig_line);
00176 
00177                 // Proceed after string termination ('\"')
00178                 ++ iter;
00179         }
00180 
00181         void tokenise_indentation(
00182                 token_list& list,
00183                 const std::string& src,
00184                 string_iterator& iter,
00185                 unsigned int& line
00186         )
00187         {
00188                 // Proceed to next char that is not a space character
00189                 string_iterator orig = iter;
00190                 for(; iter != src.end(); ++ iter)
00191                         if(!isspace(*iter) || *iter == '\n')
00192                                 break;
00193 
00194                 // Add indentation if this is not an empty line
00195                 if(*iter != '\n' && *iter != '\0' && iter != src.end() )
00196                 {
00197                         list.add(
00198                                 token::TYPE_INDENTATION,
00199                                 std::string(orig, iter),
00200                                 line
00201                         );
00202                 }
00203         }
00204 
00205         void tokenise(
00206                 token_list& list,
00207                 const std::string& src
00208         )
00209         {
00210                 unsigned int line = 1;
00211                 for(string_iterator iter = src.begin();
00212                     iter != src.end();)
00213                 {
00214                         // Nullbyte identifies end of string
00215                         if(*iter == '\0')
00216                                 break;
00217 
00218                         if(*iter == '\n')
00219                         {
00220                                 // Line counting
00221                                 ++ line;
00222 
00223                                 // Parse it
00224                                 ++ iter;
00225                                 tokenise_indentation(
00226                                         list, src, iter, line
00227                                 );
00228 
00229                                 continue;
00230                         }
00231 
00232                         // String literal
00233                         if(*iter == '\"')
00234                         {
00235                                 tokenise_string(list, src, iter, line);
00236                                 continue;
00237                         }
00238 
00239                         // Comment
00240                         if(*iter == '#')
00241                         {
00242                                 tokenise_comment(list, src, iter, line);
00243                                 continue;
00244                         }
00245 
00246                         // Identifier
00247                         if(isalnum(*iter) || *iter == '_')
00248                         {
00249                                 tokenise_identifier(list, src, iter, line);
00250                                 continue;
00251                         }
00252 
00253                         // Ignore whitespace
00254                         if(isspace(*iter) )
00255                         {
00256                                 ++ iter;
00257                                 continue;
00258                         }
00259 
00260                         // Special character?
00261                         token::type type = token::TYPE_UNKNOWN;
00262                         switch(*iter)
00263                         {
00264                         case '!':
00265                                 type = token::TYPE_EXCLAMATION;
00266                                 break;
00267                         case '=':
00268                                 type = token::TYPE_ASSIGNMENT;
00269                                 break;
00270                         }
00271 
00272                         if(type == token::TYPE_UNKNOWN)
00273                         {
00274                                 obby::format_string str(
00275                                         _("Unexpected token: '%0%'")
00276                                 );
00277 
00278                                 str << *iter;
00279                                 throw error(str.str(), line);
00280                         }
00281 
00282                         list.add(type, std::string(1, *iter), line);
00283 
00284                         // Go on with next character
00285                         ++ iter;
00286                 }
00287         }
00288 
00289         void detokenise(const token_list& list, std::string& target)
00290         {
00291                 bool line_begin = true;
00292                 std::string escaped_string;
00293 
00294                 for(token_list::iterator iter = list.begin();
00295                     iter != list.end();
00296                     ++ iter)
00297                 {
00298                         switch(iter->get_type() )
00299                         {
00300                         case token::TYPE_INDENTATION:
00301                                 target.append("\n" + iter->get_text() );
00302                                 line_begin = true;
00303                                 break;
00304                         case token::TYPE_STRING:
00305                                 escaped_string = iter->get_text();
00306                                 escape(escaped_string);
00307 
00308                                 target.append("\"");
00309                                 target.append(escaped_string);
00310                                 target.append("\"");
00311 
00312                                 line_begin = false;
00313                                 break;
00314                         case token::TYPE_IDENTIFIER:
00315                                 if(!line_begin)
00316                                         target.append(" ");
00317                                 // Fallthrough
00318                         default:
00319                                 target.append(iter->get_text() );
00320                                 if(iter->get_type() != token::TYPE_EXCLAMATION)
00321                                         line_begin = false;
00322                                 break;
00323                         }
00324                 }
00325         }
00326 }
00327 
00328 obby::serialise::token::token(
00329         type type,
00330         const std::string& text,
00331         unsigned int line
00332 ) :
00333         m_type(type), m_text(text), m_line(line)
00334 {
00335 }
00336 
00337 obby::serialise::token::type obby::serialise::token::get_type() const
00338 {
00339         return m_type;
00340 }
00341 
00342 const std::string& obby::serialise::token::get_text() const
00343 {
00344         return m_text;
00345 }
00346 
00347 unsigned int obby::serialise::token::get_line() const
00348 {
00349         return m_line;
00350 }
00351 
00352 obby::serialise::token_list::token_list()
00353 {
00354 }
00355 
00356 void obby::serialise::token_list::serialise(
00357         std::string& string
00358 ) const
00359 {
00360         detokenise(*this, string);
00361 }
00362 
00363 void obby::serialise::token_list::deserialise(
00364         const std::string& string
00365 )
00366 {
00367         tokenise(*this, string);
00368 }
00369 
00370 void obby::serialise::token_list::add(
00371         token::type type,
00372         const std::string& text,
00373         unsigned int line
00374 )
00375 {
00376         m_list.push_back(token(type, text, line) );
00377 }
00378 
00379 obby::serialise::token_list::iterator obby::serialise::token_list::begin() const
00380 {
00381         return m_list.begin();
00382 }
00383 
00384 obby::serialise::token_list::iterator obby::serialise::token_list::end() const
00385 {
00386         return m_list.end();
00387 }
00388 
00389 void obby::serialise::token_list::next_token(
00390         iterator& iter
00391 ) const
00392 {
00393         unsigned int orig_line = iter->get_line();
00394         if(++ iter == m_list.end() )
00395                 throw error(_("Unexpected end of input"), orig_line);
00396 }
00397 

Generated on Fri Jan 11 10:01:32 2008 for obby by  doxygen 1.5.1