用Boost.Spirit写了一个term下c++词法高亮

Posted on 2008-07-08 11:49 RichardHe 阅读(2198) 评论(1) 编辑收藏引用所属分类: [转]

出自http://hi.baidu.com/yesbaba/blog/item/79c3eb13215da0d2f7039ec9.html

1.1版，命令行下使用如1.0版，

增加更易用的方法：点击coco.exe，将文件拖入窗口，高亮文件输出到同名txt文件中

转成html的功能以后不懒的时候再写吧

1.0版：命令行下使用，输出到stdout，

可以重定向到文件，彩色复制就行了
coco.exe xxx.cpp > whateverfile
记事本打开whateverfile，复制，在bbs上彩色粘贴
编译这个大概需要2百多M内存

想直接现在用的，可以在这下载，这是水木的附件，不知道能有效到什么时候

http://www.newsmth.net/att.php?p.335.193566.17417.exe

论坛可以用它来实现代码高亮显示
以后再改进

正确的代码coco应该不会分析错，能分析正确的不一定是正确的代码

读代码可以先看spirit最基础的ch_p,str_p等，还有spirit文档中的utility

再看如何分析文件

http://hi.baidu.com/yesbaba/blog/item/091ca995d0fe6e49d1135e8b.html

gnu source highlight就可以实现这个功能了，我做这个一来是学spirit练练手，

二来是要做个课程设计，三来这个很实用，平常在水木帖代码也能用的上

///////////////////////////////////////////////////////////////////////////////
//
//       highlight c++ source code in ansi code format
//       COCO (COlorful COde) version 1.1
//       ibe@newsmth.net
//       [ 2007-08-13 ]
//
///////////////////////////////////////////////////////////////////////////////

#include <boost/spirit/core.hpp>
#include <boost/spirit/iterator/file_iterator.hpp>
#include <boost/spirit/utility/confix.hpp>
#include <boost/spirit/utility/escape_char.hpp>
#include <iostream>
#include <fstream>
///////////////////////////////////////////////////////////////////////////////
using namespace boost::spirit;
using namespace std;

////////////////////////////////////////////////////////////////////////////
//
//       Types
//types needed for file parsing
////////////////////////////////////////////////////////////////////////////
typedef char char_t;
typedef file_iterator < char_t > iterator_t;
typedef scanner < iterator_t > scanner_t;
typedef rule < scanner_t > rule_t;

////////////////////////////////////////////////////////////////////////////
//To ansi，action函数，匹配后输出成ansi代码的彩色控制符，
//可参考各大高校bbs的asciiart版精华区，可以输出成需要的格式，如html
//
////////////////////////////////////////////////////////////////////////////
ofstream outfile;
namespace to_ansi
{
  void
   black (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;30m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   red (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;31m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   green (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;32m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   yellow (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;33m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   blue (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;34m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   magenta (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;35m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   cyan (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;36m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   white (iterator_t first, iterator_t const &last)
   {
     outfile << "\033[1;37m";
    while (first != last)
       outfile << *first++;
     outfile << "\033[m";
   }

  void
   echo (iterator_t first, iterator_t const &last)
   {
    while (first != last)
       outfile << *first++;
   }
}
////////////////////////////////////////////////////////////////////////////
//
//       cpp lex
//c++的词法描述，有了comment_p就方便多了，识别函数名还没实现
////////////////////////////////////////////////////////////////////////////
namespace cpp_lex
{
     rule_t comment = comment_p ("/*", "*/")
                     | comment_p ("//")
                     ;
     rule_t whitespace = space_p
                     ;
     rule_t include = str_p ("#include") >> *space_p >>
                     (comment_p ("<", ">") | comment_p ("\"", "\""))
                     ;
     rule_t preprocessor = (include | "##" | "#define" | "#error" | ("#if" >> space_p)
                         | "#ifdef" | "#ifndef" | "#else" | "#elif"
                         | "#endif" | "#line" | "#pragma" | "#undef" | "#"
                         | "__LINE__" | "__FILE__" | "__DATE__" | "__TIME__"
                         | "_cplusplus" | "__STDC__")
                         >> space_p
                         ;
     rule_t keyword_ = str_p ("asm") | "auto" | "bool" | "break" | "case"
                     | "catch" | "char" | "class" | "const" | "const_cast"
                     | "continue" | "default" | "delete" | "do" | "double"
                     | "dynamic_cast" | "else" | "enum" | "explicit"
                     | "extern" | "false" | "float" | "for" | "friend"
                     | "goto" | "if" | "inline" | "int" | "long" | "mutable"
                     | "namespace" | "new" | "operator" | "private"
                     | "protected" | "public" | "register" | "reinterpret_cast"
                     | "return" | "short" | "signed" | "sizeof" | "static"
                     | "static_cast" | "struct" | "switch" | "template"
                     | "this" | "throw" | "true" | "try" | "typedef" | "typeid"
                     | "typename" | "union" | "unsighed" | "using" | "virtual"
                     | "void" | "volatile" | "wchar_t" | "while"
                     ;
     rule_t keyword = keyword_ >> space_p
                     ;
     rule_t identifer = (alpha_p | '_') >> *(alnum_p | '_')
                     ;
         rule_t operators = punct_p - '`' - '@' - '$' - '\\'
                         ;
     rule_t number = real_p
                     ;
     rule_t str = confix_p ("\"", *c_escape_ch_p, "\"")
                     ;
     rule_t charcter = confix_p("\'", *c_escape_ch_p, "\'")
                     ;
     rule_t constant = number
                     | str
                     | charcter
                     ;

};

////////////////////////////////////////////////////////////////////////////
//
//       Main program
//
////////////////////////////////////////////////////////////////////////////
int
main (int argc, char *argv[])
{
         string filepath;
        if (2 > argc)
         {//把要处理的文件拖到窗口内
                 cout << "drag file to this windows\n";
                 string filepath_input;
                 getline(cin, filepath_input);
                 filepath_input.erase(filepath_input.end()-1); //去掉结尾的'\"'
                 filepath_input.erase(filepath_input.begin());   //去掉开头"
                for (int i = 0; filepath_input[i] != 0; i++) {
                         filepath.push_back(filepath_input[i]);
                        if (filepath_input[i] == '\\')
                                 filepath.push_back('\\');
                 }
         }else{
                // for console usage
                 filepath = argv[1];
         }
         iterator_t first (filepath);
        if (!first)   {
                 std::cerr << "Unable to open file!\n";
                return -1;
         }

        // Create an EOF iterator
         iterator_t last = first.make_end ();

         string filepath_output = filepath+".txt";
         outfile.open(filepath_output.c_str());
        // A simple rule词法对应的颜色在这里改就可以
         rule_t r = *(
                 cpp_lex::comment[&to_ansi::cyan]
                 | cpp_lex::constant[&to_ansi::yellow]
                 | cpp_lex::preprocessor[&to_ansi::red]
                 | cpp_lex::keyword[&to_ansi::green]
                 | cpp_lex::whitespace[&to_ansi::echo]
                 | cpp_lex::operators[&to_ansi::magenta]
                 | cpp_lex::identifer[&to_ansi::white]
                 )
                 ;
        //
        // Parse
        /*The parse_info structure
         The functions above return a parse_info structure parameterized by the iterator type passed in.
         The parse_info struct has these members:parse_info
         stop     Points to the final parse position (i.e The parser recognized and processed the input up to this point)
         hit     True if parsing is successful. This may be full: the parser consumed all the input, or partial: the parser consumed only a portion of the input.
         full     True when we have a full match (i.e The parser consumed all the input).
         length     The number of characters consumed by the parser. This is valid only if we have a successful match (either partial or full).
        */
         parse_info < iterator_t > info = parse (first, last, r);

        // This really shouldn't fail...
        if (info.full)
                 std::cout << "\nParse succeeded!\n";
        else
                 std::cout << "\nParse failed!\n";

        std::cout << "highlight file saved in " << filepath_output << endl;
         string end;
         getline (cin, end);//按回车键推出窗口

        return 0;
}

这只是个词法高亮程序，还不能识别语法，如函数等，用正则也能做，但boost.spirit更简单

Feedback

# re: 用Boost.Spirit写了一个term下c++词法高亮 回复 更多评论

2012-03-10 03:43 by 王小贱

你好，当程序文件第一行以哈哈开头，没有注释符号，会解析出错。请问怎么只对定义的那些符号进行着色，而别的字符只是照原本的方式输出呢？

刷新评论列表

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！

相关文章: 简介Boost.Regex boost::regex C/C++中宏的使用 extern "C" 推荐--丰富的游戏开发的站点用Boost.Spirit写了一个term下c++词法高亮回调函数内存分区 Boost.Singals 教程 [摘录]回调函数与转移表

网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

Richard He

学无止境!永远学下去!

文章分类(91)

随笔档案(94)

文章档案(94)

最新随笔

最新评论