糯米 - C++博客

[bash源码分析] 2 寻找入口点

2. 寻找入口点

--- 获得源码

    直接在主页就可以下载到了，用ubuntu的可以很方便的get到：
    apt-get source bash
    我的ubuntu是9.04，get到的是bash-3.2。没有打debian的补丁。

--- Makefile

    bash的Makefile是由autoconf工具根据Makefile.in和configure.in来生成的。
    Makefile中只有小部分的配置是可更改的，一般来说这小部分都是不重要的部分。
    所以./configure后生成出来的Makefile与Makefile.in相比差别不大。我们把Makefile.in视为Makefile。

--- 主要依赖关系

    打开Makefile.in。从all开始跟下去。

    all -> .made -> $(Program)

    Program = bash$(EXEEXT)
    $(Program): .build $(OBJECTS) $(BUILTINS_DEP) $(LIBDEP)

    LIBDEP = $(SHLIB_DEP) $(INTL_DEP) $(READLINE_DEP) $(HISTORY_DEP) $(TERMCAP_DEP) $(GLOB_DEP) \
         $(TILDE_DEP) $(MALLOC_DEP)

    BUILTINS_DEP = $(BUILTINS_LIBRARY)
    BUILTINS_LIBRARY = $(DEFDIR)/libbuiltins.a

    # Matching object files.
    OBJECTS     = shell.o eval.o y.tab.o general.o make_cmd.o print_cmd.o $(GLOBO) \
           dispose_cmd.o execute_cmd.o variables.o copy_cmd.o error.o \
           expr.o flags.o $(JOBS_O) subst.o hashcmd.o hashlib.o mailcheck.o \
           trap.o input.o unwind_prot.o pathexp.o sig.o test.o version.o \
           alias.o array.o arrayfunc.o braces.o bracecomp.o bashhist.o \
           bashline.o $(SIGLIST_O) list.o stringlib.o locale.o findcmd.o redir.o \
           pcomplete.o pcomplib.o syntax.o xmalloc.o $(SIGNAMES_O)

    简要的看了一下，LIBDEP和BUILTINS_DEP是一些静态库，单独实现一些功能的模块。我们可以先不看。
    而OBJECTS看起来就是bash的核心部分了。
    其中形似$(xxx_O)的变量是在./configure中指定的，不用理会。

--- 关键文件列表

    整理了一下

   1795 shell.c
    275 eval.c
   6277 y.tab.c
   1029 general.c
    856 make_cmd.c
   1307 print_cmd.c
    329 dispose_cmd.c
   4143 execute_cmd.c
   4270 variables.c
    422 copy_cmd.c
    452 error.c
   1348 expr.c
    355 flags.c
   8140 subst.c
    196 hashcmd.c
    442 hashlib.c
    438 mailcheck.c
    983 trap.c
    627 input.c
    318 unwind_prot.c
    438 pathexp.c
    595 sig.c
    825 test.c
     83 version.c
    574 alias.c
    932 array.c
    837 arrayfunc.c
    630 braces.c
    200 bracecomp.c
    823 bashhist.c
   3199 bashline.c
    137 list.c
    284 stringlib.c
    509 locale.c
    598 findcmd.c
   1086 redir.c
   1394 pcomplete.c
    225 pcomplib.c
    193 xmalloc.c
47564 总用量

    可见bash并不是个省油的灯，区区30多个核心文件就4w多行代码。比linux0.11还大。
    其中的subst.c更是巅峰造极，8000行。

    统计一下bash工程的总代码量：
    find -name '*.[ch]' | xargs cat | wc -l
    结果是13w+行。。真挺多的

--- 入口点

    这么多文件，没有理由一个个去找main函数。首先在源码根目录下执行ctags -R *。
    ctags看源码的时候也会用到的。然后 vi -t main。就可以列出所有main函数的定义。
    这时候我们发现有几十个main函数，就像剑圣的分身一样，真假难辩。
    从程序员的直觉可以得出shell.c里面的main函数是真身。
    其他的main函数都是测试用的。
    形如：
    #ifdef xxx_TEST
    main() { ... }
    #endif

    下一篇我们就从 shell.c 里的 main 开始分析。

--- bash 的生日

    shell.c 文件开头的那一段注释尾部：
    ...
    Birthdate:
    Sunday, January 10th, 1988.
    Initial author: Brian Fox
    */

    bash 居然已经诞生了20多年了，比我还大9个月。这么说来，也是个80后呢。
    呵呵，bash 都算是个富二代了：
    贵族出身（GNU），身边不乏追求者（贡献者），还搭上了一个90后mm（linux）。

posted @ 2010-07-25 10:18 糯米阅读(1792) | 评论 (1) | 编辑收藏

[bash源码分析] 1 目的和意义

--- bash是大多数linux发行版的默认shell
Ubuntu、Fedora、Puppy。。。
查询你现在使用的shell的方法：
env | grep SHELL

--- bash是内核与应用程序之间的桥梁
linux绝大部分操作是基于命令行，也就是通过bash来调用程序。
当运行了一个脚本，bash就要负责管理一系列进程，处理好进程的文件、管道、信号、同步等等。
而了解这些细节，对于我们日常使用也是很有帮助的。

--- Just for fun
这不是什么一定要完成的任务，纯粹是为了消磨时间，有一天我找到事情做了，我就不会继续写下去了，这很正常。

posted @ 2010-07-25 10:17 糯米阅读(643) | 评论 (0) | 编辑收藏

POJ 2050 Searching the Web 数据结构

摘要: 这题基本上没有算法，只用了一个字符串的hash。但代码很长，200+行。非常荣幸的1ac了！ #include <stdio.h>#include <string.h>#define MAX_LINES 2048#define MAX_LINE_LEN 128#define MAX_DOCS ... 阅读全文

posted @ 2010-07-23 13:24 糯米阅读(579) | 评论 (0) | 编辑收藏

POJ 1570 Exchange Rates 并查集

并查集加上分数运算就可以了。
两种物品之间的兑换比率可以用分数来表示，两种物品之间是否存在联系用并查集来表示。

#include <stdio.h>
#include <string.h>

typedef struct {
    int a, b;
} frac ;

#define MAX_ITEM 64

char item[MAX_ITEM][32];
int item_cnt;

struct {
    frac f;
    int p;
} set[MAX_ITEM];

int gcd(int a, int b)
{
    int t;

    if (a > b) {
        a ^= b;
        b ^= a;
        a ^= b;
    }

    while (a) {
        t = a;
        a = b % a;
        b = t;
    }

    return b;
}

frac init(int a, int b)
{
    frac r;
    int g = gcd(a, b);

    r.a = a / g;
    r.b = b / g;

    return r;
}

frac mul(frac a, frac b)
{
    return init(a.a * b.a, a.b * b.b);
}

frac div(frac a, frac b)
{
    return init(a.a * b.b, a.b * b.a);
}

int find(int i)
{
    int p;

    if (set[i].p == i)
        return i;

    p = find(set[i].p);
    set[i].f = mul(set[set[i].p].f, set[i].f);
    set[i].p = p;

    return p;
}

int insert(char *s)
{
    int i;

    for (i = 0; i < item_cnt; i++)
        if (!strcmp(s, item[i]))
            return i;
    strcpy(item[item_cnt], s);
    return item_cnt++;
}

int main()
{
    char op[16], sa[32], sb[32];
    int a, b, ia, ib, i, p;
    frac f;

    for (i = 0; i < MAX_ITEM; i++) {
        set[i].p = i;
        set[i].f = init(1, 1);
    }

    while (scanf("%s", op), op[0] != '.') {
        if (op[0] == '!') {
            scanf("%d%s%*s%d%s", &a, sa, &b, sb);
            ia = insert(sa);
            ib = insert(sb);
            find(ia);
            p = set[ia].p;
            set[p].p = ib;
            set[p].f = div(init(b, a), set[ia].f);
        } else {
            scanf("%s%*s%s", sa, sb);
            ia = insert(sa);
            ib = insert(sb);
            find(ia);
            find(ib);
            if (set[ia].p == set[ib].p) {
                f = div(set[ia].f, set[ib].f);
                printf("%d %s = %d %s\n", f.b, item[ia], f.a, item[ib]);
            } else
                printf("? %s = ? %s\n", item[ia], item[ib]);
        }
    }

    return 0;
}

posted @ 2010-07-22 11:59 糯米阅读(386) | 评论 (0) | 编辑收藏

POJ 1432 Decoding Morse Sequences 动态规划＋hash

思路如下：

匹配的递归过程如下。
把单词和正文都转换成mos码来表示。
从正文的头部开始匹配。
如果某个单词是正文的前缀，那么从前缀后面的部分递归下去。
统计一下所有方案的数目，就是答案了。
子问题就是“从位置k开始匹配，有多少种方案”，数组保存即可。

关键在于怎样快速发现某个单词是正文的前缀。
如果顺序查找，复杂度O(N)，超时。
如果枚举单词长度后二分查找，复杂度O(L*lgN)，应该不会超时，但代码不太自然，比较难写。
如果枚举单词长度后用hash查找，复杂度O(L)，不会超时，而且代码比较好写。

事实证明，经典的字符串hash函数同样可以用于mos码，在65536格的闭hash里面没有产生冲突！

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char *mos[] = {
".-",
"-

",
    "-.-.",
    "-..",

    ".",
    "..-.",
    "--.",
    "

.",

    "..",
    ".---",
    "-.-",
    ".-..",

    "--",
    "-.",
    "---",
    ".--.",

    "--.-",
    ".-.",
    "

",
    "-",

    "..-",
    "

-",
    ".--",
    "-..-",

    "-.--",
    "--..",
};

#define HASH_SIZE 65536
#define INPUT_LEN 10032

struct _hash {
    int val, cnt;
};

struct _hash hash[HASH_SIZE];
int dp[INPUT_LEN];
int max_len;

int strhash(char *str, int len)
{
    int val;
    for (val = 0; len--; str++)
        val = val*31 + *str;
    return val & 0x7fffffff;
}

struct _hash *find(int val)
{
    int h;

    for (h = val % HASH_SIZE;
         hash[h].cnt && hash[h].val != val;
         h = (h + 1) % HASH_SIZE
        );
    return &hash[h];
}

void insert(char *str)
{
    int len = strlen(str);
    int val = strhash(str, len);
    struct _hash *h = find(val);

//    printf("insert %s\n", str);

    if (len > max_len)
        max_len = len;

    h->val = val;
    h->cnt++;
}

int calc(char *str, int start)
{
    struct _hash *h;
    int i, s;

//    printf("start %d %s\n", start, str + start);

    if (!str[start])
        return 1;

    if (dp[start] != -1)
        return dp[start];

    s = 0;
    for (i = 1; str[start + i - 1] && i <= max_len; i++) {
        h = find(strhash(str + start, i));
//        printf("len %d %s cnt %d\n", i, str + start, h->cnt);
        if (h->cnt)
            s += calc(str, start + i) * h->cnt;
    }

    dp[start] = s;
    return s;
}

void solve()
{
    int i, j, d, n;
    static char word[32], str[256], in[10032];

    memset(dp, -1, sizeof(dp));
    memset(hash, 0, sizeof(hash));

    scanf("%s%d", in, &n);
    for (i = 0; i < n; i++) {
        scanf("%s", word);
        str[0] = 0;
        for (j = 0; word[j]; j++)
            strcat(str, mos[word[j] - 'A']);
        insert(str);
    }
//    printf("max_len %d\n", max_len);
    printf("%d\n", calc(in, 0));
}

int main()
{
    int d;

    scanf("%d", &d);
    while (d--)
        solve();

    return 0;
}

posted @ 2010-07-21 14:27 糯米阅读(700) | 评论 (0) | 编辑收藏

POJ 1434 Fill the Cisterns!

回家待了几天，我觉得再继续颓废下去也不是个办法。还是得他妈的振作！振作！
跟以前一样，按照计划行事。
每天2题，难度随意。做不出来绝对不死磕，找标程或者数据弄懂再说。管他妈什么算法，我只管写代码。
项目紧的时候做项目，不紧的时候看点代码或者写点代码，啥都行，主要是保持一个感觉。
剩下的时间就练吉他。

今天开始做了第一题，结果悲剧。
我日你妈poj，能不能不要他妈的加数据，为啥子官方数据都过了还是过不了你那的。。

#include <stdio.h>
#include <stdlib.h>

#define MAX_N 50032

int K, N, V;
struct node {
    int base, area, sign;
};

struct node arr[MAX_N*2];

int cmp(const void *a, const void *b)
{
    return ((struct node *)a)->base - ((struct node *)b)->base;
}

int main()
{
    int b, h, w, d, i, a, v;

    scanf("%d", &K);
    while (K--) {
        scanf("%d", &N);
        for (i = 0; i < N; i++) {
            scanf("%d%d%d%d", &b, &d, &w, &h);
            arr[i*2].base = b;
            arr[i*2].area = w * h;
            arr[i*2].sign = 1;
            arr[i*2 + 1].base = b + d;
            arr[i*2 + 1].area = w * h;
            arr[i*2 + 1].sign = -1;
        }
        scanf("%d", &V);
        qsort(arr, N*2, sizeof(arr[0]), cmp);

        a = 0;
        for (i = 0; i < N*2; i++) {
            v = i ? a * (arr[i].base - arr[i - 1].base) : 0;
            if (V <= v) {
                printf("%.2lf\n", (double)V / a + arr[i - 1].base);
                break;
            }
            V -= v;
            a += arr[i].sign * arr[i].area;
        }
        if (i == N*2)
            printf("OVERFLOW\n");
    }

    return 0;
}

posted @ 2010-07-20 21:48 糯米阅读(519) | 评论 (0) | 编辑收藏

POJ 1229 Wild Domains 动态规划

摘要: 这题看上去很冷门~其实也是的，第一眼看上去想不到好的解法，但是将问题稍稍转化一下就很好办了。思路：两个pattern匹配的过程，如果没有通配符，那就是从左到右，逐个逐个的匹配。由于存在通配符，a的一个节点有可能匹配b的数个节点，同样，b的一个节点也有可能匹配a的数个节点。这就需要搜索了。但是一开始发现搜索的时候通配符的处理真的很麻烦。感觉就是代码稍微写错一点就会WA。于是想简化一下问题。重新定义三... 阅读全文

posted @ 2010-05-26 08:21 糯米阅读(776) | 评论 (0) | 编辑收藏

POJ 1226 Substrings 后缀Trie

思路：

将每个字符串的原文的所有后缀和反转后的所有后缀都插入到Trie中。
同时Trie中的节点维护一个值 --- 该节点下面包含了多少个不同单词的节点。
然后统计这个值等于N的最深的节点，其深度就是答案了。
后缀Trie并不是好的解法。有人说用后缀数组也能做的，但是想不出来。

#include <stdio.h>

#include <string.h>

struct node {

char ch;

int ts, cnt;

struct node *sib, *child;

};

struct node nodes[65536], root;

int nodes_cnt;

int N, T;

int ts, ans;

inline struct node *insert(struct node *q, char ch, int depth)

{

struct node *t;

for (t = q->child; t; t = t->sib)

if (t->ch == ch)

break;

if (!t) {

t = &nodes[nodes_cnt++];

t->ch = ch;

t->cnt = 0;

t->child = NULL;

t->sib = q->child;

q->child = t;

}

if (t->ts != ts) {

t->ts = ts;

t->cnt++;

}

if (t->cnt == N && depth > ans)

ans = depth;

return t;

}

int main()

{

int i, j, k, len;

char str[128];

struct node *t;

scanf("%d", &T);

while (T--) {

scanf("%d", &N);

ans = 0;

nodes_cnt = 0;

root.child = root.sib = NULL;

root.cnt = 0;

for (i = 0; i < N; i++) {

scanf("%s", str);

ts++;

len = strlen(str);

for (j = 0; j < len; j++) {

t = &root;

for (k = j; k < len; k++)

t = insert(t, str[k], k - j + 1);

}

for (j = len - 1; j >= 0; j--) {

t = &root;

for (k = j; k >= 0; k--)

t = insert(t, str[k], j - k + 1);

}

printf("%d\n", ans);

}

return 0;

}

posted @ 2010-05-26 08:05 糯米阅读(589) | 评论 (0) | 编辑收藏

POJ 1230 Pass-Muraille 贪心

思路：

考虑最左边的需要移除墙的列。这列是必定要移除一些墙的。
不妨移除右边界较大的那些墙。

实现的时候，可以用基数排序的方式来找到右边界较大的墙。
开两个数组如下：
map[i][j] = { 第i列中，从该列开始向右延伸，长度为j的墙的数目}
cnt[i] = {第i列中墙的数目}
这样代码比较方便，速度也快。

#include <stdio.h>

#include <string.h>

int T, N, K;

char map[128][128];

int cnt[128];

int main()

{

int x1, x2, y;

int i, j, i2, j2, ans;

scanf("%d", &T);

while (T--) {

scanf("%d%d", &N, &K);

memset(map, 0, sizeof(map));

memset(cnt, 0, sizeof(cnt));

while (N--) {

scanf("%d%d%d%d", &x1, &y, &x2, &y);

if (x1 > x2) {

x1 ^= x2;

x2 ^= x1;

x1 ^= x2;

}

for (i = x1; i <= x2; i++) {

map[i][x2 - i + 1]++;

cnt[i]++;

}

ans = 0;

for (i = 0; i <= 100; i++) {

if (cnt[i] <= K)

continue;

for (j = 100; cnt[i] > K && j > 0; j--) {

while (cnt[i] > K && map[i][j]) {

i2 = i;

j2 = j;

while (j2) {

map[i2][j2]--;

cnt[i2]--;

j2--;

i2++;

}

ans++;

}

printf("%d\n", ans);

}

return 0;

}

posted @ 2010-05-24 23:20 糯米阅读(876) | 评论 (0) | 编辑收藏

POJ 1231 The Alphabet Game 贪心

近来实验室给派了新活，跟原来做的东西，以及我们熟悉的东西都比较不搭边的，郁闷。
折腾了两个星期，昨天终于有了些进展。
今天做了两道水题~ 都是贪心

思路：
这题看上去挺唬人，提交的人也不多，实际上都是水题来的。
1. 对于同一种字母，求出它出现位置的最左边、最右边、最上边、最下边。这就构成了一个矩形。
2. 对于在x轴上投影重合的一系列矩形，他们必定处在同一个方格内。给这些方格编号。
3. 对于在y轴上投影重合的一系列矩形，如果其中两个编号相同，就不符合条件了。

#include <stdio.h>

#include <stdlib.h>

#include <algorithm>

using namespace std;

struct rect {

int left, right, top, bottom;

int rank_x;

} rec[32];

int T, K, P;

int cmp_x(const void *a, const void *b)

{

return ((struct rect *)a)->left - ((struct rect *)b)->left;

}

int cmp_y(const void *a, const void *b)

{

return ((struct rect *)a)->top - ((struct rect *)b)->top;

}

inline int solve()

{

int i, last, rank, mask;

qsort(rec, K, sizeof(rec[0]), cmp_x);

rank = 0;

for (i = 0; i < K; ) {

last = rec[i].right;

while (i < K && rec[i].left <= last) {

rec[i].rank_x = rank;

last = max(last, rec[i].right);

i++;

}

rank++;

}

qsort(rec, K, sizeof(rec[0]), cmp_y);

for (i = 0; i < K; ) {

mask = 0;

last = rec[i].bottom;

while (i < K && rec[i].top <= last) {

if (mask & (1 << rec[i].rank_x))

return 0;

mask |= 1 << rec[i].rank_x;

last = max(last, rec[i].bottom);

i++;

}

return 1;

}

int main()

{

int i, j, x, y;

scanf("%d", &T);

while (T--) {

scanf("%d%d", &K, &P);

for (i = 0; i < K; i++) {

rec[i].left = rec[i].top = 1000000;

rec[i].right = rec[i].bottom = 0;

for (j = 0; j < P; j++) {

scanf("%d%d", &x, &y);

if (x < rec[i].left)

rec[i].left = x;

if (x > rec[i].right)

rec[i].right = x;

if (y < rec[i].top)

rec[i].top = y;

if (y > rec[i].bottom)

rec[i].bottom = y;

}

printf("%s\n", solve() ? "YES" : "NO");

}

return 0;

}

posted @ 2010-05-24 23:14 糯米阅读(525) | 评论 (0) | 编辑收藏

糯米

[bash源码分析] 2 寻找入口点

[bash源码分析] 1 目的和意义

POJ 2050 Searching the Web 数据结构

POJ 1570 Exchange Rates 并查集

POJ 1432 Decoding Morse Sequences 动态规划＋hash

POJ 1434 Fill the Cisterns!

POJ 1229 Wild Domains 动态规划

POJ 1226 Substrings 后缀Trie

POJ 1230 Pass-Muraille 贪心

POJ 1231 The Alphabet Game 贪心

导航

随笔分类

Links

最新评论

阅读排行榜