Android Native 实现隐蔽采集数据

Brief

随着隐私越来越受到大家关注，对于设备信息的获取越来越困难。同时，一些基于Hook制作的隐私监控工具会将一些常见的获取隐私数据的接口全部都hook掉；为了能更加隐蔽的拿到这些设备ID，窃闻某些APP采用“SVC”指令的方式来获取一些设备唯一ID，感觉很有趣的样子。于是自己写了一些DEMO来体验这一新奇的方式，同时也思考下这种方式给未来检测场景带来的挑战；

什么是利用SVC来获取设备唯一ID

我们知道在Android上有许多可以作为设备指纹的id，都是存储在文件系统中的。采集这些ID可以使用Android提供的一些系统接口来获取，也可以直接通过读取相应文件的方式。而文件读取的操作，最终都会走到libc的open函数打开文件、read函数读取文件。那么通过Frida hook住libc中的相应关键方法就可以方便的时刻记录进程的文件操作了。

那么，有没有方法可以绕过这些函数而实现文件的读写操作呢？

以openat方法为例，看下其实现的过程：

.text:00000000000DDDA0 __openat ; CODE XREF: open64+D4↑p

.text:00000000000DDDA0 ; __open_2+48↑p ...

.text:00000000000DDDA0 ; __unwind {

.text:00000000000DDDA0 MOV X8, #0x38 ; '8'

.text:00000000000DDDA4 SVC 0

.text:00000000000DDDA8 CMN X0, #1,LSL#12

.text:00000000000DDDAC CINV X0, X0, HI

.text:00000000000DDDB0 B.HI __set_errno_internal

.text:00000000000DDDB4 RET

.text:00000000000DDDB4 ; } // starts at DDDA0

最后的最后，libc中的这些函数都会相应的syscall来依赖内核来完成相应的功能。通过unistd.h文件可以了解全部可以被使用的系统调用（前提是没有secomp等机制的拦截）。因此，如果我们绕开libc中的相应函数，而直接采用使用syscall来完成相应调用的话，就可以绕开一些Hook工具而直接获取到我们想要的内容了。

由于，在ARM架构中，最后都是通过SVC来完成内核陷入。因此才有了本章节标题的这种奇怪称呼。不过通过上面的描述，应该大体了解我要做什么了。

Demos

我测试了两种比较典型的直接使用系统调用来完成libc中函数的调用。

由于需要避免调用libc中的函数，需要写一些汇编代码来完成syscall的调用，比较简单的可以直接将syscall.S引入到项目中来编译。这里我直接写了一个函数来“包裹”syscall的调用：

size_t inline raw_syscall(size_t a, size_t b, ...) {

__asm__ (

"mov x8, x0\n"

"mov x0, x1\n"

"mov x1, x2\n"

"mov x2, x3\n"

"mov x3, x4\n"

"mov x4, x5\n"

"mov x5, x6\n"

"svc 0\n"

"add sp, sp, 0xc0\n"

"ret\n"

);

}

Demo 1: 文件读写函数重写

我们以存储着IP地址的/sys/class/net/p2p0/address 为例：

int main(int argc, char** argv) {

    char *path = "/sys/class/net/p2p0/address";

    int BufferSize = 20;

    char buffer[BufferSize];

    memset(buffer, 0, BufferSize);

    // long fd = openat(0,path, O_RDONLY);

    long fd = (long) raw_syscall(__NR_openat, 0, (size_t) path, O_RDONLY);

    printf("I got an fd : %ld\n", fd);

    if (fd<0){

        exit(-1);

    }

    while (raw_syscall(__NR_read, fd, (size_t) buffer, 1) != 0) {

        printf("read from file : %s\n",buffer);

    }

    raw_syscall(__NR_close,fd);

    exit(0);

}

Demo 2 : Socket 读写函数的重写

这个Demo的灵感来源是不久前在Github上关注到的一个项目，在Android Native中利用NetLink提供的接口来获取MAC地址；在ifaddrs.h中提供了一个函数getifaddrs 可以获取到ifconfig拿到的内容，仅需简单解析下就可以获取到MAC地址：

void normal_get_mac() {

    struct ifaddrs *ifap, *ifaptr;

    if (getifaddrs(&ifap) == 0) {

    for (ifaptr = ifap; ifaptr != NULL; ifaptr = (ifaptr)->ifa_next) {

        char macp[INET6_ADDRSTRLEN];

        if (ifaptr->ifa_addr != NULL) {

            if (((ifaptr)->ifa_addr)->sa_family == AF_PACKET) {

                struct sockaddr_ll *sockadd = (struct sockaddr_ll *) (ifaptr->ifa_addr);

                int i;

                int len = 0;

                for (i = 0; i < 6; i++) {

                    len += sprintf(macp + len, "%02X%s", sockadd->sll_addr[i],

                                (i < 5 ? ":" : ""));

                }

                if (strcmp(ifaptr->ifa_name, "wlan0") == 0) {

                    printf("I got device mac address : %s \n", macp);

                    return ;

               }

            }

        }

    }

    freeifaddrs(ifap);

    } else {

        char *err_msg = "getifaddrs!";

        errExit(err_msg);

    }    

}

通过AOSP提供的代码，可以获取到netlink相关的源代码：http://aospxref.com/android-10.0.0_r47/xref/bionic/libc/bionic/

int mgetifaddrs(ifaddrs** out) {

    // We construct the result directly into `out`, so terminate the list.

    printf("N1rv0us : I got you from entrance of getifaddrs\n");

    *out = nullptr;

    // Open the netlink socket and ask for all the links and addresses.

    NetlinkConnection nc;

    bool okay = nc.SendRequest(RTM_GETLINK) && nc.ReadResponses(__getifaddrs_callback, out); // &&

    // nc.SendRequest(RTM_GETADDR) && nc.ReadResponses(__getifaddrs_callback, out);

    // bool okay = nc.SendRequest(RTM_GETADDR) && nc.ReadResponses(__getifaddrs_callback, out);

    if (!okay) {

        out = nullptr;

        freeifaddrs(*out);

       // Ensure that callers crash if they forget to check for success.

        *out = nullptr;

        return -1;

    }

    return 0;

}

官方的接口通过NetlinkConnection创建了一个socket来获取信息，其中SendRequest发送请求，ReadResponses来读取返回内容。因此改写ReadResponses如下：

bool NetlinkConnection::ReadResponses(void callback(void*, nlmsghdr*), void* out) {

    // Read through all the responses, handing interesting ones to the callback.

    ssize_t bytes_read;

    printf("N1rv0us : I got you from entrance of NetlinkConnection::ReadResponses\n");

    while ((bytes_read = TEMP_FAILURE_RETRY(raw_syscall(__NR_recvfrom,fd_, data_, size_, 0, NULL, 0))) > 0) {

    // while ((bytes_read = recvfrom(fd_, data_, size_, 0, NULL, NULL)) > 0) {

        // 将拿到的data数据进行赋值

        printf("N1rv0us no result %s\n", data_);

        auto* hdr = reinterpret_cast<nlmsghdr*>(data_);

        for (; NLMSG_OK(hdr, static_cast<size_t>(bytes_read)); hdr = NLMSG_NEXT(hdr, bytes_read)) {

            //判断是否读取结束,否则读取callback

            if (hdr->nlmsg_type == NLMSG_DONE) return true;

            if (hdr->nlmsg_type == NLMSG_ERROR) {

                auto* err = reinterpret_cast<nlmsgerr*>(NLMSG_DATA(hdr));

                errno = (hdr->nlmsg_len >= NLMSG_LENGTH(sizeof(nlmsgerr))) ? -err->error : EIO;

                return false;

            }

            //处理具体逻辑

            callback(out, hdr);

        }

    }

    printf("N1rv0us no result %s\n", data_);

    // We only get here if recv fails before we see a NLMSG_DONE.

    return false;

}

一些额外的思考

使用上述的方式来采集一些设备信息，确实可以更加隐蔽且更加具有迷惑性。配合一些成熟代码混淆的技术，很难发现其采集设备信息的行为。但是，syscall本身同样可以作为一个用于监控的特征，后面，我也将尝试利用Frida框架来实现通用的监控方案。

除了提供隐蔽性以外，改方式并没有其他的作用。简单的说，它并不能实现访问原本无法访问到的数据。原本没有读取权限的文件仍然无法读取，通过netlink获取的MAC地址也仍然会收到随机MAC地址的影响。

最后，与我本人而言，这次经历还是稍微恢复了点点编码技术，也提升了点点IDA调试技巧。